summary refs log tree commit diff
diff options
context:
space:
mode:
authorBernardo Meurer <bernardo@meurer.org>2022-01-27 09:35:34 -0800
committerGitHub <noreply@github.com>2022-01-27 09:35:34 -0800
commit5f9b470ff07532d78c76c6f2a7ca5554e211ecac (patch)
tree6c4b3b688af1cfb283cb48c489e658917a5cb181
parent3cafa47a66a392b83eb1b5863348092acc8469fa (diff)
parentb52372675d75eac74312bb3adfb8fe6d1e4c702b (diff)
downloadnixpkgs-5f9b470ff07532d78c76c6f2a7ca5554e211ecac.tar
nixpkgs-5f9b470ff07532d78c76c6f2a7ca5554e211ecac.tar.gz
nixpkgs-5f9b470ff07532d78c76c6f2a7ca5554e211ecac.tar.bz2
nixpkgs-5f9b470ff07532d78c76c6f2a7ca5554e211ecac.tar.lz
nixpkgs-5f9b470ff07532d78c76c6f2a7ca5554e211ecac.tar.xz
nixpkgs-5f9b470ff07532d78c76c6f2a7ca5554e211ecac.tar.zst
nixpkgs-5f9b470ff07532d78c76c6f2a7ca5554e211ecac.zip
Merge pull request #154809 from helsinki-systems/feat/stc-proper-unit-file-parser
nixos/switch-to-configuration: Proper unit file parser and clean/fix lower part of the script
-rw-r--r--nixos/doc/manual/from_md/release-notes/rl-2205.section.xml48
-rw-r--r--nixos/doc/manual/release-notes/rl-2205.section.md6
-rw-r--r--nixos/modules/system/activation/switch-to-configuration.pl135
-rw-r--r--nixos/modules/system/activation/top-level.nix2
-rw-r--r--nixos/tests/switch-test.nix79
5 files changed, 220 insertions, 50 deletions
diff --git a/nixos/doc/manual/from_md/release-notes/rl-2205.section.xml b/nixos/doc/manual/from_md/release-notes/rl-2205.section.xml
index 8d8d70440e8..c945112ca7a 100644
--- a/nixos/doc/manual/from_md/release-notes/rl-2205.section.xml
+++ b/nixos/doc/manual/from_md/release-notes/rl-2205.section.xml
@@ -442,13 +442,49 @@
       </listitem>
       <listitem>
         <para>
-          The interface that allows activation scripts to restart units
-          has been reworked. Restarting and reloading is now done by a
-          single file
-          <literal>/run/nixos/activation-restart-list</literal> that
-          honors <literal>restartIfChanged</literal> and
-          <literal>reloadIfChanged</literal> of the units.
+          <literal>switch-to-configuration</literal> (the script that is
+          run when running <literal>nixos-rebuild switch</literal> for
+          example) has been reworked
         </para>
+        <itemizedlist spacing="compact">
+          <listitem>
+            <para>
+              The interface that allows activation scripts to restart
+              units has been streamlined. Restarting and reloading is
+              now done by a single file
+              <literal>/run/nixos/activation-restart-list</literal> that
+              honors <literal>restartIfChanged</literal> and
+              <literal>reloadIfChanged</literal> of the units.
+            </para>
+          </listitem>
+          <listitem>
+            <para>
+              The script now uses a proper ini-file parser to parse
+              systemd units. Some values are now only searched in one
+              section instead of in the entire unit. This is only
+              relevant for units that don’t use the NixOS systemd moule.
+            </para>
+            <itemizedlist spacing="compact">
+              <listitem>
+                <para>
+                  <literal>RefuseManualStop</literal>,
+                  <literal>X-OnlyManualStart</literal>,
+                  <literal>X-StopOnRemoval</literal>,
+                  <literal>X-StopOnReconfiguration</literal> are only
+                  searched in the <literal>[Unit]</literal> section
+                </para>
+              </listitem>
+              <listitem>
+                <para>
+                  <literal>X-ReloadIfChanged</literal>,
+                  <literal>X-RestartIfChanged</literal>,
+                  <literal>X-StopIfChanged</literal> are only searched
+                  in the <literal>[Service]</literal> section
+                </para>
+              </listitem>
+            </itemizedlist>
+          </listitem>
+        </itemizedlist>
       </listitem>
       <listitem>
         <para>
diff --git a/nixos/doc/manual/release-notes/rl-2205.section.md b/nixos/doc/manual/release-notes/rl-2205.section.md
index 61305fe70d6..e9ae1fa7ee7 100644
--- a/nixos/doc/manual/release-notes/rl-2205.section.md
+++ b/nixos/doc/manual/release-notes/rl-2205.section.md
@@ -141,7 +141,11 @@ In addition to numerous new and upgraded packages, this release has the followin
   `pkgs.noto-fonts-cjk` is currently an alias of `pkgs.noto-fonts-cjk-sans` and
   doesn't include serif fonts.
 
-- The interface that allows activation scripts to restart units has been reworked. Restarting and reloading is now done by a single file `/run/nixos/activation-restart-list` that honors `restartIfChanged` and `reloadIfChanged` of the units.
+- `switch-to-configuration` (the script that is run when running `nixos-rebuild switch` for example) has been reworked
+    * The interface that allows activation scripts to restart units has been streamlined. Restarting and reloading is now done by a single file `/run/nixos/activation-restart-list` that honors `restartIfChanged` and `reloadIfChanged` of the units.
+    * The script now uses a proper ini-file parser to parse systemd units. Some values are now only searched in one section instead of in the entire unit. This is only relevant for units that don't use the NixOS systemd moule.
+        * `RefuseManualStop`, `X-OnlyManualStart`, `X-StopOnRemoval`, `X-StopOnReconfiguration` are only searched in the `[Unit]` section
+        * `X-ReloadIfChanged`, `X-RestartIfChanged`, `X-StopIfChanged` are only searched in the `[Service]` section
 
 - The `services.bookstack.cacheDir` option has been removed, since the
   cache directory is now handled by systemd.
diff --git a/nixos/modules/system/activation/switch-to-configuration.pl b/nixos/modules/system/activation/switch-to-configuration.pl
index 93fff889d6b..1fe346114e4 100644
--- a/nixos/modules/system/activation/switch-to-configuration.pl
+++ b/nixos/modules/system/activation/switch-to-configuration.pl
@@ -2,6 +2,7 @@
 
 use strict;
 use warnings;
+use Config::IniFiles;
 use File::Path qw(make_path);
 use File::Basename;
 use File::Slurp;
@@ -113,26 +114,77 @@ sub parseFstab {
     return ($fss, $swaps);
 }
 
-sub parseUnit {
-    my ($filename) = @_;
-    my $info = {};
-    parseKeyValues($info, read_file($filename)) if -f $filename;
-    parseKeyValues($info, read_file("${filename}.d/overrides.conf")) if -f "${filename}.d/overrides.conf";
-    return $info;
+# This subroutine takes a single ini file that specified systemd configuration
+# like unit configuration and parses it into a hash where the keys are the sections
+# of the unit file and the values are hashes themselves. These hashes have the unit file
+# keys as their keys (left side of =) and an array of all values that were set as their
+# values. If a value is empty (for example `ExecStart=`), then all current definitions are
+# removed.
+#
+# Instead of returning the hash, this subroutine takes a hashref to return the data in. This
+# allows calling the subroutine multiple times with the same hash to parse override files.
+sub parseSystemdIni {
+    my ($unitContents, $path) = @_;
+    # Tie the ini file to a hash for easier access
+    my %fileContents;
+    tie %fileContents, "Config::IniFiles", (-file => $path, -allowempty => 1, -allowcontinue => 1);
+
+    # Copy over all sections
+    foreach my $sectionName (keys %fileContents) {
+        # Copy over all keys
+        foreach my $iniKey (keys %{$fileContents{$sectionName}}) {
+            # Ensure the value is an array so it's easier to work with
+            my $iniValue = $fileContents{$sectionName}{$iniKey};
+            my @iniValues;
+            if (ref($iniValue) eq "ARRAY") {
+                @iniValues = @{$iniValue};
+            } else {
+                @iniValues = $iniValue;
+            }
+            # Go over all values
+            for my $iniValue (@iniValues) {
+                # If a value is empty, it's an override that tells us to clean the value
+                if ($iniValue eq "") {
+                    delete $unitContents->{$sectionName}->{$iniKey};
+                    next;
+                }
+                push(@{$unitContents->{$sectionName}->{$iniKey}}, $iniValue);
+            }
+        }
+    }
+    return;
 }
 
-sub parseKeyValues {
-    my $info = shift;
-    foreach my $line (@_) {
-        # FIXME: not quite correct.
-        $line =~ /^([^=]+)=(.*)$/ or next;
-        $info->{$1} = $2;
-    }
+# This subroutine takes the path to a systemd configuration file (like a unit configuration),
+# parses it, and returns a hash that contains the contents. The contents of this hash are
+# explained in the `parseSystemdIni` subroutine. Neither the sections nor the keys inside
+# the sections are consistently sorted.
+#
+# If a directory with the same basename ending in .d exists next to the unit file, it will be
+# assumed to contain override files which will be parsed as well and handled properly.
+sub parseUnit {
+    my ($unitPath) = @_;
+
+    # Parse the main unit and all overrides
+    my %unitData;
+    parseSystemdIni(\%unitData, $_) for glob("${unitPath}{,.d/*.conf}");
+    return %unitData;
 }
 
-sub boolIsTrue {
-    my ($s) = @_;
-    return $s eq "yes" || $s eq "true";
+# Checks whether a specified boolean in a systemd unit is true
+# or false, with a default that is applied when the value is not set.
+sub parseSystemdBool {
+    my ($unitConfig, $sectionName, $boolName, $default) = @_;
+
+    my @values = @{$unitConfig->{$sectionName}{$boolName} // []};
+    # Return default if value is not set
+    if (scalar @values lt 1 || not defined $values[-1]) {
+        return $default;
+    }
+    # If value is defined multiple times, use the last definition
+    my $last = $values[-1];
+    # These are valid values as of systemd.syntax(7)
+    return $last eq "1" || $last eq "yes" || $last eq "true" || $last eq "on";
 }
 
 sub recordUnit {
@@ -167,17 +219,17 @@ sub handleModifiedUnit {
         # Revert of the attempt: https://github.com/NixOS/nixpkgs/pull/147609
         # More details: https://github.com/NixOS/nixpkgs/issues/74899#issuecomment-981142430
     } else {
-        my $unitInfo = parseUnit($newUnitFile);
-        if (boolIsTrue($unitInfo->{'X-ReloadIfChanged'} // "no")) {
+        my %unitInfo = parseUnit($newUnitFile);
+        if (parseSystemdBool(\%unitInfo, "Service", "X-ReloadIfChanged", 0)) {
             $unitsToReload->{$unit} = 1;
             recordUnit($reloadListFile, $unit);
         }
-        elsif (!boolIsTrue($unitInfo->{'X-RestartIfChanged'} // "yes") || boolIsTrue($unitInfo->{'RefuseManualStop'} // "no") || boolIsTrue($unitInfo->{'X-OnlyManualStart'} // "no")) {
+        elsif (!parseSystemdBool(\%unitInfo, "Service", "X-RestartIfChanged", 1) || parseSystemdBool(\%unitInfo, "Unit", "RefuseManualStop", 0) || parseSystemdBool(\%unitInfo, "Unit", "X-OnlyManualStart", 0)) {
             $unitsToSkip->{$unit} = 1;
         } else {
             # It doesn't make sense to stop and start non-services because
             # they can't have ExecStop=
-            if (!boolIsTrue($unitInfo->{'X-StopIfChanged'} // "yes") || $unit !~ /\.service$/) {
+            if (!parseSystemdBool(\%unitInfo, "Service", "X-StopIfChanged", 1) || $unit !~ /\.service$/) {
                 # This unit should be restarted instead of
                 # stopped and started.
                 $unitsToRestart->{$unit} = 1;
@@ -188,7 +240,7 @@ sub handleModifiedUnit {
                 # socket(s) instead of the service.
                 my $socketActivated = 0;
                 if ($unit =~ /\.service$/) {
-                    my @sockets = split / /, ($unitInfo->{Sockets} // "");
+                    my @sockets = split(/ /, join(" ", @{$unitInfo{Service}{Sockets} // []}));
                     if (scalar @sockets == 0) {
                         @sockets = ("$baseName.socket");
                     }
@@ -254,12 +306,12 @@ while (my ($unit, $state) = each %{$activePrev}) {
 
     if (-e $prevUnitFile && ($state->{state} eq "active" || $state->{state} eq "activating")) {
         if (! -e $newUnitFile || abs_path($newUnitFile) eq "/dev/null") {
-            my $unitInfo = parseUnit($prevUnitFile);
-            $unitsToStop{$unit} = 1 if boolIsTrue($unitInfo->{'X-StopOnRemoval'} // "yes");
+            my %unitInfo = parseUnit($prevUnitFile);
+            $unitsToStop{$unit} = 1 if parseSystemdBool(\%unitInfo, "Unit", "X-StopOnRemoval", 1);
         }
 
         elsif ($unit =~ /\.target$/) {
-            my $unitInfo = parseUnit($newUnitFile);
+            my %unitInfo = parseUnit($newUnitFile);
 
             # Cause all active target units to be restarted below.
             # This should start most changed units we stop here as
@@ -268,7 +320,7 @@ while (my ($unit, $state) = each %{$activePrev}) {
             # active after the system has resumed, which probably
             # should not be the case.  Just ignore it.
             if ($unit ne "suspend.target" && $unit ne "hibernate.target" && $unit ne "hybrid-sleep.target") {
-                unless (boolIsTrue($unitInfo->{'RefuseManualStart'} // "no") || boolIsTrue($unitInfo->{'X-OnlyManualStart'} // "no")) {
+                unless (parseSystemdBool(\%unitInfo, "Unit", "RefuseManualStart", 0) || parseSystemdBool(\%unitInfo, "Unit", "X-OnlyManualStart", 0)) {
                     $unitsToStart{$unit} = 1;
                     recordUnit($startListFile, $unit);
                     # Don't spam the user with target units that always get started.
@@ -287,7 +339,7 @@ while (my ($unit, $state) = each %{$activePrev}) {
             # Stopping a target generally has no effect on other units
             # (unless there is a PartOf dependency), so this is just a
             # bookkeeping thing to get systemd to do the right thing.
-            if (boolIsTrue($unitInfo->{'X-StopOnReconfiguration'} // "no")) {
+            if (parseSystemdBool(\%unitInfo, "Unit", "X-StopOnReconfiguration", 0)) {
                 $unitsToStop{$unit} = 1;
             }
         }
@@ -546,33 +598,36 @@ my $activeNew = getActiveUnits;
 while (my ($unit, $state) = each %{$activeNew}) {
     if ($state->{state} eq "failed") {
         push @failed, $unit;
+        next;
     }
-    elsif ($state->{state} eq "auto-restart") {
-        # A unit in auto-restart state is a failure *if* it previously failed to start
-        my $lines = `@systemd@/bin/systemctl show '$unit'`;
-        my $info = {};
-        parseKeyValues($info, split("\n", $lines));
 
-        if ($info->{ExecMainStatus} ne '0') {
+    if ($state->{substate} eq "auto-restart") {
+        # A unit in auto-restart substate is a failure *if* it previously failed to start
+        my $main_status = `@systemd@/bin/systemctl show --value --property=ExecMainStatus '$unit'`;
+        chomp($main_status);
+
+        if ($main_status ne "0") {
             push @failed, $unit;
+            next;
         }
     }
+
     # Ignore scopes since they are not managed by this script but rather
     # created and managed by third-party services via the systemd dbus API.
-    elsif ($state->{state} ne "failed" && !defined $activePrev->{$unit} && $unit !~ /\.scope$/) {
+    # This only lists units that are not failed (including ones that are in auto-restart but have not failed previously)
+    if ($state->{state} ne "failed" && !defined $activePrev->{$unit} && $unit !~ /\.scope$/msx) {
         push @new, $unit;
     }
 }
 
-print STDERR "the following new units were started: ", join(", ", sort(@new)), "\n"
-    if scalar @new > 0;
+if (scalar @new > 0) {
+    print STDERR "the following new units were started: ", join(", ", sort(@new)), "\n"
+}
 
 if (scalar @failed > 0) {
-    print STDERR "warning: the following units failed: ", join(", ", sort(@failed)), "\n";
-    foreach my $unit (@failed) {
-        print STDERR "\n";
-        system("COLUMNS=1000 @systemd@/bin/systemctl status --no-pager '$unit' >&2");
-    }
+    my @failed_sorted = sort @failed;
+    print STDERR "warning: the following units failed: ", join(", ", @failed_sorted), "\n\n";
+    system "@systemd@/bin/systemctl status --no-pager --full '" . join("' '", @failed_sorted) . "' >&2";
     $res = 4;
 }
 
diff --git a/nixos/modules/system/activation/top-level.nix b/nixos/modules/system/activation/top-level.nix
index 40afa551c7f..9e6ca75b9da 100644
--- a/nixos/modules/system/activation/top-level.nix
+++ b/nixos/modules/system/activation/top-level.nix
@@ -117,7 +117,7 @@ let
     configurationName = config.boot.loader.grub.configurationName;
 
     # Needed by switch-to-configuration.
-    perl = pkgs.perl.withPackages (p: with p; [ FileSlurp NetDBus XMLParser XMLTwig ]);
+    perl = pkgs.perl.withPackages (p: with p; [ FileSlurp NetDBus XMLParser XMLTwig ConfigIniFiles ]);
   };
 
   # Handle assertions and warnings
diff --git a/nixos/tests/switch-test.nix b/nixos/tests/switch-test.nix
index 1c32bf6beb9..8e425f0f877 100644
--- a/nixos/tests/switch-test.nix
+++ b/nixos/tests/switch-test.nix
@@ -45,6 +45,31 @@ import ./make-test-python.nix ({ pkgs, ...} : {
           systemd.services.test.restartIfChanged = false;
         };
 
+        simpleServiceFailing.configuration = {
+          imports = [ simpleServiceModified.configuration ];
+          systemd.services.test.serviceConfig.ExecStart = lib.mkForce "${pkgs.coreutils}/bin/false";
+        };
+
+        autorestartService.configuration = {
+          # A service that immediately goes into restarting (but without failing)
+          systemd.services.autorestart = {
+            wantedBy = [ "multi-user.target" ];
+            serviceConfig = {
+              Type = "simple";
+              Restart = "always";
+              RestartSec = "20y"; # Should be long enough
+              ExecStart = "${pkgs.coreutils}/bin/true";
+            };
+          };
+        };
+
+        autorestartServiceFailing.configuration = {
+          imports = [ autorestartService.configuration ];
+          systemd.services.autorestart.serviceConfig = {
+            ExecStart = lib.mkForce "${pkgs.coreutils}/bin/false";
+          };
+        };
+
         restart-and-reload-by-activation-script.configuration = {
           systemd.services = rec {
             simple-service = {
@@ -189,12 +214,13 @@ import ./make-test-python.nix ({ pkgs, ...} : {
       exec env -i "$@" | tee /dev/stderr
     '';
   in /* python */ ''
-    def switch_to_specialisation(system, name, action="test"):
+    def switch_to_specialisation(system, name, action="test", fail=False):
         if name == "":
             stc = f"{system}/bin/switch-to-configuration"
         else:
             stc = f"{system}/specialisation/{name}/bin/switch-to-configuration"
-        out = machine.succeed(f"{stc} {action} 2>&1")
+        out = machine.fail(f"{stc} {action} 2>&1") if fail \
+            else machine.succeed(f"{stc} {action} 2>&1")
         assert_lacks(out, "switch-to-configuration line")  # Perl warnings
         return out
 
@@ -305,7 +331,56 @@ import ./make-test-python.nix ({ pkgs, ...} : {
         assert_lacks(out, "as well:")
         assert_contains(out, "would start the following units: test.service\n")
 
+    with subtest("failing units"):
+        # Let the simple service fail
+        switch_to_specialisation("${machine}", "simpleServiceModified")
+        out = switch_to_specialisation("${machine}", "simpleServiceFailing", fail=True)
+        assert_contains(out, "stopping the following units: test.service\n")
+        assert_lacks(out, "NOT restarting the following changed units:")
+        assert_lacks(out, "reloading the following units:")
+        assert_lacks(out, "\nrestarting the following units:")
+        assert_contains(out, "\nstarting the following units: test.service\n")
+        assert_lacks(out, "the following new units were started:")
+        assert_contains(out, "warning: the following units failed: test.service\n")
+        assert_contains(out, "Main PID:")  # output of systemctl
+        assert_lacks(out, "as well:")
+
+        # A unit that gets into autorestart without failing is not treated as failed
+        out = switch_to_specialisation("${machine}", "autorestartService")
+        assert_lacks(out, "stopping the following units:")
+        assert_lacks(out, "NOT restarting the following changed units:")
+        assert_lacks(out, "reloading the following units:")
+        assert_lacks(out, "\nrestarting the following units:")
+        assert_lacks(out, "\nstarting the following units:")
+        assert_contains(out, "the following new units were started: autorestart.service\n")
+        assert_lacks(out, "as well:")
+        machine.systemctl('stop autorestart.service')  # cancel the 20y timer
+
+        # Switching to the same system should do nothing (especially not treat the unit as failed)
+        out = switch_to_specialisation("${machine}", "autorestartService")
+        assert_lacks(out, "stopping the following units:")
+        assert_lacks(out, "NOT restarting the following changed units:")
+        assert_lacks(out, "reloading the following units:")
+        assert_lacks(out, "\nrestarting the following units:")
+        assert_lacks(out, "\nstarting the following units:")
+        assert_contains(out, "the following new units were started: autorestart.service\n")
+        assert_lacks(out, "as well:")
+        machine.systemctl('stop autorestart.service')  # cancel the 20y timer
+
+        # If systemd thinks the unit has failed and is in autorestart, we should show it as failed
+        out = switch_to_specialisation("${machine}", "autorestartServiceFailing", fail=True)
+        assert_lacks(out, "stopping the following units:")
+        assert_lacks(out, "NOT restarting the following changed units:")
+        assert_lacks(out, "reloading the following units:")
+        assert_lacks(out, "\nrestarting the following units:")
+        assert_lacks(out, "\nstarting the following units:")
+        assert_lacks(out, "the following new units were started:")
+        assert_contains(out, "warning: the following units failed: autorestart.service\n")
+        assert_contains(out, "Main PID:")  # output of systemctl
+        assert_lacks(out, "as well:")
+
     with subtest("restart and reload by activation script"):
+        switch_to_specialisation("${machine}", "simpleServiceNorestart")
         out = switch_to_specialisation("${machine}", "restart-and-reload-by-activation-script")
         assert_contains(out, "stopping the following units: test.service\n")
         assert_lacks(out, "NOT restarting the following changed units:")