From 96d36b0c2e5a4be7c5538f7dea7c16154081af29 Mon Sep 17 00:00:00 2001 From: Janne Heß Date: Wed, 5 Jan 2022 12:59:47 +0100 Subject: nixos/switch-to-configuration: Proper unit file parser This replaces the naive K=V unit parser with a proper INI parser from a library and adds proper support for override files. Also adds a bunch of comments about parsing, I hope this makes it easier to understand and maintain in the future. There are multiple reasons to do so, the first one is just general correctness with is nice imo. But to get to more serious reasons (I didn't put in all that effort for nothing) is that this is the first step torwards more clever restart/reload handling. By using a library like Data::Compare a future PR could replace the current way of fingerprinting units (which is to compare store paths) by comparing the hashes. This is more precise because units won't get restarted because the order of the options change, comments are added, some dependency of writeText changes, .... Also this allows us to add a feature like `X-Reload-Triggers` so the unit can either be reloaded when these change or restarted when everything else changes, giving module authors the ability to have their services reloaded without having to fear that updates are not applied because the service doesn't get restarted. Another reason why this feature is nice is that now that the unit files are parsed correctly (and values are just extracted from one section), potential future rewrites can just rely on some INI library without having to implement their own weird parser that is compatible with this script. This also comes with a new subroutine to handle systemd booleans because I thought the current way of handling it was just ugly. This also allows overriding values this script reads in an override file. Apart from making this script more compatible with the world around it, this also fixes two issues I saw bugging exactly 0 (zero) people. First is that this script now supports multiple override files, also ones that are not called override.conf and the second one is that `1` and `on` are treated as bools by systemd but were previously not parsed as such by switch-to-configuration. --- .../from_md/release-notes/rl-2205.section.xml | 48 +++++++++-- nixos/doc/manual/release-notes/rl-2205.section.md | 6 +- .../system/activation/switch-to-configuration.pl | 98 +++++++++++++++++----- nixos/modules/system/activation/top-level.nix | 2 +- 4 files changed, 127 insertions(+), 27 deletions(-) diff --git a/nixos/doc/manual/from_md/release-notes/rl-2205.section.xml b/nixos/doc/manual/from_md/release-notes/rl-2205.section.xml index 2875ea683f8..35a9c5d16b0 100644 --- a/nixos/doc/manual/from_md/release-notes/rl-2205.section.xml +++ b/nixos/doc/manual/from_md/release-notes/rl-2205.section.xml @@ -391,13 +391,49 @@ - The interface that allows activation scripts to restart units - has been reworked. Restarting and reloading is now done by a - single file - /run/nixos/activation-restart-list that - honors restartIfChanged and - reloadIfChanged of the units. + switch-to-configuration (the script that is + run when running nixos-rebuild switch for + example) has been reworked + + + + The interface that allows activation scripts to restart + units has been streamlined. Restarting and reloading is + now done by a single file + /run/nixos/activation-restart-list that + honors restartIfChanged and + reloadIfChanged of the units. + + + + + The script now uses a proper ini-file parser to parse + systemd units. Some values are now only searched in one + section instead of in the entire unit. This is only + relevant for units that don’t use the NixOS systemd moule. + + + + + RefuseManualStop, + X-OnlyManualStart, + X-StopOnRemoval, + X-StopOnReconfiguration are only + searched in the [Unit] section + + + + + X-ReloadIfChanged, + X-RestartIfChanged, + X-StopIfChanged are only searched + in the [Service] section + + + + + diff --git a/nixos/doc/manual/release-notes/rl-2205.section.md b/nixos/doc/manual/release-notes/rl-2205.section.md index a59513adfc9..078ebed5f13 100644 --- a/nixos/doc/manual/release-notes/rl-2205.section.md +++ b/nixos/doc/manual/release-notes/rl-2205.section.md @@ -124,7 +124,11 @@ In addition to numerous new and upgraded packages, this release has the followin `pkgs.noto-fonts-cjk` is currently an alias of `pkgs.noto-fonts-cjk-sans` and doesn't include serif fonts. -- The interface that allows activation scripts to restart units has been reworked. Restarting and reloading is now done by a single file `/run/nixos/activation-restart-list` that honors `restartIfChanged` and `reloadIfChanged` of the units. +- `switch-to-configuration` (the script that is run when running `nixos-rebuild switch` for example) has been reworked + * The interface that allows activation scripts to restart units has been streamlined. Restarting and reloading is now done by a single file `/run/nixos/activation-restart-list` that honors `restartIfChanged` and `reloadIfChanged` of the units. + * The script now uses a proper ini-file parser to parse systemd units. Some values are now only searched in one section instead of in the entire unit. This is only relevant for units that don't use the NixOS systemd moule. + * `RefuseManualStop`, `X-OnlyManualStart`, `X-StopOnRemoval`, `X-StopOnReconfiguration` are only searched in the `[Unit]` section + * `X-ReloadIfChanged`, `X-RestartIfChanged`, `X-StopIfChanged` are only searched in the `[Service]` section - The `services.bookstack.cacheDir` option has been removed, since the cache directory is now handled by systemd. diff --git a/nixos/modules/system/activation/switch-to-configuration.pl b/nixos/modules/system/activation/switch-to-configuration.pl index 93fff889d6b..c51ae5d54fc 100644 --- a/nixos/modules/system/activation/switch-to-configuration.pl +++ b/nixos/modules/system/activation/switch-to-configuration.pl @@ -2,6 +2,7 @@ use strict; use warnings; +use Config::IniFiles; use File::Path qw(make_path); use File::Basename; use File::Slurp; @@ -113,26 +114,85 @@ sub parseFstab { return ($fss, $swaps); } +# This subroutine takes a single ini file that specified systemd configuration +# like unit configuration and parses it into a hash where the keys are the sections +# of the unit file and the values are hashes themselves. These hashes have the unit file +# keys as their keys (left side of =) and an array of all values that were set as their +# values. If a value is empty (for example `ExecStart=`), then all current definitions are +# removed. +# +# Instead of returning the hash, this subroutine takes a hashref to return the data in. This +# allows calling the subroutine multiple times with the same hash to parse override files. +sub parseSystemdIni { + my ($unitContents, $path) = @_; + # Tie the ini file to a hash for easier access + my %fileContents; + tie %fileContents, "Config::IniFiles", (-file => $path, -allowempty => 1, -allowcontinue => 1); + + # Copy over all sections + foreach my $sectionName (keys %fileContents) { + # Copy over all keys + foreach my $iniKey (keys %{$fileContents{$sectionName}}) { + # Ensure the value is an array so it's easier to work with + my $iniValue = $fileContents{$sectionName}{$iniKey}; + my @iniValues; + if (ref($iniValue) eq "ARRAY") { + @iniValues = @{$iniValue}; + } else { + @iniValues = $iniValue; + } + # Go over all values + for my $iniValue (@iniValues) { + # If a value is empty, it's an override that tells us to clean the value + if ($iniValue eq "") { + delete $unitContents->{$sectionName}->{$iniKey}; + next; + } + push(@{$unitContents->{$sectionName}->{$iniKey}}, $iniValue); + } + } + } + return; +} + +# This subroutine takes the path to a systemd configuration file (like a unit configuration), +# parses it, and returns a hash that contains the contents. The contents of this hash are +# explained in the `parseSystemdIni` subroutine. Neither the sections nor the keys inside +# the sections are consistently sorted. +# +# If a directory with the same basename ending in .d exists next to the unit file, it will be +# assumed to contain override files which will be parsed as well and handled properly. sub parseUnit { - my ($filename) = @_; - my $info = {}; - parseKeyValues($info, read_file($filename)) if -f $filename; - parseKeyValues($info, read_file("${filename}.d/overrides.conf")) if -f "${filename}.d/overrides.conf"; - return $info; + my ($unitPath) = @_; + + # Parse the main unit and all overrides + my %unitData; + parseSystemdIni(\%unitData, $_) for glob("${unitPath}{,.d/*.conf}"); + return %unitData; } sub parseKeyValues { my $info = shift; foreach my $line (@_) { - # FIXME: not quite correct. $line =~ /^([^=]+)=(.*)$/ or next; $info->{$1} = $2; } } -sub boolIsTrue { - my ($s) = @_; - return $s eq "yes" || $s eq "true"; +# Checks whether a specified boolean in a systemd unit is true +# or false, with a default that is applied when the value is not set. +sub parseSystemdBool { + my ($unitConfig, $sectionName, $boolName, $default) = @_; + + my @values = @{$unitConfig->{$sectionName}{$boolName} // []}; + # Return default if value is not set + if (scalar @values lt 1 || not defined $values[-1]) { + return $default; + } + # If value is defined multiple times, use the last definition + my $last = $values[-1]; + # These are valid values as of systemd.syntax(7) + return $last eq "1" || $last eq "yes" || $last eq "true" || $last eq "on"; } sub recordUnit { @@ -167,17 +227,17 @@ sub handleModifiedUnit { # Revert of the attempt: https://github.com/NixOS/nixpkgs/pull/147609 # More details: https://github.com/NixOS/nixpkgs/issues/74899#issuecomment-981142430 } else { - my $unitInfo = parseUnit($newUnitFile); - if (boolIsTrue($unitInfo->{'X-ReloadIfChanged'} // "no")) { + my %unitInfo = parseUnit($newUnitFile); + if (parseSystemdBool(\%unitInfo, "Service", "X-ReloadIfChanged", 0)) { $unitsToReload->{$unit} = 1; recordUnit($reloadListFile, $unit); } - elsif (!boolIsTrue($unitInfo->{'X-RestartIfChanged'} // "yes") || boolIsTrue($unitInfo->{'RefuseManualStop'} // "no") || boolIsTrue($unitInfo->{'X-OnlyManualStart'} // "no")) { + elsif (!parseSystemdBool(\%unitInfo, "Service", "X-RestartIfChanged", 1) || parseSystemdBool(\%unitInfo, "Unit", "RefuseManualStop", 0) || parseSystemdBool(\%unitInfo, "Unit", "X-OnlyManualStart", 0)) { $unitsToSkip->{$unit} = 1; } else { # It doesn't make sense to stop and start non-services because # they can't have ExecStop= - if (!boolIsTrue($unitInfo->{'X-StopIfChanged'} // "yes") || $unit !~ /\.service$/) { + if (!parseSystemdBool(\%unitInfo, "Service", "X-StopIfChanged", 1) || $unit !~ /\.service$/) { # This unit should be restarted instead of # stopped and started. $unitsToRestart->{$unit} = 1; @@ -188,7 +248,7 @@ sub handleModifiedUnit { # socket(s) instead of the service. my $socketActivated = 0; if ($unit =~ /\.service$/) { - my @sockets = split / /, ($unitInfo->{Sockets} // ""); + my @sockets = split(/ /, join(" ", @{$unitInfo{Service}{Sockets} // []})); if (scalar @sockets == 0) { @sockets = ("$baseName.socket"); } @@ -254,12 +314,12 @@ while (my ($unit, $state) = each %{$activePrev}) { if (-e $prevUnitFile && ($state->{state} eq "active" || $state->{state} eq "activating")) { if (! -e $newUnitFile || abs_path($newUnitFile) eq "/dev/null") { - my $unitInfo = parseUnit($prevUnitFile); - $unitsToStop{$unit} = 1 if boolIsTrue($unitInfo->{'X-StopOnRemoval'} // "yes"); + my %unitInfo = parseUnit($prevUnitFile); + $unitsToStop{$unit} = 1 if parseSystemdBool(\%unitInfo, "Unit", "X-StopOnRemoval", 1); } elsif ($unit =~ /\.target$/) { - my $unitInfo = parseUnit($newUnitFile); + my %unitInfo = parseUnit($newUnitFile); # Cause all active target units to be restarted below. # This should start most changed units we stop here as @@ -268,7 +328,7 @@ while (my ($unit, $state) = each %{$activePrev}) { # active after the system has resumed, which probably # should not be the case. Just ignore it. if ($unit ne "suspend.target" && $unit ne "hibernate.target" && $unit ne "hybrid-sleep.target") { - unless (boolIsTrue($unitInfo->{'RefuseManualStart'} // "no") || boolIsTrue($unitInfo->{'X-OnlyManualStart'} // "no")) { + unless (parseSystemdBool(\%unitInfo, "Unit", "RefuseManualStart", 0) || parseSystemdBool(\%unitInfo, "Unit", "X-OnlyManualStart", 0)) { $unitsToStart{$unit} = 1; recordUnit($startListFile, $unit); # Don't spam the user with target units that always get started. @@ -287,7 +347,7 @@ while (my ($unit, $state) = each %{$activePrev}) { # Stopping a target generally has no effect on other units # (unless there is a PartOf dependency), so this is just a # bookkeeping thing to get systemd to do the right thing. - if (boolIsTrue($unitInfo->{'X-StopOnReconfiguration'} // "no")) { + if (parseSystemdBool(\%unitInfo, "Unit", "X-StopOnReconfiguration", 0)) { $unitsToStop{$unit} = 1; } } diff --git a/nixos/modules/system/activation/top-level.nix b/nixos/modules/system/activation/top-level.nix index 1c588ff9691..0f26aaf72ec 100644 --- a/nixos/modules/system/activation/top-level.nix +++ b/nixos/modules/system/activation/top-level.nix @@ -119,7 +119,7 @@ let configurationName = config.boot.loader.grub.configurationName; # Needed by switch-to-configuration. - perl = pkgs.perl.withPackages (p: with p; [ FileSlurp NetDBus XMLParser XMLTwig ]); + perl = pkgs.perl.withPackages (p: with p; [ FileSlurp NetDBus XMLParser XMLTwig ConfigIniFiles ]); }; # Handle assertions and warnings -- cgit 1.4.1 From b52372675d75eac74312bb3adfb8fe6d1e4c702b Mon Sep 17 00:00:00 2001 From: Janne Heß Date: Thu, 20 Jan 2022 15:32:32 +0100 Subject: nixos/switch-to-configuration: Clean up lower part of the script - Fully get rid of `parseKeyValues` and use systemctl features for that - Add some regex modifiers recommended by perlcritic - Get rid of a postfix if - Sort units when showing their status - Clean the logic for showing what failed from `elif` to `next` - Switch from `state` to `substate` for `auto-restart` because that's actually where the value is stored - Show status of units with one single systemctl call and get rid of COLUMNS in favor of --full - Add a test for failing units --- .../system/activation/switch-to-configuration.pl | 39 +++++------ nixos/tests/switch-test.nix | 79 +++++++++++++++++++++- 2 files changed, 94 insertions(+), 24 deletions(-) diff --git a/nixos/modules/system/activation/switch-to-configuration.pl b/nixos/modules/system/activation/switch-to-configuration.pl index c51ae5d54fc..1fe346114e4 100644 --- a/nixos/modules/system/activation/switch-to-configuration.pl +++ b/nixos/modules/system/activation/switch-to-configuration.pl @@ -171,14 +171,6 @@ sub parseUnit { return %unitData; } -sub parseKeyValues { - my $info = shift; - foreach my $line (@_) { - $line =~ /^([^=]+)=(.*)$/ or next; - $info->{$1} = $2; - } -} - # Checks whether a specified boolean in a systemd unit is true # or false, with a default that is applied when the value is not set. sub parseSystemdBool { @@ -606,33 +598,36 @@ my $activeNew = getActiveUnits; while (my ($unit, $state) = each %{$activeNew}) { if ($state->{state} eq "failed") { push @failed, $unit; + next; } - elsif ($state->{state} eq "auto-restart") { - # A unit in auto-restart state is a failure *if* it previously failed to start - my $lines = `@systemd@/bin/systemctl show '$unit'`; - my $info = {}; - parseKeyValues($info, split("\n", $lines)); - if ($info->{ExecMainStatus} ne '0') { + if ($state->{substate} eq "auto-restart") { + # A unit in auto-restart substate is a failure *if* it previously failed to start + my $main_status = `@systemd@/bin/systemctl show --value --property=ExecMainStatus '$unit'`; + chomp($main_status); + + if ($main_status ne "0") { push @failed, $unit; + next; } } + # Ignore scopes since they are not managed by this script but rather # created and managed by third-party services via the systemd dbus API. - elsif ($state->{state} ne "failed" && !defined $activePrev->{$unit} && $unit !~ /\.scope$/) { + # This only lists units that are not failed (including ones that are in auto-restart but have not failed previously) + if ($state->{state} ne "failed" && !defined $activePrev->{$unit} && $unit !~ /\.scope$/msx) { push @new, $unit; } } -print STDERR "the following new units were started: ", join(", ", sort(@new)), "\n" - if scalar @new > 0; +if (scalar @new > 0) { + print STDERR "the following new units were started: ", join(", ", sort(@new)), "\n" +} if (scalar @failed > 0) { - print STDERR "warning: the following units failed: ", join(", ", sort(@failed)), "\n"; - foreach my $unit (@failed) { - print STDERR "\n"; - system("COLUMNS=1000 @systemd@/bin/systemctl status --no-pager '$unit' >&2"); - } + my @failed_sorted = sort @failed; + print STDERR "warning: the following units failed: ", join(", ", @failed_sorted), "\n\n"; + system "@systemd@/bin/systemctl status --no-pager --full '" . join("' '", @failed_sorted) . "' >&2"; $res = 4; } diff --git a/nixos/tests/switch-test.nix b/nixos/tests/switch-test.nix index 1c32bf6beb9..8e425f0f877 100644 --- a/nixos/tests/switch-test.nix +++ b/nixos/tests/switch-test.nix @@ -45,6 +45,31 @@ import ./make-test-python.nix ({ pkgs, ...} : { systemd.services.test.restartIfChanged = false; }; + simpleServiceFailing.configuration = { + imports = [ simpleServiceModified.configuration ]; + systemd.services.test.serviceConfig.ExecStart = lib.mkForce "${pkgs.coreutils}/bin/false"; + }; + + autorestartService.configuration = { + # A service that immediately goes into restarting (but without failing) + systemd.services.autorestart = { + wantedBy = [ "multi-user.target" ]; + serviceConfig = { + Type = "simple"; + Restart = "always"; + RestartSec = "20y"; # Should be long enough + ExecStart = "${pkgs.coreutils}/bin/true"; + }; + }; + }; + + autorestartServiceFailing.configuration = { + imports = [ autorestartService.configuration ]; + systemd.services.autorestart.serviceConfig = { + ExecStart = lib.mkForce "${pkgs.coreutils}/bin/false"; + }; + }; + restart-and-reload-by-activation-script.configuration = { systemd.services = rec { simple-service = { @@ -189,12 +214,13 @@ import ./make-test-python.nix ({ pkgs, ...} : { exec env -i "$@" | tee /dev/stderr ''; in /* python */ '' - def switch_to_specialisation(system, name, action="test"): + def switch_to_specialisation(system, name, action="test", fail=False): if name == "": stc = f"{system}/bin/switch-to-configuration" else: stc = f"{system}/specialisation/{name}/bin/switch-to-configuration" - out = machine.succeed(f"{stc} {action} 2>&1") + out = machine.fail(f"{stc} {action} 2>&1") if fail \ + else machine.succeed(f"{stc} {action} 2>&1") assert_lacks(out, "switch-to-configuration line") # Perl warnings return out @@ -305,7 +331,56 @@ import ./make-test-python.nix ({ pkgs, ...} : { assert_lacks(out, "as well:") assert_contains(out, "would start the following units: test.service\n") + with subtest("failing units"): + # Let the simple service fail + switch_to_specialisation("${machine}", "simpleServiceModified") + out = switch_to_specialisation("${machine}", "simpleServiceFailing", fail=True) + assert_contains(out, "stopping the following units: test.service\n") + assert_lacks(out, "NOT restarting the following changed units:") + assert_lacks(out, "reloading the following units:") + assert_lacks(out, "\nrestarting the following units:") + assert_contains(out, "\nstarting the following units: test.service\n") + assert_lacks(out, "the following new units were started:") + assert_contains(out, "warning: the following units failed: test.service\n") + assert_contains(out, "Main PID:") # output of systemctl + assert_lacks(out, "as well:") + + # A unit that gets into autorestart without failing is not treated as failed + out = switch_to_specialisation("${machine}", "autorestartService") + assert_lacks(out, "stopping the following units:") + assert_lacks(out, "NOT restarting the following changed units:") + assert_lacks(out, "reloading the following units:") + assert_lacks(out, "\nrestarting the following units:") + assert_lacks(out, "\nstarting the following units:") + assert_contains(out, "the following new units were started: autorestart.service\n") + assert_lacks(out, "as well:") + machine.systemctl('stop autorestart.service') # cancel the 20y timer + + # Switching to the same system should do nothing (especially not treat the unit as failed) + out = switch_to_specialisation("${machine}", "autorestartService") + assert_lacks(out, "stopping the following units:") + assert_lacks(out, "NOT restarting the following changed units:") + assert_lacks(out, "reloading the following units:") + assert_lacks(out, "\nrestarting the following units:") + assert_lacks(out, "\nstarting the following units:") + assert_contains(out, "the following new units were started: autorestart.service\n") + assert_lacks(out, "as well:") + machine.systemctl('stop autorestart.service') # cancel the 20y timer + + # If systemd thinks the unit has failed and is in autorestart, we should show it as failed + out = switch_to_specialisation("${machine}", "autorestartServiceFailing", fail=True) + assert_lacks(out, "stopping the following units:") + assert_lacks(out, "NOT restarting the following changed units:") + assert_lacks(out, "reloading the following units:") + assert_lacks(out, "\nrestarting the following units:") + assert_lacks(out, "\nstarting the following units:") + assert_lacks(out, "the following new units were started:") + assert_contains(out, "warning: the following units failed: autorestart.service\n") + assert_contains(out, "Main PID:") # output of systemctl + assert_lacks(out, "as well:") + with subtest("restart and reload by activation script"): + switch_to_specialisation("${machine}", "simpleServiceNorestart") out = switch_to_specialisation("${machine}", "restart-and-reload-by-activation-script") assert_contains(out, "stopping the following units: test.service\n") assert_lacks(out, "NOT restarting the following changed units:") -- cgit 1.4.1