From 82931ea446e5fde9ae4d2fe58c2988790d91a880 Mon Sep 17 00:00:00 2001 From: pennae Date: Sun, 25 Apr 2021 19:36:51 +0200 Subject: nixos/nix-containers: use SIGTERM to stop containers systemd-nspawn can react to SIGTERM and send a shutdown signal to the container init process. use that instead of going through dbus and machined to request nspawn sending the signal, since during host shutdown machined or dbus may have gone away by the point a container unit is stopped. to solve the issue that a container that is still starting cannot be stopped cleanly we must also handle this signal in containerInit/stage-2. --- nixos/modules/virtualisation/nixos-containers.nix | 28 ++++++++++++++--------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'nixos') diff --git a/nixos/modules/virtualisation/nixos-containers.nix b/nixos/modules/virtualisation/nixos-containers.nix index f15d5875841..43eca142e02 100644 --- a/nixos/modules/virtualisation/nixos-containers.nix +++ b/nixos/modules/virtualisation/nixos-containers.nix @@ -35,6 +35,9 @@ let '' #! ${pkgs.runtimeShell} -e + # Exit early if we're asked to shut down. + trap "exit 0" SIGRTMIN+3 + # Initialise the container side of the veth pair. if [ -n "$HOST_ADDRESS" ] || [ -n "$HOST_ADDRESS6" ] || [ -n "$LOCAL_ADDRESS" ] || [ -n "$LOCAL_ADDRESS6" ] || @@ -60,8 +63,12 @@ let ${concatStringsSep "\n" (mapAttrsToList renderExtraVeth cfg.extraVeths)} - # Start the regular stage 1 script. - exec "$1" + # Start the regular stage 2 script. + # We source instead of exec to not lose an early stop signal, which is + # also the only _reliable_ shutdown signal we have since early stop + # does not execute ExecStop* commands. + set +e + . "$1" '' ); @@ -127,12 +134,16 @@ let ''} # Run systemd-nspawn without startup notification (we'll - # wait for the container systemd to signal readiness). + # wait for the container systemd to signal readiness) + # Kill signal handling means systemd-nspawn will pass a system-halt signal + # to the container systemd when it receives SIGTERM for container shutdown; + # containerInit and stage2 have to handle this as well. exec ${config.systemd.package}/bin/systemd-nspawn \ --keep-unit \ -M "$INSTANCE" -D "$root" $extraFlags \ $EXTRA_NSPAWN_FLAGS \ --notify-ready=yes \ + --kill-signal=SIGRTMIN+3 \ --bind-ro=/nix/store \ --bind-ro=/nix/var/nix/db \ --bind-ro=/nix/var/nix/daemon-socket \ @@ -259,13 +270,10 @@ let Slice = "machine.slice"; Delegate = true; - # Hack: we don't want to kill systemd-nspawn, since we call - # "machinectl poweroff" in preStop to shut down the - # container cleanly. But systemd requires sending a signal - # (at least if we want remaining processes to be killed - # after the timeout). So send an ignored signal. + # We rely on systemd-nspawn turning a SIGTERM to itself into a shutdown + # signal (SIGRTMIN+3) for the inner container. KillMode = "mixed"; - KillSignal = "WINCH"; + KillSignal = "TERM"; DevicePolicy = "closed"; DeviceAllow = map (d: "${d.node} ${d.modifier}") cfg.allowedDevices; @@ -752,8 +760,6 @@ in postStart = postStartScript dummyConfig; - preStop = "machinectl poweroff $INSTANCE"; - restartIfChanged = false; serviceConfig = serviceDirectives dummyConfig; -- cgit 1.4.1 From 317a2c9f26d06ba79850f3866e8e04cd298211c4 Mon Sep 17 00:00:00 2001 From: pennae Date: Fri, 30 Apr 2021 15:43:27 +0200 Subject: nixos/nix-containers: add tests for early/no-machined container stop --- nixos/tests/containers-imperative.nix | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'nixos') diff --git a/nixos/tests/containers-imperative.nix b/nixos/tests/containers-imperative.nix index 0ff0d3f9545..bb207165a02 100644 --- a/nixos/tests/containers-imperative.nix +++ b/nixos/tests/containers-imperative.nix @@ -111,6 +111,26 @@ import ./make-test-python.nix ({ pkgs, lib, ... }: { machine.succeed(f"nixos-container stop {id1}") machine.succeed(f"nixos-container start {id1}") + # clear serial backlog for next tests + machine.succeed("logger eat console backlog 3ea46eb2-7f82-4f70-b810-3f00e3dd4c4d") + machine.wait_for_console_text( + "eat console backlog 3ea46eb2-7f82-4f70-b810-3f00e3dd4c4d" + ) + + with subtest("Stop a container early"): + machine.succeed(f"nixos-container stop {id1}") + machine.succeed(f"nixos-container start {id1} &") + machine.wait_for_console_text("Stage 2") + machine.succeed(f"nixos-container stop {id1}") + machine.wait_for_console_text(f"Container {id1} exited successfully") + machine.succeed(f"nixos-container start {id1}") + + with subtest("Stop a container without machined (regression test for #109695)"): + machine.systemctl("stop systemd-machined") + machine.succeed(f"nixos-container stop {id1}") + machine.wait_for_console_text(f"Container {id1} has been shut down") + machine.succeed(f"nixos-container start {id1}") + with subtest("tmpfiles are present"): machine.log("creating container tmpfiles") machine.succeed( -- cgit 1.4.1