summary refs log tree commit diff
path: root/nixos/modules/virtualisation/nixos-containers.nix
diff options
context:
space:
mode:
authorpennae <github@quasiparticle.net>2021-04-25 19:36:51 +0200
committerpennae <github@quasiparticle.net>2021-04-28 14:07:35 +0200
commit82931ea446e5fde9ae4d2fe58c2988790d91a880 (patch)
treea559693dbb19ac7cd9b5b329a618f820e9d2e172 /nixos/modules/virtualisation/nixos-containers.nix
parent265d31bcbd6599c38499354bc5f111589814f101 (diff)
downloadnixpkgs-82931ea446e5fde9ae4d2fe58c2988790d91a880.tar
nixpkgs-82931ea446e5fde9ae4d2fe58c2988790d91a880.tar.gz
nixpkgs-82931ea446e5fde9ae4d2fe58c2988790d91a880.tar.bz2
nixpkgs-82931ea446e5fde9ae4d2fe58c2988790d91a880.tar.lz
nixpkgs-82931ea446e5fde9ae4d2fe58c2988790d91a880.tar.xz
nixpkgs-82931ea446e5fde9ae4d2fe58c2988790d91a880.tar.zst
nixpkgs-82931ea446e5fde9ae4d2fe58c2988790d91a880.zip
nixos/nix-containers: use SIGTERM to stop containers
systemd-nspawn can react to SIGTERM and send a shutdown signal to the container
init process. use that instead of going through dbus and machined to request
nspawn sending the signal, since during host shutdown machined or dbus may have
gone away by the point a container unit is stopped.

to solve the issue that a container that is still starting cannot be stopped
cleanly we must also handle this signal in containerInit/stage-2.
Diffstat (limited to 'nixos/modules/virtualisation/nixos-containers.nix')
-rw-r--r--nixos/modules/virtualisation/nixos-containers.nix28
1 files changed, 17 insertions, 11 deletions
diff --git a/nixos/modules/virtualisation/nixos-containers.nix b/nixos/modules/virtualisation/nixos-containers.nix
index f15d5875841..43eca142e02 100644
--- a/nixos/modules/virtualisation/nixos-containers.nix
+++ b/nixos/modules/virtualisation/nixos-containers.nix
@@ -35,6 +35,9 @@ let
       ''
         #! ${pkgs.runtimeShell} -e
 
+        # Exit early if we're asked to shut down.
+        trap "exit 0" SIGRTMIN+3
+
         # Initialise the container side of the veth pair.
         if [ -n "$HOST_ADDRESS" ]   || [ -n "$HOST_ADDRESS6" ]  ||
            [ -n "$LOCAL_ADDRESS" ]  || [ -n "$LOCAL_ADDRESS6" ] ||
@@ -60,8 +63,12 @@ let
 
         ${concatStringsSep "\n" (mapAttrsToList renderExtraVeth cfg.extraVeths)}
 
-        # Start the regular stage 1 script.
-        exec "$1"
+        # Start the regular stage 2 script.
+        # We source instead of exec to not lose an early stop signal, which is
+        # also the only _reliable_ shutdown signal we have since early stop
+        # does not execute ExecStop* commands.
+        set +e
+        . "$1"
       ''
     );
 
@@ -127,12 +134,16 @@ let
       ''}
 
       # Run systemd-nspawn without startup notification (we'll
-      # wait for the container systemd to signal readiness).
+      # wait for the container systemd to signal readiness)
+      # Kill signal handling means systemd-nspawn will pass a system-halt signal
+      # to the container systemd when it receives SIGTERM for container shutdown;
+      # containerInit and stage2 have to handle this as well.
       exec ${config.systemd.package}/bin/systemd-nspawn \
         --keep-unit \
         -M "$INSTANCE" -D "$root" $extraFlags \
         $EXTRA_NSPAWN_FLAGS \
         --notify-ready=yes \
+        --kill-signal=SIGRTMIN+3 \
         --bind-ro=/nix/store \
         --bind-ro=/nix/var/nix/db \
         --bind-ro=/nix/var/nix/daemon-socket \
@@ -259,13 +270,10 @@ let
     Slice = "machine.slice";
     Delegate = true;
 
-    # Hack: we don't want to kill systemd-nspawn, since we call
-    # "machinectl poweroff" in preStop to shut down the
-    # container cleanly. But systemd requires sending a signal
-    # (at least if we want remaining processes to be killed
-    # after the timeout). So send an ignored signal.
+    # We rely on systemd-nspawn turning a SIGTERM to itself into a shutdown
+    # signal (SIGRTMIN+3) for the inner container.
     KillMode = "mixed";
-    KillSignal = "WINCH";
+    KillSignal = "TERM";
 
     DevicePolicy = "closed";
     DeviceAllow = map (d: "${d.node} ${d.modifier}") cfg.allowedDevices;
@@ -752,8 +760,6 @@ in
 
       postStart = postStartScript dummyConfig;
 
-      preStop = "machinectl poweroff $INSTANCE";
-
       restartIfChanged = false;
 
       serviceConfig = serviceDirectives dummyConfig;