summary refs log tree commit diff
path: root/nixos/tests/pacemaker.nix
diff options
context:
space:
mode:
authorAstro <astro@spaceboyz.net>2022-03-08 02:46:26 +0100
committerAstro <astro@spaceboyz.net>2022-03-09 01:19:10 +0100
commit70c1e849c0b5741e07e7d8d0d418764e2fdb4e24 (patch)
treed68e3e477acc0424115b9d2f7e6fda725e360ef4 /nixos/tests/pacemaker.nix
parenta60ab35dd653b0969c014ced93396d212f1c460c (diff)
downloadnixpkgs-70c1e849c0b5741e07e7d8d0d418764e2fdb4e24.tar
nixpkgs-70c1e849c0b5741e07e7d8d0d418764e2fdb4e24.tar.gz
nixpkgs-70c1e849c0b5741e07e7d8d0d418764e2fdb4e24.tar.bz2
nixpkgs-70c1e849c0b5741e07e7d8d0d418764e2fdb4e24.tar.lz
nixpkgs-70c1e849c0b5741e07e7d8d0d418764e2fdb4e24.tar.xz
nixpkgs-70c1e849c0b5741e07e7d8d0d418764e2fdb4e24.tar.zst
nixpkgs-70c1e849c0b5741e07e7d8d0d418764e2fdb4e24.zip
nixos/tests/pacemaker: init
Diffstat (limited to 'nixos/tests/pacemaker.nix')
-rw-r--r--nixos/tests/pacemaker.nix110
1 files changed, 110 insertions, 0 deletions
diff --git a/nixos/tests/pacemaker.nix b/nixos/tests/pacemaker.nix
new file mode 100644
index 00000000000..68455761495
--- /dev/null
+++ b/nixos/tests/pacemaker.nix
@@ -0,0 +1,110 @@
+import ./make-test-python.nix  ({ pkgs, lib, ... }: rec {
+  name = "pacemaker";
+  meta = with pkgs.lib.maintainers; {
+    maintainers = [ astro ];
+  };
+
+  nodes =
+    let
+      node = i: {
+        networking.interfaces.eth1.ipv4.addresses = [ {
+          address = "192.168.0.${toString i}";
+          prefixLength = 24;
+        } ];
+
+        services.corosync = {
+          enable = true;
+          clusterName = "zentralwerk-network";
+          nodelist = lib.imap (i: name: {
+            nodeid = i;
+            inherit name;
+            ring_addrs = [
+              (builtins.head nodes.${name}.networking.interfaces.eth1.ipv4.addresses).address
+            ];
+          }) (builtins.attrNames nodes);
+        };
+        environment.etc."corosync/authkey" = {
+          source = builtins.toFile "authkey"
+            # minimum length: 128 bytes
+            "testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest";
+          mode = "0400";
+        };
+
+        services.pacemaker.enable = true;
+
+        # used for pacemaker resource
+        systemd.services.ha-cat = {
+          description = "Highly available netcat";
+          serviceConfig.ExecStart = "${pkgs.netcat}/bin/nc -l discard";
+        };
+      };
+    in {
+      node1 = node 1;
+      node2 = node 2;
+      node3 = node 3;
+    };
+
+  # sets up pacemaker with resources configuration, then crashes a
+  # node and waits for service restart on another node
+  testScript =
+    let
+      resources = builtins.toFile "cib-resources.xml" ''
+        <resources>
+          <primitive id="cat" class="systemd" type="ha-cat">
+            <operations>
+              <op id="stop-cat" name="start" interval="0" timeout="1s"/>
+              <op id="start-cat" name="start" interval="0" timeout="1s"/>
+              <op id="monitor-cat" name="monitor" interval="1s" timeout="1s"/>
+            </operations>
+          </primitive>
+        </resources>
+      '';
+    in ''
+      import re
+      import time
+
+      start_all()
+
+      ${lib.concatMapStrings (node: ''
+        ${node}.wait_until_succeeds("corosync-quorumtool")
+        ${node}.wait_for_unit("pacemaker.service")
+      '') (builtins.attrNames nodes)}
+
+      # No STONITH device
+      node1.succeed("crm_attribute -t crm_config -n stonith-enabled -v false")
+      # Configure the cat resource
+      node1.succeed("cibadmin --replace --scope resources --xml-file ${resources}")
+
+      # wait until the service is started
+      while True:
+        output = node1.succeed("crm_resource -r cat --locate")
+        match = re.search("is running on: (.+)", output)
+        if match:
+          for machine in machines:
+            if machine.name == match.group(1):
+              current_node = machine
+          break
+        time.sleep(1)
+
+      current_node.log("Service running here!")
+      current_node.crash()
+
+      # pick another node that's still up
+      for machine in machines:
+        if machine.booted:
+          check_node = machine
+      # find where the service has been started next
+      while True:
+        output = check_node.succeed("crm_resource -r cat --locate")
+        match = re.search("is running on: (.+)", output)
+        # output will remain the old current_node until the crash is detected by pacemaker
+        if match and match.group(1) != current_node.name:
+          for machine in machines:
+            if machine.name == match.group(1):
+              next_node = machine
+          break
+        time.sleep(1)
+
+      next_node.log("Service migrated here!")
+  '';
+})