summary refs log tree commit diff
path: root/nixos
diff options
context:
space:
mode:
authorRyan Mulligan <ryan@ryantm.com>2022-03-15 16:06:56 -0700
committerGitHub <noreply@github.com>2022-03-15 16:06:56 -0700
commit0ab73f9a3fe0d8a305db88650baef4a6bb792f29 (patch)
tree80b17b7be127a8e5eaaf85d1fdc8dfe9b9cad329 /nixos
parentcec02f35167a49490f3ee8e32673f22f87a8132a (diff)
parent70c1e849c0b5741e07e7d8d0d418764e2fdb4e24 (diff)
downloadnixpkgs-0ab73f9a3fe0d8a305db88650baef4a6bb792f29.tar
nixpkgs-0ab73f9a3fe0d8a305db88650baef4a6bb792f29.tar.gz
nixpkgs-0ab73f9a3fe0d8a305db88650baef4a6bb792f29.tar.bz2
nixpkgs-0ab73f9a3fe0d8a305db88650baef4a6bb792f29.tar.lz
nixpkgs-0ab73f9a3fe0d8a305db88650baef4a6bb792f29.tar.xz
nixpkgs-0ab73f9a3fe0d8a305db88650baef4a6bb792f29.tar.zst
nixpkgs-0ab73f9a3fe0d8a305db88650baef4a6bb792f29.zip
Merge pull request #162535 from astro/pacemaker
pacemaker: init
Diffstat (limited to 'nixos')
-rw-r--r--nixos/doc/manual/from_md/release-notes/rl-2205.section.xml6
-rw-r--r--nixos/doc/manual/release-notes/rl-2205.section.md2
-rw-r--r--nixos/modules/module-list.nix2
-rw-r--r--nixos/modules/services/cluster/corosync/default.nix112
-rw-r--r--nixos/modules/services/cluster/pacemaker/default.nix52
-rw-r--r--nixos/tests/all-tests.nix1
-rw-r--r--nixos/tests/pacemaker.nix110
7 files changed, 285 insertions, 0 deletions
diff --git a/nixos/doc/manual/from_md/release-notes/rl-2205.section.xml b/nixos/doc/manual/from_md/release-notes/rl-2205.section.xml
index 396de8cd77c..a23b2489abe 100644
--- a/nixos/doc/manual/from_md/release-notes/rl-2205.section.xml
+++ b/nixos/doc/manual/from_md/release-notes/rl-2205.section.xml
@@ -306,6 +306,12 @@
           with many features.
         </para>
       </listitem>
+      <listitem>
+        <para>
+          <link xlink:href="https://clusterlabs.org/pacemaker/">pacemaker</link>
+          cluster resource manager
+        </para>
+      </listitem>
     </itemizedlist>
   </section>
   <section xml:id="sec-release-22.05-incompatibilities">
diff --git a/nixos/doc/manual/release-notes/rl-2205.section.md b/nixos/doc/manual/release-notes/rl-2205.section.md
index 2f730de737c..390ec7b2add 100644
--- a/nixos/doc/manual/release-notes/rl-2205.section.md
+++ b/nixos/doc/manual/release-notes/rl-2205.section.md
@@ -87,6 +87,8 @@ In addition to numerous new and upgraded packages, this release has the followin
 
 - [blocky](https://0xerr0r.github.io/blocky/), fast and lightweight DNS proxy as ad-blocker for local network with many features.
 
+- [pacemaker](https://clusterlabs.org/pacemaker/) cluster resource manager
+
 <!-- To avoid merge conflicts, consider adding your item at an arbitrary place in the list instead. -->
 
 ## Backward Incompatibilities {#sec-release-22.05-incompatibilities}
diff --git a/nixos/modules/module-list.nix b/nixos/modules/module-list.nix
index 13703968167..68f9c6c1227 100644
--- a/nixos/modules/module-list.nix
+++ b/nixos/modules/module-list.nix
@@ -302,6 +302,7 @@
   ./services/backup/znapzend.nix
   ./services/blockchain/ethereum/geth.nix
   ./services/backup/zrepl.nix
+  ./services/cluster/corosync/default.nix
   ./services/cluster/hadoop/default.nix
   ./services/cluster/k3s/default.nix
   ./services/cluster/kubernetes/addons/dns.nix
@@ -314,6 +315,7 @@
   ./services/cluster/kubernetes/pki.nix
   ./services/cluster/kubernetes/proxy.nix
   ./services/cluster/kubernetes/scheduler.nix
+  ./services/cluster/pacemaker/default.nix
   ./services/cluster/spark/default.nix
   ./services/computing/boinc/client.nix
   ./services/computing/foldingathome/client.nix
diff --git a/nixos/modules/services/cluster/corosync/default.nix b/nixos/modules/services/cluster/corosync/default.nix
new file mode 100644
index 00000000000..b4144917fee
--- /dev/null
+++ b/nixos/modules/services/cluster/corosync/default.nix
@@ -0,0 +1,112 @@
+{ config, lib, pkgs, ... }:
+
+with lib;
+let
+  cfg = config.services.corosync;
+in
+{
+  # interface
+  options.services.corosync = {
+    enable = mkEnableOption "corosync";
+
+    package = mkOption {
+      type = types.package;
+      default = pkgs.corosync;
+      defaultText = literalExpression "pkgs.corosync";
+      description = "Package that should be used for corosync.";
+    };
+
+    clusterName = mkOption {
+      type = types.str;
+      default = "nixcluster";
+      description = "Name of the corosync cluster.";
+    };
+
+    extraOptions = mkOption {
+      type = with types; listOf str;
+      default = [];
+      description = "Additional options with which to start corosync.";
+    };
+
+    nodelist = mkOption {
+      description = "Corosync nodelist: all cluster members.";
+      default = [];
+      type = with types; listOf (submodule {
+        options = {
+          nodeid = mkOption {
+            type = int;
+            description = "Node ID number";
+          };
+          name = mkOption {
+            type = str;
+            description = "Node name";
+          };
+          ring_addrs = mkOption {
+            type = listOf str;
+            description = "List of addresses, one for each ring.";
+          };
+        };
+      });
+    };
+  };
+
+  # implementation
+  config = mkIf cfg.enable {
+    environment.systemPackages = [ cfg.package ];
+
+    environment.etc."corosync/corosync.conf".text = ''
+      totem {
+        version: 2
+        secauth: on
+        cluster_name: ${cfg.clusterName}
+        transport: knet
+      }
+
+      nodelist {
+        ${concatMapStrings ({ nodeid, name, ring_addrs }: ''
+          node {
+            nodeid: ${toString nodeid}
+            name: ${name}
+            ${concatStrings (imap0 (i: addr: ''
+              ring${toString i}_addr: ${addr}
+            '') ring_addrs)}
+          }
+        '') cfg.nodelist}
+      }
+
+      quorum {
+        # only corosync_votequorum is supported
+        provider: corosync_votequorum
+        wait_for_all: 0
+        ${optionalString (builtins.length cfg.nodelist < 3) ''
+          two_node: 1
+        ''}
+      }
+
+      logging {
+        to_syslog: yes
+      }
+    '';
+
+    environment.etc."corosync/uidgid.d/root".text = ''
+      # allow pacemaker connection by root
+      uidgid {
+        uid: 0
+        gid: 0
+      }
+    '';
+
+    systemd.packages = [ cfg.package ];
+    systemd.services.corosync = {
+      wantedBy = [ "multi-user.target" ];
+      serviceConfig = {
+        StateDirectory = "corosync";
+        StateDirectoryMode = "0700";
+      };
+    };
+
+    environment.etc."sysconfig/corosync".text = lib.optionalString (cfg.extraOptions != []) ''
+      COROSYNC_OPTIONS="${lib.escapeShellArgs cfg.extraOptions}"
+    '';
+  };
+}
diff --git a/nixos/modules/services/cluster/pacemaker/default.nix b/nixos/modules/services/cluster/pacemaker/default.nix
new file mode 100644
index 00000000000..7eeadffcc58
--- /dev/null
+++ b/nixos/modules/services/cluster/pacemaker/default.nix
@@ -0,0 +1,52 @@
+{ config, lib, pkgs, ... }:
+
+with lib;
+let
+  cfg = config.services.pacemaker;
+in
+{
+  # interface
+  options.services.pacemaker = {
+    enable = mkEnableOption "pacemaker";
+
+    package = mkOption {
+      type = types.package;
+      default = pkgs.pacemaker;
+      defaultText = literalExpression "pkgs.pacemaker";
+      description = "Package that should be used for pacemaker.";
+    };
+  };
+
+  # implementation
+  config = mkIf cfg.enable {
+    assertions = [ {
+      assertion = config.services.corosync.enable;
+      message = ''
+        Enabling services.pacemaker requires a services.corosync configuration.
+      '';
+    } ];
+
+    environment.systemPackages = [ cfg.package ];
+
+    # required by pacemaker
+    users.users.hacluster = {
+      isSystemUser = true;
+      group = "pacemaker";
+      home = "/var/lib/pacemaker";
+    };
+    users.groups.pacemaker = {};
+
+    systemd.tmpfiles.rules = [
+      "d /var/log/pacemaker 0700 hacluster pacemaker -"
+    ];
+
+    systemd.packages = [ cfg.package ];
+    systemd.services.pacemaker = {
+      wantedBy = [ "multi-user.target" ];
+      serviceConfig = {
+        StateDirectory = "pacemaker";
+        StateDirectoryMode = "0700";
+      };
+    };
+  };
+}
diff --git a/nixos/tests/all-tests.nix b/nixos/tests/all-tests.nix
index 98ca2e08108..eee99fb5e97 100644
--- a/nixos/tests/all-tests.nix
+++ b/nixos/tests/all-tests.nix
@@ -384,6 +384,7 @@ in
   os-prober = handleTestOn ["x86_64-linux"] ./os-prober.nix {};
   osrm-backend = handleTest ./osrm-backend.nix {};
   overlayfs = handleTest ./overlayfs.nix {};
+  pacemaker = handleTest ./pacemaker.nix {};
   packagekit = handleTest ./packagekit.nix {};
   pam-file-contents = handleTest ./pam/pam-file-contents.nix {};
   pam-oath-login = handleTest ./pam/pam-oath-login.nix {};
diff --git a/nixos/tests/pacemaker.nix b/nixos/tests/pacemaker.nix
new file mode 100644
index 00000000000..68455761495
--- /dev/null
+++ b/nixos/tests/pacemaker.nix
@@ -0,0 +1,110 @@
+import ./make-test-python.nix  ({ pkgs, lib, ... }: rec {
+  name = "pacemaker";
+  meta = with pkgs.lib.maintainers; {
+    maintainers = [ astro ];
+  };
+
+  nodes =
+    let
+      node = i: {
+        networking.interfaces.eth1.ipv4.addresses = [ {
+          address = "192.168.0.${toString i}";
+          prefixLength = 24;
+        } ];
+
+        services.corosync = {
+          enable = true;
+          clusterName = "zentralwerk-network";
+          nodelist = lib.imap (i: name: {
+            nodeid = i;
+            inherit name;
+            ring_addrs = [
+              (builtins.head nodes.${name}.networking.interfaces.eth1.ipv4.addresses).address
+            ];
+          }) (builtins.attrNames nodes);
+        };
+        environment.etc."corosync/authkey" = {
+          source = builtins.toFile "authkey"
+            # minimum length: 128 bytes
+            "testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest";
+          mode = "0400";
+        };
+
+        services.pacemaker.enable = true;
+
+        # used for pacemaker resource
+        systemd.services.ha-cat = {
+          description = "Highly available netcat";
+          serviceConfig.ExecStart = "${pkgs.netcat}/bin/nc -l discard";
+        };
+      };
+    in {
+      node1 = node 1;
+      node2 = node 2;
+      node3 = node 3;
+    };
+
+  # sets up pacemaker with resources configuration, then crashes a
+  # node and waits for service restart on another node
+  testScript =
+    let
+      resources = builtins.toFile "cib-resources.xml" ''
+        <resources>
+          <primitive id="cat" class="systemd" type="ha-cat">
+            <operations>
+              <op id="stop-cat" name="start" interval="0" timeout="1s"/>
+              <op id="start-cat" name="start" interval="0" timeout="1s"/>
+              <op id="monitor-cat" name="monitor" interval="1s" timeout="1s"/>
+            </operations>
+          </primitive>
+        </resources>
+      '';
+    in ''
+      import re
+      import time
+
+      start_all()
+
+      ${lib.concatMapStrings (node: ''
+        ${node}.wait_until_succeeds("corosync-quorumtool")
+        ${node}.wait_for_unit("pacemaker.service")
+      '') (builtins.attrNames nodes)}
+
+      # No STONITH device
+      node1.succeed("crm_attribute -t crm_config -n stonith-enabled -v false")
+      # Configure the cat resource
+      node1.succeed("cibadmin --replace --scope resources --xml-file ${resources}")
+
+      # wait until the service is started
+      while True:
+        output = node1.succeed("crm_resource -r cat --locate")
+        match = re.search("is running on: (.+)", output)
+        if match:
+          for machine in machines:
+            if machine.name == match.group(1):
+              current_node = machine
+          break
+        time.sleep(1)
+
+      current_node.log("Service running here!")
+      current_node.crash()
+
+      # pick another node that's still up
+      for machine in machines:
+        if machine.booted:
+          check_node = machine
+      # find where the service has been started next
+      while True:
+        output = check_node.succeed("crm_resource -r cat --locate")
+        match = re.search("is running on: (.+)", output)
+        # output will remain the old current_node until the crash is detected by pacemaker
+        if match and match.group(1) != current_node.name:
+          for machine in machines:
+            if machine.name == match.group(1):
+              next_node = machine
+          break
+        time.sleep(1)
+
+      next_node.log("Service migrated here!")
+  '';
+})