diff options
author | Ryan Mulligan <ryan@ryantm.com> | 2022-03-15 16:06:56 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-03-15 16:06:56 -0700 |
commit | 0ab73f9a3fe0d8a305db88650baef4a6bb792f29 (patch) | |
tree | 80b17b7be127a8e5eaaf85d1fdc8dfe9b9cad329 /nixos | |
parent | cec02f35167a49490f3ee8e32673f22f87a8132a (diff) | |
parent | 70c1e849c0b5741e07e7d8d0d418764e2fdb4e24 (diff) | |
download | nixpkgs-0ab73f9a3fe0d8a305db88650baef4a6bb792f29.tar nixpkgs-0ab73f9a3fe0d8a305db88650baef4a6bb792f29.tar.gz nixpkgs-0ab73f9a3fe0d8a305db88650baef4a6bb792f29.tar.bz2 nixpkgs-0ab73f9a3fe0d8a305db88650baef4a6bb792f29.tar.lz nixpkgs-0ab73f9a3fe0d8a305db88650baef4a6bb792f29.tar.xz nixpkgs-0ab73f9a3fe0d8a305db88650baef4a6bb792f29.tar.zst nixpkgs-0ab73f9a3fe0d8a305db88650baef4a6bb792f29.zip |
Merge pull request #162535 from astro/pacemaker
pacemaker: init
Diffstat (limited to 'nixos')
-rw-r--r-- | nixos/doc/manual/from_md/release-notes/rl-2205.section.xml | 6 | ||||
-rw-r--r-- | nixos/doc/manual/release-notes/rl-2205.section.md | 2 | ||||
-rw-r--r-- | nixos/modules/module-list.nix | 2 | ||||
-rw-r--r-- | nixos/modules/services/cluster/corosync/default.nix | 112 | ||||
-rw-r--r-- | nixos/modules/services/cluster/pacemaker/default.nix | 52 | ||||
-rw-r--r-- | nixos/tests/all-tests.nix | 1 | ||||
-rw-r--r-- | nixos/tests/pacemaker.nix | 110 |
7 files changed, 285 insertions, 0 deletions
diff --git a/nixos/doc/manual/from_md/release-notes/rl-2205.section.xml b/nixos/doc/manual/from_md/release-notes/rl-2205.section.xml index 396de8cd77c..a23b2489abe 100644 --- a/nixos/doc/manual/from_md/release-notes/rl-2205.section.xml +++ b/nixos/doc/manual/from_md/release-notes/rl-2205.section.xml @@ -306,6 +306,12 @@ with many features. </para> </listitem> + <listitem> + <para> + <link xlink:href="https://clusterlabs.org/pacemaker/">pacemaker</link> + cluster resource manager + </para> + </listitem> </itemizedlist> </section> <section xml:id="sec-release-22.05-incompatibilities"> diff --git a/nixos/doc/manual/release-notes/rl-2205.section.md b/nixos/doc/manual/release-notes/rl-2205.section.md index 2f730de737c..390ec7b2add 100644 --- a/nixos/doc/manual/release-notes/rl-2205.section.md +++ b/nixos/doc/manual/release-notes/rl-2205.section.md @@ -87,6 +87,8 @@ In addition to numerous new and upgraded packages, this release has the followin - [blocky](https://0xerr0r.github.io/blocky/), fast and lightweight DNS proxy as ad-blocker for local network with many features. +- [pacemaker](https://clusterlabs.org/pacemaker/) cluster resource manager + <!-- To avoid merge conflicts, consider adding your item at an arbitrary place in the list instead. --> ## Backward Incompatibilities {#sec-release-22.05-incompatibilities} diff --git a/nixos/modules/module-list.nix b/nixos/modules/module-list.nix index 13703968167..68f9c6c1227 100644 --- a/nixos/modules/module-list.nix +++ b/nixos/modules/module-list.nix @@ -302,6 +302,7 @@ ./services/backup/znapzend.nix ./services/blockchain/ethereum/geth.nix ./services/backup/zrepl.nix + ./services/cluster/corosync/default.nix ./services/cluster/hadoop/default.nix ./services/cluster/k3s/default.nix ./services/cluster/kubernetes/addons/dns.nix @@ -314,6 +315,7 @@ ./services/cluster/kubernetes/pki.nix ./services/cluster/kubernetes/proxy.nix ./services/cluster/kubernetes/scheduler.nix + ./services/cluster/pacemaker/default.nix ./services/cluster/spark/default.nix ./services/computing/boinc/client.nix ./services/computing/foldingathome/client.nix diff --git a/nixos/modules/services/cluster/corosync/default.nix b/nixos/modules/services/cluster/corosync/default.nix new file mode 100644 index 00000000000..b4144917fee --- /dev/null +++ b/nixos/modules/services/cluster/corosync/default.nix @@ -0,0 +1,112 @@ +{ config, lib, pkgs, ... }: + +with lib; +let + cfg = config.services.corosync; +in +{ + # interface + options.services.corosync = { + enable = mkEnableOption "corosync"; + + package = mkOption { + type = types.package; + default = pkgs.corosync; + defaultText = literalExpression "pkgs.corosync"; + description = "Package that should be used for corosync."; + }; + + clusterName = mkOption { + type = types.str; + default = "nixcluster"; + description = "Name of the corosync cluster."; + }; + + extraOptions = mkOption { + type = with types; listOf str; + default = []; + description = "Additional options with which to start corosync."; + }; + + nodelist = mkOption { + description = "Corosync nodelist: all cluster members."; + default = []; + type = with types; listOf (submodule { + options = { + nodeid = mkOption { + type = int; + description = "Node ID number"; + }; + name = mkOption { + type = str; + description = "Node name"; + }; + ring_addrs = mkOption { + type = listOf str; + description = "List of addresses, one for each ring."; + }; + }; + }); + }; + }; + + # implementation + config = mkIf cfg.enable { + environment.systemPackages = [ cfg.package ]; + + environment.etc."corosync/corosync.conf".text = '' + totem { + version: 2 + secauth: on + cluster_name: ${cfg.clusterName} + transport: knet + } + + nodelist { + ${concatMapStrings ({ nodeid, name, ring_addrs }: '' + node { + nodeid: ${toString nodeid} + name: ${name} + ${concatStrings (imap0 (i: addr: '' + ring${toString i}_addr: ${addr} + '') ring_addrs)} + } + '') cfg.nodelist} + } + + quorum { + # only corosync_votequorum is supported + provider: corosync_votequorum + wait_for_all: 0 + ${optionalString (builtins.length cfg.nodelist < 3) '' + two_node: 1 + ''} + } + + logging { + to_syslog: yes + } + ''; + + environment.etc."corosync/uidgid.d/root".text = '' + # allow pacemaker connection by root + uidgid { + uid: 0 + gid: 0 + } + ''; + + systemd.packages = [ cfg.package ]; + systemd.services.corosync = { + wantedBy = [ "multi-user.target" ]; + serviceConfig = { + StateDirectory = "corosync"; + StateDirectoryMode = "0700"; + }; + }; + + environment.etc."sysconfig/corosync".text = lib.optionalString (cfg.extraOptions != []) '' + COROSYNC_OPTIONS="${lib.escapeShellArgs cfg.extraOptions}" + ''; + }; +} diff --git a/nixos/modules/services/cluster/pacemaker/default.nix b/nixos/modules/services/cluster/pacemaker/default.nix new file mode 100644 index 00000000000..7eeadffcc58 --- /dev/null +++ b/nixos/modules/services/cluster/pacemaker/default.nix @@ -0,0 +1,52 @@ +{ config, lib, pkgs, ... }: + +with lib; +let + cfg = config.services.pacemaker; +in +{ + # interface + options.services.pacemaker = { + enable = mkEnableOption "pacemaker"; + + package = mkOption { + type = types.package; + default = pkgs.pacemaker; + defaultText = literalExpression "pkgs.pacemaker"; + description = "Package that should be used for pacemaker."; + }; + }; + + # implementation + config = mkIf cfg.enable { + assertions = [ { + assertion = config.services.corosync.enable; + message = '' + Enabling services.pacemaker requires a services.corosync configuration. + ''; + } ]; + + environment.systemPackages = [ cfg.package ]; + + # required by pacemaker + users.users.hacluster = { + isSystemUser = true; + group = "pacemaker"; + home = "/var/lib/pacemaker"; + }; + users.groups.pacemaker = {}; + + systemd.tmpfiles.rules = [ + "d /var/log/pacemaker 0700 hacluster pacemaker -" + ]; + + systemd.packages = [ cfg.package ]; + systemd.services.pacemaker = { + wantedBy = [ "multi-user.target" ]; + serviceConfig = { + StateDirectory = "pacemaker"; + StateDirectoryMode = "0700"; + }; + }; + }; +} diff --git a/nixos/tests/all-tests.nix b/nixos/tests/all-tests.nix index 98ca2e08108..eee99fb5e97 100644 --- a/nixos/tests/all-tests.nix +++ b/nixos/tests/all-tests.nix @@ -384,6 +384,7 @@ in os-prober = handleTestOn ["x86_64-linux"] ./os-prober.nix {}; osrm-backend = handleTest ./osrm-backend.nix {}; overlayfs = handleTest ./overlayfs.nix {}; + pacemaker = handleTest ./pacemaker.nix {}; packagekit = handleTest ./packagekit.nix {}; pam-file-contents = handleTest ./pam/pam-file-contents.nix {}; pam-oath-login = handleTest ./pam/pam-oath-login.nix {}; diff --git a/nixos/tests/pacemaker.nix b/nixos/tests/pacemaker.nix new file mode 100644 index 00000000000..68455761495 --- /dev/null +++ b/nixos/tests/pacemaker.nix @@ -0,0 +1,110 @@ +import ./make-test-python.nix ({ pkgs, lib, ... }: rec { + name = "pacemaker"; + meta = with pkgs.lib.maintainers; { + maintainers = [ astro ]; + }; + + nodes = + let + node = i: { + networking.interfaces.eth1.ipv4.addresses = [ { + address = "192.168.0.${toString i}"; + prefixLength = 24; + } ]; + + services.corosync = { + enable = true; + clusterName = "zentralwerk-network"; + nodelist = lib.imap (i: name: { + nodeid = i; + inherit name; + ring_addrs = [ + (builtins.head nodes.${name}.networking.interfaces.eth1.ipv4.addresses).address + ]; + }) (builtins.attrNames nodes); + }; + environment.etc."corosync/authkey" = { + source = builtins.toFile "authkey" + # minimum length: 128 bytes + "testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest"; + mode = "0400"; + }; + + services.pacemaker.enable = true; + + # used for pacemaker resource + systemd.services.ha-cat = { + description = "Highly available netcat"; + serviceConfig.ExecStart = "${pkgs.netcat}/bin/nc -l discard"; + }; + }; + in { + node1 = node 1; + node2 = node 2; + node3 = node 3; + }; + + # sets up pacemaker with resources configuration, then crashes a + # node and waits for service restart on another node + testScript = + let + resources = builtins.toFile "cib-resources.xml" '' + <resources> + <primitive id="cat" class="systemd" type="ha-cat"> + <operations> + <op id="stop-cat" name="start" interval="0" timeout="1s"/> + <op id="start-cat" name="start" interval="0" timeout="1s"/> + <op id="monitor-cat" name="monitor" interval="1s" timeout="1s"/> + </operations> + </primitive> + </resources> + ''; + in '' + import re + import time + + start_all() + + ${lib.concatMapStrings (node: '' + ${node}.wait_until_succeeds("corosync-quorumtool") + ${node}.wait_for_unit("pacemaker.service") + '') (builtins.attrNames nodes)} + + # No STONITH device + node1.succeed("crm_attribute -t crm_config -n stonith-enabled -v false") + # Configure the cat resource + node1.succeed("cibadmin --replace --scope resources --xml-file ${resources}") + + # wait until the service is started + while True: + output = node1.succeed("crm_resource -r cat --locate") + match = re.search("is running on: (.+)", output) + if match: + for machine in machines: + if machine.name == match.group(1): + current_node = machine + break + time.sleep(1) + + current_node.log("Service running here!") + current_node.crash() + + # pick another node that's still up + for machine in machines: + if machine.booted: + check_node = machine + # find where the service has been started next + while True: + output = check_node.succeed("crm_resource -r cat --locate") + match = re.search("is running on: (.+)", output) + # output will remain the old current_node until the crash is detected by pacemaker + if match and match.group(1) != current_node.name: + for machine in machines: + if machine.name == match.group(1): + next_node = machine + break + time.sleep(1) + + next_node.log("Service migrated here!") + ''; +}) |