summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--nixos/modules/misc/ids.nix8
-rw-r--r--nixos/modules/module-list.nix1
-rw-r--r--nixos/modules/services/cluster/hadoop/conf.nix31
-rw-r--r--nixos/modules/services/cluster/hadoop/default.nix63
-rw-r--r--nixos/modules/services/cluster/hadoop/hdfs.nix73
-rw-r--r--nixos/modules/services/cluster/hadoop/yarn.nix74
-rw-r--r--nixos/release.nix2
-rw-r--r--nixos/tests/hadoop/hdfs.nix54
-rw-r--r--nixos/tests/hadoop/yarn.nix46
9 files changed, 352 insertions, 0 deletions
diff --git a/nixos/modules/misc/ids.nix b/nixos/modules/misc/ids.nix
index 73231edf077..aac86087f9e 100644
--- a/nixos/modules/misc/ids.nix
+++ b/nixos/modules/misc/ids.nix
@@ -317,6 +317,10 @@
       restic = 291;
       openvpn = 292;
       meguca = 293;
+      yarn = 294;
+      hdfs = 295;
+      mapred = 296;
+      hadoop = 297;
 
       # When adding a uid, make sure it doesn't match an existing gid. And don't use uids above 399!
 
@@ -594,6 +598,10 @@
       restic = 291;
       openvpn = 292;
       meguca = 293;
+      yarn = 294;
+      hdfs = 295;
+      mapred = 296;
+      hadoop = 297;
 
       # When adding a gid, make sure it doesn't match an existing
       # uid. Users and groups with the same name should have equal
diff --git a/nixos/modules/module-list.nix b/nixos/modules/module-list.nix
index c425f3c6507..5ed01a8da40 100644
--- a/nixos/modules/module-list.nix
+++ b/nixos/modules/module-list.nix
@@ -178,6 +178,7 @@
   ./services/backup/rsnapshot.nix
   ./services/backup/tarsnap.nix
   ./services/backup/znapzend.nix
+  ./services/cluster/hadoop/default.nix
   ./services/cluster/kubernetes/default.nix
   ./services/cluster/kubernetes/dns.nix
   ./services/cluster/kubernetes/dashboard.nix
diff --git a/nixos/modules/services/cluster/hadoop/conf.nix b/nixos/modules/services/cluster/hadoop/conf.nix
new file mode 100644
index 00000000000..38db10406b9
--- /dev/null
+++ b/nixos/modules/services/cluster/hadoop/conf.nix
@@ -0,0 +1,31 @@
+{ hadoop, pkgs }:
+let
+  propertyXml = name: value: ''
+    <property>
+      <name>${name}</name>
+      <value>${builtins.toString value}</value>
+    </property>
+  '';
+  siteXml = fileName: properties: pkgs.writeTextDir fileName ''
+    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
+    <!-- generated by NixOS -->
+    <configuration>
+      ${builtins.concatStringsSep "\n" (pkgs.lib.mapAttrsToList propertyXml properties)}
+    </configuration>
+  '';
+  userFunctions = ''
+    hadoop_verify_logdir() {
+      echo Skipping verification of log directory
+    }
+  '';
+in
+pkgs.buildEnv {
+  name = "hadoop-conf";
+  paths = [
+    (siteXml "core-site.xml" hadoop.coreSite)
+    (siteXml "hdfs-site.xml" hadoop.hdfsSite)
+    (siteXml "mapred-site.xml" hadoop.mapredSite)
+    (siteXml "yarn-site.xml" hadoop.yarnSite)
+    (pkgs.writeTextDir "hadoop-user-functions.sh" userFunctions)
+  ];
+}
diff --git a/nixos/modules/services/cluster/hadoop/default.nix b/nixos/modules/services/cluster/hadoop/default.nix
new file mode 100644
index 00000000000..53c13fd0603
--- /dev/null
+++ b/nixos/modules/services/cluster/hadoop/default.nix
@@ -0,0 +1,63 @@
+{ config, lib, pkgs, ...}:
+let 
+  cfg = config.services.hadoop;
+  hadoopConf = import ./conf.nix { hadoop = cfg; pkgs = pkgs; };
+in
+with lib;
+{
+  imports = [ ./yarn.nix ./hdfs.nix ];
+
+  options.services.hadoop = {
+    coreSite = mkOption {
+      default = {};
+      example = {
+        "fs.defaultFS" = "hdfs://localhost";
+      };
+      description = "Hadoop core-site.xml definition";
+    };
+
+    hdfsSite = mkOption {
+      default = {};
+      example = {
+        "dfs.nameservices" = "namenode1";
+      };
+      description = "Hadoop hdfs-site.xml definition";
+    };
+
+    mapredSite = mkOption {
+      default = {};
+      example = {
+        "mapreduce.map.cpu.vcores" = "1";
+      };
+      description = "Hadoop mapred-site.xml definition";
+    };
+
+    yarnSite = mkOption {
+      default = {};
+      example = {
+        "yarn.resourcemanager.ha.id" = "resourcemanager1";
+      };
+      description = "Hadoop yarn-site.xml definition";
+    };
+
+    package = mkOption {
+      type = types.package;
+      default = pkgs.hadoop;
+      defaultText = "pkgs.hadoop";
+      example = literalExample "pkgs.hadoop";
+      description = ''
+      '';
+    };
+  };
+
+
+  config = mkMerge [
+    (mkIf (builtins.hasAttr "yarn" config.users.extraUsers ||
+           builtins.hasAttr "hdfs" config.users.extraUsers ) {
+      users.extraGroups.hadoop = {
+        gid = config.ids.gids.hadoop;
+      };
+    })
+
+  ];
+}
diff --git a/nixos/modules/services/cluster/hadoop/hdfs.nix b/nixos/modules/services/cluster/hadoop/hdfs.nix
new file mode 100644
index 00000000000..48020e6139c
--- /dev/null
+++ b/nixos/modules/services/cluster/hadoop/hdfs.nix
@@ -0,0 +1,73 @@
+{ config, lib, pkgs, ...}:
+let
+  cfg = config.services.hadoop;
+  hadoopConf = import ./conf.nix { hadoop = cfg; pkgs = pkgs; };
+in
+with lib;
+{
+  options.services.hadoop.hdfs = {
+    namenode.enabled = mkOption {
+      type = types.bool;
+      default = false;
+      description = ''
+        Whether to run the Hadoop YARN NameNode
+      '';
+    };
+    datanode.enabled = mkOption {
+      type = types.bool;
+      default = false;
+      description = ''
+        Whether to run the Hadoop YARN DataNode
+      '';
+    };
+  };
+
+  config = mkMerge [
+    (mkIf cfg.hdfs.namenode.enabled {
+      systemd.services."hdfs-namenode" = {
+        description = "Hadoop HDFS NameNode";
+        wantedBy = [ "multi-user.target" ];
+
+        environment = {
+          HADOOP_HOME = "${cfg.package}";
+        };
+
+        preStart = ''
+          ${cfg.package}/bin/hdfs --config ${hadoopConf} namenode -format -nonInteractive || true
+        '';
+
+        serviceConfig = {
+          User = "hdfs";
+          SyslogIdentifier = "hdfs-namenode";
+          ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} namenode";
+        };
+      };
+    })
+    (mkIf cfg.hdfs.datanode.enabled {
+      systemd.services."hdfs-datanode" = {
+        description = "Hadoop HDFS DataNode";
+        wantedBy = [ "multi-user.target" ];
+
+        environment = {
+          HADOOP_HOME = "${cfg.package}";
+        };
+
+        serviceConfig = {
+          User = "hdfs";
+          SyslogIdentifier = "hdfs-datanode";
+          ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} datanode";
+        };
+      };
+    })
+    (mkIf (
+        cfg.hdfs.namenode.enabled || cfg.hdfs.datanode.enabled
+    ) {
+      users.extraUsers.hdfs = {
+        description = "Hadoop HDFS user";
+        group = "hadoop";
+        uid = config.ids.uids.hdfs;
+      };
+    })
+
+  ];
+}
diff --git a/nixos/modules/services/cluster/hadoop/yarn.nix b/nixos/modules/services/cluster/hadoop/yarn.nix
new file mode 100644
index 00000000000..ce5b04a331c
--- /dev/null
+++ b/nixos/modules/services/cluster/hadoop/yarn.nix
@@ -0,0 +1,74 @@
+{ config, lib, pkgs, ...}:
+let
+  cfg = config.services.hadoop;
+  hadoopConf = import ./conf.nix { hadoop = cfg; pkgs = pkgs; };
+in
+with lib;
+{
+  options.services.hadoop.yarn = {
+    resourcemanager.enabled = mkOption {
+      type = types.bool;
+      default = false;
+      description = ''
+        Whether to run the Hadoop YARN ResourceManager
+      '';
+    };
+    nodemanager.enabled = mkOption {
+      type = types.bool;
+      default = false;
+      description = ''
+        Whether to run the Hadoop YARN NodeManager
+      '';
+    };
+  };
+
+  config = mkMerge [
+    (mkIf (
+        cfg.yarn.resourcemanager.enabled || cfg.yarn.nodemanager.enabled
+    ) {
+
+      users.extraUsers.yarn = {
+        description = "Hadoop YARN user";
+        group = "hadoop";
+        uid = config.ids.uids.yarn;
+      };
+    })
+
+    (mkIf cfg.yarn.resourcemanager.enabled {
+      systemd.services."yarn-resourcemanager" = {
+        description = "Hadoop YARN ResourceManager";
+        wantedBy = [ "multi-user.target" ];
+
+        environment = {
+          HADOOP_HOME = "${cfg.package}";
+        };
+
+        serviceConfig = {
+          User = "yarn";
+          SyslogIdentifier = "yarn-resourcemanager";
+          ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " +
+                      " resourcemanager";
+        };
+      };
+    })
+
+    (mkIf cfg.yarn.nodemanager.enabled {
+      systemd.services."yarn-nodemanager" = {
+        description = "Hadoop YARN NodeManager";
+        wantedBy = [ "multi-user.target" ];
+
+        environment = {
+          HADOOP_HOME = "${cfg.package}";
+        };
+
+        serviceConfig = {
+          User = "yarn";
+          SyslogIdentifier = "yarn-nodemanager";
+          ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " +
+                      " nodemanager";
+        };
+      };
+    })
+
+  ];
+}
diff --git a/nixos/release.nix b/nixos/release.nix
index e494fa35029..93566762b3a 100644
--- a/nixos/release.nix
+++ b/nixos/release.nix
@@ -299,6 +299,8 @@ in rec {
   tests.gnome3-gdm = callTest tests/gnome3-gdm.nix {};
   tests.grafana = callTest tests/grafana.nix {};
   tests.graphite = callTest tests/graphite.nix {};
+  tests.hadoop.hdfs = callTestOnMatchingSystems [ "x86_64-linux" ] tests/hadoop/hdfs.nix {};
+  tests.hadoop.yarn = callTestOnMatchingSystems [ "x86_64-linux" ] tests/hadoop/yarn.nix {};
   tests.hardened = callTest tests/hardened.nix { };
   tests.haproxy = callTest tests/haproxy.nix {};
   tests.hibernate = callTest tests/hibernate.nix {};
diff --git a/nixos/tests/hadoop/hdfs.nix b/nixos/tests/hadoop/hdfs.nix
new file mode 100644
index 00000000000..4206c940c1a
--- /dev/null
+++ b/nixos/tests/hadoop/hdfs.nix
@@ -0,0 +1,54 @@
+import ../make-test.nix ({pkgs, ...}: {
+  nodes = {
+    namenode = {pkgs, config, ...}: {
+      services.hadoop = {
+        package = pkgs.hadoop_3_1;
+        hdfs.namenode.enabled = true;
+        coreSite = {
+          "fs.defaultFS" = "hdfs://namenode:8020";
+        };
+        hdfsSite = {
+          "dfs.replication" = 1;
+          "dfs.namenode.rpc-bind-host" = "0.0.0.0";
+          "dfs.namenode.http-bind-host" = "0.0.0.0";
+        };
+      };
+      networking.firewall.allowedTCPPorts = [
+        9870 # namenode.http-address
+        8020 # namenode.rpc-address
+      ];
+    };
+    datanode = {pkgs, config, ...}: {
+      services.hadoop = {
+        package = pkgs.hadoop_3_1;
+        hdfs.datanode.enabled = true;
+        coreSite = {
+          "fs.defaultFS" = "hdfs://namenode:8020";
+        };
+      };
+      networking.firewall.allowedTCPPorts = [
+        9864 # datanode.http.address
+        9866 # datanode.address
+        9867 # datanode.ipc.address
+      ];
+    };
+  };
+
+  testScript = ''
+    startAll
+
+    $namenode->waitForUnit("hdfs-namenode");
+    $namenode->waitForUnit("network.target");
+    $namenode->waitForOpenPort(8020);
+    $namenode->waitForOpenPort(9870);
+
+    $datanode->waitForUnit("hdfs-datanode");
+    $datanode->waitForUnit("network.target");
+    $datanode->waitForOpenPort(9864);
+    $datanode->waitForOpenPort(9866);
+    $datanode->waitForOpenPort(9867);
+
+    $namenode->succeed("curl http://namenode:9870");
+    $datanode->succeed("curl http://datanode:9864");
+  '';
+})
diff --git a/nixos/tests/hadoop/yarn.nix b/nixos/tests/hadoop/yarn.nix
new file mode 100644
index 00000000000..e97cc1acc90
--- /dev/null
+++ b/nixos/tests/hadoop/yarn.nix
@@ -0,0 +1,46 @@
+import ../make-test.nix ({pkgs, ...}: {
+  nodes = {
+    resourcemanager = {pkgs, config, ...}: {
+      services.hadoop.package = pkgs.hadoop_3_1;
+      services.hadoop.yarn.resourcemanager.enabled = true;
+      services.hadoop.yarnSite = {
+        "yarn.resourcemanager.scheduler.class" = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler";
+      };
+      networking.firewall.allowedTCPPorts = [
+        8088 # resourcemanager.webapp.address
+        8031 # resourcemanager.resource-tracker.address
+      ];
+    };
+    nodemanager = {pkgs, config, ...}: {
+      services.hadoop.package = pkgs.hadoop_3_1;
+      services.hadoop.yarn.nodemanager.enabled = true;
+      services.hadoop.yarnSite = {
+        "yarn.resourcemanager.hostname" = "resourcemanager";
+        "yarn.nodemanager.log-dirs" = "/tmp/userlogs";
+        "yarn.nodemanager.address" = "0.0.0.0:8041";
+      };
+      networking.firewall.allowedTCPPorts = [
+        8042 # nodemanager.webapp.address
+        8041 # nodemanager.address
+      ];
+    };
+
+  };
+
+  testScript = ''
+    startAll;
+
+    $resourcemanager->waitForUnit("yarn-resourcemanager");
+    $resourcemanager->waitForUnit("network.target");
+    $resourcemanager->waitForOpenPort(8031);
+    $resourcemanager->waitForOpenPort(8088);
+
+    $nodemanager->waitForUnit("yarn-nodemanager");
+    $nodemanager->waitForUnit("network.target");
+    $nodemanager->waitForOpenPort(8042);
+    $nodemanager->waitForOpenPort(8041);
+
+    $resourcemanager->succeed("curl http://localhost:8088");
+    $nodemanager->succeed("curl http://localhost:8042");
+  '';
+})