summary refs log tree commit diff
diff options
context:
space:
mode:
authorillustris <rharikrishnan95@gmail.com>2022-03-02 12:50:01 +0530
committerillustris <rharikrishnan95@gmail.com>2022-03-11 14:26:57 +0530
commite1017adb328da98e7942ec08354dd686653370a4 (patch)
tree68fd5d43f6119bb9198b10cc4d1cb1dcb9a7a9f3
parentbef71d7c530aeecdcfb66290e6c0b4948d1fb223 (diff)
downloadnixpkgs-e1017adb328da98e7942ec08354dd686653370a4.tar
nixpkgs-e1017adb328da98e7942ec08354dd686653370a4.tar.gz
nixpkgs-e1017adb328da98e7942ec08354dd686653370a4.tar.bz2
nixpkgs-e1017adb328da98e7942ec08354dd686653370a4.tar.lz
nixpkgs-e1017adb328da98e7942ec08354dd686653370a4.tar.xz
nixpkgs-e1017adb328da98e7942ec08354dd686653370a4.tar.zst
nixpkgs-e1017adb328da98e7942ec08354dd686653370a4.zip
nixos/hadoop: add module options for commonly used service configs
-rw-r--r--nixos/modules/services/cluster/hadoop/conf.nix22
-rw-r--r--nixos/modules/services/cluster/hadoop/default.nix67
-rw-r--r--nixos/modules/services/cluster/hadoop/hdfs.nix52
-rw-r--r--nixos/modules/services/cluster/hadoop/yarn.nix77
-rw-r--r--nixos/tests/hadoop/hadoop.nix21
-rw-r--r--nixos/tests/hadoop/hdfs.nix4
6 files changed, 194 insertions, 49 deletions
diff --git a/nixos/modules/services/cluster/hadoop/conf.nix b/nixos/modules/services/cluster/hadoop/conf.nix
index 0caec5cfc20..e3c26a0d550 100644
--- a/nixos/modules/services/cluster/hadoop/conf.nix
+++ b/nixos/modules/services/cluster/hadoop/conf.nix
@@ -1,6 +1,6 @@
 { cfg, pkgs, lib }:
 let
-  propertyXml = name: value: ''
+  propertyXml = name: value: lib.optionalString (value != null) ''
     <property>
       <name>${name}</name>
       <value>${builtins.toString value}</value>
@@ -29,16 +29,16 @@ let
     export HADOOP_LOG_DIR=/tmp/hadoop/$USER
   '';
 in
-pkgs.runCommand "hadoop-conf" {} ''
+pkgs.runCommand "hadoop-conf" {} (with cfg; ''
   mkdir -p $out/
-  cp ${siteXml "core-site.xml" cfg.coreSite}/* $out/
-  cp ${siteXml "hdfs-site.xml" cfg.hdfsSite}/* $out/
-  cp ${siteXml "mapred-site.xml" cfg.mapredSite}/* $out/
-  cp ${siteXml "yarn-site.xml" cfg.yarnSite}/* $out/
-  cp ${siteXml "httpfs-site.xml" cfg.httpfsSite}/* $out/
-  cp ${cfgFile "container-executor.cfg" cfg.containerExecutorCfg}/* $out/
+  cp ${siteXml "core-site.xml" (coreSite // coreSiteInternal)}/* $out/
+  cp ${siteXml "hdfs-site.xml" (hdfsSiteDefault // hdfsSite // hdfsSiteInternal)}/* $out/
+  cp ${siteXml "mapred-site.xml" (mapredSiteDefault // mapredSite)}/* $out/
+  cp ${siteXml "yarn-site.xml" (yarnSiteDefault // yarnSite // yarnSiteInternal)}/* $out/
+  cp ${siteXml "httpfs-site.xml" httpfsSite}/* $out/
+  cp ${cfgFile "container-executor.cfg" containerExecutorCfg}/* $out/
   cp ${pkgs.writeTextDir "hadoop-user-functions.sh" userFunctions}/* $out/
   cp ${pkgs.writeTextDir "hadoop-env.sh" hadoopEnv}/* $out/
-  cp ${cfg.log4jProperties} $out/log4j.properties
-  ${lib.concatMapStringsSep "\n" (dir: "cp -r ${dir}/* $out/") cfg.extraConfDirs}
-''
+  cp ${log4jProperties} $out/log4j.properties
+  ${lib.concatMapStringsSep "\n" (dir: "cp -r ${dir}/* $out/") extraConfDirs}
+'')
diff --git a/nixos/modules/services/cluster/hadoop/default.nix b/nixos/modules/services/cluster/hadoop/default.nix
index 57b1d7a90d7..a4fdea81037 100644
--- a/nixos/modules/services/cluster/hadoop/default.nix
+++ b/nixos/modules/services/cluster/hadoop/default.nix
@@ -21,25 +21,50 @@ with lib;
         <link xlink:href="https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/core-default.xml"/>
       '';
     };
+    coreSiteInternal = mkOption {
+      default = {};
+      type = types.attrsOf types.anything;
+      internal = true;
+      description = ''
+        Internal option to add configs to core-site.xml based on module options
+      '';
+    };
 
-    hdfsSite = mkOption {
+    hdfsSiteDefault = mkOption {
       default = {
         "dfs.namenode.rpc-bind-host" = "0.0.0.0";
         "dfs.namenode.http-address" = "0.0.0.0:9870";
+        "dfs.namenode.servicerpc-bind-host" = "0.0.0.0";
+        "dfs.namenode.http-bind-host" = "0.0.0.0";
       };
       type = types.attrsOf types.anything;
+      description = ''
+        Default options for hdfs-site.xml
+      '';
+    };
+    hdfsSite = mkOption {
+      default = {};
+      type = types.attrsOf types.anything;
       example = literalExpression ''
         {
           "dfs.nameservices" = "namenode1";
         }
       '';
       description = ''
-        Hadoop hdfs-site.xml definition
+        Additional options and overrides for hdfs-site.xml
         <link xlink:href="https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/hdfs-default.xml"/>
       '';
     };
+    hdfsSiteInternal = mkOption {
+      default = {};
+      type = types.attrsOf types.anything;
+      internal = true;
+      description = ''
+        Internal option to add configs to hdfs-site.xml based on module options
+      '';
+    };
 
-    mapredSite = mkOption {
+    mapredSiteDefault = mkOption {
       default = {
         "mapreduce.framework.name" = "yarn";
         "yarn.app.mapreduce.am.env" = "HADOOP_MAPRED_HOME=${cfg.package}/lib/${cfg.package.untarDir}";
@@ -55,18 +80,25 @@ with lib;
         }
       '';
       type = types.attrsOf types.anything;
+      description = ''
+        Default options for mapred-site.xml
+      '';
+    };
+    mapredSite = mkOption {
+      default = {};
+      type = types.attrsOf types.anything;
       example = literalExpression ''
-        options.services.hadoop.mapredSite.default // {
+        {
           "mapreduce.map.java.opts" = "-Xmx900m -XX:+UseParallelGC";
         }
       '';
       description = ''
-        Hadoop mapred-site.xml definition
+        Additional options and overrides for mapred-site.xml
         <link xlink:href="https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml"/>
       '';
     };
 
-    yarnSite = mkOption {
+    yarnSiteDefault = mkOption {
       default = {
         "yarn.nodemanager.admin-env" = "PATH=$PATH";
         "yarn.nodemanager.aux-services" = "mapreduce_shuffle";
@@ -79,22 +111,33 @@ with lib;
         "yarn.nodemanager.log-dirs" = "/var/log/hadoop/yarn/nodemanager";
         "yarn.resourcemanager.bind-host" = "0.0.0.0";
         "yarn.resourcemanager.scheduler.class" = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler";
-        "yarn.nodemanager.linux-container-executor.cgroups.hierarchy" = "/hadoop-yarn";
-        "yarn.nodemanager.linux-container-executor.resources-handler.class" = "org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler";
-        "yarn.nodemanager.linux-container-executor.cgroups.mount" = "true";
-        "yarn.nodemanager.linux-container-executor.cgroups.mount-path" = "/run/wrappers/yarn-nodemanager/cgroup";
       };
       type = types.attrsOf types.anything;
+      description = ''
+        Default options for yarn-site.xml
+      '';
+    };
+    yarnSite = mkOption {
+      default = {};
+      type = types.attrsOf types.anything;
       example = literalExpression ''
-        options.services.hadoop.yarnSite.default // {
+        {
           "yarn.resourcemanager.hostname" = "''${config.networking.hostName}";
         }
       '';
       description = ''
-        Hadoop yarn-site.xml definition
+        Additional options and overrides for yarn-site.xml
         <link xlink:href="https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-common/yarn-default.xml"/>
       '';
     };
+    yarnSiteInternal = mkOption {
+      default = {};
+      type = types.attrsOf types.anything;
+      internal = true;
+      description = ''
+        Internal option to add configs to yarn-site.xml based on module options
+      '';
+    };
 
     httpfsSite = mkOption {
       default = { };
diff --git a/nixos/modules/services/cluster/hadoop/hdfs.nix b/nixos/modules/services/cluster/hadoop/hdfs.nix
index 1725dc62d0c..325a002ad32 100644
--- a/nixos/modules/services/cluster/hadoop/hdfs.nix
+++ b/nixos/modules/services/cluster/hadoop/hdfs.nix
@@ -7,7 +7,7 @@ let
   hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/";
 
   # Generator for HDFS service options
-  hadoopServiceOption = { serviceName, firewallOption ? true }: {
+  hadoopServiceOption = { serviceName, firewallOption ? true, extraOpts ? null }: {
     enable = mkEnableOption serviceName;
     restartIfChanged = mkOption {
       type = types.bool;
@@ -19,13 +19,27 @@ let
       '';
       default = false;
     };
+    extraFlags = mkOption{
+      type = with types; listOf str;
+      default = [];
+      description = "Extra command line flags to pass to ${serviceName}";
+      example = [
+        "-Dcom.sun.management.jmxremote"
+        "-Dcom.sun.management.jmxremote.port=8010"
+      ];
+    };
+    extraEnv = mkOption{
+      type = with types; attrsOf str;
+      default = {};
+      description = "Extra environment variables for ${serviceName}";
+    };
   } // (optionalAttrs firewallOption {
     openFirewall = mkOption {
       type = types.bool;
       default = false;
       description = "Open firewall ports for ${serviceName}.";
     };
-  });
+  }) // (optionalAttrs (extraOpts != null) extraOpts);
 
   # Generator for HDFS service configs
   hadoopServiceConfig =
@@ -36,17 +50,19 @@ let
     , allowedTCPPorts ? [ ]
     , preStart ? ""
     , environment ? { }
+    , extraConfig ? { }
     }: (
 
-      mkIf serviceOptions.enable {
+      mkIf serviceOptions.enable ( mkMerge [{
         systemd.services."hdfs-${toLower name}" = {
-          inherit description preStart environment;
+          inherit description preStart;
+          environment = environment // serviceOptions.extraEnv;
           wantedBy = [ "multi-user.target" ];
           inherit (serviceOptions) restartIfChanged;
           serviceConfig = {
             inherit User;
             SyslogIdentifier = "hdfs-${toLower name}";
-            ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} ${toLower name}";
+            ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} ${toLower name} ${escapeShellArgs serviceOptions.extraFlags}";
             Restart = "always";
           };
         };
@@ -56,7 +72,7 @@ let
         networking.firewall.allowedTCPPorts = mkIf
           ((builtins.hasAttr "openFirewall" serviceOptions) && serviceOptions.openFirewall)
           allowedTCPPorts;
-      }
+      } extraConfig])
     );
 
 in
@@ -77,7 +93,27 @@ in
       };
     };
 
-    datanode = hadoopServiceOption { serviceName = "HDFS DataNode"; };
+    datanode = hadoopServiceOption { serviceName = "HDFS DataNode"; } // {
+      dataDirs = mkOption {
+        default = null;
+        description = "Tier and path definitions for datanode storage.";
+        type = with types; nullOr (listOf (submodule {
+          options = {
+            type = mkOption {
+              type = enum [ "SSD" "DISK" "ARCHIVE" "RAM_DISK" ];
+              description = ''
+                Storage types ([SSD]/[DISK]/[ARCHIVE]/[RAM_DISK]) for HDFS storage policies.
+              '';
+            };
+            path = mkOption {
+              type = path;
+              example = [ "/var/lib/hadoop/hdfs/dn" ];
+              description = "Determines where on the local filesystem a data node should store its blocks.";
+            };
+          };
+        }));
+      };
+    };
 
     journalnode = hadoopServiceOption { serviceName = "HDFS JournalNode"; };
 
@@ -122,6 +158,8 @@ in
         50010 # datanode.address
         50020 # datanode.ipc.address
       ];
+      extraConfig.services.hadoop.hdfsSiteInternal."dfs.datanode.data.dir" = let d = cfg.hdfs.datanode.dataDirs; in
+        if (d!= null) then (concatMapStringsSep "," (x: "["+x.type+"]file://"+x.path) cfg.hdfs.datanode.dataDirs) else d;
     })
 
     (hadoopServiceConfig {
diff --git a/nixos/modules/services/cluster/hadoop/yarn.nix b/nixos/modules/services/cluster/hadoop/yarn.nix
index 373d8a70a12..74e16bdec68 100644
--- a/nixos/modules/services/cluster/hadoop/yarn.nix
+++ b/nixos/modules/services/cluster/hadoop/yarn.nix
@@ -13,12 +13,27 @@ let
     '';
     default = false;
   };
+  extraFlags = mkOption{
+    type = with types; listOf str;
+    default = [];
+    description = "Extra command line flags to pass to the service";
+    example = [
+      "-Dcom.sun.management.jmxremote"
+      "-Dcom.sun.management.jmxremote.port=8010"
+    ];
+  };
+  extraEnv = mkOption{
+    type = with types; attrsOf str;
+    default = {};
+    description = "Extra environment variables";
+  };
 in
 {
   options.services.hadoop.yarn = {
     resourcemanager = {
       enable = mkEnableOption "Hadoop YARN ResourceManager";
-      inherit restartIfChanged;
+      inherit restartIfChanged extraFlags extraEnv;
+
       openFirewall = mkOption {
         type = types.bool;
         default = false;
@@ -29,7 +44,46 @@ in
     };
     nodemanager = {
       enable = mkEnableOption "Hadoop YARN NodeManager";
-      inherit restartIfChanged;
+      inherit restartIfChanged extraFlags extraEnv;
+
+      resource = {
+        cpuVCores = mkOption {
+          description = "Number of vcores that can be allocated for containers.";
+          type = with types; nullOr ints.positive;
+          default = null;
+        };
+        maximumAllocationVCores = mkOption {
+          description = "The maximum virtual CPU cores any container can be allocated.";
+          type = with types; nullOr ints.positive;
+          default = null;
+        };
+        memoryMB = mkOption {
+          description = "Amount of physical memory, in MB, that can be allocated for containers.";
+          type = with types; nullOr ints.positive;
+          default = null;
+        };
+        maximumAllocationMB = mkOption {
+          description = "The maximum physical memory any container can be allocated.";
+          type = with types; nullOr ints.positive;
+          default = null;
+        };
+      };
+
+      useCGroups = mkOption {
+        type = types.bool;
+        default = true;
+        description = ''
+          Use cgroups to enforce resource limits on containers
+        '';
+      };
+
+      localDir = mkOption {
+        description = "List of directories to store localized files in.";
+        type = with types; nullOr (listOf path);
+        example = [ "/var/lib/hadoop/yarn/nm" ];
+        default = null;
+      };
+
       addBinBash = mkOption {
         type = types.bool;
         default = true;
@@ -62,12 +116,13 @@ in
         description = "Hadoop YARN ResourceManager";
         wantedBy = [ "multi-user.target" ];
         inherit (cfg.yarn.resourcemanager) restartIfChanged;
+        environment = cfg.yarn.resourcemanager.extraEnv;
 
         serviceConfig = {
           User = "yarn";
           SyslogIdentifier = "yarn-resourcemanager";
           ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " +
-                      " resourcemanager";
+                      " resourcemanager ${escapeShellArgs cfg.yarn.resourcemanager.extraFlags}";
           Restart = "always";
         };
       };
@@ -94,6 +149,7 @@ in
         description = "Hadoop YARN NodeManager";
         wantedBy = [ "multi-user.target" ];
         inherit (cfg.yarn.nodemanager) restartIfChanged;
+        environment = cfg.yarn.nodemanager.extraEnv;
 
         preStart = ''
           # create log dir
@@ -115,13 +171,26 @@ in
           SyslogIdentifier = "yarn-nodemanager";
           PermissionsStartOnly = true;
           ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " +
-                      " nodemanager";
+                      " nodemanager ${escapeShellArgs cfg.yarn.nodemanager.extraFlags}";
           Restart = "always";
         };
       };
 
       services.hadoop.gatewayRole.enable = true;
 
+      services.hadoop.yarnSiteInternal = with cfg.yarn.nodemanager; {
+        "yarn.nodemanager.local-dirs" = localDir;
+        "yarn.scheduler.maximum-allocation-vcores" = resource.maximumAllocationVCores;
+        "yarn.scheduler.maximum-allocation-mb" = resource.maximumAllocationMB;
+        "yarn.nodemanager.resource.cpu-vcores" = resource.cpuVCores;
+        "yarn.nodemanager.resource.memory-mb" = resource.memoryMB;
+      } // mkIf useCGroups {
+        "yarn.nodemanager.linux-container-executor.cgroups.hierarchy" = "/hadoop-yarn";
+        "yarn.nodemanager.linux-container-executor.resources-handler.class" = "org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler";
+        "yarn.nodemanager.linux-container-executor.cgroups.mount" = "true";
+        "yarn.nodemanager.linux-container-executor.cgroups.mount-path" = "/run/wrappers/yarn-nodemanager/cgroup";
+      };
+
       networking.firewall.allowedTCPPortRanges = [
         (mkIf (cfg.yarn.nodemanager.openFirewall) {from = 1024; to = 65535;})
       ];
diff --git a/nixos/tests/hadoop/hadoop.nix b/nixos/tests/hadoop/hadoop.nix
index 42c238ef853..b132f4fa58b 100644
--- a/nixos/tests/hadoop/hadoop.nix
+++ b/nixos/tests/hadoop/hadoop.nix
@@ -10,15 +10,10 @@ import ../make-test-python.nix ({ package, ... }: {
         "fs.defaultFS" = "hdfs://ns1";
       };
       hdfsSite = {
-        "dfs.namenode.rpc-bind-host" = "0.0.0.0";
-        "dfs.namenode.http-bind-host" = "0.0.0.0";
-        "dfs.namenode.servicerpc-bind-host" = "0.0.0.0";
-
         # HA Quorum Journal Manager configuration
         "dfs.nameservices" = "ns1";
         "dfs.ha.namenodes.ns1" = "nn1,nn2";
-        "dfs.namenode.shared.edits.dir.ns1.nn1" = "qjournal://jn1:8485;jn2:8485;jn3:8485/ns1";
-        "dfs.namenode.shared.edits.dir.ns1.nn2" = "qjournal://jn1:8485;jn2:8485;jn3:8485/ns1";
+        "dfs.namenode.shared.edits.dir.ns1" = "qjournal://jn1:8485;jn2:8485;jn3:8485/ns1";
         "dfs.namenode.rpc-address.ns1.nn1" = "nn1:8020";
         "dfs.namenode.rpc-address.ns1.nn2" = "nn2:8020";
         "dfs.namenode.servicerpc-address.ns1.nn1" = "nn1:8022";
@@ -32,7 +27,7 @@ import ../make-test-python.nix ({ package, ... }: {
         "dfs.ha.fencing.methods" = "shell(true)";
         "ha.zookeeper.quorum" = "zk1:2181";
       };
-      yarnSiteHA = {
+      yarnSite = {
         "yarn.resourcemanager.zk-address" = "zk1:2181";
         "yarn.resourcemanager.ha.enabled" = "true";
         "yarn.resourcemanager.ha.rm-ids" = "rm1,rm2";
@@ -116,8 +111,7 @@ import ../make-test-python.nix ({ package, ... }: {
       # YARN cluster
       rm1 = { options, ... }: {
         services.hadoop = {
-          inherit package coreSite hdfsSite;
-          yarnSite = options.services.hadoop.yarnSite.default // yarnSiteHA;
+          inherit package coreSite hdfsSite yarnSite;
           yarn.resourcemanager = {
             enable = true;
             openFirewall = true;
@@ -126,8 +120,7 @@ import ../make-test-python.nix ({ package, ... }: {
       };
       rm2 = { options, ... }: {
         services.hadoop = {
-          inherit package coreSite hdfsSite;
-          yarnSite = options.services.hadoop.yarnSite.default // yarnSiteHA;
+          inherit package coreSite hdfsSite yarnSite;
           yarn.resourcemanager = {
             enable = true;
             openFirewall = true;
@@ -137,8 +130,7 @@ import ../make-test-python.nix ({ package, ... }: {
       nm1 = { options, ... }: {
         virtualisation.memorySize = 2048;
         services.hadoop = {
-          inherit package coreSite hdfsSite;
-          yarnSite = options.services.hadoop.yarnSite.default // yarnSiteHA;
+          inherit package coreSite hdfsSite yarnSite;
           yarn.nodemanager = {
             enable = true;
             openFirewall = true;
@@ -148,8 +140,7 @@ import ../make-test-python.nix ({ package, ... }: {
       client = { options, ... }: {
         services.hadoop = {
           gatewayRole.enable = true;
-          inherit package coreSite hdfsSite;
-          yarnSite = options.services.hadoop.yarnSite.default // yarnSiteHA;
+          inherit package coreSite hdfsSite yarnSite;
         };
       };
   };
diff --git a/nixos/tests/hadoop/hdfs.nix b/nixos/tests/hadoop/hdfs.nix
index cc70fb8ecaf..9415500463d 100644
--- a/nixos/tests/hadoop/hdfs.nix
+++ b/nixos/tests/hadoop/hdfs.nix
@@ -35,6 +35,10 @@ with lib;
         hdfs.datanode = {
           enable = true;
           openFirewall = true;
+          dataDirs = [{
+            type = "DISK";
+            path = "/tmp/dn1";
+          }];
         };
         inherit coreSite;
       };