summary refs log tree commit diff
path: root/nixos/modules/services/cluster/hadoop/yarn.nix
blob: 74e16bdec687a1dacd18d7e9454468c890ea5eab (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
{ config, lib, pkgs, ...}:
with lib;
let
  cfg = config.services.hadoop;
  hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/";
  restartIfChanged  = mkOption {
    type = types.bool;
    description = ''
      Automatically restart the service on config change.
      This can be set to false to defer restarts on clusters running critical applications.
      Please consider the security implications of inadvertently running an older version,
      and the possibility of unexpected behavior caused by inconsistent versions across a cluster when disabling this option.
    '';
    default = false;
  };
  extraFlags = mkOption{
    type = with types; listOf str;
    default = [];
    description = "Extra command line flags to pass to the service";
    example = [
      "-Dcom.sun.management.jmxremote"
      "-Dcom.sun.management.jmxremote.port=8010"
    ];
  };
  extraEnv = mkOption{
    type = with types; attrsOf str;
    default = {};
    description = "Extra environment variables";
  };
in
{
  options.services.hadoop.yarn = {
    resourcemanager = {
      enable = mkEnableOption "Hadoop YARN ResourceManager";
      inherit restartIfChanged extraFlags extraEnv;

      openFirewall = mkOption {
        type = types.bool;
        default = false;
        description = ''
          Open firewall ports for resourcemanager
        '';
      };
    };
    nodemanager = {
      enable = mkEnableOption "Hadoop YARN NodeManager";
      inherit restartIfChanged extraFlags extraEnv;

      resource = {
        cpuVCores = mkOption {
          description = "Number of vcores that can be allocated for containers.";
          type = with types; nullOr ints.positive;
          default = null;
        };
        maximumAllocationVCores = mkOption {
          description = "The maximum virtual CPU cores any container can be allocated.";
          type = with types; nullOr ints.positive;
          default = null;
        };
        memoryMB = mkOption {
          description = "Amount of physical memory, in MB, that can be allocated for containers.";
          type = with types; nullOr ints.positive;
          default = null;
        };
        maximumAllocationMB = mkOption {
          description = "The maximum physical memory any container can be allocated.";
          type = with types; nullOr ints.positive;
          default = null;
        };
      };

      useCGroups = mkOption {
        type = types.bool;
        default = true;
        description = ''
          Use cgroups to enforce resource limits on containers
        '';
      };

      localDir = mkOption {
        description = "List of directories to store localized files in.";
        type = with types; nullOr (listOf path);
        example = [ "/var/lib/hadoop/yarn/nm" ];
        default = null;
      };

      addBinBash = mkOption {
        type = types.bool;
        default = true;
        description = ''
          Add /bin/bash. This is needed by the linux container executor's launch script.
        '';
      };
      openFirewall = mkOption {
        type = types.bool;
        default = false;
        description = ''
          Open firewall ports for nodemanager.
          Because containers can listen on any ephemeral port, TCP ports 1024–65535 will be opened.
        '';
      };
    };
  };

  config = mkMerge [
    (mkIf cfg.gatewayRole.enable {
      users.users.yarn = {
        description = "Hadoop YARN user";
        group = "hadoop";
        uid = config.ids.uids.yarn;
      };
    })

    (mkIf cfg.yarn.resourcemanager.enable {
      systemd.services.yarn-resourcemanager = {
        description = "Hadoop YARN ResourceManager";
        wantedBy = [ "multi-user.target" ];
        inherit (cfg.yarn.resourcemanager) restartIfChanged;
        environment = cfg.yarn.resourcemanager.extraEnv;

        serviceConfig = {
          User = "yarn";
          SyslogIdentifier = "yarn-resourcemanager";
          ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " +
                      " resourcemanager ${escapeShellArgs cfg.yarn.resourcemanager.extraFlags}";
          Restart = "always";
        };
      };

      services.hadoop.gatewayRole.enable = true;

      networking.firewall.allowedTCPPorts = (mkIf cfg.yarn.resourcemanager.openFirewall [
        8088 # resourcemanager.webapp.address
        8030 # resourcemanager.scheduler.address
        8031 # resourcemanager.resource-tracker.address
        8032 # resourcemanager.address
        8033 # resourcemanager.admin.address
      ]);
    })

    (mkIf cfg.yarn.nodemanager.enable {
      # Needed because yarn hardcodes /bin/bash in container start scripts
      # These scripts can't be patched, they are generated at runtime
      systemd.tmpfiles.rules = [
        (mkIf cfg.yarn.nodemanager.addBinBash "L /bin/bash - - - - /run/current-system/sw/bin/bash")
      ];

      systemd.services.yarn-nodemanager = {
        description = "Hadoop YARN NodeManager";
        wantedBy = [ "multi-user.target" ];
        inherit (cfg.yarn.nodemanager) restartIfChanged;
        environment = cfg.yarn.nodemanager.extraEnv;

        preStart = ''
          # create log dir
          mkdir -p /var/log/hadoop/yarn/nodemanager
          chown yarn:hadoop /var/log/hadoop/yarn/nodemanager

          # set up setuid container executor binary
          umount /run/wrappers/yarn-nodemanager/cgroup/cpu || true
          rm -rf /run/wrappers/yarn-nodemanager/ || true
          mkdir -p /run/wrappers/yarn-nodemanager/{bin,etc/hadoop,cgroup/cpu}
          cp ${cfg.package}/lib/${cfg.package.untarDir}/bin/container-executor /run/wrappers/yarn-nodemanager/bin/
          chgrp hadoop /run/wrappers/yarn-nodemanager/bin/container-executor
          chmod 6050 /run/wrappers/yarn-nodemanager/bin/container-executor
          cp ${hadoopConf}/container-executor.cfg /run/wrappers/yarn-nodemanager/etc/hadoop/
        '';

        serviceConfig = {
          User = "yarn";
          SyslogIdentifier = "yarn-nodemanager";
          PermissionsStartOnly = true;
          ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " +
                      " nodemanager ${escapeShellArgs cfg.yarn.nodemanager.extraFlags}";
          Restart = "always";
        };
      };

      services.hadoop.gatewayRole.enable = true;

      services.hadoop.yarnSiteInternal = with cfg.yarn.nodemanager; {
        "yarn.nodemanager.local-dirs" = localDir;
        "yarn.scheduler.maximum-allocation-vcores" = resource.maximumAllocationVCores;
        "yarn.scheduler.maximum-allocation-mb" = resource.maximumAllocationMB;
        "yarn.nodemanager.resource.cpu-vcores" = resource.cpuVCores;
        "yarn.nodemanager.resource.memory-mb" = resource.memoryMB;
      } // mkIf useCGroups {
        "yarn.nodemanager.linux-container-executor.cgroups.hierarchy" = "/hadoop-yarn";
        "yarn.nodemanager.linux-container-executor.resources-handler.class" = "org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler";
        "yarn.nodemanager.linux-container-executor.cgroups.mount" = "true";
        "yarn.nodemanager.linux-container-executor.cgroups.mount-path" = "/run/wrappers/yarn-nodemanager/cgroup";
      };

      networking.firewall.allowedTCPPortRanges = [
        (mkIf (cfg.yarn.nodemanager.openFirewall) {from = 1024; to = 65535;})
      ];
    })

  ];
}