From 9ca4363191f05147d0bd23ffb61838d9c1ee890e Mon Sep 17 00:00:00 2001 From: illustris Date: Tue, 2 Nov 2021 18:34:04 +0530 Subject: nixos/hadoop: add HTTPFS --- nixos/modules/services/cluster/hadoop/conf.nix | 1 + nixos/modules/services/cluster/hadoop/default.nix | 14 ++++++- nixos/modules/services/cluster/hadoop/hdfs.nix | 50 ++++++++++++++++++++++- nixos/tests/hadoop/hdfs.nix | 23 +++++++---- 4 files changed, 78 insertions(+), 10 deletions(-) (limited to 'nixos') diff --git a/nixos/modules/services/cluster/hadoop/conf.nix b/nixos/modules/services/cluster/hadoop/conf.nix index 69472408cab..0caec5cfc20 100644 --- a/nixos/modules/services/cluster/hadoop/conf.nix +++ b/nixos/modules/services/cluster/hadoop/conf.nix @@ -35,6 +35,7 @@ pkgs.runCommand "hadoop-conf" {} '' cp ${siteXml "hdfs-site.xml" cfg.hdfsSite}/* $out/ cp ${siteXml "mapred-site.xml" cfg.mapredSite}/* $out/ cp ${siteXml "yarn-site.xml" cfg.yarnSite}/* $out/ + cp ${siteXml "httpfs-site.xml" cfg.httpfsSite}/* $out/ cp ${cfgFile "container-executor.cfg" cfg.containerExecutorCfg}/* $out/ cp ${pkgs.writeTextDir "hadoop-user-functions.sh" userFunctions}/* $out/ cp ${pkgs.writeTextDir "hadoop-env.sh" hadoopEnv}/* $out/ diff --git a/nixos/modules/services/cluster/hadoop/default.nix b/nixos/modules/services/cluster/hadoop/default.nix index da3e47b95d4..e4dcfde80fb 100644 --- a/nixos/modules/services/cluster/hadoop/default.nix +++ b/nixos/modules/services/cluster/hadoop/default.nix @@ -70,6 +70,17 @@ with lib; description = "Hadoop yarn-site.xml definition"; }; + httpfsSite = mkOption { + default = { }; + type = types.attrsOf types.anything; + example = literalExpression '' + { + "hadoop.http.max.threads" = 500; + } + ''; + description = "Hadoop httpfs-site.xml definition"; + }; + log4jProperties = mkOption { default = "${cfg.package}/lib/${cfg.package.untarDir}/etc/hadoop/log4j.properties"; type = types.path; @@ -118,7 +129,8 @@ with lib; config = mkMerge [ (mkIf (builtins.hasAttr "yarn" config.users.users || - builtins.hasAttr "hdfs" config.users.users) { + builtins.hasAttr "hdfs" config.users.users || + builtins.hasAttr "httpfs" config.users.users) { users.groups.hadoop = { gid = config.ids.gids.hadoop; }; diff --git a/nixos/modules/services/cluster/hadoop/hdfs.nix b/nixos/modules/services/cluster/hadoop/hdfs.nix index 961aa35a4b1..11b855b0c71 100644 --- a/nixos/modules/services/cluster/hadoop/hdfs.nix +++ b/nixos/modules/services/cluster/hadoop/hdfs.nix @@ -86,6 +86,23 @@ in }; inherit restartIfChanged; }; + httpfs = { + enabled = mkOption { + type = types.bool; + default = false; + description = '' + Whether to run the HDFS httpfs failover controller + ''; + }; + tempPath = mkOption { + type = types.path; + default = "/tmp/hadoop/httpfs"; + description = '' + HTTPFS_TEMP path used by HTTPFS + ''; + }; + inherit restartIfChanged; + }; }; config = mkMerge [ @@ -166,6 +183,31 @@ in }; }; }) + (mkIf cfg.hdfs.httpfs.enabled { + systemd.services.hdfs-httpfs = { + description = "Hadoop httpfs"; + wantedBy = [ "multi-user.target" ]; + inherit (cfg.hdfs.httpfs) restartIfChanged; + + environment = { + HTTPFS_TEMP = cfg.hdfs.httpfs.tempPath; + }; + + preStart = '' + mkdir -p $HTTPFS_TEMP + ''; + + serviceConfig = { + User = "httpfs"; + SyslogIdentifier = "hdfs-httpfs"; + ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} httpfs"; + Restart = "always"; + }; + }; + networking.firewall.allowedTCPPorts = (mkIf cfg.hdfs.datanode.openFirewall [ + 14000 # httpfs.http.port + ]); + }) (mkIf ( cfg.hdfs.namenode.enabled || cfg.hdfs.datanode.enabled || cfg.hdfs.journalnode.enabled || cfg.hdfs.zkfc.enabled ) { @@ -175,6 +217,12 @@ in uid = config.ids.uids.hdfs; }; }) - + (mkIf cfg.hdfs.httpfs.enabled { + users.users.httpfs = { + description = "Hadoop HTTPFS user"; + group = "hadoop"; + isSystemUser = true; + }; + }) ]; } diff --git a/nixos/tests/hadoop/hdfs.nix b/nixos/tests/hadoop/hdfs.nix index 9bc32cc7f8a..e2cef32d05d 100644 --- a/nixos/tests/hadoop/hdfs.nix +++ b/nixos/tests/hadoop/hdfs.nix @@ -2,19 +2,20 @@ import ../make-test-python.nix ({...}: { nodes = { namenode = {pkgs, ...}: { + virtualisation.memorySize = 1024; services.hadoop = { package = pkgs.hadoop; - hdfs.namenode = { - enabled = true; - formatOnInit = true; + hdfs = { + namenode = { + enabled = true; + formatOnInit = true; + }; + httpfs.enabled = true; }; coreSite = { "fs.defaultFS" = "hdfs://namenode:8020"; - }; - hdfsSite = { - "dfs.replication" = 1; - "dfs.namenode.rpc-bind-host" = "0.0.0.0"; - "dfs.namenode.http-bind-host" = "0.0.0.0"; + "hadoop.proxyuser.httpfs.groups" = "*"; + "hadoop.proxyuser.httpfs.hosts" = "*"; }; }; }; @@ -24,6 +25,8 @@ import ../make-test-python.nix ({...}: { hdfs.datanode.enabled = true; coreSite = { "fs.defaultFS" = "hdfs://namenode:8020"; + "hadoop.proxyuser.httpfs.groups" = "*"; + "hadoop.proxyuser.httpfs.hosts" = "*"; }; }; }; @@ -49,5 +52,9 @@ import ../make-test-python.nix ({...}: { datanode.succeed("sudo -u hdfs hdfs dfsadmin -safemode wait") datanode.succeed("echo testfilecontents | sudo -u hdfs hdfs dfs -put - /testfile") assert "testfilecontents" in datanode.succeed("sudo -u hdfs hdfs dfs -cat /testfile") + + namenode.wait_for_unit("hdfs-httpfs") + namenode.wait_for_open_port(14000) + assert "testfilecontents" in datanode.succeed("curl -f \"http://namenode:14000/webhdfs/v1/testfile?user.name=hdfs&op=OPEN\" 2>&1") ''; }) -- cgit 1.4.1