From 74c684c5b3daf8b6bf79ecd30b3ca27631a4ab79 Mon Sep 17 00:00:00 2001 From: illustris Date: Mon, 28 Mar 2022 23:18:11 +0530 Subject: hadoop and spark: improve interoperability --- pkgs/applications/networking/cluster/hadoop/default.nix | 5 +++++ pkgs/applications/networking/cluster/spark/default.nix | 8 ++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/pkgs/applications/networking/cluster/hadoop/default.nix b/pkgs/applications/networking/cluster/hadoop/default.nix index adb46540cba..202b9fd5e68 100644 --- a/pkgs/applications/networking/cluster/hadoop/default.nix +++ b/pkgs/applications/networking/cluster/hadoop/default.nix @@ -17,6 +17,8 @@ , openssl , glibc , nixosTests +, sparkSupport ? true +, spark }: with lib; @@ -52,6 +54,9 @@ let --prefix PATH : "${makeBinPath [ bash coreutils which]}"\ --prefix JAVA_LIBRARY_PATH : "${makeLibraryPath buildInputs}" done + '' + optionalString sparkSupport '' + # Add the spark shuffle service jar to YARN + cp ${spark.src}/yarn/spark-${spark.version}-yarn-shuffle.jar $out/lib/${untarDir}/share/hadoop/yarn/ '' + libPatches; passthru = { inherit tests; }; diff --git a/pkgs/applications/networking/cluster/spark/default.nix b/pkgs/applications/networking/cluster/spark/default.nix index 057be75aeb3..a3ca42dff34 100644 --- a/pkgs/applications/networking/cluster/spark/default.nix +++ b/pkgs/applications/networking/cluster/spark/default.nix @@ -6,6 +6,7 @@ , python3Packages , extraPythonPackages ? [ ] , coreutils +, hadoopSupport ? true , hadoop , RSupport ? true , R @@ -17,12 +18,13 @@ let spark = { pname, version, sha256, extraMeta ? {} }: stdenv.mkDerivation rec { inherit pname version; + jdk = if hadoopSupport then hadoop.jdk else jdk8; src = fetchzip { url = "mirror://apache/spark/${pname}-${version}/${pname}-${version}-bin-without-hadoop.tgz"; sha256 = sha256; }; nativeBuildInputs = [ makeWrapper ]; - buildInputs = [ jdk8 python3Packages.python ] + buildInputs = [ jdk python3Packages.python ] ++ extraPythonPackages ++ optional RSupport R; @@ -34,9 +36,11 @@ let cp $out/lib/${untarDir}/conf/log4j.properties{.template,} cat > $out/lib/${untarDir}/conf/spark-env.sh <<- EOF - export JAVA_HOME="${jdk8}" + export JAVA_HOME="${jdk}" export SPARK_HOME="$out/lib/${untarDir}" + '' + optionalString hadoopSupport '' export SPARK_DIST_CLASSPATH=$(${hadoop}/bin/hadoop classpath) + '' + '' export PYSPARK_PYTHON="${python3Packages.python}/bin/${python3Packages.python.executable}" export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH" ${optionalString RSupport '' -- cgit 1.4.1