summary refs log tree commit diff
diff options
context:
space:
mode:
authorillustris <rharikrishnan95@gmail.com>2021-09-17 20:49:21 +0530
committerillustris <rharikrishnan95@gmail.com>2021-09-17 20:52:54 +0530
commit682bbeb2ca4e43d5528f1e25ef61d88422d8ee3e (patch)
treeabde07e0b8c4dff9797c6168cf24c1cf24917f1e
parentf023c4710137586b1b443b0100c8d8bce74f5437 (diff)
downloadnixpkgs-682bbeb2ca4e43d5528f1e25ef61d88422d8ee3e.tar
nixpkgs-682bbeb2ca4e43d5528f1e25ef61d88422d8ee3e.tar.gz
nixpkgs-682bbeb2ca4e43d5528f1e25ef61d88422d8ee3e.tar.bz2
nixpkgs-682bbeb2ca4e43d5528f1e25ef61d88422d8ee3e.tar.lz
nixpkgs-682bbeb2ca4e43d5528f1e25ef61d88422d8ee3e.tar.xz
nixpkgs-682bbeb2ca4e43d5528f1e25ef61d88422d8ee3e.tar.zst
nixpkgs-682bbeb2ca4e43d5528f1e25ef61d88422d8ee3e.zip
spark: 2.4.4 -> 2.4.8, init 3.1.2
The "spark" package now points to 3.1.2, and "spark2" points to spark 2.4.8
-rw-r--r--pkgs/applications/networking/cluster/spark/default.nix109
-rw-r--r--pkgs/top-level/all-packages.nix5
2 files changed, 68 insertions, 46 deletions
diff --git a/pkgs/applications/networking/cluster/spark/default.nix b/pkgs/applications/networking/cluster/spark/default.nix
index 76230b8e100..af194afafa9 100644
--- a/pkgs/applications/networking/cluster/spark/default.nix
+++ b/pkgs/applications/networking/cluster/spark/default.nix
@@ -1,56 +1,75 @@
-{ lib, stdenv, fetchzip, makeWrapper, jre, pythonPackages, coreutils, hadoop
+{ lib, stdenv, fetchzip, makeWrapper, jdk8, python3Packages, extraPythonPackages ? [], coreutils, hadoop
 , RSupport? true, R
 }:
 
 with lib;
 
-stdenv.mkDerivation rec {
+let
+  spark = { pname, version, src }:
+    stdenv.mkDerivation rec {
+      inherit pname version src;
+      nativeBuildInputs = [ makeWrapper ];
+      buildInputs = [ jdk8 python3Packages.python ]
+        ++ extraPythonPackages
+        ++ optional RSupport R;
 
-  pname = "spark";
-  version = "2.4.4";
+      untarDir = "${pname}-${version}";
+      installPhase = ''
+        mkdir -p $out/{lib/${untarDir}/conf,bin,/share/java}
+        mv * $out/lib/${untarDir}
 
-  src = fetchzip {
-    url    = "mirror://apache/spark/${pname}-${version}/${pname}-${version}-bin-without-hadoop.tgz";
-    sha256 = "1a9w5k0207fysgpxx6db3a00fs5hdc2ncx99x4ccy2s0v5ndc66g";
+        cp $out/lib/${untarDir}/conf/log4j.properties{.template,}
+
+        cat > $out/lib/${untarDir}/conf/spark-env.sh <<- EOF
+        export JAVA_HOME="${jdk8}"
+        export SPARK_HOME="$out/lib/${untarDir}"
+        export SPARK_DIST_CLASSPATH=$(${hadoop}/bin/hadoop classpath)
+        export PYSPARK_PYTHON="${python3Packages.python}/bin/${python3Packages.python.executable}"
+        export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH"
+        ${optionalString RSupport ''
+          export SPARKR_R_SHELL="${R}/bin/R"
+          export PATH="\$PATH:${R}/bin"''}
+        EOF
+
+        for n in $(find $out/lib/${untarDir}/bin -type f ! -name "*.*"); do
+          makeWrapper "$n" "$out/bin/$(basename $n)"
+          substituteInPlace "$n" --replace dirname ${coreutils.out}/bin/dirname
+        done
+        for n in $(find $out/lib/${untarDir}/sbin -type f); do
+          # Spark deprecated scripts with "slave" in the name.
+          # This line adds forward compatibility with the nixos spark module for
+          # older versions of spark that don't have the new "worker" scripts.
+          ln -s "$n" $(echo "$n" | sed -r 's/slave(s?).sh$/worker\1.sh/g') || true
+        done
+        ln -s $out/lib/${untarDir}/lib/spark-assembly-*.jar $out/share/java
+      '';
+
+      meta = {
+        description      = "Apache Spark is a fast and general engine for large-scale data processing";
+        homepage         = "http://spark.apache.org";
+        license          = lib.licenses.asl20;
+        platforms        = lib.platforms.all;
+        maintainers      = with maintainers; [ thoughtpolice offline kamilchm illustris ];
+        repositories.git = "git://git.apache.org/spark.git";
+      };
+    };
+in {
+  spark3 = spark rec {
+    pname = "spark";
+    version = "3.1.2";
+
+    src = fetchzip {
+      url    = "mirror://apache/spark/${pname}-${version}/${pname}-${version}-bin-without-hadoop.tgz";
+      sha256 = "1bgh2y6jm7wqy6yc40rx68xkki31i3jiri2yixb1bm0i9pvsj9yf";
+    };
   };
+  spark2 = spark rec {
+    pname = "spark";
+    version = "2.4.8";
 
-  nativeBuildInputs = [ makeWrapper ];
-  buildInputs = [ jre pythonPackages.python pythonPackages.numpy ]
-    ++ optional RSupport R;
-
-  untarDir = "${pname}-${version}-bin-without-hadoop";
-  installPhase = ''
-    mkdir -p $out/{lib/${untarDir}/conf,bin,/share/java}
-    mv * $out/lib/${untarDir}
-
-    sed -e 's/INFO, console/WARN, console/' < \
-       $out/lib/${untarDir}/conf/log4j.properties.template > \
-       $out/lib/${untarDir}/conf/log4j.properties
-
-    cat > $out/lib/${untarDir}/conf/spark-env.sh <<- EOF
-    export JAVA_HOME="${jre}"
-    export SPARK_HOME="$out/lib/${untarDir}"
-    export SPARK_DIST_CLASSPATH=$(${hadoop}/bin/hadoop classpath)
-    export PYSPARK_PYTHON="${pythonPackages.python}/bin/${pythonPackages.python.executable}"
-    export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH"
-    ${optionalString RSupport
-      ''export SPARKR_R_SHELL="${R}/bin/R"
-        export PATH=$PATH:"${R}/bin/R"''}
-    EOF
-
-    for n in $(find $out/lib/${untarDir}/bin -type f ! -name "*.*"); do
-      makeWrapper "$n" "$out/bin/$(basename $n)"
-      substituteInPlace "$n" --replace dirname ${coreutils.out}/bin/dirname
-    done
-    ln -s $out/lib/${untarDir}/lib/spark-assembly-*.jar $out/share/java
-  '';
-
-  meta = {
-    description      = "Apache Spark is a fast and general engine for large-scale data processing";
-    homepage         = "http://spark.apache.org";
-    license          = lib.licenses.asl20;
-    platforms        = lib.platforms.all;
-    maintainers      = with maintainers; [ thoughtpolice offline kamilchm ];
-    repositories.git = "git://git.apache.org/spark.git";
+    src = fetchzip {
+      url    = "mirror://apache/spark/${pname}-${version}/${pname}-${version}-bin-without-hadoop.tgz";
+      sha256 = "1mkyq0gz9fiav25vr0dba5ivp0wh0mh7kswwnx8pvsmb6wbwyfxv";
+    };
   };
 }
diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix
index e7fbd365386..7e38b1aaa40 100644
--- a/pkgs/top-level/all-packages.nix
+++ b/pkgs/top-level/all-packages.nix
@@ -13140,7 +13140,10 @@ with pkgs;
 
   self = pkgsi686Linux.callPackage ../development/interpreters/self { };
 
-  spark = callPackage ../applications/networking/cluster/spark { };
+  inherit (callPackages ../applications/networking/cluster/spark { hadoop = hadoop_3_1; })
+    spark3
+    spark2;
+  spark = spark3;
 
   sparkleshare = callPackage ../applications/version-management/sparkleshare { };