summary refs log tree commit diff
path: root/pkgs/applications/networking/cluster/spark/default.nix
blob: 057be75aeb343cc52e53c717b1c20eaf99447cef (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
{ lib
, stdenv
, fetchzip
, makeWrapper
, jdk8
, python3Packages
, extraPythonPackages ? [ ]
, coreutils
, hadoop
, RSupport ? true
, R
}:

with lib;

let
  spark = { pname, version, sha256, extraMeta ? {} }:
    stdenv.mkDerivation rec {
      inherit pname version;
      src = fetchzip {
        url = "mirror://apache/spark/${pname}-${version}/${pname}-${version}-bin-without-hadoop.tgz";
        sha256 = sha256;
      };
      nativeBuildInputs = [ makeWrapper ];
      buildInputs = [ jdk8 python3Packages.python ]
        ++ extraPythonPackages
        ++ optional RSupport R;

      untarDir = "${pname}-${version}";
      installPhase = ''
        mkdir -p $out/{lib/${untarDir}/conf,bin,/share/java}
        mv * $out/lib/${untarDir}

        cp $out/lib/${untarDir}/conf/log4j.properties{.template,}

        cat > $out/lib/${untarDir}/conf/spark-env.sh <<- EOF
        export JAVA_HOME="${jdk8}"
        export SPARK_HOME="$out/lib/${untarDir}"
        export SPARK_DIST_CLASSPATH=$(${hadoop}/bin/hadoop classpath)
        export PYSPARK_PYTHON="${python3Packages.python}/bin/${python3Packages.python.executable}"
        export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH"
        ${optionalString RSupport ''
          export SPARKR_R_SHELL="${R}/bin/R"
          export PATH="\$PATH:${R}/bin"''}
        EOF

        for n in $(find $out/lib/${untarDir}/bin -type f ! -name "*.*"); do
          makeWrapper "$n" "$out/bin/$(basename $n)"
          substituteInPlace "$n" --replace dirname ${coreutils.out}/bin/dirname
        done
        for n in $(find $out/lib/${untarDir}/sbin -type f); do
          # Spark deprecated scripts with "slave" in the name.
          # This line adds forward compatibility with the nixos spark module for
          # older versions of spark that don't have the new "worker" scripts.
          ln -s "$n" $(echo "$n" | sed -r 's/slave(s?).sh$/worker\1.sh/g') || true
        done
        ln -s $out/lib/${untarDir}/lib/spark-assembly-*.jar $out/share/java
      '';

      meta = {
        description = "Apache Spark is a fast and general engine for large-scale data processing";
        homepage = "https://spark.apache.org/";
        license = lib.licenses.asl20;
        platforms = lib.platforms.all;
        maintainers = with maintainers; [ thoughtpolice offline kamilchm illustris ];
      } // extraMeta;
    };
in
{
  spark_3_2 = spark rec {
    pname = "spark";
    version = "3.2.1";
    sha256 = "0kxdqczwmj6pray0h8h1qhygni9m82jzznw5fbv9hrxrkq1v182d";
  };
  spark_3_1 = spark rec {
    pname = "spark";
    version = "3.1.3";
    sha256 = "sha256-RIQyN5YjxFLfNIrETR3Vv99zsHxt77rhOXHIThCI2Y8=";
  };
  spark_2_4 = spark rec {
    pname = "spark";
    version = "2.4.8";
    sha256 = "1mkyq0gz9fiav25vr0dba5ivp0wh0mh7kswwnx8pvsmb6wbwyfxv";
    extraMeta.knownVulnerabilities = [ "CVE-2021-38296" ];
  };
}