summary refs log tree commit diff
path: root/pkgs/development/python-modules/datafusion
diff options
context:
space:
mode:
Diffstat (limited to 'pkgs/development/python-modules/datafusion')
-rw-r--r--pkgs/development/python-modules/datafusion/Cargo.lock.patch78
-rw-r--r--pkgs/development/python-modules/datafusion/default.nix90
2 files changed, 168 insertions, 0 deletions
diff --git a/pkgs/development/python-modules/datafusion/Cargo.lock.patch b/pkgs/development/python-modules/datafusion/Cargo.lock.patch
new file mode 100644
index 00000000000..e4e5eca8af4
--- /dev/null
+++ b/pkgs/development/python-modules/datafusion/Cargo.lock.patch
@@ -0,0 +1,78 @@
+diff --git a/Cargo.lock b/Cargo.lock
+index fa84a54c..3d790e1c 100644
+--- a/Cargo.lock
++++ b/Cargo.lock
+@@ -57,9 +57,9 @@ checksum = "be4dc07131ffa69b8072d35f5007352af944213cde02545e2103680baed38fcd"
+ 
+ [[package]]
+ name = "arrow"
+-version = "6.0.0"
++version = "6.5.0"
+ source = "registry+https://github.com/rust-lang/crates.io-index"
+-checksum = "337e668497751234149fd607f5cb41a6ae7b286b6329589126fe67f0ac55d637"
++checksum = "216c6846a292bdd93c2b93c1baab58c32ff50e2ab5e8d50db333ab518535dd8b"
+ dependencies = [
+  "bitflags",
+  "chrono",
+@@ -212,9 +212,9 @@ dependencies = [
+ 
+ [[package]]
+ name = "comfy-table"
+-version = "4.1.1"
++version = "5.0.0"
+ source = "registry+https://github.com/rust-lang/crates.io-index"
+-checksum = "11e95a3e867422fd8d04049041f5671f94d53c32a9dcd82e2be268714942f3f3"
++checksum = "c42350b81f044f576ff88ac750419f914abb46a03831bb1747134344ee7a4e64"
+ dependencies = [
+  "strum",
+  "strum_macros",
+@@ -279,7 +279,7 @@ dependencies = [
+ 
+ [[package]]
+ name = "datafusion"
+-version = "5.1.0"
++version = "6.0.0"
+ dependencies = [
+  "ahash",
+  "arrow",
+@@ -310,7 +310,7 @@ dependencies = [
+ 
+ [[package]]
+ name = "datafusion-python"
+-version = "0.3.0"
++version = "0.4.0"
+ dependencies = [
+  "datafusion",
+  "pyo3",
+@@ -877,9 +877,9 @@ dependencies = [
+ 
+ [[package]]
+ name = "parquet"
+-version = "6.0.0"
++version = "6.5.0"
+ source = "registry+https://github.com/rust-lang/crates.io-index"
+-checksum = "d263b9b59ba260518de9e57bd65931c3f765fea0fabacfe84f40d6fde38e841a"
++checksum = "788d9953f4cfbe9db1beff7bebd54299d105e34680d78b82b1ddc85d432cac9d"
+ dependencies = [
+  "arrow",
+  "base64",
+@@ -1228,15 +1228,15 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
+ 
+ [[package]]
+ name = "strum"
+-version = "0.21.0"
++version = "0.22.0"
+ source = "registry+https://github.com/rust-lang/crates.io-index"
+-checksum = "aaf86bbcfd1fa9670b7a129f64fc0c9fcbbfe4f1bc4210e9e98fe71ffc12cde2"
++checksum = "f7ac893c7d471c8a21f31cfe213ec4f6d9afeed25537c772e08ef3f005f8729e"
+ 
+ [[package]]
+ name = "strum_macros"
+-version = "0.21.1"
++version = "0.22.0"
+ source = "registry+https://github.com/rust-lang/crates.io-index"
+-checksum = "d06aaeeee809dbc59eb4556183dd927df67db1540de5be8d3ec0b6636358a5ec"
++checksum = "339f799d8b549e3744c7ac7feb216383e4005d94bdb22561b3ab8f3b808ae9fb"
+ dependencies = [
+  "heck",
+  "proc-macro2",
diff --git a/pkgs/development/python-modules/datafusion/default.nix b/pkgs/development/python-modules/datafusion/default.nix
new file mode 100644
index 00000000000..4b36df22d18
--- /dev/null
+++ b/pkgs/development/python-modules/datafusion/default.nix
@@ -0,0 +1,90 @@
+{ lib
+, stdenv
+, fetchurl
+, buildPythonPackage
+, fetchPypi
+, fetchFromGitHub
+, rustPlatform
+, maturin
+, pytestCheckHook
+, libiconv
+, numpy
+, pandas
+, pyarrow
+, pytest
+}:
+let
+  # le sigh, the perils of unrelated versions of software living in the same
+  # repo: there's no obvious way to map the top level source repo
+  # (arrow-datafusion) version to the version of contained repo
+  # (arrow-datafusion/python)
+  #
+  # A commit hash will do in a pinch, and ultimately the sha256 has the final
+  # say of what the content is when building
+  cargoLock = fetchurl {
+    url = "https://raw.githubusercontent.com/apache/arrow-datafusion/6.0.0/python/Cargo.lock";
+    sha256 = "sha256-xiv3drEU5jOGsEIh0U01ZQ1NBKobxO2ctp4mxy9iigw=";
+  };
+
+  postUnpack = ''
+    cp "${cargoLock}" $sourceRoot/Cargo.lock
+    chmod u+w $sourceRoot/Cargo.lock
+  '';
+in
+buildPythonPackage rec {
+  pname = "datafusion";
+  version = "0.4.0";
+  format = "pyproject";
+
+  src = fetchPypi {
+    inherit pname version;
+    sha256 = "sha256-+YqogteKfNhtI2QbVXv/5CIWm3PcOH653dwONm5ZcL8=";
+  };
+
+  inherit postUnpack;
+
+  # TODO: remove the patch hacking and postUnpack hooks after
+  # https://github.com/apache/arrow-datafusion/pull/1508 is merged
+  #
+  # the lock file isn't up to date as of 6.0.0 so we need to patch the source
+  # lockfile and the vendored cargo deps lockfile
+  patches = [ ./Cargo.lock.patch ];
+  cargoDeps = rustPlatform.fetchCargoTarball {
+    inherit src pname version postUnpack;
+    sha256 = "sha256-JGyDxpfBXzduJaMF1sbmRm7KJajHYdVSj+WbiSETiY0=";
+    patches = [ ./Cargo.lock.patch ];
+  };
+
+  nativeBuildInputs = with rustPlatform; [
+    cargoSetupHook
+    maturinBuildHook
+  ];
+
+  buildInputs = lib.optionals stdenv.isDarwin [ libiconv ];
+
+  propagatedBuildInputs = [
+    numpy
+    pandas
+    pyarrow
+  ];
+
+  checkInputs = [ pytest ];
+  pythonImportsCheck = [ "datafusion" ];
+
+  checkPhase = ''
+    runHook preCheck
+    pytest --pyargs "${pname}"
+    runHook postCheck
+  '';
+
+  meta = with lib; {
+    description = "Extensible query execution framework";
+    longDescription = ''
+      DataFusion is an extensible query execution framework, written in Rust,
+      that uses Apache Arrow as its in-memory format.
+    '';
+    homepage = "https://arrow.apache.org/datafusion/";
+    license = with licenses; [ asl20 ];
+    maintainers = with maintainers; [ cpcloud ];
+  };
+}