summary refs log tree commit diff
path: root/pkgs/development/python-modules/apache-beam
diff options
context:
space:
mode:
authorAlexander Tsvyashchenko <ndl@endl.ch>2022-01-06 21:55:12 +0100
committerAlexander Tsvyashchenko <ndl@endl.ch>2022-01-07 18:01:35 +0100
commit0341138ad72a1900ee347c6804da9212be443e08 (patch)
tree52855b9b03758802329b2e89ed360f5cae7a4426 /pkgs/development/python-modules/apache-beam
parentdd93ef8e7e1e0426adb60ee71043f120cdec9ecd (diff)
downloadnixpkgs-0341138ad72a1900ee347c6804da9212be443e08.tar
nixpkgs-0341138ad72a1900ee347c6804da9212be443e08.tar.gz
nixpkgs-0341138ad72a1900ee347c6804da9212be443e08.tar.bz2
nixpkgs-0341138ad72a1900ee347c6804da9212be443e08.tar.lz
nixpkgs-0341138ad72a1900ee347c6804da9212be443e08.tar.xz
nixpkgs-0341138ad72a1900ee347c6804da9212be443e08.tar.zst
nixpkgs-0341138ad72a1900ee347c6804da9212be443e08.zip
python3Packages.apache-beam: init at 2.35.0
Diffstat (limited to 'pkgs/development/python-modules/apache-beam')
-rw-r--r--pkgs/development/python-modules/apache-beam/default.nix145
-rw-r--r--pkgs/development/python-modules/apache-beam/fix-cython.patch41
-rw-r--r--pkgs/development/python-modules/apache-beam/relax-deps.patch20
3 files changed, 206 insertions, 0 deletions
diff --git a/pkgs/development/python-modules/apache-beam/default.nix b/pkgs/development/python-modules/apache-beam/default.nix
new file mode 100644
index 00000000000..44748f2fdb0
--- /dev/null
+++ b/pkgs/development/python-modules/apache-beam/default.nix
@@ -0,0 +1,145 @@
+{ buildPythonPackage
+, cloudpickle
+, crcmod
+, cython
+, dill
+, fastavro
+, fetchFromGitHub
+, freezegun
+, grpcio
+, grpcio-tools
+, hdfs
+, httplib2
+, lib
+, mock
+, mypy-protobuf
+, numpy
+, oauth2client
+, orjson
+, pandas
+, parameterized
+, proto-plus
+, protobuf
+, psycopg2
+, pyarrow
+, pydot
+, pyhamcrest
+, pymongo
+, pytest-timeout
+, pytest-xdist
+, pytestCheckHook
+, python
+, python-dateutil
+, pytz
+, pyyaml
+, requests
+, requests-mock
+, setuptools
+, sqlalchemy
+, tenacity
+, typing-extensions
+}:
+
+buildPythonPackage rec {
+  pname = "apache-beam";
+  version = "2.35.0";
+
+  src = fetchFromGitHub {
+    owner = "apache";
+    repo = "beam";
+    rev = "v${version}";
+    sha256 = "0qxkas33d8i6yj133plnadbfm74ak7arn7ldpziyiwdav3hj68sy";
+  };
+
+  patches = [
+    ./relax-deps.patch
+    # Fixes https://issues.apache.org/jira/browse/BEAM-9324
+    ./fix-cython.patch
+  ];
+
+  sourceRoot = "source/sdks/python";
+
+  nativeBuildInputs = [
+    cython
+    grpcio-tools
+    mypy-protobuf
+  ];
+
+  propagatedBuildInputs = [
+    cloudpickle
+    crcmod
+    cython
+    dill
+    fastavro
+    grpcio
+    hdfs
+    httplib2
+    numpy
+    oauth2client
+    orjson
+    proto-plus
+    protobuf
+    pyarrow
+    pydot
+    pymongo
+    python-dateutil
+    pytz
+    requests
+    setuptools
+    typing-extensions
+  ];
+
+  pythonImportsCheck = [
+    "apache_beam"
+  ];
+
+  checkInputs = [
+    freezegun
+    mock
+    pandas
+    parameterized
+    psycopg2
+    pyhamcrest
+    pytest-timeout
+    pytest-xdist
+    pytestCheckHook
+    pyyaml
+    requests-mock
+    sqlalchemy
+    tenacity
+  ];
+
+  # Make sure we're running the tests for the actually installed
+  # package, so that cython's .so files are available.
+  preCheck = "cd $out/lib/${python.libPrefix}/site-packages";
+
+  disabledTestPaths = [
+    # These tests depend on the availability of specific servers backends.
+    "apache_beam/runners/portability/flink_runner_test.py"
+    "apache_beam/runners/portability/samza_runner_test.py"
+    "apache_beam/runners/portability/spark_runner_test.py"
+  ];
+
+  disabledTests = [
+    # The reasons of failures for these tests are unclear.
+    # They reproduce in Docker with Ubuntu 22.04
+    # (= they're not `nixpkgs`-specific) but given the upstream uses
+    # quite elaborate testing infra with containers and multiple
+    # different runners - I don't expect them to help debugging these
+    # when running via our (= custom from their PoV) testing infra.
+    "testBuildListUnpack"
+    "testBuildTupleUnpack"
+    "testBuildTupleUnpackWithCall"
+    "test_convert_bare_types"
+    "test_incomparable_default"
+    "test_pardo_type_inference"
+    "test_with_main_session"
+  ];
+
+  meta = with lib; {
+    description = "Unified model for defining both batch and streaming data-parallel processing pipelines";
+    homepage = "https://beam.apache.org/";
+    license = licenses.asl20;
+    maintainers = with maintainers; [ ndl ];
+  };
+}
diff --git a/pkgs/development/python-modules/apache-beam/fix-cython.patch b/pkgs/development/python-modules/apache-beam/fix-cython.patch
new file mode 100644
index 00000000000..f73d75b4b84
--- /dev/null
+++ b/pkgs/development/python-modules/apache-beam/fix-cython.patch
@@ -0,0 +1,41 @@
+diff --git a/apache_beam/runners/worker/operations.py b/apache_beam/runners/worker/operations.py
+index 3464c5750c..5921c72b90 100644
+--- a/apache_beam/runners/worker/operations.py
++++ b/apache_beam/runners/worker/operations.py
+@@ -69,18 +69,6 @@ if TYPE_CHECKING:
+   from apache_beam.runners.worker.statesampler import StateSampler
+   from apache_beam.transforms.userstate import TimerSpec
+ 
+-# Allow some "pure mode" declarations.
+-try:
+-  import cython
+-except ImportError:
+-
+-  class FakeCython(object):
+-    @staticmethod
+-    def cast(type, value):
+-      return value
+-
+-  globals()['cython'] = FakeCython()
+-
+ _globally_windowed_value = GlobalWindows.windowed_value(None)
+ _global_window_type = type(_globally_windowed_value.windows[0])
+ 
+@@ -149,7 +137,7 @@ class ConsumerSet(Receiver):
+     # type: (WindowedValue) -> None
+     self.update_counters_start(windowed_value)
+     for consumer in self.consumers:
+-      cython.cast(Operation, consumer).process(windowed_value)
++      consumer.process(windowed_value)
+     self.update_counters_finish()
+ 
+   def try_split(self, fraction_of_remainder):
+@@ -345,7 +333,7 @@ class Operation(object):
+ 
+   def output(self, windowed_value, output_index=0):
+     # type: (WindowedValue, int) -> None
+-    cython.cast(Receiver, self.receivers[output_index]).receive(windowed_value)
++    self.receivers[output_index].receive(windowed_value)
+ 
+   def add_receiver(self, operation, output_index=0):
+     # type: (Operation, int) -> None
diff --git a/pkgs/development/python-modules/apache-beam/relax-deps.patch b/pkgs/development/python-modules/apache-beam/relax-deps.patch
new file mode 100644
index 00000000000..67bd38f5e7c
--- /dev/null
+++ b/pkgs/development/python-modules/apache-beam/relax-deps.patch
@@ -0,0 +1,20 @@
+diff --git a/setup.py b/setup.py
+index 9429459622..2727b3becb 100644
+--- a/setup.py
++++ b/setup.py
+@@ -136,12 +136,12 @@ REQUIRED_PACKAGES = [
+     # version of dill. It is best to use the same version of dill on client and
+     # server, therefore list of allowed versions is very narrow.
+     # See: https://github.com/uqfoundation/dill/issues/341.
+-    'dill>=0.3.1.1,<0.3.2',
++    'dill>=0.3.1.1',
+     'fastavro>=0.21.4,<2',
+     'grpcio>=1.29.0,<2',
+     'hdfs>=2.1.0,<3.0.0',
+-    'httplib2>=0.8,<0.20.0',
+-    'numpy>=1.14.3,<1.21.0',
++    'httplib2>=0.8',
++    'numpy>=1.14.3',
+     'pymongo>=3.8.0,<4.0.0',
+     'oauth2client>=2.0.1,<5',
+     'protobuf>=3.12.2,<4',