summary refs log tree commit diff
diff options
context:
space:
mode:
authorhappysalada <raphael@megzari.com>2023-08-15 00:07:16 +0800
committerYt <happysalada@tuta.io>2023-08-15 10:51:41 +0800
commitea95c0917609e5c48023cc7c6141bea2fdf13970 (patch)
treece0195cb8a53f851db65abd4a6331d00a1cf15cf
parent01c84dba9232995ed758ee1de0fc26f56e77bd56 (diff)
downloadnixpkgs-ea95c0917609e5c48023cc7c6141bea2fdf13970.tar
nixpkgs-ea95c0917609e5c48023cc7c6141bea2fdf13970.tar.gz
nixpkgs-ea95c0917609e5c48023cc7c6141bea2fdf13970.tar.bz2
nixpkgs-ea95c0917609e5c48023cc7c6141bea2fdf13970.tar.lz
nixpkgs-ea95c0917609e5c48023cc7c6141bea2fdf13970.tar.xz
nixpkgs-ea95c0917609e5c48023cc7c6141bea2fdf13970.tar.zst
nixpkgs-ea95c0917609e5c48023cc7c6141bea2fdf13970.zip
nltk-data: init at unstable-2023-02-02
-rw-r--r--pkgs/tools/text/nltk_data/default.nix50
-rw-r--r--pkgs/top-level/all-packages.nix2
2 files changed, 52 insertions, 0 deletions
diff --git a/pkgs/tools/text/nltk_data/default.nix b/pkgs/tools/text/nltk_data/default.nix
new file mode 100644
index 00000000000..1e2d803a21c
--- /dev/null
+++ b/pkgs/tools/text/nltk_data/default.nix
@@ -0,0 +1,50 @@
+{ lib, newScope, fetchFromGitHub, unzip, stdenvNoCC }:
+let
+  base = {
+    version = "unstable-2023-02-02";
+    nativeBuildInputs = [ unzip ];
+    dontBuild = true;
+    meta = with lib; {
+      description = "NLTK Data";
+      homepage = "https://github.com/nltk/nltk_data";
+      license = licenses.asl20;
+      platforms = platforms.all;
+      maintainers = with maintainers; [ happysalada ];
+    };
+  };
+  makeNltkDataPackage = {pname, location, hash}:
+    let
+      src = fetchFromGitHub {
+        owner = "nltk";
+        repo = "nltk_data";
+        rev = "5db857e6f7df11eabb5e5665836db9ec8df07e28";
+        inherit hash;
+        sparseCheckout = [ "${location}/${pname}.zip" ];
+      };
+    in
+    stdenvNoCC.mkDerivation (base // {
+      inherit pname src;
+      version = base.version;
+      installPhase = ''
+        runHook preInstall
+
+        mkdir -p $out
+        unzip ${src}/${location}/${pname}.zip
+        cp -R ${pname}/ $out/
+
+        runHook postInstall
+      '';
+    });
+in
+lib.makeScope newScope (self: {
+  punkt = makeNltkDataPackage ({
+    pname = "punkt";
+    location = "packages/tokenizers";
+    hash = "sha256-rMkgn3xzmSJNv8//kqbPF2Xq3Gf16lgA1Wx8FPYbaQo=";
+  });
+  averaged_perceptron_tagger = makeNltkDataPackage ({
+    pname = "averaged_perceptron_tagger";
+    location = "packages/taggers";
+    hash = "sha256-ilTs4HWPUoHxQb4kWEy3wJ6QsE/98+EQya44gtV2inw=";
+  });
+})
diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix
index 0ce71f67768..93af8d9b683 100644
--- a/pkgs/top-level/all-packages.nix
+++ b/pkgs/top-level/all-packages.nix
@@ -5971,6 +5971,8 @@ with pkgs;
     inherit (darwin.apple_sdk.frameworks) Security;
   };
 
+  nltk-data = callPackage ../tools/text/nltk_data { };
+
   nodepy-runtime = with python3.pkgs; toPythonApplication nodepy-runtime;
 
   nixpkgs-pytools = with python3.pkgs; toPythonApplication nixpkgs-pytools;