summary refs log tree commit diff
diff options
context:
space:
mode:
authorRobert Schütz <nix@dotlambda.de>2021-09-18 14:08:04 -0700
committerRobert Schütz <nix@dotlambda.de>2021-09-18 14:15:36 -0700
commit4c268ee2ccb7d4eaec3830bee16fe67cfc39f269 (patch)
tree319e759b0d350684a24bdfe790e6fe06646aa8b6
parentfc1c501f4c6e5ca4a90b48ce5bd5b9d631ce5772 (diff)
downloadnixpkgs-4c268ee2ccb7d4eaec3830bee16fe67cfc39f269.tar
nixpkgs-4c268ee2ccb7d4eaec3830bee16fe67cfc39f269.tar.gz
nixpkgs-4c268ee2ccb7d4eaec3830bee16fe67cfc39f269.tar.bz2
nixpkgs-4c268ee2ccb7d4eaec3830bee16fe67cfc39f269.tar.lz
nixpkgs-4c268ee2ccb7d4eaec3830bee16fe67cfc39f269.tar.xz
nixpkgs-4c268ee2ccb7d4eaec3830bee16fe67cfc39f269.tar.zst
nixpkgs-4c268ee2ccb7d4eaec3830bee16fe67cfc39f269.zip
ocrmypdf: move to python3Packages
According to https://ocrmypdf.readthedocs.io/en/latest/api.html, it also
provides a Python API.
-rw-r--r--pkgs/applications/office/paperless-ng/default.nix1
-rw-r--r--pkgs/development/python-modules/ocrmypdf/default.nix (renamed from pkgs/tools/text/ocrmypdf/default.nix)77
-rw-r--r--pkgs/development/python-modules/ocrmypdf/paths.patch160
-rw-r--r--pkgs/tools/text/ocrmypdf/liblept.patch13
-rw-r--r--pkgs/top-level/all-packages.nix2
-rw-r--r--pkgs/top-level/python-packages.nix2
6 files changed, 199 insertions, 56 deletions
diff --git a/pkgs/applications/office/paperless-ng/default.nix b/pkgs/applications/office/paperless-ng/default.nix
index e84b3c79443..0fd33573850 100644
--- a/pkgs/applications/office/paperless-ng/default.nix
+++ b/pkgs/applications/office/paperless-ng/default.nix
@@ -5,7 +5,6 @@
 , ghostscript
 , imagemagick
 , jbig2enc
-, ocrmypdf
 , optipng
 , pngquant
 , qpdf
diff --git a/pkgs/tools/text/ocrmypdf/default.nix b/pkgs/development/python-modules/ocrmypdf/default.nix
index 4292c275a7f..531e042c5b9 100644
--- a/pkgs/tools/text/ocrmypdf/default.nix
+++ b/pkgs/development/python-modules/ocrmypdf/default.nix
@@ -1,34 +1,32 @@
-{ fetchFromGitHub
+{ lib
+, buildPythonPackage
+, cffi
+, coloredlogs
+, fetchFromGitHub
 , ghostscript
 , img2pdf
+, importlib-resources
 , jbig2enc
 , leptonica
+, pdfminer
+, pikepdf
+, pillow
+, pluggy
 , pngquant
-, python3
-, python3Packages
-, qpdf
-, lib
+, pytest-xdist
+, pytestCheckHook
+, reportlab
+, setuptools
+, setuptools-scm
+, setuptools-scm-git-archive
 , stdenv
+, substituteAll
 , tesseract4
+, tqdm
 , unpaper
-, substituteAll
 }:
-let
-  inherit (python3Packages) buildPythonApplication;
-
-  runtimeDeps = with python3Packages; [
-    ghostscript
-    jbig2enc
-    leptonica
-    pngquant
-    qpdf
-    tesseract4
-    unpaper
-    pillow
-  ];
 
-in
-buildPythonApplication rec {
+buildPythonPackage rec {
   pname = "ocrmypdf";
   version = "12.5.0";
 
@@ -39,51 +37,48 @@ buildPythonApplication rec {
     sha256 = "sha256-g80WedX+TGHE9EJ/RSgOc53PM17V3WZslUNaHoqKTo0=";
   };
 
-  nativeBuildInputs = with python3Packages; [
-    setuptools
+  patches = [
+    (substituteAll {
+      src = ./paths.patch;
+      gs = "${lib.getBin ghostscript}/bin/gs";
+      jbig2 = "${lib.getBin jbig2enc}/bin/jbig2";
+      liblept = "${lib.getLib leptonica}/lib/liblept${stdenv.hostPlatform.extensions.sharedLibrary}";
+      pngquant = "${lib.getBin pngquant}/bin/pngquant";
+      tesseract = "${lib.getBin tesseract4}/bin/tesseract";
+      unpaper = "${lib.getBin unpaper}/bin/unpaper";
+    })
+  ];
+
+  nativeBuildInputs = [
     setuptools-scm-git-archive
     setuptools-scm
   ];
 
-  propagatedBuildInputs = with python3Packages; [
+  propagatedBuildInputs = [
     cffi
     coloredlogs
     img2pdf
     importlib-resources
     pdfminer
-    pluggy
     pikepdf
     pillow
+    pluggy
     reportlab
     setuptools
     tqdm
   ];
 
-  checkInputs = with python3Packages; [
-    pypdf2
-    pytest
-    pytest-helpers-namespace
+  checkInputs = [
     pytest-xdist
-    pytest-cov
-    python-xmp-toolkit
     pytestCheckHook
-  ] ++ runtimeDeps;
-
-  patches = [
-    (substituteAll {
-      src = ./liblept.patch;
-      liblept = "${lib.getLib leptonica}/lib/liblept${stdenv.hostPlatform.extensions.sharedLibrary}";
-    })
   ];
 
-  makeWrapperArgs = [ "--prefix PATH : ${lib.makeBinPath [ ghostscript jbig2enc pngquant qpdf tesseract4 unpaper ]}" ];
-
   meta = with lib; {
     homepage = "https://github.com/jbarlow83/OCRmyPDF";
     description = "Adds an OCR text layer to scanned PDF files, allowing them to be searched";
     license = with licenses; [ mpl20 mit ];
     platforms = platforms.linux;
-    maintainers = [ maintainers.kiwi ];
+    maintainers = with maintainers; [ kiwi dotlambda ];
     changelog = "https://github.com/jbarlow83/OCRmyPDF/blob/v${version}/docs/release_notes.rst";
   };
 }
diff --git a/pkgs/development/python-modules/ocrmypdf/paths.patch b/pkgs/development/python-modules/ocrmypdf/paths.patch
new file mode 100644
index 00000000000..9bfcc728554
--- /dev/null
+++ b/pkgs/development/python-modules/ocrmypdf/paths.patch
@@ -0,0 +1,160 @@
+diff --git a/src/ocrmypdf/_exec/ghostscript.py b/src/ocrmypdf/_exec/ghostscript.py
+index 5c357f1b..f459763a 100644
+--- a/src/ocrmypdf/_exec/ghostscript.py
++++ b/src/ocrmypdf/_exec/ghostscript.py
+@@ -25,28 +25,7 @@ from ocrmypdf.subprocess import get_version, run, run_polling_stderr
+ 
+ log = logging.getLogger(__name__)
+ 
+-missing_gs_error = """
+----------------------------------------------------------------------
+-This error normally occurs when ocrmypdf find can't Ghostscript.
+-Please ensure Ghostscript is installed and its location is added to
+-the system PATH environment variable.
+-
+-For details see:
+-    https://ocrmypdf.readthedocs.io/en/latest/installation.html
+----------------------------------------------------------------------
+-"""
+-
+-_gswin = None
+-if os.name == 'nt':
+-    _gswin = which('gswin64c')
+-    if not _gswin:
+-        _gswin = which('gswin32c')
+-        if not _gswin:
+-            raise MissingDependencyError(missing_gs_error)
+-    _gswin = Path(_gswin).stem
+-
+-GS = _gswin if _gswin else 'gs'
+-del _gswin
++GS = '@gs@'
+ 
+ 
+ def version():
+diff --git a/src/ocrmypdf/_exec/jbig2enc.py b/src/ocrmypdf/_exec/jbig2enc.py
+index 2e8a058b..65a09088 100644
+--- a/src/ocrmypdf/_exec/jbig2enc.py
++++ b/src/ocrmypdf/_exec/jbig2enc.py
+@@ -14,7 +14,7 @@ from ocrmypdf.subprocess import get_version, run
+ 
+ 
+ def version():
+-    return get_version('jbig2', regex=r'jbig2enc (\d+(\.\d+)*).*')
++    return get_version('@jbig2@', regex=r'jbig2enc (\d+(\.\d+)*).*')
+ 
+ 
+ def available():
+@@ -27,7 +27,7 @@ def available():
+ 
+ def convert_group(*, cwd, infiles, out_prefix):
+     args = [
+-        'jbig2',
++        '@jbig2@',
+         '-b',
+         out_prefix,
+         '-s',  # symbol mode (lossy)
+@@ -46,7 +46,7 @@ def convert_group_mp(args):
+ 
+ 
+ def convert_single(*, cwd, infile, outfile):
+-    args = ['jbig2', '-p', infile]
++    args = ['@jbig2@', '-p', infile]
+     with open(outfile, 'wb') as fstdout:
+         proc = run(args, cwd=cwd, stdout=fstdout, stderr=PIPE)
+     proc.check_returncode()
+diff --git a/src/ocrmypdf/_exec/pngquant.py b/src/ocrmypdf/_exec/pngquant.py
+index ca8a4542..d0544174 100644
+--- a/src/ocrmypdf/_exec/pngquant.py
++++ b/src/ocrmypdf/_exec/pngquant.py
+@@ -19,7 +19,7 @@ from ocrmypdf.subprocess import get_version, run
+ 
+ 
+ def version():
+-    return get_version('pngquant', regex=r'(\d+(\.\d+)*).*')
++    return get_version('@pngquant@', regex=r'(\d+(\.\d+)*).*')
+ 
+ 
+ def available():
+@@ -46,7 +46,7 @@ def input_as_png(input_file: Path):
+ def quantize(input_file: Path, output_file: Path, quality_min: int, quality_max: int):
+     with input_as_png(input_file) as input_stream:
+         args = [
+-            'pngquant',
++            '@pngquant@',
+             '--force',
+             '--skip-if-larger',
+             '--quality',
+diff --git a/src/ocrmypdf/_exec/tesseract.py b/src/ocrmypdf/_exec/tesseract.py
+index 33ead41e..5840f7c1 100644
+--- a/src/ocrmypdf/_exec/tesseract.py
++++ b/src/ocrmypdf/_exec/tesseract.py
+@@ -78,7 +78,7 @@ class TesseractVersion(StrictVersion):
+ 
+ 
+ def version():
+-    return get_version('tesseract', regex=r'tesseract\s(.+)')
++    return get_version('@tesseract@', regex=r'tesseract\s(.+)')
+ 
+ 
+ def has_user_words():
+@@ -100,7 +100,7 @@ def get_languages():
+         msg += output
+         return msg
+ 
+-    args_tess = ['tesseract', '--list-langs']
++    args_tess = ['@tesseract@', '--list-langs']
+     try:
+         proc = run(
+             args_tess,
+@@ -122,7 +122,7 @@ def get_languages():
+ 
+ 
+ def tess_base_args(langs: List[str], engine_mode: Optional[int]) -> List[str]:
+-    args = ['tesseract']
++    args = ['@tesseract@']
+     if langs:
+         args.extend(['-l', '+'.join(langs)])
+     if engine_mode is not None:
+diff --git a/src/ocrmypdf/_exec/unpaper.py b/src/ocrmypdf/_exec/unpaper.py
+index 3c3ae72c..d269966a 100644
+--- a/src/ocrmypdf/_exec/unpaper.py
++++ b/src/ocrmypdf/_exec/unpaper.py
+@@ -31,7 +31,7 @@ log = logging.getLogger(__name__)
+ 
+ 
+ def version() -> str:
+-    return get_version('unpaper')
++    return get_version('@unpaper@')
+ 
+ 
+ def _setup_unpaper_io(tmpdir: Path, input_file: Path) -> Tuple[Path, Path]:
+@@ -71,7 +71,7 @@ def _setup_unpaper_io(tmpdir: Path, input_file: Path) -> Tuple[Path, Path]:
+ def run(
+     input_file: Path, output_file: Path, *, dpi: DecFloat, mode_args: List[str]
+ ) -> None:
+-    args_unpaper = ['unpaper', '-v', '--dpi', str(round(dpi, 6))] + mode_args
++    args_unpaper = ['@unpaper@', '-v', '--dpi', str(round(dpi, 6))] + mode_args
+ 
+     with TemporaryDirectory() as tmpdir:
+         input_pnm, output_pnm = _setup_unpaper_io(Path(tmpdir), input_file)
+diff --git a/src/ocrmypdf/leptonica.py b/src/ocrmypdf/leptonica.py
+index e4814f1a..fdaf7ea4 100644
+--- a/src/ocrmypdf/leptonica.py
++++ b/src/ocrmypdf/leptonica.py
+@@ -33,14 +33,7 @@ from ocrmypdf.lib._leptonica import ffi
+ 
+ logger = logging.getLogger(__name__)
+ 
+-if os.name == 'nt':
+-    from ocrmypdf.subprocess._windows import shim_env_path
+-
+-    libname = 'liblept-5'
+-    os.environ['PATH'] = shim_env_path()
+-else:
+-    libname = 'lept'
+-_libpath = find_library(libname)
++_libpath = '@liblept@'
+ if not _libpath:
+     raise MissingDependencyError(
+         """
diff --git a/pkgs/tools/text/ocrmypdf/liblept.patch b/pkgs/tools/text/ocrmypdf/liblept.patch
deleted file mode 100644
index ed413a8b37b..00000000000
--- a/pkgs/tools/text/ocrmypdf/liblept.patch
+++ /dev/null
@@ -1,13 +0,0 @@
-diff --git a/src/ocrmypdf/leptonica.py b/src/ocrmypdf/leptonica.py
-index 328b063..b993cc9 100644
---- a/src/ocrmypdf/leptonica.py
-+++ b/src/ocrmypdf/leptonica.py
-@@ -46,7 +46,7 @@ if os.name == 'nt':
-     os.environ['PATH'] = shim_paths_with_program_files()
- else:
-     libname = 'lept'
--_libpath = find_library(libname)
-+_libpath = '@liblept@'
- if not _libpath:
-     raise MissingDependencyError(
-         """
diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix
index 1b6fe883918..acfb8d72529 100644
--- a/pkgs/top-level/all-packages.nix
+++ b/pkgs/top-level/all-packages.nix
@@ -3228,7 +3228,7 @@ with pkgs;
 
   oci-cli = callPackage ../tools/admin/oci-cli { };
 
-  ocrmypdf = callPackage ../tools/text/ocrmypdf { };
+  ocrmypdf = with python3.pkgs; toPythonApplication ocrmypdf;
 
   ocrfeeder = callPackage ../applications/graphics/ocrfeeder { };
 
diff --git a/pkgs/top-level/python-packages.nix b/pkgs/top-level/python-packages.nix
index 19ee538be2d..4be0eb03c5e 100644
--- a/pkgs/top-level/python-packages.nix
+++ b/pkgs/top-level/python-packages.nix
@@ -5059,6 +5059,8 @@ in {
 
   oci = callPackage ../development/python-modules/oci { };
 
+  ocrmypdf = callPackage ../development/python-modules/ocrmypdf { };
+
   od = callPackage ../development/python-modules/od { };
 
   odfpy = callPackage ../development/python-modules/odfpy { };