diff options
author | Robert Schütz <nix@dotlambda.de> | 2021-09-18 14:08:04 -0700 |
---|---|---|
committer | Robert Schütz <nix@dotlambda.de> | 2021-09-18 14:15:36 -0700 |
commit | 4c268ee2ccb7d4eaec3830bee16fe67cfc39f269 (patch) | |
tree | 319e759b0d350684a24bdfe790e6fe06646aa8b6 | |
parent | fc1c501f4c6e5ca4a90b48ce5bd5b9d631ce5772 (diff) | |
download | nixpkgs-4c268ee2ccb7d4eaec3830bee16fe67cfc39f269.tar nixpkgs-4c268ee2ccb7d4eaec3830bee16fe67cfc39f269.tar.gz nixpkgs-4c268ee2ccb7d4eaec3830bee16fe67cfc39f269.tar.bz2 nixpkgs-4c268ee2ccb7d4eaec3830bee16fe67cfc39f269.tar.lz nixpkgs-4c268ee2ccb7d4eaec3830bee16fe67cfc39f269.tar.xz nixpkgs-4c268ee2ccb7d4eaec3830bee16fe67cfc39f269.tar.zst nixpkgs-4c268ee2ccb7d4eaec3830bee16fe67cfc39f269.zip |
ocrmypdf: move to python3Packages
According to https://ocrmypdf.readthedocs.io/en/latest/api.html, it also provides a Python API.
-rw-r--r-- | pkgs/applications/office/paperless-ng/default.nix | 1 | ||||
-rw-r--r-- | pkgs/development/python-modules/ocrmypdf/default.nix (renamed from pkgs/tools/text/ocrmypdf/default.nix) | 77 | ||||
-rw-r--r-- | pkgs/development/python-modules/ocrmypdf/paths.patch | 160 | ||||
-rw-r--r-- | pkgs/tools/text/ocrmypdf/liblept.patch | 13 | ||||
-rw-r--r-- | pkgs/top-level/all-packages.nix | 2 | ||||
-rw-r--r-- | pkgs/top-level/python-packages.nix | 2 |
6 files changed, 199 insertions, 56 deletions
diff --git a/pkgs/applications/office/paperless-ng/default.nix b/pkgs/applications/office/paperless-ng/default.nix index e84b3c79443..0fd33573850 100644 --- a/pkgs/applications/office/paperless-ng/default.nix +++ b/pkgs/applications/office/paperless-ng/default.nix @@ -5,7 +5,6 @@ , ghostscript , imagemagick , jbig2enc -, ocrmypdf , optipng , pngquant , qpdf diff --git a/pkgs/tools/text/ocrmypdf/default.nix b/pkgs/development/python-modules/ocrmypdf/default.nix index 4292c275a7f..531e042c5b9 100644 --- a/pkgs/tools/text/ocrmypdf/default.nix +++ b/pkgs/development/python-modules/ocrmypdf/default.nix @@ -1,34 +1,32 @@ -{ fetchFromGitHub +{ lib +, buildPythonPackage +, cffi +, coloredlogs +, fetchFromGitHub , ghostscript , img2pdf +, importlib-resources , jbig2enc , leptonica +, pdfminer +, pikepdf +, pillow +, pluggy , pngquant -, python3 -, python3Packages -, qpdf -, lib +, pytest-xdist +, pytestCheckHook +, reportlab +, setuptools +, setuptools-scm +, setuptools-scm-git-archive , stdenv +, substituteAll , tesseract4 +, tqdm , unpaper -, substituteAll }: -let - inherit (python3Packages) buildPythonApplication; - - runtimeDeps = with python3Packages; [ - ghostscript - jbig2enc - leptonica - pngquant - qpdf - tesseract4 - unpaper - pillow - ]; -in -buildPythonApplication rec { +buildPythonPackage rec { pname = "ocrmypdf"; version = "12.5.0"; @@ -39,51 +37,48 @@ buildPythonApplication rec { sha256 = "sha256-g80WedX+TGHE9EJ/RSgOc53PM17V3WZslUNaHoqKTo0="; }; - nativeBuildInputs = with python3Packages; [ - setuptools + patches = [ + (substituteAll { + src = ./paths.patch; + gs = "${lib.getBin ghostscript}/bin/gs"; + jbig2 = "${lib.getBin jbig2enc}/bin/jbig2"; + liblept = "${lib.getLib leptonica}/lib/liblept${stdenv.hostPlatform.extensions.sharedLibrary}"; + pngquant = "${lib.getBin pngquant}/bin/pngquant"; + tesseract = "${lib.getBin tesseract4}/bin/tesseract"; + unpaper = "${lib.getBin unpaper}/bin/unpaper"; + }) + ]; + + nativeBuildInputs = [ setuptools-scm-git-archive setuptools-scm ]; - propagatedBuildInputs = with python3Packages; [ + propagatedBuildInputs = [ cffi coloredlogs img2pdf importlib-resources pdfminer - pluggy pikepdf pillow + pluggy reportlab setuptools tqdm ]; - checkInputs = with python3Packages; [ - pypdf2 - pytest - pytest-helpers-namespace + checkInputs = [ pytest-xdist - pytest-cov - python-xmp-toolkit pytestCheckHook - ] ++ runtimeDeps; - - patches = [ - (substituteAll { - src = ./liblept.patch; - liblept = "${lib.getLib leptonica}/lib/liblept${stdenv.hostPlatform.extensions.sharedLibrary}"; - }) ]; - makeWrapperArgs = [ "--prefix PATH : ${lib.makeBinPath [ ghostscript jbig2enc pngquant qpdf tesseract4 unpaper ]}" ]; - meta = with lib; { homepage = "https://github.com/jbarlow83/OCRmyPDF"; description = "Adds an OCR text layer to scanned PDF files, allowing them to be searched"; license = with licenses; [ mpl20 mit ]; platforms = platforms.linux; - maintainers = [ maintainers.kiwi ]; + maintainers = with maintainers; [ kiwi dotlambda ]; changelog = "https://github.com/jbarlow83/OCRmyPDF/blob/v${version}/docs/release_notes.rst"; }; } diff --git a/pkgs/development/python-modules/ocrmypdf/paths.patch b/pkgs/development/python-modules/ocrmypdf/paths.patch new file mode 100644 index 00000000000..9bfcc728554 --- /dev/null +++ b/pkgs/development/python-modules/ocrmypdf/paths.patch @@ -0,0 +1,160 @@ +diff --git a/src/ocrmypdf/_exec/ghostscript.py b/src/ocrmypdf/_exec/ghostscript.py +index 5c357f1b..f459763a 100644 +--- a/src/ocrmypdf/_exec/ghostscript.py ++++ b/src/ocrmypdf/_exec/ghostscript.py +@@ -25,28 +25,7 @@ from ocrmypdf.subprocess import get_version, run, run_polling_stderr + + log = logging.getLogger(__name__) + +-missing_gs_error = """ +---------------------------------------------------------------------- +-This error normally occurs when ocrmypdf find can't Ghostscript. +-Please ensure Ghostscript is installed and its location is added to +-the system PATH environment variable. +- +-For details see: +- https://ocrmypdf.readthedocs.io/en/latest/installation.html +---------------------------------------------------------------------- +-""" +- +-_gswin = None +-if os.name == 'nt': +- _gswin = which('gswin64c') +- if not _gswin: +- _gswin = which('gswin32c') +- if not _gswin: +- raise MissingDependencyError(missing_gs_error) +- _gswin = Path(_gswin).stem +- +-GS = _gswin if _gswin else 'gs' +-del _gswin ++GS = '@gs@' + + + def version(): +diff --git a/src/ocrmypdf/_exec/jbig2enc.py b/src/ocrmypdf/_exec/jbig2enc.py +index 2e8a058b..65a09088 100644 +--- a/src/ocrmypdf/_exec/jbig2enc.py ++++ b/src/ocrmypdf/_exec/jbig2enc.py +@@ -14,7 +14,7 @@ from ocrmypdf.subprocess import get_version, run + + + def version(): +- return get_version('jbig2', regex=r'jbig2enc (\d+(\.\d+)*).*') ++ return get_version('@jbig2@', regex=r'jbig2enc (\d+(\.\d+)*).*') + + + def available(): +@@ -27,7 +27,7 @@ def available(): + + def convert_group(*, cwd, infiles, out_prefix): + args = [ +- 'jbig2', ++ '@jbig2@', + '-b', + out_prefix, + '-s', # symbol mode (lossy) +@@ -46,7 +46,7 @@ def convert_group_mp(args): + + + def convert_single(*, cwd, infile, outfile): +- args = ['jbig2', '-p', infile] ++ args = ['@jbig2@', '-p', infile] + with open(outfile, 'wb') as fstdout: + proc = run(args, cwd=cwd, stdout=fstdout, stderr=PIPE) + proc.check_returncode() +diff --git a/src/ocrmypdf/_exec/pngquant.py b/src/ocrmypdf/_exec/pngquant.py +index ca8a4542..d0544174 100644 +--- a/src/ocrmypdf/_exec/pngquant.py ++++ b/src/ocrmypdf/_exec/pngquant.py +@@ -19,7 +19,7 @@ from ocrmypdf.subprocess import get_version, run + + + def version(): +- return get_version('pngquant', regex=r'(\d+(\.\d+)*).*') ++ return get_version('@pngquant@', regex=r'(\d+(\.\d+)*).*') + + + def available(): +@@ -46,7 +46,7 @@ def input_as_png(input_file: Path): + def quantize(input_file: Path, output_file: Path, quality_min: int, quality_max: int): + with input_as_png(input_file) as input_stream: + args = [ +- 'pngquant', ++ '@pngquant@', + '--force', + '--skip-if-larger', + '--quality', +diff --git a/src/ocrmypdf/_exec/tesseract.py b/src/ocrmypdf/_exec/tesseract.py +index 33ead41e..5840f7c1 100644 +--- a/src/ocrmypdf/_exec/tesseract.py ++++ b/src/ocrmypdf/_exec/tesseract.py +@@ -78,7 +78,7 @@ class TesseractVersion(StrictVersion): + + + def version(): +- return get_version('tesseract', regex=r'tesseract\s(.+)') ++ return get_version('@tesseract@', regex=r'tesseract\s(.+)') + + + def has_user_words(): +@@ -100,7 +100,7 @@ def get_languages(): + msg += output + return msg + +- args_tess = ['tesseract', '--list-langs'] ++ args_tess = ['@tesseract@', '--list-langs'] + try: + proc = run( + args_tess, +@@ -122,7 +122,7 @@ def get_languages(): + + + def tess_base_args(langs: List[str], engine_mode: Optional[int]) -> List[str]: +- args = ['tesseract'] ++ args = ['@tesseract@'] + if langs: + args.extend(['-l', '+'.join(langs)]) + if engine_mode is not None: +diff --git a/src/ocrmypdf/_exec/unpaper.py b/src/ocrmypdf/_exec/unpaper.py +index 3c3ae72c..d269966a 100644 +--- a/src/ocrmypdf/_exec/unpaper.py ++++ b/src/ocrmypdf/_exec/unpaper.py +@@ -31,7 +31,7 @@ log = logging.getLogger(__name__) + + + def version() -> str: +- return get_version('unpaper') ++ return get_version('@unpaper@') + + + def _setup_unpaper_io(tmpdir: Path, input_file: Path) -> Tuple[Path, Path]: +@@ -71,7 +71,7 @@ def _setup_unpaper_io(tmpdir: Path, input_file: Path) -> Tuple[Path, Path]: + def run( + input_file: Path, output_file: Path, *, dpi: DecFloat, mode_args: List[str] + ) -> None: +- args_unpaper = ['unpaper', '-v', '--dpi', str(round(dpi, 6))] + mode_args ++ args_unpaper = ['@unpaper@', '-v', '--dpi', str(round(dpi, 6))] + mode_args + + with TemporaryDirectory() as tmpdir: + input_pnm, output_pnm = _setup_unpaper_io(Path(tmpdir), input_file) +diff --git a/src/ocrmypdf/leptonica.py b/src/ocrmypdf/leptonica.py +index e4814f1a..fdaf7ea4 100644 +--- a/src/ocrmypdf/leptonica.py ++++ b/src/ocrmypdf/leptonica.py +@@ -33,14 +33,7 @@ from ocrmypdf.lib._leptonica import ffi + + logger = logging.getLogger(__name__) + +-if os.name == 'nt': +- from ocrmypdf.subprocess._windows import shim_env_path +- +- libname = 'liblept-5' +- os.environ['PATH'] = shim_env_path() +-else: +- libname = 'lept' +-_libpath = find_library(libname) ++_libpath = '@liblept@' + if not _libpath: + raise MissingDependencyError( + """ diff --git a/pkgs/tools/text/ocrmypdf/liblept.patch b/pkgs/tools/text/ocrmypdf/liblept.patch deleted file mode 100644 index ed413a8b37b..00000000000 --- a/pkgs/tools/text/ocrmypdf/liblept.patch +++ /dev/null @@ -1,13 +0,0 @@ -diff --git a/src/ocrmypdf/leptonica.py b/src/ocrmypdf/leptonica.py -index 328b063..b993cc9 100644 ---- a/src/ocrmypdf/leptonica.py -+++ b/src/ocrmypdf/leptonica.py -@@ -46,7 +46,7 @@ if os.name == 'nt': - os.environ['PATH'] = shim_paths_with_program_files() - else: - libname = 'lept' --_libpath = find_library(libname) -+_libpath = '@liblept@' - if not _libpath: - raise MissingDependencyError( - """ diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index 1b6fe883918..acfb8d72529 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -3228,7 +3228,7 @@ with pkgs; oci-cli = callPackage ../tools/admin/oci-cli { }; - ocrmypdf = callPackage ../tools/text/ocrmypdf { }; + ocrmypdf = with python3.pkgs; toPythonApplication ocrmypdf; ocrfeeder = callPackage ../applications/graphics/ocrfeeder { }; diff --git a/pkgs/top-level/python-packages.nix b/pkgs/top-level/python-packages.nix index 19ee538be2d..4be0eb03c5e 100644 --- a/pkgs/top-level/python-packages.nix +++ b/pkgs/top-level/python-packages.nix @@ -5059,6 +5059,8 @@ in { oci = callPackage ../development/python-modules/oci { }; + ocrmypdf = callPackage ../development/python-modules/ocrmypdf { }; + od = callPackage ../development/python-modules/od { }; odfpy = callPackage ../development/python-modules/odfpy { }; |