blob: a5643da8c3a8260fa7e22103a166b20e809ba754 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
|
{ stdenv, fetchFromGitHub, autoreconfHook, pkgconfig
, leptonica, libpng, libtiff, icu, pango, opencl-headers
# Supported list of languages or `null' for all available languages
, enableLanguages ? null
}:
stdenv.mkDerivation rec {
name = "tesseract-${version}";
version = "3.05.00";
src = fetchFromGitHub {
owner = "tesseract-ocr";
repo = "tesseract";
rev = version;
sha256 = "11wrpcfl118wxsv2c3w2scznwb48c4547qml42s2bpdz079g8y30";
};
tessdata = fetchFromGitHub {
owner = "tesseract-ocr";
repo = "tessdata";
rev = "3cf1e2df1fe1d1da29295c9ef0983796c7958b7d";
sha256 = "1v4b63v5nzcxr2y3635r19l7lj5smjmc9vfk0wmxlryxncb4vpg7";
};
nativeBuildInputs = [ pkgconfig autoreconfHook ];
buildInputs = [ leptonica libpng libtiff icu pango opencl-headers ];
LIBLEPT_HEADERSDIR = "${leptonica}/include";
# Copy the .traineddata files of the languages specified in enableLanguages
# into `$out/share/tessdata' and check afterwards if copying was successful.
postInstall = let
mkArg = lang: "-iname ${stdenv.lib.escapeShellArg "${lang}.traineddata"}";
mkFindArgs = stdenv.lib.concatMapStringsSep " -o " mkArg;
findLangArgs = if enableLanguages != null
then "\\( ${mkFindArgs enableLanguages} \\)"
else "-iname '*.traineddata'";
in ''
numLangs="$(find "$tessdata" -mindepth 1 -maxdepth 1 -type f \
${findLangArgs} -exec cp -t "$out/share/tessdata" {} + -print | wc -l)"
${if enableLanguages != null then ''
expected=${toString (builtins.length enableLanguages)}
'' else ''
expected="$(ls -1 "$tessdata/"*.traineddata | wc -l)"
''}
if [ "$numLangs" -ne "$expected" ]; then
echo "Expected $expected languages, but $numLangs" \
"were copied to \`$out/share/tessdata'" >&2
exit 1
fi
'';
meta = {
description = "OCR engine";
homepage = http://code.google.com/p/tesseract-ocr/;
license = stdenv.lib.licenses.asl20;
maintainers = with stdenv.lib.maintainers; [viric];
platforms = with stdenv.lib.platforms; linux ++ darwin;
};
}
|