summary refs log tree commit diff
path: root/pkgs/applications/graphics/tesseract
diff options
context:
space:
mode:
authorLluís Batlle i Rossell <viric@vicerveza.homeunix.net>2011-04-24 20:01:19 +0000
committerLluís Batlle i Rossell <viric@vicerveza.homeunix.net>2011-04-24 20:01:19 +0000
commit9a0a0c92c7e7a1f22d454dc641ccd4d5e55dc187 (patch)
treed40f257e84c4701e66b15d90e580a5196382daa9 /pkgs/applications/graphics/tesseract
parent47c44f9315ade8ccf7a3d1369617bc6aabacc9f9 (diff)
downloadnixpkgs-9a0a0c92c7e7a1f22d454dc641ccd4d5e55dc187.tar
nixpkgs-9a0a0c92c7e7a1f22d454dc641ccd4d5e55dc187.tar.gz
nixpkgs-9a0a0c92c7e7a1f22d454dc641ccd4d5e55dc187.tar.bz2
nixpkgs-9a0a0c92c7e7a1f22d454dc641ccd4d5e55dc187.tar.lz
nixpkgs-9a0a0c92c7e7a1f22d454dc641ccd4d5e55dc187.tar.xz
nixpkgs-9a0a0c92c7e7a1f22d454dc641ccd4d5e55dc187.tar.zst
nixpkgs-9a0a0c92c7e7a1f22d454dc641ccd4d5e55dc187.zip
Adding training results files for some languages to tesseract to be able to do OCR directly.
svn path=/nixpkgs/trunk/; revision=26956
Diffstat (limited to 'pkgs/applications/graphics/tesseract')
-rw-r--r--pkgs/applications/graphics/tesseract/default.nix19
1 files changed, 19 insertions, 0 deletions
diff --git a/pkgs/applications/graphics/tesseract/default.nix b/pkgs/applications/graphics/tesseract/default.nix
index 80e0514d38f..07a160a2e48 100644
--- a/pkgs/applications/graphics/tesseract/default.nix
+++ b/pkgs/applications/graphics/tesseract/default.nix
@@ -1,5 +1,22 @@
 { stdenv, fetchurl, libtiff }:
 
+let
+  f = lang : sha256 : let
+      src = fetchurl {
+        url = "http://tesseract-ocr.googlecode.com/files/${lang}.traineddata.gz";
+        inherit sha256;
+      };
+    in 
+      "gunzip -c ${src} > $out/share/tessdata/${lang}.traineddata";
+
+  extraLanguages = ''
+    ${f "cat" "1qndk8qygw9bq7nzn7kzgxkm3jhlq7jgvdqpj5id4rrcaavjvifw"}
+    ${f "rus" "0yjzks189bgcmi2vr4v0l0fla11qdrw3cb1nvpxl9mdis8qr9vcc"}
+    ${f "spa" "1q1hw3qi95q5ww3l02fbhjqacxm34cp65fkbx10wjdcg0s5p9q2x"}
+    ${f "nld" "0cbqfhl2rwb1mg4y1140nw2vhhcilc0nk7bfbnxw6bzj1y5n49i8"}
+  '';
+in
+
 stdenv.mkDerivation {
   name = "tesseract-3.0.0";
 
@@ -10,6 +27,8 @@ stdenv.mkDerivation {
 
   buildInputs = [ libtiff ];
 
+  postInstall = extraLanguages;
+
   meta = {
     description = "OCR engine";
     homepage = http://code.google.com/p/tesseract-ocr/;