diff options
-rw-r--r-- | pkgs/applications/misc/k2pdfopt/0001-Fix-CMakeLists.patch | 49 | ||||
-rw-r--r-- | pkgs/applications/misc/k2pdfopt/default.nix | 142 | ||||
-rw-r--r-- | pkgs/applications/misc/k2pdfopt/k2pdfopt-mupdf-1.16.1.patch | 151 | ||||
-rw-r--r-- | pkgs/applications/misc/k2pdfopt/k2pdfopt.patch | 99 | ||||
-rw-r--r-- | pkgs/applications/misc/k2pdfopt/leptonica.patch | 254 | ||||
-rw-r--r-- | pkgs/applications/misc/k2pdfopt/mupdf.patch | 1060 | ||||
-rw-r--r-- | pkgs/applications/misc/k2pdfopt/tesseract.patch | 675 |
7 files changed, 164 insertions, 2266 deletions
diff --git a/pkgs/applications/misc/k2pdfopt/0001-Fix-CMakeLists.patch b/pkgs/applications/misc/k2pdfopt/0001-Fix-CMakeLists.patch new file mode 100644 index 00000000000..8f9271ac996 --- /dev/null +++ b/pkgs/applications/misc/k2pdfopt/0001-Fix-CMakeLists.patch @@ -0,0 +1,49 @@ +From 2629af4ed00d7ca65359178203d80fb146901cdb Mon Sep 17 00:00:00 2001 +From: Daniel Fullmer <danielrf12@gmail.com> +Date: Fri, 3 Jul 2020 21:00:45 -0700 +Subject: [PATCH 1/2] Fix CMakeLists + +--- + CMakeLists.txt | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index e218279..4341de9 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -57,6 +57,7 @@ endif(JPEG_FOUND) + include(FindJasper) + if(JASPER_FOUND) + set(HAVE_JASPER_LIB 1) ++ set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${JASPER_LIBRARY}) + endif(JASPER_FOUND) + + # paths from willuslib/wgs.c +@@ -71,9 +72,12 @@ else() + message(STATUS "Could NOT find ghostscript executable") + endif(GHOSTSCRIPT_EXECUTABLE) + +-# willus.h +-# HAVE_GSL_LIB +- ++pkg_check_modules(GSL gsl) ++if(GSL_FOUND) ++ set(HAVE_GSL_LIB 1) ++ include_directories(SYSTEM ${GSL_INCLUDEDIR}) ++ set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${GSL_LDFLAGS}) ++endif(GSL_FOUND) + + # libfreetype6 (>= 2.3.9), libjbig2dec0, libjpeg8 (>= 8c), libx11-6, libxext6, zlib1g (>= 1:1.2.0) + # MUPDF_STATIC_LDFLAGS misses mupdf-js-none, and doubles libs ... +@@ -85,7 +89,7 @@ if(MUPDF_FOUND) + include_directories(SYSTEM ${MUPDF_INCLUDEDIR}) + message(STATUS "mupdf libraries: ${MUPDF_LDFLAGS}") + set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${MUPDF_LDFLAGS} +- -lmupdf-js-none -lopenjpeg -ljbig2dec -ljpeg -lfreetype ++ + ) + endif(MUPDF_FOUND) + +-- +2.27.0 + diff --git a/pkgs/applications/misc/k2pdfopt/default.nix b/pkgs/applications/misc/k2pdfopt/default.nix index 8899654cc4c..31accf811bc 100644 --- a/pkgs/applications/misc/k2pdfopt/default.nix +++ b/pkgs/applications/misc/k2pdfopt/default.nix @@ -1,5 +1,5 @@ -{ stdenv, fetchzip, fetchurl, fetchpatch, cmake, pkgconfig -, zlib, libpng +{ stdenv, runCommand, fetchzip, fetchurl, fetchpatch, fetchFromGitHub +, cmake, pkgconfig, zlib, libpng , enableGSL ? true, gsl , enableGhostScript ? true, ghostscript , enableMuPDF ? true, mupdf @@ -11,44 +11,132 @@ with stdenv.lib; -stdenv.mkDerivation rec { - pname = "k2pdfopt"; - version = "2.51a"; +# k2pdfopt is a pain to package. It requires modified versions of mupdf, +# leptonica, and tesseract. Instead of shipping patches for these upstream +# packages, k2pdfopt includes just the modified source files for these +# packages. The individual files from the {mupdf,leptonica,tesseract}_mod/ +# directories are intended to replace the corresponding source files in the +# upstream packages, for a particular version of that upstream package. +# +# There are a few ways we could approach packaging these modified versions of +# mupdf, leptonica, and mupdf: +# 1) Override the upstream source with a new derivation that involves copying +# the modified source files from k2pdfopt and replacing the corresponding +# source files in the upstream packages. Since the files are intended for a +# particular version of the upstream package, this would not allow us to easily +# use updates to those packages in nixpkgs. +# 2) Manually produce patches which can be applied against the upstream +# project, and have the same effect as replacing those files. This is what I +# believe k2pdfopt should do this for us anyway. The benefit of creating and +# applying patches in this way is that minor updates (esp. security fixes) to +# upstream packages might still allow these patches to apply successfully. +# 3) Automatically produce these patches inside a nix derivation. This is the +# approach taken here, using the "mkPatch" provided below. This has the +# benefit of easier review and should hopefully be simpler to update in the +# future. + +let + # Create a patch against src based on changes applied in patchCommands + mkPatch = { name, src, patchCommands }: runCommand "${name}-k2pdfopt.patch" { inherit src; } '' + source $stdenv/setup + unpackPhase - src = (fetchzip { - url = "http://www.willus.com/k2pdfopt/src/k2pdfopt_v2.51_src.zip"; - sha256 = "133l7xkvi67s6sfk8cfh7rmavbsf7ib5fyksk1ci6b6sch3z2sw9"; - }); + orig=$sourceRoot + new=$sourceRoot-modded + cp -r $orig/. $new/ - # Note: the v2.51a zip contains only files to be replaced in the v2.50 zip. - v251a_src = (fetchzip { - url = "http://www.willus.com/k2pdfopt/src/k2pdfopt_v2.51a_src.zip"; - sha256 = "0vvwblii7kgdwfxw8dzk6jbmz4dv94d7rkv18i60y8wkayj6yhl6"; - }); + pushd $new >/dev/null + ${patchCommands} + popd >/dev/null - postUnpack = '' - cp -r ${v251a_src}/* $sourceRoot + diff -Naur $orig $new > $out || true ''; - patches = [ ./k2pdfopt.patch ./k2pdfopt-mupdf-1.16.1.patch ]; + pname = "k2pdfopt"; + version = "2.53"; + k2pdfopt_src = fetchzip { + url = "http://www.willus.com/${pname}/src/${pname}_v${version}_src.zip"; + sha256 = "1fna8bg3pascjfc3hmc6xn0xi2yh7f1qp0d344mw9hqanbnykyy8"; + }; +in stdenv.mkDerivation rec { + inherit pname version; + src = k2pdfopt_src; + + patches = [ + ./0001-Fix-CMakeLists.patch + ]; + + postPatch = '' + substituteInPlace willuslib/bmpdjvu.c \ + --replace "<djvu.h>" "<libdjvu/ddjvuapi.h>" + ''; nativeBuildInputs = [ cmake pkgconfig ]; buildInputs = let - # The patches below were constructed by taking the files from k2pdfopt in - # the {mupdf,leptonica,tesseract}_mod/ directories, replacing the - # corresponding files in the respective source trees, resolving any errors - # with more recent versions of these depencencies, and running diff. - mupdf_modded = mupdf.overrideAttrs (attrs: { - patches = attrs.patches ++ [ ./mupdf.patch ]; # Last verified with mupdf 1.16.1 + # We use specific versions of these sources below to match the versions + # used in the k2pdfopt source. Note that this does _not_ need to match the + # version used elsewhere in nixpkgs, since it is only used to create the + # patch that can then be applied to the version in nixpkgs. + mupdf_patch = mkPatch { + name = "mupdf"; + src = fetchurl { + url = "https://mupdf.com/downloads/archive/mupdf-1.17.0-source.tar.gz"; + sha256 = "13nl9nrcx2awz9l83mlv2psi1lmn3hdnfwxvwgwiwbxlkjl3zqq0"; + }; + patchCommands = '' + cp ${k2pdfopt_src}/mupdf_mod/{filter-basic,font,stext-device,string}.c ./source/fitz/ + cp ${k2pdfopt_src}/mupdf_mod/pdf-* ./source/pdf/ + ''; + }; + mupdf_modded = mupdf.overrideAttrs ({ patches ? [], ... }: { + patches = patches ++ [ mupdf_patch ]; + # This function is missing in font.c, see font-win32.c + postPatch = '' + echo "void pdf_install_load_system_font_funcs(fz_context *ctx) {}" >> source/fitz/font.c + ''; }); - leptonica_modded = leptonica.overrideAttrs (attrs: { - patches = [ ./leptonica.patch ]; # Last verified with leptonica 1.78.0 + + leptonica_patch = mkPatch { + name = "leptonica"; + src = fetchurl { + url = "http://www.leptonica.org/source/leptonica-1.79.0.tar.gz"; + sha256 = "1n004gv1dj3pq1fcnfdclvvx5nang80336aa67nvs3nnqp4ncn84"; + }; + patchCommands = "cp -r ${k2pdfopt_src}/leptonica_mod/. ./src/"; + }; + leptonica_modded = leptonica.overrideAttrs ({ patches ? [], ... }: { + patches = patches ++ [ leptonica_patch ]; }); + + tesseract_patch = mkPatch { + name = "tesseract"; + src = fetchFromGitHub { + owner = "tesseract-ocr"; + repo = "tesseract"; + rev = "4.1.1"; + sha256 = "1ca27zbjpx35nxh9fha410z3jskwyj06i5hqiqdc08s2d7kdivwn"; + }; + patchCommands = '' + cp ${k2pdfopt_src}/tesseract_mod/{baseapi,tesscapi,tesseract}.* src/api/ + cp ${k2pdfopt_src}/tesseract_mod/{tesscapi,tessedit,tesseract}.* src/ccmain/ + cp ${k2pdfopt_src}/tesseract_mod/dotproduct{avx,fma,sse}.* src/arch/ + cp ${k2pdfopt_src}/tesseract_mod/{intsimdmatrixsse,simddetect}.* src/arch/ + cp ${k2pdfopt_src}/tesseract_mod/{errcode,genericvector,mainblk,params,serialis,tessdatamanager,tess_version,tprintf,unicharset}.* src/ccutil/ + cp ${k2pdfopt_src}/tesseract_mod/{input,lstmrecognizer}.* src/lstm/ + cp ${k2pdfopt_src}/tesseract_mod/openclwrapper.* src/opencl/ + ''; + }; tesseract_modded = tesseract4.override { - tesseractBase = tesseract4.tesseractBase.overrideAttrs (_: { - patches = [ ./tesseract.patch ]; # Last verified with tesseract 1.4 + tesseractBase = tesseract4.tesseractBase.overrideAttrs ({ patches ? [], ... }: { + patches = patches ++ [ tesseract_patch ]; + # Additional compilation fixes + postPatch = '' + echo libtesseract_api_la_SOURCES += tesscapi.cpp >> src/api/Makefile.am + substituteInPlace src/api/tesseract.h \ + --replace "#include <leptonica.h>" "//#include <leptonica.h>" + ''; }); }; in diff --git a/pkgs/applications/misc/k2pdfopt/k2pdfopt-mupdf-1.16.1.patch b/pkgs/applications/misc/k2pdfopt/k2pdfopt-mupdf-1.16.1.patch deleted file mode 100644 index 3a9eca30e75..00000000000 --- a/pkgs/applications/misc/k2pdfopt/k2pdfopt-mupdf-1.16.1.patch +++ /dev/null @@ -1,151 +0,0 @@ -diff --git a/willuslib/wmupdf.c b/willuslib/wmupdf.c -index 81627ef..f14a96c 100644 ---- a/willuslib/wmupdf.c -+++ b/willuslib/wmupdf.c -@@ -189,8 +189,6 @@ int wmupdf_remake_pdf(char *infile,char *outfile,WPDFPAGEINFO *pageinfo,int use_ - pdf_write_opts.do_compress=1; - pdf_write_opts.do_linear=0; - pdf_write_opts.do_garbage=1; /* 2 and 3 don't work for this. */ -- pdf_write_opts.continue_on_error=0; -- pdf_write_opts.errors=NULL; - write_failed=0; - wpdfpageinfo_sort(pageinfo); - xref=NULL; -@@ -1687,8 +1685,8 @@ WPDFOUTLINE *wpdfoutline_read_from_pdf_file(char *filename) - /* Sumatra version of MuPDF v1.4 -- use locally installed fonts */ - pdf_install_load_system_font_funcs(ctx); - fz_try(ctx) { doc=fz_open_document(ctx,filename); } -- fz_catch(ctx) -- { -+ fz_catch(ctx) -+ { - fz_drop_context(ctx); - return(NULL); - } -@@ -1890,5 +1888,5 @@ static pdf_obj *pdf_new_string_utf8(fz_context *ctx,char *string) - willus_mem_free((double **)&utfbuf,funcname); - return(pdfobj); - } -- -+ - #endif /* HAVE_MUPDF_LIB */ -diff --git a/willuslib/wmupdfinfo.c b/willuslib/wmupdfinfo.c -index 5c7f38c..9b9e6fd 100644 ---- a/willuslib/wmupdfinfo.c -+++ b/willuslib/wmupdfinfo.c -@@ -237,23 +237,22 @@ static void showglobalinfo(fz_context *ctx, globals *glo,char *filename) - pdf_obj *robj; - - robj=pdf_resolve_indirect(ctx,obj); -- n=pdf_sprint_obj(ctx,NULL,0,robj,1); -- buf=malloc(n+2); -+ buf=pdf_sprint_obj(ctx,NULL,0,&n,robj,1,0); - if (buf==NULL) - { - fz_write_printf(ctx,out,"Info object (%d %d R):\n",pdf_to_num(ctx,obj),pdf_to_gen(ctx,obj)); -- pdf_print_obj(ctx,out,robj,1); -+ pdf_print_obj(ctx,out,robj,1,0); - } - else - { -- pdf_sprint_obj(ctx,buf,n+2,robj,1); -+ pdf_sprint_obj(ctx,buf,n+2,&n,robj,1,0); - display_pdf_field(ctx,out,buf,"Title","TITLE"); - display_pdf_field(ctx,out,buf,"CreationDate","CREATED"); - display_pdf_field(ctx,out,buf,"ModDate","LAST MODIFIED"); - display_pdf_field(ctx,out,buf,"Producer","PDF PRODUCER"); - display_pdf_field(ctx,out,buf,"Creator","CREATOR"); - display_file_size(ctx,out,filename); -- free(buf); -+ fz_free(ctx,buf); - } - } - if (glo->dims==1) -@@ -275,7 +274,7 @@ static void showglobalinfo(fz_context *ctx, globals *glo,char *filename) - if (obj) - { - fz_write_printf(ctx,out, "\nEncryption object (%d %d R):\n", pdf_to_num(ctx,obj), pdf_to_gen(ctx,obj)); -- pdf_print_obj(ctx,out, pdf_resolve_indirect(ctx,obj), 1); -+ pdf_print_obj(ctx,out, pdf_resolve_indirect(ctx,obj), 1, 0); - } - } - -@@ -396,7 +395,7 @@ gatherdimensions(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_ - if (j < glo->dims) - return; - -- glo->dim = fz_resize_array(ctx, glo->dim, glo->dims+1, sizeof(struct info)); -+ glo->dim = fz_realloc_array(ctx, glo->dim, glo->dims+1, struct info); - glo->dims++; - - glo->dim[glo->dims - 1].page = page; -@@ -441,7 +440,7 @@ gatherfonts(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj * - if (k < glo->fonts) - continue; - -- glo->font = fz_resize_array(ctx, glo->font, glo->fonts+1, sizeof(struct info)); -+ glo->font = fz_realloc_array(ctx, glo->font, glo->fonts+1, struct info); - glo->fonts++; - - glo->font[glo->fonts - 1].page = page; -@@ -510,7 +509,7 @@ gatherimages(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj - if (k < glo->images) - continue; - -- glo->image = fz_resize_array(ctx, glo->image, glo->images+1, sizeof(struct info)); -+ glo->image = fz_realloc_array(ctx, glo->image, glo->images+1, struct info); - glo->images++; - - glo->image[glo->images - 1].page = page; -@@ -568,7 +567,7 @@ gatherforms(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj * - if (k < glo->forms) - continue; - -- glo->form = fz_resize_array(ctx, glo->form, glo->forms+1, sizeof(struct info)); -+ glo->form = fz_realloc_array(ctx, glo->form, glo->forms+1, struct info); - glo->forms++; - - glo->form[glo->forms - 1].page = page; -@@ -613,7 +612,7 @@ gatherpsobjs(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj - if (k < glo->psobjs) - continue; - -- glo->psobj = fz_resize_array(ctx, glo->psobj, glo->psobjs+1, sizeof(struct info)); -+ glo->psobj = fz_realloc_array(ctx, glo->psobj, glo->psobjs+1, struct info); - glo->psobjs++; - - glo->psobj[glo->psobjs - 1].page = page; -@@ -656,7 +655,7 @@ gathershadings(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_ob - if (k < glo->shadings) - continue; - -- glo->shading = fz_resize_array(ctx, glo->shading, glo->shadings+1, sizeof(struct info)); -+ glo->shading = fz_realloc_array(ctx, glo->shading, glo->shadings+1, struct info); - glo->shadings++; - - glo->shading[glo->shadings - 1].page = page; -@@ -724,7 +723,7 @@ gatherpatterns(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_ob - if (k < glo->patterns) - continue; - -- glo->pattern = fz_resize_array(ctx, glo->pattern, glo->patterns+1, sizeof(struct info)); -+ glo->pattern = fz_realloc_array(ctx, glo->pattern, glo->patterns+1, struct info); - glo->patterns++; - - glo->pattern[glo->patterns - 1].page = page; -@@ -1216,7 +1215,7 @@ void wmupdfinfo_get(char *filename,int *pagelist,char **buf) - if (fout==NULL) - return; - */ -- -+ - ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED); - if (!ctx) - { -@@ -1307,5 +1306,5 @@ static void date_convert(char *dst,char *src) - else if (src[i]!='\0') - sprintf(&dst[strlen(dst)]," %s",&src[i]); - } -- -+ - #endif /* HAVE_MUPDF_LIB */ diff --git a/pkgs/applications/misc/k2pdfopt/k2pdfopt.patch b/pkgs/applications/misc/k2pdfopt/k2pdfopt.patch deleted file mode 100644 index cf7e4896b80..00000000000 --- a/pkgs/applications/misc/k2pdfopt/k2pdfopt.patch +++ /dev/null @@ -1,99 +0,0 @@ -diff --git a/CMakeLists.txt b/CMakeLists.txt -index 4a2378b..502c477 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -52,6 +52,7 @@ endif(JPEG_FOUND) - include(FindJasper) - if(JASPER_FOUND) - set(HAVE_JASPER_LIB 1) -+ set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${JASPER_LIBRARY}) - endif(JASPER_FOUND) - - # paths from willuslib/wgs.c -@@ -66,8 +67,12 @@ else() - message(STATUS "Could NOT find ghostscript executable") - endif(GHOSTSCRIPT_EXECUTABLE) - --# willus.h --# HAVE_GSL_LIB -+pkg_check_modules(GSL gsl) -+if(GSL_FOUND) -+ set(HAVE_GSL_LIB 1) -+ include_directories(SYSTEM ${GSL_INCLUDEDIR}) -+ set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${GSL_LDFLAGS}) -+endif(GSL_FOUND) - - - # libfreetype6 (>= 2.3.9), libjbig2dec0, libjpeg8 (>= 8c), libx11-6, libxext6, zlib1g (>= 1:1.2.0) -@@ -80,7 +85,7 @@ if(MUPDF_FOUND) - include_directories(SYSTEM ${MUPDF_INCLUDEDIR}) - message(STATUS "mupdf libraries: ${MUPDF_LDFLAGS}") - set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${MUPDF_LDFLAGS} -- -lmupdf-js-none -lopenjpeg -ljbig2dec -ljpeg -lfreetype -+ - ) - endif(MUPDF_FOUND) - -@@ -91,9 +96,25 @@ if(DJVU_FOUND) - set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${DJVU_LDFLAGS}) - endif(DJVU_FOUND) - --# HAVE_GOCR_LIB --# HAVE_LEPTONICA_LIB --# HAVE_TESSERACT_LIB -+find_library(GOCR_LIB NAMES Pgm2asc) -+if(GOCR_LIB) -+ set(HAVE_GOCR_LIB 1) -+ set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${GOCR_LIB}) -+endif(GOCR_LIB) -+ -+pkg_check_modules(LEPTONICA lept) -+if(LEPTONICA_FOUND) -+ set(HAVE_LEPTONICA_LIB 1) -+ include_directories(SYSTEM ${LEPTONICA_INCLUDEDIR}) -+ set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${LEPTONICA_LDFLAGS}) -+endif(LEPTONICA_FOUND) -+ -+pkg_check_modules(TESSERACT tesseract) -+if(TESSERACT_FOUND) -+ set(HAVE_TESSERACT_LIB 1) -+ include_directories(SYSTEM ${TESSERACT_INCLUDEDIR}) -+ set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${TESSERACT_LDFLAGS}) -+endif(TESSERACT_FOUND) - - # ---- Describe project - -diff --git a/willuslib/CMakeLists.txt b/willuslib/CMakeLists.txt -index 463bbc9..8043db5 100644 ---- a/willuslib/CMakeLists.txt -+++ b/willuslib/CMakeLists.txt -@@ -6,7 +6,7 @@ include_directories(..) - set(WILLUSLIB_SRC - ansi.c array.c bmp.c bmpdjvu.c bmpmupdf.c dtcompress.c filelist.c - fontdata.c fontrender.c gslpolyfit.c linux.c math.c mem.c ocr.c -- ocrjocr.c ocrtess.c pdfwrite.c point2d.c render.c strbuf.c string.c -+ ocrgocr.c ocrtess.c pdfwrite.c point2d.c render.c strbuf.c string.c - token.c wfile.c wgs.c wgui.c willusversion.c win.c winbmp.c - wincomdlg.c winmbox.c winshell.c wmupdf.c wmupdfinfo.c wpdf.c wsys.c - wzfile.c wleptonica.c -diff --git a/willuslib/ocrgocr.c b/willuslib/ocrgocr.c -index 6027e9a..fbe10f0 100644 ---- a/willuslib/ocrgocr.c -+++ b/willuslib/ocrgocr.c -@@ -29,6 +29,8 @@ - #ifdef HAVE_GOCR_LIB - #include <gocr.h> - -+job_t *OCR_JOB; -+ - /* - ** bmp8 must be grayscale - ** (x1,y1) and (x2,y2) from top left of bitmap -@@ -63,6 +65,7 @@ void gocr_single_word_from_bmp8(char *text,int maxlen,WILLUSBITMAP *bmp8, - h=y2-y1+1; - dh=h+bw*2; - job=&_job; -+ OCR_JOB=job; - job_init(job); - job_init_image(job); - // willus_mem_alloc_warn((void **)&job->src.p.p,w*h,funcname,10); diff --git a/pkgs/applications/misc/k2pdfopt/leptonica.patch b/pkgs/applications/misc/k2pdfopt/leptonica.patch deleted file mode 100644 index dfab99fd013..00000000000 --- a/pkgs/applications/misc/k2pdfopt/leptonica.patch +++ /dev/null @@ -1,254 +0,0 @@ -From 8c11a20925686855023df90ed477957c7d7fe91e Mon Sep 17 00:00:00 2001 -From: Daniel Fullmer <danielrf12@gmail.com> -Date: Fri, 13 Sep 2019 15:54:21 -0400 -Subject: [PATCH] Willus mod for k2pdfopt - ---- - src/allheaders.h | 4 ++ - src/dewarp2.c | 106 ++++++++++++++++++++++++++++++++++++++++++----- - src/leptwin.c | 6 ++- - 3 files changed, 104 insertions(+), 12 deletions(-) - -diff --git a/src/allheaders.h b/src/allheaders.h -index e68eff1..b3cc729 100644 ---- a/src/allheaders.h -+++ b/src/allheaders.h -@@ -669,6 +669,10 @@ LEPT_DLL extern L_DEWARPA * dewarpaReadMem ( const l_uint8 *data, size_t size ); - LEPT_DLL extern l_ok dewarpaWrite ( const char *filename, L_DEWARPA *dewa ); - LEPT_DLL extern l_ok dewarpaWriteStream ( FILE *fp, L_DEWARPA *dewa ); - LEPT_DLL extern l_ok dewarpaWriteMem ( l_uint8 **pdata, size_t *psize, L_DEWARPA *dewa ); -+/* WILLUS MOD */ -+ LEPT_DLL extern l_int32 dewarpBuildPageModel_ex ( L_DEWARP *dew, const char *debugfile,l_int32 fit_order ); -+ LEPT_DLL extern l_int32 dewarpFindVertDisparity_ex ( L_DEWARP *dew, PTAA *ptaa, l_int32 rotflag,l_int32 fit_order ); -+ LEPT_DLL extern l_int32 dewarpBuildLineModel_ex ( L_DEWARP *dew, l_int32 opensize, const char *debugfile,l_int32 fit_order ); - LEPT_DLL extern l_ok dewarpBuildPageModel ( L_DEWARP *dew, const char *debugfile ); - LEPT_DLL extern l_ok dewarpFindVertDisparity ( L_DEWARP *dew, PTAA *ptaa, l_int32 rotflag ); - LEPT_DLL extern l_ok dewarpFindHorizDisparity ( L_DEWARP *dew, PTAA *ptaa ); -diff --git a/src/dewarp2.c b/src/dewarp2.c -index 220eec1..2e29500 100644 ---- a/src/dewarp2.c -+++ b/src/dewarp2.c -@@ -144,9 +144,17 @@ static const l_float32 L_ALLOWED_W_FRACT = 0.05; /* no bigger */ - * longest textlines. - * </pre> - */ -+/* WILLUS MOD */ - l_ok --dewarpBuildPageModel(L_DEWARP *dew, -- const char *debugfile) -+dewarpBuildPageModel(L_DEWARP *dew,const char *debugfile) -+{ -+return(dewarpBuildPageModel_ex(dew,debugfile,2)); -+} -+ -+l_ok -+dewarpBuildPageModel_ex(L_DEWARP *dew, -+ const char *debugfile, -+ l_int32 fit_order) - { - l_int32 linecount, topline, botline, ret; - PIX *pixs, *pix1, *pix2, *pix3; -@@ -225,7 +233,7 @@ PTAA *ptaa1, *ptaa2; - /* Get the sampled vertical disparity from the textline centers. - * The disparity array will push pixels vertically so that each - * textline is flat and centered at the y-position of the mid-point. */ -- if (dewarpFindVertDisparity(dew, ptaa2, 0) != 0) { -+ if (dewarpFindVertDisparity_ex(dew, ptaa2, 0, fit_order) != 0) { - L_WARNING("vertical disparity not built\n", procName); - ptaaDestroy(&ptaa2); - return 1; -@@ -290,13 +298,24 @@ PTAA *ptaa1, *ptaa2; - * a pdf. Non-pix debug output goes to /tmp. - * </pre> - */ -+/* WILLUS MOD */ - l_ok - dewarpFindVertDisparity(L_DEWARP *dew, - PTAA *ptaa, - l_int32 rotflag) - { -+return(dewarpFindVertDisparity_ex(dew,ptaa,rotflag,2)); -+} -+/* WILLUS MOD -- add cubic and quartic fits and ..._ex functions */ -+l_int32 -+dewarpFindVertDisparity_ex(L_DEWARP *dew, -+ PTAA *ptaa, -+ l_int32 rotflag, -+ l_int32 fit_order) -+{ - l_int32 i, j, nlines, npts, nx, ny, sampling; --l_float32 c0, c1, c2, x, y, midy, val, medval, meddev, minval, maxval; -+/* WILLUS MOD */ -+l_float32 c0, c1, c2, c3, c4, x, y, midy, val, medval, meddev, minval, maxval; - l_float32 *famidys; - NUMA *nax, *nafit, *nacurve0, *nacurve1, *nacurves; - NUMA *namidy, *namidys, *namidysi; -@@ -304,11 +323,22 @@ PIX *pix1, *pix2, *pixcirc, *pixdb; - PTA *pta, *ptad, *ptacirc; - PTAA *ptaa0, *ptaa1, *ptaa2, *ptaa3, *ptaa4, *ptaa5, *ptaat; - FPIX *fpix; -+/* WILLUS MOD */ -+l_int32 fit_order1,fit_order2; - - PROCNAME("dewarpFindVertDisparity"); - - if (!dew) - return ERROR_INT("dew not defined", procName, 1); -+/* WILLUS MOD */ -+ if (fit_order < 10) -+ fit_order1 = fit_order2 = fit_order; -+ else -+ { -+ fit_order1=fit_order % 10; -+ fit_order2=fit_order / 10; -+ fit_order2=fit_order2 % 10; -+ } - dew->vsuccess = 0; - if (!ptaa) - return ERROR_INT("ptaa not defined", procName, 1); -@@ -331,12 +361,32 @@ FPIX *fpix; - pixdb = (rotflag) ? pixRotateOrth(dew->pixs, 1) : pixClone(dew->pixs); - for (i = 0; i < nlines; i++) { /* for each line */ - pta = ptaaGetPta(ptaa, i, L_CLONE); -- ptaGetQuadraticLSF(pta, &c2, &c1, &c0, NULL); -- numaAddNumber(nacurve0, c2); -+/* WILLUS MOD */ -+if (fit_order1>3) -+ { -+ ptaGetQuarticLSF(pta, &c4, &c3, &c2, &c1, &c0, NULL); -+ numaAddNumber(nacurve0, c4); -+ } -+else if (fit_order1==3) -+ { -+ ptaGetCubicLSF(pta, &c3, &c2, &c1, &c0, NULL); -+ numaAddNumber(nacurve0, c3); -+ } -+else -+ { -+ ptaGetQuadraticLSF(pta, &c2, &c1, &c0, NULL); -+ numaAddNumber(nacurve0, c2); -+ } - ptad = ptaCreate(nx); - for (j = 0; j < nx; j++) { /* uniformly sampled in x */ - x = j * sampling; -- applyQuadraticFit(c2, c1, c0, x, &y); -+/* WILLUS MOD */ -+if (fit_order1>3) -+ applyQuarticFit(c4, c3, c2, c1, c0, x, &y); -+else if (fit_order1==3) -+ applyCubicFit(c3, c2, c1, c0, x, &y); -+else -+ applyQuadraticFit(c2, c1, c0, x, &y); - ptaAddPt(ptad, x, y); - } - ptaaAddPta(ptaa0, ptad, L_INSERT); -@@ -350,7 +400,13 @@ FPIX *fpix; - for (i = 0; i < nlines; i++) { - pta = ptaaGetPta(ptaa, i, L_CLONE); - ptaGetArrays(pta, &nax, NULL); -- ptaGetQuadraticLSF(pta, NULL, NULL, NULL, &nafit); -+/* WILLUS MOD */ -+if (fit_order1>3) -+ptaGetQuarticLSF(pta, NULL, NULL, NULL, NULL, NULL, &nafit); -+else if (fit_order1==3) -+ptaGetCubicLSF(pta, NULL, NULL, NULL, NULL, &nafit); -+else -+ptaGetQuadraticLSF(pta, NULL, NULL, NULL, &nafit); - ptad = ptaCreateFromNuma(nax, nafit); - ptaaAddPta(ptaat, ptad, L_INSERT); - ptaDestroy(&pta); -@@ -494,11 +550,24 @@ FPIX *fpix; - ptaa5 = ptaaCreate(nx); /* uniformly sampled across full height of image */ - for (j = 0; j < nx; j++) { /* for each column */ - pta = ptaaGetPta(ptaa4, j, L_CLONE); -- ptaGetQuadraticLSF(pta, &c2, &c1, &c0, NULL); -+/* WILLUS MOD */ -+/* Order higher than 2 can cause a little craziness here. */ -+if (fit_order2>3) -+ ptaGetQuarticLSF(pta, &c4, &c3, &c2, &c1, &c0, NULL); -+else if (fit_order2==3) -+ ptaGetCubicLSF(pta, &c3, &c2, &c1, &c0, NULL); -+else -+ ptaGetQuadraticLSF(pta, &c2, &c1, &c0, NULL); - ptad = ptaCreate(ny); - for (i = 0; i < ny; i++) { /* uniformly sampled in y */ - y = i * sampling; -- applyQuadraticFit(c2, c1, c0, y, &val); -+/* WILLUS MOD */ -+if (fit_order2>3) -+ applyQuarticFit(c4, c3, c2, c1, c0, y, &val); -+else if (fit_order2==3) -+ applyCubicFit(c3, c2, c1, c0, y, &val); -+else -+ applyQuadraticFit(c2, c1, c0, y, &val); - ptaAddPt(ptad, y, val); - } - ptaaAddPta(ptaa5, ptad, L_INSERT); -@@ -1602,11 +1671,21 @@ FPIX *fpix; - * See notes there. - * </pre> - */ -+/* WILLUS MOD */ - l_ok - dewarpBuildLineModel(L_DEWARP *dew, - l_int32 opensize, - const char *debugfile) - { -+return(dewarpBuildLineModel_ex(dew,opensize,debugfile,2)); -+} -+ -+l_int32 -+dewarpBuildLineModel_ex(L_DEWARP *dew, -+ l_int32 opensize, -+ const char *debugfile, -+ l_int32 fit_order) -+{ - char buf[64]; - l_int32 i, j, bx, by, ret, nlines; - BOXA *boxa; -@@ -1695,6 +1774,8 @@ PTAA *ptaa1, *ptaa2; - - /* Remove all lines that are not at least 0.75 times the length - * of the longest line. */ -+/* WILLUS MOD */ -+/* - ptaa2 = dewarpRemoveShortLines(pix, ptaa1, 0.75, DEBUG_SHORT_LINES); - if (debugfile) { - pix1 = pixConvertTo32(pix); -@@ -1704,6 +1785,8 @@ PTAA *ptaa1, *ptaa2; - pixDestroy(&pix1); - pixDestroy(&pix2); - } -+*/ -+ptaa2=ptaa1; - ptaaDestroy(&ptaa1); - nlines = ptaaGetCount(ptaa2); - if (nlines < dew->minlines) { -@@ -1717,7 +1800,8 @@ PTAA *ptaa1, *ptaa2; - * centers. The disparity array will push pixels vertically - * so that each line is flat and centered at the y-position - * of the mid-point. */ -- ret = dewarpFindVertDisparity(dew, ptaa2, 1 - i); -+/* WILLUS MOD */ -+ ret = dewarpFindVertDisparity_ex(dew, ptaa2, 1 - i, fit_order); - - /* If i == 0, move the result to the horizontal disparity, - * rotating it back by -90 degrees. */ -diff --git a/src/leptwin.c b/src/leptwin.c -index 72643a0..573d33e 100644 ---- a/src/leptwin.c -+++ b/src/leptwin.c -@@ -364,5 +364,9 @@ PIXCMAP *cmap; - - return hBitmap; - } -- -+#else -+/* willus mod: Avoid weird issue with OS/X library archiver when there are no symbols */ -+int leptwin_my_empty_func(void); -+int leptwin_my_empty_func(void) -+{return(0);} - #endif /* _WIN32 */ --- -2.22.0 - diff --git a/pkgs/applications/misc/k2pdfopt/mupdf.patch b/pkgs/applications/misc/k2pdfopt/mupdf.patch deleted file mode 100644 index 0c59a1d2016..00000000000 --- a/pkgs/applications/misc/k2pdfopt/mupdf.patch +++ /dev/null @@ -1,1060 +0,0 @@ -From d8927c969e3387ca2669a616c0ba53bce918a031 Mon Sep 17 00:00:00 2001 -From: Daniel Fullmer <danielrf12@gmail.com> -Date: Fri, 13 Sep 2019 15:11:45 -0400 -Subject: [PATCH] Willus mod for k2pdfopt - ---- - source/fitz/filter-basic.c | 3 + - source/fitz/font-win32.c | 866 +++++++++++++++++++++++++++++++++++++ - source/fitz/font.c | 3 + - source/fitz/stext-device.c | 5 + - source/fitz/string.c | 5 + - source/pdf/pdf-annot.c | 14 +- - source/pdf/pdf-link.c | 3 + - source/pdf/pdf-parse.c | 5 + - source/pdf/pdf-xref.c | 9 + - 9 files changed, 912 insertions(+), 1 deletion(-) - create mode 100644 source/fitz/font-win32.c - -diff --git a/source/fitz/filter-basic.c b/source/fitz/filter-basic.c -index 0713a62e7..b8ef4d292 100644 ---- a/source/fitz/filter-basic.c -+++ b/source/fitz/filter-basic.c -@@ -259,7 +259,10 @@ look_for_endstream: - if (!state->warned) - { - state->warned = 1; -+/* willus mod -- no warning */ -+/* - fz_warn(ctx, "PDF stream Length incorrect"); -+*/ - } - return *stm->rp++; - } -diff --git a/source/fitz/font-win32.c b/source/fitz/font-win32.c -new file mode 100644 -index 000000000..45de8cfd3 ---- /dev/null -+++ b/source/fitz/font-win32.c -@@ -0,0 +1,866 @@ -+/* -+** Routines to access MS Windows system fonts. -+** From sumatra PDF distro. -+** Modified for MuPDF v1.9a by willus.com -+*/ -+#include "mupdf/pdf.h" -+ -+/* -+ Which fonts are embedded is based on a few preprocessor definitions. -+ -+ The base 14 fonts are always embedded. -+ For CJK font substitution we embed DroidSansFallback. -+ -+ Set NOCJK to skip all CJK support (this also omits embedding the CJK CMaps) -+ Set NOCJKFONT to skip the embedded CJK font. -+ Set NOCJKFULL to embed a smaller CJK font without CJK Extension A support. -+*/ -+ -+#ifdef NOCJK -+#define NOCJKFONT -+#endif -+ -+/* SumatraPDF: also load fonts included with Windows */ -+#ifdef _WIN32 -+ -+#ifndef UNICODE -+#define UNICODE -+#endif -+#ifndef _UNICODE -+#define _UNICODE -+#endif -+ -+#include <windows.h> -+ -+// TODO: Use more of FreeType for TTF parsing (for performance reasons, -+// the fonts can't be parsed completely, though) -+#include <ft2build.h> -+#include FT_TRUETYPE_IDS_H -+#include FT_TRUETYPE_TAGS_H -+ -+#define TTC_VERSION1 0x00010000 -+#define TTC_VERSION2 0x00020000 -+ -+#define MAX_FACENAME 128 -+ -+// Note: the font face must be the first field so that the structure -+// can be treated like a simple string for searching -+typedef struct pdf_fontmapMS_s -+{ -+ char fontface[MAX_FACENAME]; -+ char fontpath[MAX_PATH]; -+ int index; -+} pdf_fontmapMS; -+ -+typedef struct pdf_fontlistMS_s -+{ -+ pdf_fontmapMS *fontmap; -+ int len; -+ int cap; -+} pdf_fontlistMS; -+ -+typedef struct _tagTT_OFFSET_TABLE -+{ -+ ULONG uVersion; -+ USHORT uNumOfTables; -+ USHORT uSearchRange; -+ USHORT uEntrySelector; -+ USHORT uRangeShift; -+} TT_OFFSET_TABLE; -+ -+typedef struct _tagTT_TABLE_DIRECTORY -+{ -+ ULONG uTag; //table name -+ ULONG uCheckSum; //Check sum -+ ULONG uOffset; //Offset from beginning of file -+ ULONG uLength; //length of the table in bytes -+} TT_TABLE_DIRECTORY; -+ -+typedef struct _tagTT_NAME_TABLE_HEADER -+{ -+ USHORT uFSelector; //format selector. Always 0 -+ USHORT uNRCount; //Name Records count -+ USHORT uStorageOffset; //Offset for strings storage, from start of the table -+} TT_NAME_TABLE_HEADER; -+ -+typedef struct _tagTT_NAME_RECORD -+{ -+ USHORT uPlatformID; -+ USHORT uEncodingID; -+ USHORT uLanguageID; -+ USHORT uNameID; -+ USHORT uStringLength; -+ USHORT uStringOffset; //from start of storage area -+} TT_NAME_RECORD; -+ -+typedef struct _tagFONT_COLLECTION -+{ -+ ULONG Tag; -+ ULONG Version; -+ ULONG NumFonts; -+} FONT_COLLECTION; -+ -+static struct { -+ char *name; -+ char *pattern; -+} baseSubstitutes[] = { -+ { "Courier", "CourierNewPSMT" }, -+ { "Courier-Bold", "CourierNewPS-BoldMT" }, -+ { "Courier-Oblique", "CourierNewPS-ItalicMT" }, -+ { "Courier-BoldOblique", "CourierNewPS-BoldItalicMT" }, -+ { "Helvetica", "ArialMT" }, -+ { "Helvetica-Bold", "Arial-BoldMT" }, -+ { "Helvetica-Oblique", "Arial-ItalicMT" }, -+ { "Helvetica-BoldOblique", "Arial-BoldItalicMT" }, -+ { "Times-Roman", "TimesNewRomanPSMT" }, -+ { "Times-Bold", "TimesNewRomanPS-BoldMT" }, -+ { "Times-Italic", "TimesNewRomanPS-ItalicMT" }, -+ { "Times-BoldItalic", "TimesNewRomanPS-BoldItalicMT" }, -+ { "Symbol", "SymbolMT" }, -+}; -+static const char *base_font_names[][10] = -+{ -+ { "Courier", "CourierNew", "CourierNewPSMT", NULL }, -+ { "Courier-Bold", "CourierNew,Bold", "Courier,Bold", -+ "CourierNewPS-BoldMT", "CourierNew-Bold", NULL }, -+ { "Courier-Oblique", "CourierNew,Italic", "Courier,Italic", -+ "CourierNewPS-ItalicMT", "CourierNew-Italic", NULL }, -+ { "Courier-BoldOblique", "CourierNew,BoldItalic", "Courier,BoldItalic", -+ "CourierNewPS-BoldItalicMT", "CourierNew-BoldItalic", NULL }, -+ { "Helvetica", "ArialMT", "Arial", NULL }, -+ { "Helvetica-Bold", "Arial-BoldMT", "Arial,Bold", "Arial-Bold", -+ "Helvetica,Bold", NULL }, -+ { "Helvetica-Oblique", "Arial-ItalicMT", "Arial,Italic", "Arial-Italic", -+ "Helvetica,Italic", "Helvetica-Italic", NULL }, -+ { "Helvetica-BoldOblique", "Arial-BoldItalicMT", -+ "Arial,BoldItalic", "Arial-BoldItalic", -+ "Helvetica,BoldItalic", "Helvetica-BoldItalic", NULL }, -+ { "Times-Roman", "TimesNewRomanPSMT", "TimesNewRoman", -+ "TimesNewRomanPS", NULL }, -+ { "Times-Bold", "TimesNewRomanPS-BoldMT", "TimesNewRoman,Bold", -+ "TimesNewRomanPS-Bold", "TimesNewRoman-Bold", NULL }, -+ { "Times-Italic", "TimesNewRomanPS-ItalicMT", "TimesNewRoman,Italic", -+ "TimesNewRomanPS-Italic", "TimesNewRoman-Italic", NULL }, -+ { "Times-BoldItalic", "TimesNewRomanPS-BoldItalicMT", -+ "TimesNewRoman,BoldItalic", "TimesNewRomanPS-BoldItalic", -+ "TimesNewRoman-BoldItalic", NULL }, -+ { "Symbol", "Symbol,Italic", "Symbol,Bold", "Symbol,BoldItalic", -+ "SymbolMT", "SymbolMT,Italic", "SymbolMT,Bold", "SymbolMT,BoldItalic", NULL }, -+ { "ZapfDingbats", NULL } -+}; -+ -+static pdf_fontlistMS fontlistMS = -+{ -+ NULL, -+ 0, -+ 0, -+}; -+static int strcmp_ignore_space(const char *a, const char *b); -+static const char *clean_font_name(const char *fontname); -+static const char *pdf_clean_base14_name(const char *fontname); -+ -+static inline USHORT BEtoHs(USHORT x) -+{ -+ BYTE *data = (BYTE *)&x; -+ return (data[0] << 8) | data[1]; -+} -+ -+static inline ULONG BEtoHl(ULONG x) -+{ -+ BYTE *data = (BYTE *)&x; -+ return (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; -+} -+ -+static int strcmp_ignore_space(const char *a, const char *b) -+{ -+ while (1) -+ { -+ while (*a == ' ') -+ a++; -+ while (*b == ' ') -+ b++; -+ if (*a != *b) -+ return 1; -+ if (*a == 0) -+ return *a != *b; -+ if (*b == 0) -+ return *a != *b; -+ a++; -+ b++; -+ } -+} -+ -+/* A little bit more sophisticated name matching so that e.g. "EurostileExtended" -+ matches "EurostileExtended-Roman" or "Tahoma-Bold,Bold" matches "Tahoma-Bold" */ -+static int -+lookup_compare(const void *elem1, const void *elem2) -+{ -+ const char *val1 = elem1; -+ const char *val2 = elem2; -+ int len1 = strlen(val1); -+ int len2 = strlen(val2); -+ -+ if (len1 != len2) -+ { -+ const char *rest = len1 > len2 ? val1 + len2 : val2 + len1; -+ if (',' == *rest || !_stricmp(rest, "-roman")) -+ return _strnicmp(val1, val2, fz_mini(len1, len2)); -+ } -+ -+ return _stricmp(val1, val2); -+} -+ -+static void -+remove_spaces(char *srcDest) -+{ -+ char *dest; -+ -+ for (dest = srcDest; *srcDest; srcDest++) -+ if (*srcDest != ' ') -+ *dest++ = *srcDest; -+ *dest = '\0'; -+} -+ -+static int -+str_ends_with(const char *str, const char *end) -+{ -+ size_t len1 = strlen(str); -+ size_t len2 = strlen(end); -+ -+ return len1 >= len2 && !strcmp(str + len1 - len2, end); -+} -+ -+static pdf_fontmapMS * -+pdf_find_windows_font_path(const char *fontname) -+{ -+ return bsearch(fontname, fontlistMS.fontmap, fontlistMS.len, sizeof(pdf_fontmapMS), lookup_compare); -+} -+ -+/* source and dest can be same */ -+static void -+decode_unicode_BE(fz_context *ctx, char *source, int sourcelen, char *dest, int destlen) -+{ -+ WCHAR *tmp; -+ int converted, i; -+ -+ if (sourcelen % 2 != 0) -+ fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : invalid unicode string"); -+ -+ tmp = fz_malloc_array(ctx, sourcelen / 2 + 1, sizeof(WCHAR)); -+ for (i = 0; i < sourcelen / 2; i++) -+ tmp[i] = BEtoHs(((WCHAR *)source)[i]); -+ tmp[sourcelen / 2] = '\0'; -+ -+ converted = WideCharToMultiByte(CP_UTF8, 0, tmp, -1, dest, destlen, NULL, NULL); -+ fz_free(ctx, tmp); -+ if (!converted) -+ fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : invalid unicode string"); -+} -+ -+static void -+decode_platform_string(fz_context *ctx, int platform, int enctype, char *source, int sourcelen, char *dest, int destlen) -+{ -+ switch (platform) -+ { -+ case TT_PLATFORM_APPLE_UNICODE: -+ switch (enctype) -+ { -+ case TT_APPLE_ID_DEFAULT: -+ case TT_APPLE_ID_UNICODE_2_0: -+ decode_unicode_BE(ctx, source, sourcelen, dest, destlen); -+ return; -+ } -+ fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : unsupported encoding (%d/%d)", platform, enctype); -+ case TT_PLATFORM_MACINTOSH: -+ switch (enctype) -+ { -+ case TT_MAC_ID_ROMAN: -+ if (sourcelen + 1 > destlen) -+ fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : overlong fontname: %s", source); -+ // TODO: Convert to UTF-8 from what encoding? -+ memcpy(dest, source, sourcelen); -+ dest[sourcelen] = 0; -+ return; -+ } -+ fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : unsupported encoding (%d/%d)", platform, enctype); -+ case TT_PLATFORM_MICROSOFT: -+ switch (enctype) -+ { -+ case TT_MS_ID_SYMBOL_CS: -+ case TT_MS_ID_UNICODE_CS: -+ case TT_MS_ID_UCS_4: -+ decode_unicode_BE(ctx, source, sourcelen, dest, destlen); -+ return; -+ } -+ fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : unsupported encoding (%d/%d)", platform, enctype); -+ default: -+ fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : unsupported encoding (%d/%d)", platform, enctype); -+ } -+} -+ -+static void -+grow_system_font_list(fz_context *ctx, pdf_fontlistMS *fl) -+{ -+ int newcap; -+ pdf_fontmapMS *newitems; -+ -+ if (fl->cap == 0) -+ newcap = 1024; -+ else -+ newcap = fl->cap * 2; -+ -+ // use realloc/free for the fontmap, since the list can -+ // remain in memory even with all fz_contexts destroyed -+ newitems = realloc(fl->fontmap, newcap * sizeof(pdf_fontmapMS)); -+ if (!newitems) -+ fz_throw(ctx, FZ_ERROR_GENERIC, "OOM in grow_system_font_list"); -+ memset(newitems + fl->cap, 0, sizeof(pdf_fontmapMS) * (newcap - fl->cap)); -+ -+ fl->fontmap = newitems; -+ fl->cap = newcap; -+} -+ -+static void -+append_mapping(fz_context *ctx, pdf_fontlistMS *fl, const char *facename, const char *path, int index) -+{ -+ if (fl->len == fl->cap) -+ grow_system_font_list(ctx, fl); -+ -+ if (fl->len >= fl->cap) -+ fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : fontlist overflow"); -+ -+ fz_strlcpy(fl->fontmap[fl->len].fontface, facename, sizeof(fl->fontmap[0].fontface)); -+ fz_strlcpy(fl->fontmap[fl->len].fontpath, path, sizeof(fl->fontmap[0].fontpath)); -+ fl->fontmap[fl->len].index = index; -+ -+ ++fl->len; -+} -+ -+static void -+safe_read(fz_context *ctx, fz_stream *file, int offset, char *buf, int size) -+{ -+ int n; -+ fz_seek(ctx, file, offset, 0); -+ n = fz_read(ctx, file, (unsigned char *)buf, size); -+ if (n != size) -+ fz_throw(ctx, FZ_ERROR_GENERIC, "safe_read: read %d, expected %d", n, size); -+} -+ -+static void -+read_ttf_string(fz_context *ctx, fz_stream *file, int offset, TT_NAME_RECORD *ttRecordBE, char *buf, int size) -+{ -+ char szTemp[MAX_FACENAME * 2]; -+ // ignore empty and overlong strings -+ int stringLength = BEtoHs(ttRecordBE->uStringLength); -+ if (stringLength == 0 || stringLength >= sizeof(szTemp)) -+ return; -+ -+ safe_read(ctx, file, offset + BEtoHs(ttRecordBE->uStringOffset), szTemp, stringLength); -+ decode_platform_string(ctx, BEtoHs(ttRecordBE->uPlatformID), -+ BEtoHs(ttRecordBE->uEncodingID), szTemp, stringLength, buf, size); -+} -+ -+static void -+makeFakePSName(char szName[MAX_FACENAME], const char *szStyle) -+{ -+ // append the font's subfamily, unless it's a Regular font -+ if (*szStyle && _stricmp(szStyle, "Regular") != 0) -+ { -+ fz_strlcat(szName, "-", MAX_FACENAME); -+ fz_strlcat(szName, szStyle, MAX_FACENAME); -+ } -+ remove_spaces(szName); -+} -+ -+static void -+parseTTF(fz_context *ctx, fz_stream *file, int offset, int index, const char *path) -+{ -+ TT_OFFSET_TABLE ttOffsetTableBE; -+ TT_TABLE_DIRECTORY tblDirBE; -+ TT_NAME_TABLE_HEADER ttNTHeaderBE; -+ TT_NAME_RECORD ttRecordBE; -+ -+ char szPSName[MAX_FACENAME] = { 0 }; -+ char szTTName[MAX_FACENAME] = { 0 }; -+ char szStyle[MAX_FACENAME] = { 0 }; -+ char szCJKName[MAX_FACENAME] = { 0 }; -+ int i, count, tblOffset; -+ -+ safe_read(ctx, file, offset, (char *)&ttOffsetTableBE, sizeof(TT_OFFSET_TABLE)); -+ -+ // check if this is a TrueType font of version 1.0 or an OpenType font -+ if (BEtoHl(ttOffsetTableBE.uVersion) != TTC_VERSION1 && -+ BEtoHl(ttOffsetTableBE.uVersion) != TTAG_OTTO) -+ { -+ fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : invalid font version %x", (unsigned int)BEtoHl(ttOffsetTableBE.uVersion)); -+ } -+ -+ // determine the name table's offset by iterating through the offset table -+ count = BEtoHs(ttOffsetTableBE.uNumOfTables); -+ for (i = 0; i < count; i++) -+ { -+ int entryOffset = offset + sizeof(TT_OFFSET_TABLE) + i * sizeof(TT_TABLE_DIRECTORY); -+ safe_read(ctx, file, entryOffset, (char *)&tblDirBE, sizeof(TT_TABLE_DIRECTORY)); -+ if (!BEtoHl(tblDirBE.uTag) || BEtoHl(tblDirBE.uTag) == TTAG_name) -+ break; -+ } -+ if (count == i || !BEtoHl(tblDirBE.uTag)) -+ fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : nameless font"); -+ tblOffset = BEtoHl(tblDirBE.uOffset); -+ -+ // read the 'name' table for record count and offsets -+ safe_read(ctx, file, tblOffset, (char *)&ttNTHeaderBE, sizeof(TT_NAME_TABLE_HEADER)); -+ offset = tblOffset + sizeof(TT_NAME_TABLE_HEADER); -+ tblOffset += BEtoHs(ttNTHeaderBE.uStorageOffset); -+ -+ // read through the strings for PostScript name and font family -+ count = BEtoHs(ttNTHeaderBE.uNRCount); -+ for (i = 0; i < count; i++) -+ { -+ short langId, nameId; -+ BOOL isCJKName; -+ -+ safe_read(ctx, file, offset + i * sizeof(TT_NAME_RECORD), (char *)&ttRecordBE, sizeof(TT_NAME_RECORD)); -+ -+ langId = BEtoHs(ttRecordBE.uLanguageID); -+ nameId = BEtoHs(ttRecordBE.uNameID); -+ isCJKName = TT_NAME_ID_FONT_FAMILY == nameId && LANG_CHINESE == PRIMARYLANGID(langId); -+ -+ // ignore non-English strings (except for Chinese font names) -+ if (langId && langId != TT_MS_LANGID_ENGLISH_UNITED_STATES && !isCJKName) -+ continue; -+ // ignore names other than font (sub)family and PostScript name -+ fz_try(ctx) -+ { -+ if (isCJKName) -+ read_ttf_string(ctx, file, tblOffset, &ttRecordBE, szCJKName, sizeof(szCJKName)); -+ else if (TT_NAME_ID_FONT_FAMILY == nameId) -+ read_ttf_string(ctx, file, tblOffset, &ttRecordBE, szTTName, sizeof(szTTName)); -+ else if (TT_NAME_ID_FONT_SUBFAMILY == nameId) -+ read_ttf_string(ctx, file, tblOffset, &ttRecordBE, szStyle, sizeof(szStyle)); -+ else if (TT_NAME_ID_PS_NAME == nameId) -+ read_ttf_string(ctx, file, tblOffset, &ttRecordBE, szPSName, sizeof(szPSName)); -+ } -+ fz_catch(ctx) -+ { -+ fz_warn(ctx, "ignoring face name decoding fonterror"); -+ } -+ } -+ -+ // try to prevent non-Arial fonts from accidentally substituting Arial -+ if (!strcmp(szPSName, "ArialMT")) -+ { -+ // cf. https://code.google.com/p/sumatrapdf/issues/detail?id=2471 -+ if (strcmp(szTTName, "Arial") != 0) -+ szPSName[0] = '\0'; -+ // TODO: is there a better way to distinguish Arial Caps from Arial proper? -+ // cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1290 -+ else if (strstr(path, "caps") || strstr(path, "Caps")) -+ fz_throw(ctx, FZ_ERROR_GENERIC, "ignore %s, as it can't be distinguished from Arial,Regular", path); -+ } -+ -+ if (szPSName[0]) -+ append_mapping(ctx, &fontlistMS, szPSName, path, index); -+ if (szTTName[0]) -+ { -+ // derive a PostScript-like name and add it, if it's different from the font's -+ // included PostScript name; cf. http://code.google.com/p/sumatrapdf/issues/detail?id=376 -+ makeFakePSName(szTTName, szStyle); -+ // compare the two names before adding this one -+ if (lookup_compare(szTTName, szPSName)) -+ append_mapping(ctx, &fontlistMS, szTTName, path, index); -+ } -+ if (szCJKName[0]) -+ { -+ makeFakePSName(szCJKName, szStyle); -+ if (lookup_compare(szCJKName, szPSName) && lookup_compare(szCJKName, szTTName)) -+ append_mapping(ctx, &fontlistMS, szCJKName, path, index); -+ } -+} -+ -+static void -+parseTTFs(fz_context *ctx, const char *path) -+{ -+ fz_stream *file = fz_open_file(ctx, path); -+ /* "fonterror : %s not found", path */ -+ fz_try(ctx) -+ { -+ parseTTF(ctx, file, 0, 0, path); -+ } -+ fz_always(ctx) -+ { -+ fz_drop_stream(ctx,file); -+ } -+ fz_catch(ctx) -+ { -+ fz_rethrow(ctx); -+ } -+} -+ -+static void -+parseTTCs(fz_context *ctx, const char *path) -+{ -+ FONT_COLLECTION fontcollectionBE; -+ ULONG i, numFonts, *offsettableBE = NULL; -+ -+ fz_stream *file = fz_open_file(ctx, path); -+ /* "fonterror : %s not found", path */ -+ -+ fz_var(offsettableBE); -+ -+ fz_try(ctx) -+ { -+ safe_read(ctx, file, 0, (char *)&fontcollectionBE, sizeof(FONT_COLLECTION)); -+ if (BEtoHl(fontcollectionBE.Tag) != TTAG_ttcf) -+ fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : wrong format %x", (unsigned int)BEtoHl(fontcollectionBE.Tag)); -+ if (BEtoHl(fontcollectionBE.Version) != TTC_VERSION1 && -+ BEtoHl(fontcollectionBE.Version) != TTC_VERSION2) -+ { -+ fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : invalid version %x", (unsigned int)BEtoHl(fontcollectionBE.Version)); -+ } -+ -+ numFonts = BEtoHl(fontcollectionBE.NumFonts); -+ offsettableBE = fz_malloc_array(ctx, numFonts, sizeof(ULONG)); -+ -+ safe_read(ctx, file, sizeof(FONT_COLLECTION), (char *)offsettableBE, numFonts * sizeof(ULONG)); -+ for (i = 0; i < numFonts; i++) -+ parseTTF(ctx, file, BEtoHl(offsettableBE[i]), i, path); -+ } -+ fz_always(ctx) -+ { -+ fz_free(ctx, offsettableBE); -+ fz_drop_stream(ctx,file); -+ } -+ fz_catch(ctx) -+ { -+ fz_rethrow(ctx); -+ } -+} -+ -+static void -+extend_system_font_list(fz_context *ctx, const WCHAR *path) -+{ -+ WCHAR szPath[MAX_PATH], *lpFileName; -+ WIN32_FIND_DATA FileData; -+ HANDLE hList; -+ -+ GetFullPathName(path, nelem(szPath), szPath, &lpFileName); -+ -+ hList = FindFirstFile(szPath, &FileData); -+ if (hList == INVALID_HANDLE_VALUE) -+ { -+ // Don't complain about missing directories -+ if (GetLastError() == ERROR_FILE_NOT_FOUND) -+ return; -+ fz_throw(ctx, FZ_ERROR_GENERIC, "extend_system_font_list: unknown error %d", (int)GetLastError()); -+ } -+ do -+ { -+ if (!(FileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) -+ { -+ char szPathUtf8[MAX_PATH], *fileExt; -+ int res; -+ lstrcpyn(lpFileName, FileData.cFileName, szPath + MAX_PATH - lpFileName); -+ res = WideCharToMultiByte(CP_UTF8, 0, szPath, -1, szPathUtf8, sizeof(szPathUtf8), NULL, NULL); -+ if (!res) -+ { -+ fz_warn(ctx, "WideCharToMultiByte failed for %S", szPath); -+ continue; -+ } -+ fileExt = szPathUtf8 + strlen(szPathUtf8) - 4; -+ fz_try(ctx) -+ { -+ if (!_stricmp(fileExt, ".ttc")) -+ parseTTCs(ctx, szPathUtf8); -+ else if (!_stricmp(fileExt, ".ttf") || !_stricmp(fileExt, ".otf")) -+ parseTTFs(ctx, szPathUtf8); -+ } -+ fz_catch(ctx) -+ { -+ // ignore errors occurring while parsing a given font file -+ } -+ } -+ } while (FindNextFile(hList, &FileData)); -+ FindClose(hList); -+} -+ -+static void -+destroy_system_font_list(void) -+{ -+ free(fontlistMS.fontmap); -+ memset(&fontlistMS, 0, sizeof(fontlistMS)); -+} -+ -+static void -+create_system_font_list(fz_context *ctx) -+{ -+ WCHAR szFontDir[MAX_PATH]; -+ UINT cch; -+ -+ cch = GetWindowsDirectory(szFontDir, nelem(szFontDir) - 12); -+ if (0 < cch && cch < nelem(szFontDir) - 12) -+ { -+ /* willus.com edit--Win XP default MSVCRT.DLL doesn't have wcscat_s */ -+#ifdef _WIN64 -+ wcscat_s(szFontDir, MAX_PATH, L"\\Fonts\\*.?t?"); -+#else -+ wcscat(szFontDir,L"\\Fonts\\*.?t?"); -+#endif -+ extend_system_font_list(ctx, szFontDir); -+ } -+ -+ if (fontlistMS.len == 0) -+ fz_warn(ctx, "couldn't find any usable system fonts"); -+ -+#ifdef NOCJKFONT -+ { -+ // If no CJK fallback font is builtin but one has been shipped separately (in the same -+ // directory as the main executable), add it to the list of loadable system fonts -+ WCHAR szFile[MAX_PATH], *lpFileName; -+ GetModuleFileName(0, szFontDir, MAX_PATH); -+ GetFullPathName(szFontDir, MAX_PATH, szFile, &lpFileName); -+ lstrcpyn(lpFileName, L"DroidSansFallback.ttf", szFile + MAX_PATH - lpFileName); -+ extend_system_font_list(ctx, szFile); -+ } -+#endif -+ -+ // sort the font list, so that it can be searched binarily -+ qsort(fontlistMS.fontmap, fontlistMS.len, sizeof(pdf_fontmapMS), _stricmp); -+ -+#ifdef DEBUG -+ // allow to overwrite system fonts for debugging purposes -+ // (either pass a full path or a search pattern such as "fonts\*.ttf") -+ cch = GetEnvironmentVariable(L"MUPDF_FONTS_PATTERN", szFontDir, nelem(szFontDir)); -+ if (0 < cch && cch < nelem(szFontDir)) -+ { -+ int i, prev_len = fontlistMS.len; -+ extend_system_font_list(ctx, szFontDir); -+ for (i = prev_len; i < fontlistMS.len; i++) -+ { -+ pdf_fontmapMS *entry = bsearch(fontlistMS.fontmap[i].fontface, fontlistMS.fontmap, prev_len, sizeof(pdf_fontmapMS), lookup_compare); -+ if (entry) -+ *entry = fontlistMS.fontmap[i]; -+ } -+ qsort(fontlistMS.fontmap, fontlistMS.len, sizeof(pdf_fontmapMS), _stricmp); -+ } -+#endif -+ -+ // make sure to clean up after ourselves -+ atexit(destroy_system_font_list); -+} -+ -+static fz_font * -+pdf_load_windows_font_by_name(fz_context *ctx, const char *orig_name) -+{ -+ pdf_fontmapMS *found = NULL; -+ char *comma, *fontname; -+ fz_font *font; -+ -+ /* WILLUS MOD--not multi-threaded for k2pdfopt */ -+ /* fz_synchronize_begin(); */ -+ if (fontlistMS.len == 0) -+ { -+ fz_try(ctx) -+ { -+ create_system_font_list(ctx); -+ } -+ fz_catch(ctx) { } -+ } -+ /* WILLUS MOD--not multi-threaded for k2pdfopt */ -+ /* fz_synchronize_end(); */ -+ if (fontlistMS.len == 0) -+ fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror: couldn't find any fonts"); -+ -+ // work on a normalized copy of the font name -+ fontname = fz_strdup(ctx, orig_name); -+ remove_spaces(fontname); -+ -+ // first, try to find the exact font name (including appended style information) -+ comma = strchr(fontname, ','); -+ if (comma) -+ { -+ *comma = '-'; -+ found = pdf_find_windows_font_path(fontname); -+ *comma = ','; -+ } -+ // second, substitute the font name with a known PostScript name -+ else -+ { -+ int i; -+ for (i = 0; i < nelem(baseSubstitutes) && !found; i++) -+ if (!strcmp(fontname, baseSubstitutes[i].name)) -+ found = pdf_find_windows_font_path(baseSubstitutes[i].pattern); -+ } -+ // third, search for the font name without additional style information -+ if (!found) -+ found = pdf_find_windows_font_path(fontname); -+ // fourth, try to separate style from basename for prestyled fonts (e.g. "ArialBold") -+ if (!found && !comma && (str_ends_with(fontname, "Bold") || str_ends_with(fontname, "Italic"))) -+ { -+ int styleLen = str_ends_with(fontname, "Bold") ? 4 : str_ends_with(fontname, "BoldItalic") ? 10 : 6; -+ fontname = fz_resize_array(ctx, fontname, strlen(fontname) + 2, sizeof(char)); -+ comma = fontname + strlen(fontname) - styleLen; -+ memmove(comma + 1, comma, styleLen + 1); -+ *comma = '-'; -+ found = pdf_find_windows_font_path(fontname); -+ *comma = ','; -+ if (!found) -+ found = pdf_find_windows_font_path(fontname); -+ } -+ // fifth, try to convert the font name from the common Chinese codepage 936 -+ if (!found && fontname[0] < 0) -+ { -+ WCHAR cjkNameW[MAX_FACENAME]; -+ char cjkName[MAX_FACENAME]; -+ if (MultiByteToWideChar(936, MB_ERR_INVALID_CHARS, fontname, -1, cjkNameW, nelem(cjkNameW)) && -+ WideCharToMultiByte(CP_UTF8, 0, cjkNameW, -1, cjkName, nelem(cjkName), NULL, NULL)) -+ { -+ comma = strchr(cjkName, ','); -+ if (comma) -+ { -+ *comma = '-'; -+ found = pdf_find_windows_font_path(cjkName); -+ *comma = ','; -+ } -+ if (!found) -+ found = pdf_find_windows_font_path(cjkName); -+ } -+ } -+ -+ fz_free(ctx, fontname); -+ if (!found) -+ fz_throw(ctx, FZ_ERROR_GENERIC, "couldn't find system font '%s'", orig_name); -+ -+ /* -+ fz_warn(ctx, "loading non-embedded font '%s' from '%s'", orig_name, found->fontpath); -+ */ -+ -+ font = fz_new_font_from_file(ctx, orig_name, found->fontpath, found->index, -+ strcmp(found->fontface, "DroidSansFallback") != 0); -+ /* willus mod for MuPDF v1.10, 10-21-2016 */ -+ { -+ fz_font_flags_t *flags; -+ flags=fz_font_flags(font); -+ if (flags!=NULL) -+ flags->ft_substitute = 1; -+ } -+ return font; -+} -+ -+static fz_font * -+pdf_load_windows_font(fz_context *ctx, const char *fontname, int bold, int italic, int needs_exact_metrics) -+{ -+ if (needs_exact_metrics) -+ { -+ const char *clean_name; -+ /* WILLUS: Declare pdf_clean_base14_name() */ -+ extern const char *pdf_clean_base14_name(const char *fontname); -+ -+ /* TODO: the metrics for Times-Roman and Courier don't match -+ those of Windows' Times New Roman and Courier New; for -+ some reason, Poppler doesn't seem to have this problem */ -+ int len; -+ if (fz_lookup_builtin_font(ctx,fontname, bold, italic, &len)) -+ return NULL; -+ -+ /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=2173 */ -+ clean_name = pdf_clean_base14_name(fontname); -+ if (clean_name != fontname && !strncmp(clean_name, "Times-", 6)) -+ return NULL; -+ } -+ -+ // TODO: unset font->ft_substitute for base14/needs_exact_metrics? -+ return pdf_load_windows_font_by_name(ctx, fontname); -+} -+ -+static const char *clean_font_name(const char *fontname) -+{ -+ int i, k; -+ for (i = 0; i < nelem(base_font_names); i++) -+ for (k = 0; base_font_names[i][k]; k++) -+ if (!strcmp_ignore_space(base_font_names[i][k], fontname)) -+ return base_font_names[i][0]; -+ return fontname; -+} -+ -+ -+/* SumatraPDF: expose clean_font_name */ -+static const char * pdf_clean_base14_name(const char *fontname) -+{ -+ return clean_font_name(fontname); -+} -+ -+static fz_font * -+pdf_load_windows_cjk_font(fz_context *ctx, const char *fontname, int ros, int serif) -+{ -+ fz_font *font; -+ -+ font=NULL; /* WILLUS: Avoid compiler warning */ -+ /* try to find a matching system font before falling back to an approximate one */ -+ fz_try(ctx) -+ { -+ font = pdf_load_windows_font_by_name(ctx, fontname); -+ } -+ fz_catch(ctx) -+ { -+ font = NULL; -+ } -+ if (font) -+ return font; -+ -+ /* try to fall back to a reasonable system font */ -+ fz_try(ctx) -+ { -+ if (serif) -+ { -+ switch (ros) -+ { -+ case FZ_ADOBE_CNS: font = pdf_load_windows_font_by_name(ctx, "MingLiU"); break; -+ case FZ_ADOBE_GB: font = pdf_load_windows_font_by_name(ctx, "SimSun"); break; -+ case FZ_ADOBE_JAPAN: font = pdf_load_windows_font_by_name(ctx, "MS-Mincho"); break; -+ case FZ_ADOBE_KOREA: font = pdf_load_windows_font_by_name(ctx, "Batang"); break; -+ default: fz_throw(ctx, FZ_ERROR_GENERIC, "invalid serif ros"); -+ } -+ } -+ else -+ { -+ switch (ros) -+ { -+ case FZ_ADOBE_CNS: font = pdf_load_windows_font_by_name(ctx, "DFKaiShu-SB-Estd-BF"); break; -+ case FZ_ADOBE_GB: -+ fz_try(ctx) -+ { -+ font = pdf_load_windows_font_by_name(ctx, "KaiTi"); -+ } -+ fz_catch(ctx) -+ { -+ font = pdf_load_windows_font_by_name(ctx, "KaiTi_GB2312"); -+ } -+ break; -+ case FZ_ADOBE_JAPAN: font = pdf_load_windows_font_by_name(ctx, "MS-Gothic"); break; -+ case FZ_ADOBE_KOREA: font = pdf_load_windows_font_by_name(ctx, "Gulim"); break; -+ default: fz_throw(ctx, FZ_ERROR_GENERIC, "invalid sans-serif ros"); -+ } -+ } -+ } -+ fz_catch(ctx) -+ { -+#ifdef NOCJKFONT -+ /* If no CJK fallback font is builtin, maybe one has been shipped separately */ -+ font = pdf_load_windows_font_by_name(ctx, "DroidSansFallback"); -+#else -+ fz_rethrow(ctx); -+#endif -+ } -+ -+ return font; -+} -+ -+#endif -+ -+void pdf_install_load_system_font_funcs(fz_context *ctx) -+{ -+#ifdef _WIN32 -+ fz_install_load_system_font_funcs(ctx, pdf_load_windows_font, pdf_load_windows_cjk_font, NULL); -+#endif -+} -diff --git a/source/fitz/font.c b/source/fitz/font.c -index 00c6e8f99..1448b4a56 100644 ---- a/source/fitz/font.c -+++ b/source/fitz/font.c -@@ -4,8 +4,11 @@ - #include "draw-imp.h" - - #include <ft2build.h> -+/* willus mod -- remove hb includes */ -+/* - #include "hb.h" - #include "hb-ft.h" -+*/ - - #include <assert.h> - -diff --git a/source/fitz/stext-device.c b/source/fitz/stext-device.c -index 2df90305e..b1f99e056 100644 ---- a/source/fitz/stext-device.c -+++ b/source/fitz/stext-device.c -@@ -825,6 +825,11 @@ fz_new_stext_device(fz_context *ctx, fz_stext_page *page, const fz_stext_options - dev->lastchar = ' '; - dev->curdir = 1; - dev->lasttext = NULL; -+ /* willus mod -- seems like this should be here, but not sure. */ -+ if (opts) -+ dev->flags = opts->flags; -+ else -+ dev->flags = 0; - - return (fz_device*)dev; - } -diff --git a/source/fitz/string.c b/source/fitz/string.c -index f8eedb682..7a767983d 100644 ---- a/source/fitz/string.c -+++ b/source/fitz/string.c -@@ -560,6 +560,10 @@ fz_utflen(const char *s) - */ - float fz_atof(const char *s) - { -+/* willus mod: atof(s), #if-#else-#endif */ -+#if (!defined(__SSE__)) -+ return(atof(s)); -+#else - float result; - - if (s == NULL) -@@ -572,6 +576,7 @@ float fz_atof(const char *s) - return 1; - result = fz_clamp(result, -FLT_MAX, FLT_MAX); - return result; -+#endif - } - - /* -diff --git a/source/pdf/pdf-annot.c b/source/pdf/pdf-annot.c -index 4dfdf36fe..acff7d12a 100644 ---- a/source/pdf/pdf-annot.c -+++ b/source/pdf/pdf-annot.c -@@ -5,8 +5,20 @@ - #include <string.h> - #include <time.h> - -+/* willus mod--don't use _mkgmtime--not available in Win XP */ - #ifdef _WIN32 --#define timegm _mkgmtime -+static time_t timegm(struct tm *date); -+static time_t timegm(struct tm *date) -+ -+ { -+ time_t t,z; -+ struct tm gmz; -+ -+ z=(time_t)0; -+ gmz=(*gmtime(&z)); -+ t=mktime(date)-mktime(&gmz); -+ return(t); -+ } - #endif - - #define isdigit(c) (c >= '0' && c <= '9') -diff --git a/source/pdf/pdf-link.c b/source/pdf/pdf-link.c -index 37444b471..613cc05b9 100644 ---- a/source/pdf/pdf-link.c -+++ b/source/pdf/pdf-link.c -@@ -345,6 +345,9 @@ pdf_resolve_link(fz_context *ctx, pdf_document *doc, const char *uri, float *xp, - } - return page; - } -+/* willus mod -- be quiet */ -+/* - fz_warn(ctx, "unknown link uri '%s'", uri); -+*/ - return -1; - } -diff --git a/source/pdf/pdf-parse.c b/source/pdf/pdf-parse.c -index 04a772204..9dd0cd898 100644 ---- a/source/pdf/pdf-parse.c -+++ b/source/pdf/pdf-parse.c -@@ -663,9 +663,14 @@ pdf_parse_ind_obj(fz_context *ctx, pdf_document *doc, - if (c == '\r') - { - c = fz_peek_byte(ctx, file); -+/* willus mod -- no warning */ -+/* - if (c != '\n') - fz_warn(ctx, "line feed missing after stream begin marker (%d %d R)", num, gen); - else -+*/ -+if (c=='\n') -+/* willus mod -- end */ - fz_read_byte(ctx, file); - } - stm_ofs = fz_tell(ctx, file); -diff --git a/source/pdf/pdf-xref.c b/source/pdf/pdf-xref.c -index 8f888059b..08de7bfba 100644 ---- a/source/pdf/pdf-xref.c -+++ b/source/pdf/pdf-xref.c -@@ -710,8 +710,11 @@ pdf_xref_size_from_old_trailer(fz_context *ctx, pdf_document *doc, pdf_lexbuf *b - if (!s) - fz_throw(ctx, FZ_ERROR_GENERIC, "xref subsection length missing"); - len = fz_atoi(fz_strsep(&s, " ")); -+/* willus mod -- no warning */ -+/* - if (len < 0) - fz_throw(ctx, FZ_ERROR_GENERIC, "xref subsection length must be positive"); -+*/ - - /* broken pdfs where the section is not on a separate line */ - if (s && *s != '\0') -@@ -1378,7 +1381,10 @@ pdf_init_document(fz_context *ctx, pdf_document *doc) - { - pdf_drop_xref_sections(ctx, doc); - fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); -+/* willus mod -- be quiet */ -+/* - fz_warn(ctx, "trying to repair broken xref"); -+*/ - repaired = 1; - } - -@@ -1506,7 +1512,10 @@ pdf_drop_document_imp(fz_context *ctx, pdf_document *doc) - /* Swallow error, but continue dropping */ - } - -+/* willu smod -- no pdf_drop_js */ -+/* - pdf_drop_js(ctx, doc->js); -+*/ - - pdf_drop_xref_sections(ctx, doc); - fz_free(ctx, doc->xref_index); --- -2.22.0 - diff --git a/pkgs/applications/misc/k2pdfopt/tesseract.patch b/pkgs/applications/misc/k2pdfopt/tesseract.patch deleted file mode 100644 index adfee9ae282..00000000000 --- a/pkgs/applications/misc/k2pdfopt/tesseract.patch +++ /dev/null @@ -1,675 +0,0 @@ -From 39aa8502eee7bb669a29d1a9b3bfe5c9595ad960 Mon Sep 17 00:00:00 2001 -From: Daniel Fullmer <danielrf12@gmail.com> -Date: Fri, 13 Sep 2019 13:45:05 -0400 -Subject: [PATCH] Willus mod changes from k2pdfopt - ---- - src/api/Makefile.am | 1 + - src/api/baseapi.cpp | 87 +++++++++++ - src/api/baseapi.h | 3 + - src/api/tesscapi.cpp | 311 +++++++++++++++++++++++++++++++++++++ - src/api/tesseract.h | 29 ++++ - src/ccmain/tessedit.cpp | 5 +- - src/ccutil/ccutil.h | 7 + - src/ccutil/genericvector.h | 21 ++- - src/ccutil/mainblk.cpp | 17 +- - src/ccutil/params.cpp | 3 +- - src/ccutil/serialis.cpp | 3 + - src/ccutil/serialis.h | 2 + - src/lstm/input.cpp | 3 + - 13 files changed, 488 insertions(+), 4 deletions(-) - create mode 100644 src/api/tesscapi.cpp - create mode 100644 src/api/tesseract.h - -diff --git a/src/api/Makefile.am b/src/api/Makefile.am -index d9b76eb6..cd2dc30f 100644 ---- a/src/api/Makefile.am -+++ b/src/api/Makefile.am -@@ -39,6 +39,7 @@ libtesseract_api_la_SOURCES += lstmboxrenderer.cpp - libtesseract_api_la_SOURCES += pdfrenderer.cpp - libtesseract_api_la_SOURCES += wordstrboxrenderer.cpp - libtesseract_api_la_SOURCES += renderer.cpp -+libtesseract_api_la_SOURCES += tesscapi.cpp - - lib_LTLIBRARIES += libtesseract.la - libtesseract_la_LDFLAGS = $(LEPTONICA_LIBS) $(OPENCL_LDFLAGS) $(libarchive_LIBS) -diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp -index 9245d07c..ea964ee6 100644 ---- a/src/api/baseapi.cpp -+++ b/src/api/baseapi.cpp -@@ -215,6 +215,14 @@ TessBaseAPI::TessBaseAPI() - // Use the current locale if building debug code. - std::locale::global(std::locale("")); - #endif -+ const char *locale; -+ locale = std::setlocale(LC_ALL, nullptr); -+/* willus mod Remove assertions--taken care of in tesscapi.cpp */ -+// ASSERT_HOST(!strcmp(locale, "C")); -+ locale = std::setlocale(LC_CTYPE, nullptr); -+// ASSERT_HOST(!strcmp(locale, "C")); -+ locale = std::setlocale(LC_NUMERIC, nullptr); -+// ASSERT_HOST(!strcmp(locale, "C")); - } - - TessBaseAPI::~TessBaseAPI() { -@@ -1333,6 +1341,85 @@ static void AddBoxToTSV(const PageIterator* it, PageIteratorLevel level, - text->add_str_int("\t", bottom - top); - } - -+/* willus mod */ -+int TessBaseAPI::GetOCRWords(int **x00,int **y00,int **x11,int **y11,int **ybaseline0, -+ char **utf8words) -+ -+ { -+ int iword,nwords,totlen,it8; -+ int *x0,*y0,*x1,*y1,*ybaseline; -+ char *tutf8; -+ -+ ResultIterator *res_it = GetIterator(); -+ /* Count words */ -+ iword=0; -+ totlen=0; -+ while (!res_it->Empty(RIL_BLOCK)) -+ { -+ if (res_it->Empty(RIL_WORD)) -+ { -+ res_it->Next(RIL_WORD); -+ continue; -+ } -+ iword++; -+ STRING textstr=std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_WORD)).get(); -+ totlen+=strlen(textstr.string())+1; -+ res_it->Next(RIL_WORD); -+ } -+ nwords=iword; -+/* -+printf("\nnwords=%d, totlen=%d\n",nwords,totlen); -+*/ -+ x0=(*x00)=(int *)malloc(sizeof(int)*5*nwords); -+ y0=(*y00)=&x0[nwords]; -+ x1=(*x11)=&y0[nwords]; -+ y1=(*y11)=&x1[nwords]; -+ ybaseline=(*ybaseline0)=&y1[nwords]; -+ tutf8=(*utf8words)=(char *)malloc(totlen); -+ iword=0; -+ it8=0; -+ res_it->Begin(); -+ while (!res_it->Empty(RIL_BLOCK)) -+ { -+ if (res_it->Empty(RIL_WORD)) -+ { -+ res_it->Next(RIL_WORD); -+ continue; -+ } -+ STRING textstr=std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_WORD)).get(); -+ strcpy(&tutf8[it8],textstr.string()); -+ it8 += strlen(&tutf8[it8])+1; -+ /* -+ STRING textstr(""); -+ textstr += std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_WORD)).get(); -+ */ -+/* -+printf("Word %d: '%s'\n",iword,textstr.string()); -+*/ -+ int left, top, right, bottom; -+ int u1,v1,u2,v2; -+ res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom); -+ res_it->Baseline(RIL_WORD, &u1, &v1, &u2, &v2); -+ x0[iword]=left; -+ x1[iword]=right; -+ y0[iword]=top; -+ y1[iword]=bottom; -+ ybaseline[iword]=(v1+v2)/2; -+ iword++; -+/* -+printf("BB: (%d,%d)-(%d,%d) BL: (%d,%d)-(%d,%d)\n",left,bottom,right,top,x1,y1,x2,y2); -+*/ -+ res_it->Next(RIL_WORD); -+ } -+/* -+printf("iword=%d\n",iword); -+*/ -+ return(iword); -+ } -+ -+/* willus mod */ -+int GetOCRWords(int **x0,int **y0,int **x1,int **y1,int **ybaseline,char **utf8words); -+ - /** - * Make a TSV-formatted string from the internal data structures. - * page_number is 0-based but will appear in the output as 1-based. -diff --git a/src/api/baseapi.h b/src/api/baseapi.h -index 3724dd92..23be5920 100644 ---- a/src/api/baseapi.h -+++ b/src/api/baseapi.h -@@ -575,6 +575,9 @@ class TESS_API TessBaseAPI { - */ - char* GetHOCRText(ETEXT_DESC* monitor, int page_number); - -+/* willus mod */ -+int GetOCRWords(int **x0,int **y0,int **x1,int **y1,int **ybaseline,char **utf8words); -+ - /** - * Make a HTML-formatted string with hOCR markup from the internal - * data structures. -diff --git a/src/api/tesscapi.cpp b/src/api/tesscapi.cpp -new file mode 100644 -index 00000000..1752fafe ---- /dev/null -+++ b/src/api/tesscapi.cpp -@@ -0,0 +1,311 @@ -+/* -+** tesscapi.cpp willus.com attempt at C wrapper for tesseract. -+** (Butchered from tesseractmain.cpp) -+** Last udpated 9-1-12 -+** -+** Copyright (C) 2012 http://willus.com -+** -+** This program is free software: you can redistribute it and/or modify -+** it under the terms of the GNU Affero General Public License as -+** published by the Free Software Foundation, either version 3 of the -+** License, or (at your option) any later version. -+** -+** This program is distributed in the hope that it will be useful, -+** but WITHOUT ANY WARRANTY; without even the implied warranty of -+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+** GNU Affero General Public License for more details. -+** -+** You should have received a copy of the GNU Affero General Public License -+** along with this program. If not, see <http://www.gnu.org/licenses/>. -+** -+*/ -+ -+/* -+#include "mfcpch.h" -+*/ -+// #define USE_VLD //Uncomment for Visual Leak Detector. -+#if (defined _MSC_VER && defined USE_VLD) -+#include <vld.h> -+#endif -+ -+// Include automatically generated configuration file if running autoconf -+#ifdef HAVE_CONFIG_H -+#include "config_auto.h" -+#endif -+#include <locale.h> -+#ifdef USING_GETTEXT -+#include <libintl.h> -+#define _(x) gettext(x) -+#else -+#define _(x) (x) -+#endif -+ -+#include "allheaders.h" -+#include "baseapi.h" -+#include "strngs.h" -+#include "params.h" -+#include "blobs.h" -+#include "simddetect.h" -+#include "tesseractclass.h" -+/* -+#include "notdll.h" -+*/ -+ -+/* C Wrappers */ -+#include "tesseract.h" -+ -+// static tesseract::TessBaseAPI api[4]; -+ -+/* -+** ocr_type=0: OEM_DEFAULT -+** ocr_type=1: OEM_TESSERACT_ONLY -+** ocr_type=2: OEM_LSTM_ONLY -+** ocr_type=3: OEM_TESSERACT_LSTM_COMBINED -+*/ -+void *tess_capi_init(char *datapath,char *language,int ocr_type,FILE *out, -+ char *initstr,int maxlen,int *status) -+ -+ { -+ char original_locale[256]; -+ tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI; -+/* -+printf("@tess_capi_init\n"); -+printf(" datapath='%s'\n",datapath); -+printf(" language='%s'\n",language); -+printf(" ocr_type=%d\n",ocr_type); -+*/ -+#ifdef USE_NLS -+ setlocale (LC_ALL, ""); -+ bindtextdomain (PACKAGE, LOCALEDIR); -+ textdomain (PACKAGE); -+#endif -+ /* willus mod, 11-24-16 */ -+ /* Tesseract needs "C" locale to correctly parse all data .traineddata files. */ -+/* -+printf("locale='%s'\n",setlocale(LC_ALL,NULL)); -+printf("ctype='%s'\n",setlocale(LC_CTYPE,NULL)); -+printf("numeric='%s'\n",setlocale(LC_NUMERIC,NULL)); -+*/ -+ strncpy(original_locale,setlocale(LC_ALL,NULL),255); -+ original_locale[255]='\0'; -+/* -+printf("original_locale='%s'\n",original_locale); -+*/ -+ setlocale(LC_ALL,"C"); -+/* -+printf("new locale='%s'\n",setlocale(LC_ALL,NULL)); -+printf("new ctype='%s'\n",setlocale(LC_CTYPE,NULL)); -+printf("new numeric='%s'\n",setlocale(LC_NUMERIC,NULL)); -+*/ -+ // fprintf(stderr, "tesseract %s\n", tesseract::TessBaseAPI::Version()); -+ // Make the order of args a bit more forgiving than it used to be. -+ const char* lang = "eng"; -+ tesseract::PageSegMode pagesegmode = tesseract::PSM_SINGLE_BLOCK; -+ if (language!=NULL && language[0]!='\0') -+ lang = language; -+ /* -+ if (output == NULL) -+ { -+ fprintf(stderr, _("Usage:%s imagename outputbase [-l lang] " -+ "[-psm pagesegmode] [configfile...]\n"), argv[0]); -+ fprintf(stderr, -+ _("pagesegmode values are:\n" -+ "0 = Orientation and script detection (OSD) only.\n" -+ "1 = Automatic page segmentation with OSD.\n" -+ "2 = Automatic page segmentation, but no OSD, or OCR\n" -+ "3 = Fully automatic page segmentation, but no OSD. (Default)\n" -+ "4 = Assume a single column of text of variable sizes.\n" -+ "5 = Assume a single uniform block of vertically aligned text.\n" -+ "6 = Assume a single uniform block of text.\n" -+ "7 = Treat the image as a single text line.\n" -+ "8 = Treat the image as a single word.\n" -+ "9 = Treat the image as a single word in a circle.\n" -+ "10 = Treat the image as a single character.\n")); -+ fprintf(stderr, _("-l lang and/or -psm pagesegmode must occur before any" -+ "configfile.\n")); -+ exit(1); -+ } -+ */ -+/* -+printf("SSE = %s\n",SIMDDetect::IsSSEAvailable() ? "AVAILABLE" : "NOT AVAILABLE"); -+printf("AVX = %s\n",SIMDDetect::IsAVXAvailable() ? "AVAILABLE" : "NOT AVAILABLE"); -+*/ -+/* -+v4.00 loads either TESSERACT enginer, LSTM engine, or both. No CUBE. -+*/ -+ ocr_type=0; /* Ignore specified and use default */ -+ api->SetOutputName(NULL); -+ (*status)=api->Init(datapath,lang, -+ ocr_type==0 ? tesseract::OEM_DEFAULT : -+ (ocr_type==1 ? tesseract::OEM_TESSERACT_ONLY : -+ (ocr_type==2 ? tesseract::OEM_LSTM_ONLY : -+ (tesseract::OEM_TESSERACT_LSTM_COMBINED)))); -+ if ((*status)!=0) -+ { -+ /* willus mod, 11-24-16 */ -+ setlocale(LC_ALL,original_locale); -+ api->End(); -+ delete api; -+ return(NULL); -+ } -+ /* -+ api.Init("tesscapi",lang,tesseract::OEM_DEFAULT, -+ &(argv[arg]), argc - arg, NULL, NULL, false); -+ */ -+ // We have 2 possible sources of pagesegmode: a config file and -+ // the command line. For backwards compatability reasons, the -+ // default in tesseract is tesseract::PSM_SINGLE_BLOCK, but the -+ // default for this program is tesseract::PSM_AUTO. We will let -+ // the config file take priority, so the command-line default -+ // can take priority over the tesseract default, so we use the -+ // value from the command line only if the retrieved mode -+ // is still tesseract::PSM_SINGLE_BLOCK, indicating no change -+ // in any config file. Therefore the only way to force -+ // tesseract::PSM_SINGLE_BLOCK is from the command line. -+ // It would be simpler if we could set the value before Init, -+ // but that doesn't work. -+ if (api->GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK) -+ api->SetPageSegMode(pagesegmode); -+ -+ /* -+ ** Initialization message -+ */ -+ { -+ char istr[1024]; -+ int sse,avx; -+ -+// printf("tessedit_ocr_engine_mode = %d\n",tessedit_ocr_engine_mode); -+ sprintf(istr,"%s",api->Version()); -+ sse=tesseract::SIMDDetect::IsSSEAvailable(); -+ avx=tesseract::SIMDDetect::IsAVXAvailable(); -+ if (sse || avx) -+ sprintf(&istr[strlen(istr)]," [%s]",sse&&avx?"SSE+AVX":(sse?"SSE":"AVX")); -+ sprintf(&istr[strlen(istr)],"\n Tesseract data folder = '%s'",datapath==NULL?getenv("TESSDATA_PREFIX"):datapath); -+ strcat(istr,"\n Tesseract languages: "); -+ GenericVector<STRING> languages; -+ api->GetLoadedLanguagesAsVector(&languages); -+/* -+printf("OEM=%d\n",api->oem()); -+printf("Langs='%s'\n",api->GetInitLanguagesAsString()); -+printf("AnyTessLang()=%d\n",(int)api->tesseract()->AnyTessLang()); -+printf("AnyLSTMLang()=%d\n",(int)api->tesseract()->AnyLSTMLang()); -+printf("num_sub_langs()=%d\n",api->tesseract()->num_sub_langs()); -+printf("languages.size()=%d\n",(int)languages.size()); -+*/ -+ -+ for (int i=0;i<=api->tesseract()->num_sub_langs();i++) -+ { -+ tesseract::Tesseract *lang1; -+ int eng; -+ lang1 = i==0 ? api->tesseract() : api->tesseract()->get_sub_lang(i-1); -+ eng=(int)lang1->tessedit_ocr_engine_mode; -+ sprintf(&istr[strlen(istr)],"%s%s [%s]",i==0?"":", ",lang1->lang.string(), -+ eng==2?"LSTM+Tess":(eng==1?"LSTM":"Tess")); -+ } -+/* -+printf("%d. '%s'\n",i+1,languages[i].string()); -+printf(" sublang[%d].oem_engine = %d\n",i+1,(int)api->tesseract()->get_sub_lang(i)->tessedit_ocr_engine_mode); -+*/ -+ -+ /* -+ if (ocr_type==0 || ocr_type==3) -+ sprintf(&istr[strlen(istr)],"[LSTM+] (lang="); -+ else if (ocr_type==2) -+ sprintf(&istr[strlen(istr)],"[LSTM] (lang="); -+ strncpy(&istr[strlen(istr)],language,253-strlen(istr)); -+ istr[253]='\0'; -+ strcat(istr,")"); -+ */ -+ if (out!=NULL) -+ fprintf(out,"%s\n",istr); -+ if (initstr!=NULL) -+ { -+ strncpy(initstr,istr,maxlen-1); -+ initstr[maxlen-1]='\0'; -+ } -+ } -+ -+ -+ /* Turn off LSTM debugging output */ -+ api->SetVariable("lstm_debug_level","0"); -+#if (WILLUSDEBUG & 1) -+ api->SetVariable("lstm_debug_level","9"); -+ api->SetVariable("paragraph_debug_level","9"); -+ api->SetVariable("tessdata_manager_debug_level","9"); -+ api->SetVariable("tosp_debug_level","9"); -+ api->SetVariable("wordrec_debug_level","9"); -+ api->SetVariable("segsearch_debug_level","9"); -+#endif -+ /* willus mod, 11-24-16 */ -+ setlocale(LC_ALL,original_locale); -+ return((void *)api); -+ } -+ -+ -+int tess_capi_get_ocr(void *vapi,PIX *pix,char *outstr,int maxlen,int segmode,FILE *out) -+ -+ { -+ tesseract::TessBaseAPI *api; -+ static int old_segmode=-1; -+ -+ api=(tesseract::TessBaseAPI *)vapi; -+ if (old_segmode != segmode) -+ { -+ old_segmode=segmode; -+ api->SetPageSegMode((tesseract::PageSegMode)segmode); -+ } -+ if (!api->ProcessPage(pix,0,NULL,NULL,0,NULL)) -+ { -+ /* pixDestroy(&pix); */ -+ if (out!=NULL) -+ fprintf(out,"tesscapi: Error during bitmap processing.\n"); -+ api->Clear(); -+ return(-1); -+ } -+ strncpy(outstr,api->GetUTF8Text(),maxlen-1); -+ outstr[maxlen-1]='\0'; -+ api->Clear(); -+ return(0); -+ } -+ -+ -+int tess_capi_get_ocr_multiword(void *vapi,PIX *pix,int segmode, -+ int **left,int **top,int **right,int **bottom, -+ int **ybase,char **text,int *nw, -+ FILE *out) -+ -+ { -+ tesseract::TessBaseAPI *api; -+ static int old_segmode=-1; -+ -+ api=(tesseract::TessBaseAPI *)vapi; -+ if (old_segmode != segmode) -+ { -+ old_segmode=segmode; -+ api->SetPageSegMode((tesseract::PageSegMode)segmode); -+ } -+ if (!api->ProcessPage(pix,0,NULL,NULL,0,NULL)) -+ { -+ if (out!=NULL) -+ fprintf(out,"tesscapi: Error during bitmap processing.\n"); -+ api->Clear(); -+ (*nw)=0; -+ return(-1); -+ } -+ (*nw)=api->GetOCRWords(left,top,right,bottom,ybase,text); -+ api->Clear(); -+ return(0); -+ } -+ -+ -+void tess_capi_end(void *vapi) -+ -+ { -+ tesseract::TessBaseAPI *api; -+ -+ if (vapi==NULL) -+ return; -+ api=(tesseract::TessBaseAPI *)vapi; -+ api->End(); -+ delete api; -+ } -diff --git a/src/api/tesseract.h b/src/api/tesseract.h -new file mode 100644 -index 00000000..575948cc ---- /dev/null -+++ b/src/api/tesseract.h -@@ -0,0 +1,29 @@ -+/* -+** Willus.com's Tesseract C Wrappers -+** -+** 6-8-12 -+** -+*/ -+ -+#ifndef _TESSERACT_H_ -+#define _TESSERACT_H_ -+ -+//#include <leptonica.h> -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+void *tess_capi_init(char *datapath,char *language,int ocr_type,FILE *out, -+ char *initstr,int maxlen,int *status); -+int tess_capi_get_ocr(void *api,PIX *pix,char *outstr,int maxlen,int segmode,FILE *out); -+int tess_capi_get_ocr_multiword(void *vapi,PIX *pix,int segmode, -+ int **left,int **top,int **right,int **bottom, -+ int **ybase,char **text,int *nw, -+ FILE *out); -+void tess_capi_end(void *api); -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif -diff --git a/src/ccmain/tessedit.cpp b/src/ccmain/tessedit.cpp -index 17f0951b..7af94ee2 100644 ---- a/src/ccmain/tessedit.cpp -+++ b/src/ccmain/tessedit.cpp -@@ -101,6 +101,10 @@ bool Tesseract::init_tesseract_lang_data( - " to your \"tessdata\" directory.\n"); - return false; - } -+ /* willus mod */ -+ TFile fp; -+ strncpy(fp.tfile_filename,tessdata_path.string(),511); -+ fp.tfile_filename[511]='\0'; - #ifndef DISABLED_LEGACY_ENGINE - if (oem == OEM_DEFAULT) { - // Set the engine mode from availability, which can then be overridden by -@@ -116,7 +120,6 @@ bool Tesseract::init_tesseract_lang_data( - #endif // ndef DISABLED_LEGACY_ENGINE - - // If a language specific config file (lang.config) exists, load it in. -- TFile fp; - if (mgr->GetComponent(TESSDATA_LANG_CONFIG, &fp)) { - ParamUtils::ReadParamsFromFp(SET_PARAM_CONSTRAINT_NONE, &fp, - this->params()); -diff --git a/src/ccutil/ccutil.h b/src/ccutil/ccutil.h -index 71e89c60..bdeccc14 100644 ---- a/src/ccutil/ccutil.h -+++ b/src/ccutil/ccutil.h -@@ -80,6 +80,13 @@ class CCUtil { - // Member parameters. - // These have to be declared and initialized after params_ member, since - // params_ should be initialized before parameters are added to it. -+/* willus mod */ -+/* -+ #ifdef _WIN32 -+ STRING_VAR_H(tessedit_module_name, WINDLLNAME, -+ "Module colocated with tessdata dir"); -+ #endif -+*/ - INT_VAR_H(ambigs_debug_level, 0, "Debug level for unichar ambiguities"); - BOOL_VAR_H(use_definite_ambigs_for_classifier, false, - "Use definite ambiguities when running character classifier"); -diff --git a/src/ccutil/genericvector.h b/src/ccutil/genericvector.h -index 3556d153..3a5e8662 100644 ---- a/src/ccutil/genericvector.h -+++ b/src/ccutil/genericvector.h -@@ -382,7 +382,26 @@ inline bool LoadDataFromFile(const char* filename, GenericVector<char>* data) { - // reserve an extra byte in case caller wants to append a '\0' character - data->reserve(size + 1); - data->resize_no_init(size); -- result = static_cast<long>(fread(&(*data)[0], 1, size, fp)) == size; -+ /* willus mod Dec 2018--weird issue with Win XP and MinGW gcc 7.3.0 */ -+ /* Can't read entire file at once -- need to break up into smaller blocksize reads */ -+ { -+ int frs,n; -+ int blocksize; -+ blocksize=1024*1024; -+ for (n=0;1;) -+ { -+ int bs; -+ bs= size-n > blocksize ? blocksize : size-n; -+ frs=(int)fread(&(*data)[n],1,bs,fp); -+ n+=frs; -+ if (frs<bs || bs<blocksize || n>=size) -+ break; -+ } -+ result = static_cast<long>((long)n==size); -+ } -+ /* -+ result = static_cast<long>(fread(&(*data)[0], 1, size, fp)) == size; -+ */ - } - fclose(fp); - } -diff --git a/src/ccutil/mainblk.cpp b/src/ccutil/mainblk.cpp -index 52b04b04..80b26044 100644 ---- a/src/ccutil/mainblk.cpp -+++ b/src/ccutil/mainblk.cpp -@@ -55,8 +55,22 @@ void CCUtil::main_setup(const char *argv0, const char *basename) { - #if defined(_WIN32) - } else if (datadir == nullptr || _access(datadir.string(), 0) != 0) { - /* Look for tessdata in directory of executable. */ -+ /* -+ char drive[_MAX_DRIVE]; -+ char dir[_MAX_DIR]; -+ */ - char path[_MAX_PATH]; -- DWORD length = GetModuleFileName(nullptr, path, sizeof(path)); -+ int i; -+ /* DWORD length = */ GetModuleFileName(nullptr, path, sizeof(path)); -+ /* willus mod--avoid _splitpath_s -- not in XP */ -+ for (i=strlen(path)-1;i>=0 && path[i]!='/' && path[i]!='\\';i--); -+ if (i>=0) -+ { -+ path[i]='\0'; -+ datadir=path; -+ datadir += "/tessdata"; -+ } -+ /* - if (length > 0 && length < sizeof(path)) { - char* separator = std::strrchr(path, '\\'); - if (separator != nullptr) { -@@ -65,6 +79,7 @@ void CCUtil::main_setup(const char *argv0, const char *basename) { - datadir += "/tessdata"; - } - } -+ */ - #endif /* _WIN32 */ - #if defined(TESSDATA_PREFIX) - } else { -diff --git a/src/ccutil/params.cpp b/src/ccutil/params.cpp -index 00bf2563..486c5ce0 100644 ---- a/src/ccutil/params.cpp -+++ b/src/ccutil/params.cpp -@@ -82,7 +82,8 @@ bool ParamUtils::ReadParamsFromFp(SetParamConstraint constraint, TFile *fp, - - if (!foundit) { - anyerr = true; // had an error -- tprintf("Warning: Parameter not found: %s\n", line); -+ /* willus mod */ -+ tprintf("Tesseract warning: Parameter %s not found in file %s.\n",line,fp->tfile_filename); - } - } - } -diff --git a/src/ccutil/serialis.cpp b/src/ccutil/serialis.cpp -index 7def011f..6107a494 100644 ---- a/src/ccutil/serialis.cpp -+++ b/src/ccutil/serialis.cpp -@@ -201,6 +201,9 @@ bool TFile::Open(const STRING& filename, FileReader reader) { - offset_ = 0; - is_writing_ = false; - swap_ = false; -+ /* willus mod */ -+ strncpy(tfile_filename,filename.string(),511); -+ tfile_filename[511]='\0'; - if (reader == nullptr) - return LoadDataFromFile(filename, data_); - else -diff --git a/src/ccutil/serialis.h b/src/ccutil/serialis.h -index 095b9227..4cc8251e 100644 ---- a/src/ccutil/serialis.h -+++ b/src/ccutil/serialis.h -@@ -77,6 +77,8 @@ class TFile { - public: - TFile(); - ~TFile(); -+ /* willus mod */ -+ char tfile_filename[512]; - - // All the Open methods load the whole file into memory for reading. - // Opens a file with a supplied reader, or nullptr to use the default. -diff --git a/src/lstm/input.cpp b/src/lstm/input.cpp -index 73b584b3..0b0b54c3 100644 ---- a/src/lstm/input.cpp -+++ b/src/lstm/input.cpp -@@ -93,8 +93,11 @@ Pix* Input::PrepareLSTMInputs(const ImageData& image_data, - return nullptr; - } - if (width < min_width || height < min_width) { -+ /* willus mod -- no warning */ -+ /* - tprintf("Image too small to scale!! (%dx%d vs min width of %d)\n", width, - height, min_width); -+ */ - pixDestroy(&pix); - return nullptr; - } --- -2.22.0 - |