summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--pkgs/applications/misc/k2pdfopt/0001-Fix-CMakeLists.patch49
-rw-r--r--pkgs/applications/misc/k2pdfopt/default.nix142
-rw-r--r--pkgs/applications/misc/k2pdfopt/k2pdfopt-mupdf-1.16.1.patch151
-rw-r--r--pkgs/applications/misc/k2pdfopt/k2pdfopt.patch99
-rw-r--r--pkgs/applications/misc/k2pdfopt/leptonica.patch254
-rw-r--r--pkgs/applications/misc/k2pdfopt/mupdf.patch1060
-rw-r--r--pkgs/applications/misc/k2pdfopt/tesseract.patch675
7 files changed, 164 insertions, 2266 deletions
diff --git a/pkgs/applications/misc/k2pdfopt/0001-Fix-CMakeLists.patch b/pkgs/applications/misc/k2pdfopt/0001-Fix-CMakeLists.patch
new file mode 100644
index 00000000000..8f9271ac996
--- /dev/null
+++ b/pkgs/applications/misc/k2pdfopt/0001-Fix-CMakeLists.patch
@@ -0,0 +1,49 @@
+From 2629af4ed00d7ca65359178203d80fb146901cdb Mon Sep 17 00:00:00 2001
+From: Daniel Fullmer <danielrf12@gmail.com>
+Date: Fri, 3 Jul 2020 21:00:45 -0700
+Subject: [PATCH 1/2] Fix CMakeLists
+
+---
+ CMakeLists.txt | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index e218279..4341de9 100644
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -57,6 +57,7 @@ endif(JPEG_FOUND)
+ include(FindJasper)
+ if(JASPER_FOUND)
+   set(HAVE_JASPER_LIB 1)
++  set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${JASPER_LIBRARY})
+ endif(JASPER_FOUND)
+ 
+ # paths from willuslib/wgs.c
+@@ -71,9 +72,12 @@ else()
+   message(STATUS "Could NOT find ghostscript executable")
+ endif(GHOSTSCRIPT_EXECUTABLE)
+ 
+-# willus.h
+-# HAVE_GSL_LIB
+-
++pkg_check_modules(GSL gsl)
++if(GSL_FOUND)
++  set(HAVE_GSL_LIB 1)
++  include_directories(SYSTEM ${GSL_INCLUDEDIR})
++  set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${GSL_LDFLAGS})
++endif(GSL_FOUND)
+ 
+ # libfreetype6 (>= 2.3.9), libjbig2dec0, libjpeg8 (>= 8c), libx11-6, libxext6, zlib1g (>= 1:1.2.0)
+ # MUPDF_STATIC_LDFLAGS misses mupdf-js-none, and doubles libs ...
+@@ -85,7 +89,7 @@ if(MUPDF_FOUND)
+   include_directories(SYSTEM ${MUPDF_INCLUDEDIR})
+   message(STATUS "mupdf libraries: ${MUPDF_LDFLAGS}")
+   set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${MUPDF_LDFLAGS} 
+-    -lmupdf-js-none -lopenjpeg -ljbig2dec -ljpeg -lfreetype
++
+   )
+ endif(MUPDF_FOUND)
+ 
+-- 
+2.27.0
+
diff --git a/pkgs/applications/misc/k2pdfopt/default.nix b/pkgs/applications/misc/k2pdfopt/default.nix
index 8899654cc4c..31accf811bc 100644
--- a/pkgs/applications/misc/k2pdfopt/default.nix
+++ b/pkgs/applications/misc/k2pdfopt/default.nix
@@ -1,5 +1,5 @@
-{ stdenv, fetchzip, fetchurl, fetchpatch, cmake, pkgconfig
-, zlib, libpng
+{ stdenv, runCommand, fetchzip, fetchurl, fetchpatch, fetchFromGitHub
+, cmake, pkgconfig, zlib, libpng
 , enableGSL ? true, gsl
 , enableGhostScript ? true, ghostscript
 , enableMuPDF ? true, mupdf
@@ -11,44 +11,132 @@
 
 with stdenv.lib;
 
-stdenv.mkDerivation rec {
-  pname = "k2pdfopt";
-  version = "2.51a";
+# k2pdfopt is a pain to package. It requires modified versions of mupdf,
+# leptonica, and tesseract.  Instead of shipping patches for these upstream
+# packages, k2pdfopt includes just the modified source files for these
+# packages.  The individual files from the {mupdf,leptonica,tesseract}_mod/
+# directories are intended to replace the corresponding source files in the
+# upstream packages, for a particular version of that upstream package.
+#
+# There are a few ways we could approach packaging these modified versions of
+# mupdf, leptonica, and mupdf:
+# 1) Override the upstream source with a new derivation that involves copying
+# the modified source files from k2pdfopt and replacing the corresponding
+# source files in the upstream packages. Since the files are intended for a
+# particular version of the upstream package, this would not allow us to easily
+# use updates to those packages in nixpkgs.
+# 2) Manually produce patches which can be applied against the upstream
+# project, and have the same effect as replacing those files.  This is what I
+# believe k2pdfopt should do this for us anyway.  The benefit of creating and
+# applying patches in this way is that minor updates (esp. security fixes) to
+# upstream packages might still allow these patches to apply successfully.
+# 3) Automatically produce these patches inside a nix derivation. This is the
+# approach taken here, using the "mkPatch" provided below.  This has the
+# benefit of easier review and should hopefully be simpler to update in the
+# future.
+
+let
+  # Create a patch against src based on changes applied in patchCommands
+  mkPatch = { name, src, patchCommands }: runCommand "${name}-k2pdfopt.patch" { inherit src; } ''
+    source $stdenv/setup
+    unpackPhase
 
-  src = (fetchzip {
-    url = "http://www.willus.com/k2pdfopt/src/k2pdfopt_v2.51_src.zip";
-    sha256 = "133l7xkvi67s6sfk8cfh7rmavbsf7ib5fyksk1ci6b6sch3z2sw9";
-  });
+    orig=$sourceRoot
+    new=$sourceRoot-modded
+    cp -r $orig/. $new/
 
-  # Note: the v2.51a zip contains only files to be replaced in the v2.50 zip.
-  v251a_src = (fetchzip {
-    url = "http://www.willus.com/k2pdfopt/src/k2pdfopt_v2.51a_src.zip";
-    sha256 = "0vvwblii7kgdwfxw8dzk6jbmz4dv94d7rkv18i60y8wkayj6yhl6";
-  });
+    pushd $new >/dev/null
+    ${patchCommands}
+    popd >/dev/null
 
-  postUnpack = ''
-    cp -r ${v251a_src}/* $sourceRoot
+    diff -Naur $orig $new > $out || true
   '';
 
-  patches = [ ./k2pdfopt.patch ./k2pdfopt-mupdf-1.16.1.patch ];
+  pname = "k2pdfopt";
+  version = "2.53";
+  k2pdfopt_src = fetchzip {
+    url = "http://www.willus.com/${pname}/src/${pname}_v${version}_src.zip";
+    sha256 = "1fna8bg3pascjfc3hmc6xn0xi2yh7f1qp0d344mw9hqanbnykyy8";
+  };
+in stdenv.mkDerivation rec {
+  inherit pname version;
+  src = k2pdfopt_src;
+
+  patches = [
+    ./0001-Fix-CMakeLists.patch
+  ];
+
+  postPatch = ''
+    substituteInPlace willuslib/bmpdjvu.c \
+      --replace "<djvu.h>" "<libdjvu/ddjvuapi.h>"
+  '';
 
   nativeBuildInputs = [ cmake pkgconfig ];
 
   buildInputs =
   let
-    #  The patches below were constructed by taking the files from k2pdfopt in
-    #  the {mupdf,leptonica,tesseract}_mod/ directories, replacing the
-    #  corresponding files in the respective source trees, resolving any errors
-    #  with more recent versions of these depencencies, and running diff.
-    mupdf_modded = mupdf.overrideAttrs (attrs: {
-      patches = attrs.patches ++ [ ./mupdf.patch ]; # Last verified with mupdf 1.16.1
+    # We use specific versions of these sources below to match the versions
+    # used in the k2pdfopt source. Note that this does _not_ need to match the
+    # version used elsewhere in nixpkgs, since it is only used to create the
+    # patch that can then be applied to the version in nixpkgs.
+    mupdf_patch = mkPatch {
+      name = "mupdf";
+      src = fetchurl {
+        url = "https://mupdf.com/downloads/archive/mupdf-1.17.0-source.tar.gz";
+        sha256 = "13nl9nrcx2awz9l83mlv2psi1lmn3hdnfwxvwgwiwbxlkjl3zqq0";
+      };
+      patchCommands = ''
+        cp ${k2pdfopt_src}/mupdf_mod/{filter-basic,font,stext-device,string}.c ./source/fitz/
+        cp ${k2pdfopt_src}/mupdf_mod/pdf-* ./source/pdf/
+      '';
+    };
+    mupdf_modded = mupdf.overrideAttrs ({ patches ? [], ... }: {
+      patches = patches ++ [ mupdf_patch ];
+      # This function is missing in font.c, see font-win32.c
+      postPatch = ''
+        echo "void pdf_install_load_system_font_funcs(fz_context *ctx) {}" >> source/fitz/font.c
+      '';
     });
-    leptonica_modded = leptonica.overrideAttrs (attrs: {
-      patches = [ ./leptonica.patch ]; # Last verified with leptonica 1.78.0
+
+    leptonica_patch = mkPatch {
+      name = "leptonica";
+      src = fetchurl {
+        url = "http://www.leptonica.org/source/leptonica-1.79.0.tar.gz";
+        sha256 = "1n004gv1dj3pq1fcnfdclvvx5nang80336aa67nvs3nnqp4ncn84";
+      };
+      patchCommands = "cp -r ${k2pdfopt_src}/leptonica_mod/. ./src/";
+    };
+    leptonica_modded = leptonica.overrideAttrs ({ patches ? [], ... }: {
+      patches = patches ++ [ leptonica_patch ];
     });
+
+    tesseract_patch = mkPatch {
+      name = "tesseract";
+      src = fetchFromGitHub {
+        owner = "tesseract-ocr";
+        repo = "tesseract";
+        rev = "4.1.1";
+        sha256 = "1ca27zbjpx35nxh9fha410z3jskwyj06i5hqiqdc08s2d7kdivwn";
+      };
+      patchCommands = ''
+        cp ${k2pdfopt_src}/tesseract_mod/{baseapi,tesscapi,tesseract}.* src/api/
+        cp ${k2pdfopt_src}/tesseract_mod/{tesscapi,tessedit,tesseract}.* src/ccmain/
+        cp ${k2pdfopt_src}/tesseract_mod/dotproduct{avx,fma,sse}.* src/arch/
+        cp ${k2pdfopt_src}/tesseract_mod/{intsimdmatrixsse,simddetect}.* src/arch/
+        cp ${k2pdfopt_src}/tesseract_mod/{errcode,genericvector,mainblk,params,serialis,tessdatamanager,tess_version,tprintf,unicharset}.* src/ccutil/
+        cp ${k2pdfopt_src}/tesseract_mod/{input,lstmrecognizer}.* src/lstm/
+        cp ${k2pdfopt_src}/tesseract_mod/openclwrapper.* src/opencl/
+      '';
+    };
     tesseract_modded = tesseract4.override {
-      tesseractBase = tesseract4.tesseractBase.overrideAttrs (_: {
-        patches = [ ./tesseract.patch ]; # Last verified with tesseract 1.4
+      tesseractBase = tesseract4.tesseractBase.overrideAttrs ({ patches ? [], ... }: {
+        patches = patches ++ [ tesseract_patch ];
+        # Additional compilation fixes
+        postPatch = ''
+          echo libtesseract_api_la_SOURCES += tesscapi.cpp >> src/api/Makefile.am
+          substituteInPlace src/api/tesseract.h \
+            --replace "#include <leptonica.h>" "//#include <leptonica.h>"
+        '';
       });
     };
   in
diff --git a/pkgs/applications/misc/k2pdfopt/k2pdfopt-mupdf-1.16.1.patch b/pkgs/applications/misc/k2pdfopt/k2pdfopt-mupdf-1.16.1.patch
deleted file mode 100644
index 3a9eca30e75..00000000000
--- a/pkgs/applications/misc/k2pdfopt/k2pdfopt-mupdf-1.16.1.patch
+++ /dev/null
@@ -1,151 +0,0 @@
-diff --git a/willuslib/wmupdf.c b/willuslib/wmupdf.c
-index 81627ef..f14a96c 100644
---- a/willuslib/wmupdf.c
-+++ b/willuslib/wmupdf.c
-@@ -189,8 +189,6 @@ int wmupdf_remake_pdf(char *infile,char *outfile,WPDFPAGEINFO *pageinfo,int use_
-     pdf_write_opts.do_compress=1;
-     pdf_write_opts.do_linear=0;
-     pdf_write_opts.do_garbage=1; /* 2 and 3 don't work for this. */
--    pdf_write_opts.continue_on_error=0;
--    pdf_write_opts.errors=NULL;
-     write_failed=0;
-     wpdfpageinfo_sort(pageinfo);
-     xref=NULL;
-@@ -1687,8 +1685,8 @@ WPDFOUTLINE *wpdfoutline_read_from_pdf_file(char *filename)
-         /* Sumatra version of MuPDF v1.4 -- use locally installed fonts */
-         pdf_install_load_system_font_funcs(ctx);
-         fz_try(ctx) { doc=fz_open_document(ctx,filename); }
--        fz_catch(ctx) 
--            { 
-+        fz_catch(ctx)
-+            {
-             fz_drop_context(ctx);
-             return(NULL);
-             }
-@@ -1890,5 +1888,5 @@ static pdf_obj *pdf_new_string_utf8(fz_context *ctx,char *string)
-     willus_mem_free((double **)&utfbuf,funcname);
-     return(pdfobj);
-     }
--    
-+
- #endif /* HAVE_MUPDF_LIB */
-diff --git a/willuslib/wmupdfinfo.c b/willuslib/wmupdfinfo.c
-index 5c7f38c..9b9e6fd 100644
---- a/willuslib/wmupdfinfo.c
-+++ b/willuslib/wmupdfinfo.c
-@@ -237,23 +237,22 @@ static void showglobalinfo(fz_context *ctx, globals *glo,char *filename)
-         pdf_obj *robj;
- 
-         robj=pdf_resolve_indirect(ctx,obj);
--        n=pdf_sprint_obj(ctx,NULL,0,robj,1);
--        buf=malloc(n+2);
-+        buf=pdf_sprint_obj(ctx,NULL,0,&n,robj,1,0);
-         if (buf==NULL)
-             {
-             fz_write_printf(ctx,out,"Info object (%d %d R):\n",pdf_to_num(ctx,obj),pdf_to_gen(ctx,obj));
--		    pdf_print_obj(ctx,out,robj,1);
-+		    pdf_print_obj(ctx,out,robj,1,0);
-             }
-         else
-             {
--            pdf_sprint_obj(ctx,buf,n+2,robj,1);
-+            pdf_sprint_obj(ctx,buf,n+2,&n,robj,1,0);
-             display_pdf_field(ctx,out,buf,"Title","TITLE");
-             display_pdf_field(ctx,out,buf,"CreationDate","CREATED");
-             display_pdf_field(ctx,out,buf,"ModDate","LAST MODIFIED");
-             display_pdf_field(ctx,out,buf,"Producer","PDF PRODUCER");
-             display_pdf_field(ctx,out,buf,"Creator","CREATOR");
-             display_file_size(ctx,out,filename);
--            free(buf);
-+            fz_free(ctx,buf);
-             }
- 	    }
-     if (glo->dims==1)
-@@ -275,7 +274,7 @@ static void showglobalinfo(fz_context *ctx, globals *glo,char *filename)
- 	if (obj)
-         {
- 		fz_write_printf(ctx,out, "\nEncryption object (%d %d R):\n", pdf_to_num(ctx,obj), pdf_to_gen(ctx,obj));
--		pdf_print_obj(ctx,out, pdf_resolve_indirect(ctx,obj), 1);
-+		pdf_print_obj(ctx,out, pdf_resolve_indirect(ctx,obj), 1, 0);
-         }
-     }
- 
-@@ -396,7 +395,7 @@ gatherdimensions(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_
- 	if (j < glo->dims)
- 		return;
- 
--	glo->dim = fz_resize_array(ctx, glo->dim, glo->dims+1, sizeof(struct info));
-+	glo->dim = fz_realloc_array(ctx, glo->dim, glo->dims+1, struct info);
- 	glo->dims++;
- 
- 	glo->dim[glo->dims - 1].page = page;
-@@ -441,7 +440,7 @@ gatherfonts(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj *
- 		if (k < glo->fonts)
- 			continue;
- 
--		glo->font = fz_resize_array(ctx, glo->font, glo->fonts+1, sizeof(struct info));
-+		glo->font = fz_realloc_array(ctx, glo->font, glo->fonts+1, struct info);
- 		glo->fonts++;
- 
- 		glo->font[glo->fonts - 1].page = page;
-@@ -510,7 +509,7 @@ gatherimages(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj
- 		if (k < glo->images)
- 			continue;
- 
--		glo->image = fz_resize_array(ctx, glo->image, glo->images+1, sizeof(struct info));
-+		glo->image = fz_realloc_array(ctx, glo->image, glo->images+1, struct info);
- 		glo->images++;
- 
- 		glo->image[glo->images - 1].page = page;
-@@ -568,7 +567,7 @@ gatherforms(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj *
- 		if (k < glo->forms)
- 			continue;
- 
--		glo->form = fz_resize_array(ctx, glo->form, glo->forms+1, sizeof(struct info));
-+		glo->form = fz_realloc_array(ctx, glo->form, glo->forms+1, struct info);
- 		glo->forms++;
- 
- 		glo->form[glo->forms - 1].page = page;
-@@ -613,7 +612,7 @@ gatherpsobjs(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj
- 		if (k < glo->psobjs)
- 			continue;
- 
--		glo->psobj = fz_resize_array(ctx, glo->psobj, glo->psobjs+1, sizeof(struct info));
-+		glo->psobj = fz_realloc_array(ctx, glo->psobj, glo->psobjs+1, struct info);
- 		glo->psobjs++;
- 
- 		glo->psobj[glo->psobjs - 1].page = page;
-@@ -656,7 +655,7 @@ gathershadings(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_ob
- 		if (k < glo->shadings)
- 			continue;
- 
--		glo->shading = fz_resize_array(ctx, glo->shading, glo->shadings+1, sizeof(struct info));
-+		glo->shading = fz_realloc_array(ctx, glo->shading, glo->shadings+1, struct info);
- 		glo->shadings++;
- 
- 		glo->shading[glo->shadings - 1].page = page;
-@@ -724,7 +723,7 @@ gatherpatterns(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_ob
- 		if (k < glo->patterns)
- 			continue;
- 
--		glo->pattern = fz_resize_array(ctx, glo->pattern, glo->patterns+1, sizeof(struct info));
-+		glo->pattern = fz_realloc_array(ctx, glo->pattern, glo->patterns+1, struct info);
- 		glo->patterns++;
- 
- 		glo->pattern[glo->patterns - 1].page = page;
-@@ -1216,7 +1215,7 @@ void wmupdfinfo_get(char *filename,int *pagelist,char **buf)
-     if (fout==NULL)
-         return;
-     */
--    
-+
-     ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
-     if (!ctx)
-         {
-@@ -1307,5 +1306,5 @@ static void date_convert(char *dst,char *src)
-     else if (src[i]!='\0')
-         sprintf(&dst[strlen(dst)]," %s",&src[i]);
-     }
--    
-+
- #endif /* HAVE_MUPDF_LIB */
diff --git a/pkgs/applications/misc/k2pdfopt/k2pdfopt.patch b/pkgs/applications/misc/k2pdfopt/k2pdfopt.patch
deleted file mode 100644
index cf7e4896b80..00000000000
--- a/pkgs/applications/misc/k2pdfopt/k2pdfopt.patch
+++ /dev/null
@@ -1,99 +0,0 @@
-diff --git a/CMakeLists.txt b/CMakeLists.txt
-index 4a2378b..502c477 100644
---- a/CMakeLists.txt
-+++ b/CMakeLists.txt
-@@ -52,6 +52,7 @@ endif(JPEG_FOUND)
- include(FindJasper)
- if(JASPER_FOUND)
-   set(HAVE_JASPER_LIB 1)
-+  set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${JASPER_LIBRARY})
- endif(JASPER_FOUND)
- 
- # paths from willuslib/wgs.c
-@@ -66,8 +67,12 @@ else()
-   message(STATUS "Could NOT find ghostscript executable")
- endif(GHOSTSCRIPT_EXECUTABLE)
- 
--# willus.h
--# HAVE_GSL_LIB
-+pkg_check_modules(GSL gsl)
-+if(GSL_FOUND)
-+  set(HAVE_GSL_LIB 1)
-+  include_directories(SYSTEM ${GSL_INCLUDEDIR})
-+  set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${GSL_LDFLAGS})
-+endif(GSL_FOUND)
- 
- 
- # libfreetype6 (>= 2.3.9), libjbig2dec0, libjpeg8 (>= 8c), libx11-6, libxext6, zlib1g (>= 1:1.2.0)
-@@ -80,7 +85,7 @@ if(MUPDF_FOUND)
-   include_directories(SYSTEM ${MUPDF_INCLUDEDIR})
-   message(STATUS "mupdf libraries: ${MUPDF_LDFLAGS}")
-   set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${MUPDF_LDFLAGS} 
--    -lmupdf-js-none -lopenjpeg -ljbig2dec -ljpeg -lfreetype
-+ 
-   )
- endif(MUPDF_FOUND)
- 
-@@ -91,9 +96,25 @@ if(DJVU_FOUND)
-   set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${DJVU_LDFLAGS})
- endif(DJVU_FOUND)
- 
--# HAVE_GOCR_LIB
--# HAVE_LEPTONICA_LIB
--# HAVE_TESSERACT_LIB
-+find_library(GOCR_LIB NAMES Pgm2asc)
-+if(GOCR_LIB)
-+  set(HAVE_GOCR_LIB 1)
-+  set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${GOCR_LIB})
-+endif(GOCR_LIB)
-+
-+pkg_check_modules(LEPTONICA lept)
-+if(LEPTONICA_FOUND)
-+  set(HAVE_LEPTONICA_LIB 1)
-+  include_directories(SYSTEM ${LEPTONICA_INCLUDEDIR})
-+  set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${LEPTONICA_LDFLAGS})
-+endif(LEPTONICA_FOUND)
-+
-+pkg_check_modules(TESSERACT tesseract)
-+if(TESSERACT_FOUND)
-+  set(HAVE_TESSERACT_LIB 1)
-+  include_directories(SYSTEM ${TESSERACT_INCLUDEDIR})
-+  set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${TESSERACT_LDFLAGS})
-+endif(TESSERACT_FOUND)
- 
- # ---- Describe project
- 
-diff --git a/willuslib/CMakeLists.txt b/willuslib/CMakeLists.txt
-index 463bbc9..8043db5 100644
---- a/willuslib/CMakeLists.txt
-+++ b/willuslib/CMakeLists.txt
-@@ -6,7 +6,7 @@ include_directories(..)
- set(WILLUSLIB_SRC
-     ansi.c array.c bmp.c bmpdjvu.c bmpmupdf.c dtcompress.c filelist.c
-     fontdata.c fontrender.c gslpolyfit.c linux.c math.c mem.c ocr.c
--    ocrjocr.c ocrtess.c pdfwrite.c point2d.c render.c strbuf.c string.c
-+    ocrgocr.c ocrtess.c pdfwrite.c point2d.c render.c strbuf.c string.c
-     token.c wfile.c wgs.c wgui.c willusversion.c win.c winbmp.c
-     wincomdlg.c winmbox.c winshell.c wmupdf.c wmupdfinfo.c wpdf.c wsys.c
-     wzfile.c wleptonica.c
-diff --git a/willuslib/ocrgocr.c b/willuslib/ocrgocr.c
-index 6027e9a..fbe10f0 100644
---- a/willuslib/ocrgocr.c
-+++ b/willuslib/ocrgocr.c
-@@ -29,6 +29,8 @@
- #ifdef HAVE_GOCR_LIB
- #include <gocr.h>
- 
-+job_t *OCR_JOB;
-+
- /*
- ** bmp8 must be grayscale
- ** (x1,y1) and (x2,y2) from top left of bitmap
-@@ -63,6 +65,7 @@ void gocr_single_word_from_bmp8(char *text,int maxlen,WILLUSBITMAP *bmp8,
-     h=y2-y1+1;
-     dh=h+bw*2;
-     job=&_job;
-+    OCR_JOB=job;
-     job_init(job);
-     job_init_image(job);
-     // willus_mem_alloc_warn((void **)&job->src.p.p,w*h,funcname,10);
diff --git a/pkgs/applications/misc/k2pdfopt/leptonica.patch b/pkgs/applications/misc/k2pdfopt/leptonica.patch
deleted file mode 100644
index dfab99fd013..00000000000
--- a/pkgs/applications/misc/k2pdfopt/leptonica.patch
+++ /dev/null
@@ -1,254 +0,0 @@
-From 8c11a20925686855023df90ed477957c7d7fe91e Mon Sep 17 00:00:00 2001
-From: Daniel Fullmer <danielrf12@gmail.com>
-Date: Fri, 13 Sep 2019 15:54:21 -0400
-Subject: [PATCH] Willus mod for k2pdfopt
-
----
- src/allheaders.h |   4 ++
- src/dewarp2.c    | 106 ++++++++++++++++++++++++++++++++++++++++++-----
- src/leptwin.c    |   6 ++-
- 3 files changed, 104 insertions(+), 12 deletions(-)
-
-diff --git a/src/allheaders.h b/src/allheaders.h
-index e68eff1..b3cc729 100644
---- a/src/allheaders.h
-+++ b/src/allheaders.h
-@@ -669,6 +669,10 @@ LEPT_DLL extern L_DEWARPA * dewarpaReadMem ( const l_uint8 *data, size_t size );
- LEPT_DLL extern l_ok dewarpaWrite ( const char *filename, L_DEWARPA *dewa );
- LEPT_DLL extern l_ok dewarpaWriteStream ( FILE *fp, L_DEWARPA *dewa );
- LEPT_DLL extern l_ok dewarpaWriteMem ( l_uint8 **pdata, size_t *psize, L_DEWARPA *dewa );
-+/* WILLUS MOD */
-+    LEPT_DLL extern l_int32 dewarpBuildPageModel_ex ( L_DEWARP *dew, const char *debugfile,l_int32 fit_order );
-+    LEPT_DLL extern l_int32 dewarpFindVertDisparity_ex ( L_DEWARP *dew, PTAA *ptaa, l_int32 rotflag,l_int32 fit_order );
-+    LEPT_DLL extern l_int32 dewarpBuildLineModel_ex ( L_DEWARP *dew, l_int32 opensize, const char *debugfile,l_int32 fit_order );
- LEPT_DLL extern l_ok dewarpBuildPageModel ( L_DEWARP *dew, const char *debugfile );
- LEPT_DLL extern l_ok dewarpFindVertDisparity ( L_DEWARP *dew, PTAA *ptaa, l_int32 rotflag );
- LEPT_DLL extern l_ok dewarpFindHorizDisparity ( L_DEWARP *dew, PTAA *ptaa );
-diff --git a/src/dewarp2.c b/src/dewarp2.c
-index 220eec1..2e29500 100644
---- a/src/dewarp2.c
-+++ b/src/dewarp2.c
-@@ -144,9 +144,17 @@ static const l_float32   L_ALLOWED_W_FRACT = 0.05;  /* no bigger */
-  *          longest textlines.
-  * </pre>
-  */
-+/* WILLUS MOD */
- l_ok
--dewarpBuildPageModel(L_DEWARP    *dew,
--                     const char  *debugfile)
-+dewarpBuildPageModel(L_DEWARP *dew,const char *debugfile)
-+{
-+return(dewarpBuildPageModel_ex(dew,debugfile,2));
-+}
-+
-+l_ok
-+dewarpBuildPageModel_ex(L_DEWARP    *dew,
-+                     const char  *debugfile,
-+                     l_int32 fit_order)
- {
- l_int32  linecount, topline, botline, ret;
- PIX     *pixs, *pix1, *pix2, *pix3;
-@@ -225,7 +233,7 @@ PTAA    *ptaa1, *ptaa2;
-         /* Get the sampled vertical disparity from the textline centers.
-          * The disparity array will push pixels vertically so that each
-          * textline is flat and centered at the y-position of the mid-point. */
--    if (dewarpFindVertDisparity(dew, ptaa2, 0) != 0) {
-+    if (dewarpFindVertDisparity_ex(dew, ptaa2, 0, fit_order) != 0) {
-         L_WARNING("vertical disparity not built\n", procName);
-         ptaaDestroy(&ptaa2);
-         return 1;
-@@ -290,13 +298,24 @@ PTAA    *ptaa1, *ptaa2;
-  *          a pdf.  Non-pix debug output goes to /tmp.
-  * </pre>
-  */
-+/* WILLUS MOD */
- l_ok
- dewarpFindVertDisparity(L_DEWARP  *dew,
-                         PTAA      *ptaa,
-                         l_int32    rotflag)
- {
-+return(dewarpFindVertDisparity_ex(dew,ptaa,rotflag,2));
-+}
-+/* WILLUS MOD -- add cubic and quartic fits and ..._ex functions */
-+l_int32
-+dewarpFindVertDisparity_ex(L_DEWARP  *dew,
-+                        PTAA      *ptaa,
-+                        l_int32    rotflag,
-+                        l_int32    fit_order)
-+{
- l_int32     i, j, nlines, npts, nx, ny, sampling;
--l_float32   c0, c1, c2, x, y, midy, val, medval, meddev, minval, maxval;
-+/* WILLUS MOD */
-+l_float32   c0, c1, c2, c3, c4, x, y, midy, val, medval, meddev, minval, maxval;
- l_float32  *famidys;
- NUMA       *nax, *nafit, *nacurve0, *nacurve1, *nacurves;
- NUMA       *namidy, *namidys, *namidysi;
-@@ -304,11 +323,22 @@ PIX        *pix1, *pix2, *pixcirc, *pixdb;
- PTA        *pta, *ptad, *ptacirc;
- PTAA       *ptaa0, *ptaa1, *ptaa2, *ptaa3, *ptaa4, *ptaa5, *ptaat;
- FPIX       *fpix;
-+/* WILLUS MOD */
-+l_int32 fit_order1,fit_order2;
- 
-     PROCNAME("dewarpFindVertDisparity");
- 
-     if (!dew)
-         return ERROR_INT("dew not defined", procName, 1);
-+/* WILLUS MOD */
-+    if (fit_order < 10)
-+        fit_order1 = fit_order2 = fit_order;
-+    else
-+        {
-+        fit_order1=fit_order % 10;
-+        fit_order2=fit_order / 10;
-+        fit_order2=fit_order2 % 10;
-+        }
-     dew->vsuccess = 0;
-     if (!ptaa)
-         return ERROR_INT("ptaa not defined", procName, 1);
-@@ -331,12 +361,32 @@ FPIX       *fpix;
-     pixdb = (rotflag) ? pixRotateOrth(dew->pixs, 1) : pixClone(dew->pixs);
-     for (i = 0; i < nlines; i++) {  /* for each line */
-         pta = ptaaGetPta(ptaa, i, L_CLONE);
--        ptaGetQuadraticLSF(pta, &c2, &c1, &c0, NULL);
--        numaAddNumber(nacurve0, c2);
-+/* WILLUS MOD */
-+if (fit_order1>3)
-+    {
-+    ptaGetQuarticLSF(pta, &c4, &c3, &c2, &c1, &c0, NULL);
-+    numaAddNumber(nacurve0, c4);
-+    }
-+else if (fit_order1==3)
-+    {
-+    ptaGetCubicLSF(pta, &c3, &c2, &c1, &c0, NULL);
-+    numaAddNumber(nacurve0, c3);
-+    }
-+else
-+    {
-+    ptaGetQuadraticLSF(pta, &c2, &c1, &c0, NULL);
-+    numaAddNumber(nacurve0, c2);
-+    }
-         ptad = ptaCreate(nx);
-         for (j = 0; j < nx; j++) {  /* uniformly sampled in x */
-              x = j * sampling;
--             applyQuadraticFit(c2, c1, c0, x, &y);
-+/* WILLUS MOD */
-+if (fit_order1>3)
-+    applyQuarticFit(c4, c3, c2, c1, c0, x, &y);
-+else if (fit_order1==3)
-+    applyCubicFit(c3, c2, c1, c0, x, &y);
-+else
-+    applyQuadraticFit(c2, c1, c0, x, &y);
-              ptaAddPt(ptad, x, y);
-         }
-         ptaaAddPta(ptaa0, ptad, L_INSERT);
-@@ -350,7 +400,13 @@ FPIX       *fpix;
-         for (i = 0; i < nlines; i++) {
-             pta = ptaaGetPta(ptaa, i, L_CLONE);
-             ptaGetArrays(pta, &nax, NULL);
--            ptaGetQuadraticLSF(pta, NULL, NULL, NULL, &nafit);
-+/* WILLUS MOD */
-+if (fit_order1>3)
-+ptaGetQuarticLSF(pta, NULL, NULL, NULL, NULL, NULL, &nafit);
-+else if (fit_order1==3)
-+ptaGetCubicLSF(pta, NULL, NULL, NULL, NULL, &nafit);
-+else
-+ptaGetQuadraticLSF(pta, NULL, NULL, NULL, &nafit);
-             ptad = ptaCreateFromNuma(nax, nafit);
-             ptaaAddPta(ptaat, ptad, L_INSERT);
-             ptaDestroy(&pta);
-@@ -494,11 +550,24 @@ FPIX       *fpix;
-     ptaa5 = ptaaCreate(nx);  /* uniformly sampled across full height of image */
-     for (j = 0; j < nx; j++) {  /* for each column */
-         pta = ptaaGetPta(ptaa4, j, L_CLONE);
--        ptaGetQuadraticLSF(pta, &c2, &c1, &c0, NULL);
-+/* WILLUS MOD */
-+/* Order higher than 2 can cause a little craziness here. */
-+if (fit_order2>3)
-+    ptaGetQuarticLSF(pta, &c4, &c3, &c2, &c1, &c0, NULL);
-+else if (fit_order2==3)
-+    ptaGetCubicLSF(pta, &c3, &c2, &c1, &c0, NULL);
-+else
-+    ptaGetQuadraticLSF(pta, &c2, &c1, &c0, NULL);
-         ptad = ptaCreate(ny);
-         for (i = 0; i < ny; i++) {  /* uniformly sampled in y */
-              y = i * sampling;
--             applyQuadraticFit(c2, c1, c0, y, &val);
-+/* WILLUS MOD */
-+if (fit_order2>3)
-+    applyQuarticFit(c4, c3, c2, c1, c0, y, &val);
-+else if (fit_order2==3)
-+    applyCubicFit(c3, c2, c1, c0, y, &val);
-+else
-+    applyQuadraticFit(c2, c1, c0, y, &val);
-              ptaAddPt(ptad, y, val);
-         }
-         ptaaAddPta(ptaa5, ptad, L_INSERT);
-@@ -1602,11 +1671,21 @@ FPIX      *fpix;
-  *          See notes there.
-  * </pre>
-  */
-+/* WILLUS MOD */
- l_ok
- dewarpBuildLineModel(L_DEWARP    *dew,
-                      l_int32      opensize,
-                      const char  *debugfile)
- {
-+return(dewarpBuildLineModel_ex(dew,opensize,debugfile,2));
-+}
-+
-+l_int32
-+dewarpBuildLineModel_ex(L_DEWARP    *dew,
-+                     l_int32      opensize,
-+                     const char  *debugfile,
-+                     l_int32 fit_order)
-+{
- char     buf[64];
- l_int32  i, j, bx, by, ret, nlines;
- BOXA    *boxa;
-@@ -1695,6 +1774,8 @@ PTAA    *ptaa1, *ptaa2;
- 
-             /* Remove all lines that are not at least 0.75 times the length
-              * of the longest line. */
-+/* WILLUS MOD */
-+/*
-         ptaa2 = dewarpRemoveShortLines(pix, ptaa1, 0.75, DEBUG_SHORT_LINES);
-         if (debugfile) {
-             pix1 = pixConvertTo32(pix);
-@@ -1704,6 +1785,8 @@ PTAA    *ptaa1, *ptaa2;
-             pixDestroy(&pix1);
-             pixDestroy(&pix2);
-         }
-+*/
-+ptaa2=ptaa1;
-         ptaaDestroy(&ptaa1);
-         nlines = ptaaGetCount(ptaa2);
-         if (nlines < dew->minlines) {
-@@ -1717,7 +1800,8 @@ PTAA    *ptaa1, *ptaa2;
-              * centers.  The disparity array will push pixels vertically
-              * so that each line is flat and centered at the y-position
-              * of the mid-point. */
--        ret = dewarpFindVertDisparity(dew, ptaa2, 1 - i);
-+/* WILLUS MOD */
-+        ret = dewarpFindVertDisparity_ex(dew, ptaa2, 1 - i, fit_order);
- 
-             /* If i == 0, move the result to the horizontal disparity,
-              * rotating it back by -90 degrees. */
-diff --git a/src/leptwin.c b/src/leptwin.c
-index 72643a0..573d33e 100644
---- a/src/leptwin.c
-+++ b/src/leptwin.c
-@@ -364,5 +364,9 @@ PIXCMAP   *cmap;
- 
-     return hBitmap;
- }
--
-+#else
-+/* willus mod: Avoid weird issue with OS/X library archiver when there are no symbols */
-+int leptwin_my_empty_func(void);
-+int leptwin_my_empty_func(void)
-+{return(0);}
- #endif   /* _WIN32 */
--- 
-2.22.0
-
diff --git a/pkgs/applications/misc/k2pdfopt/mupdf.patch b/pkgs/applications/misc/k2pdfopt/mupdf.patch
deleted file mode 100644
index 0c59a1d2016..00000000000
--- a/pkgs/applications/misc/k2pdfopt/mupdf.patch
+++ /dev/null
@@ -1,1060 +0,0 @@
-From d8927c969e3387ca2669a616c0ba53bce918a031 Mon Sep 17 00:00:00 2001
-From: Daniel Fullmer <danielrf12@gmail.com>
-Date: Fri, 13 Sep 2019 15:11:45 -0400
-Subject: [PATCH] Willus mod for k2pdfopt
-
----
- source/fitz/filter-basic.c |   3 +
- source/fitz/font-win32.c   | 866 +++++++++++++++++++++++++++++++++++++
- source/fitz/font.c         |   3 +
- source/fitz/stext-device.c |   5 +
- source/fitz/string.c       |   5 +
- source/pdf/pdf-annot.c     |  14 +-
- source/pdf/pdf-link.c      |   3 +
- source/pdf/pdf-parse.c     |   5 +
- source/pdf/pdf-xref.c      |   9 +
- 9 files changed, 912 insertions(+), 1 deletion(-)
- create mode 100644 source/fitz/font-win32.c
-
-diff --git a/source/fitz/filter-basic.c b/source/fitz/filter-basic.c
-index 0713a62e7..b8ef4d292 100644
---- a/source/fitz/filter-basic.c
-+++ b/source/fitz/filter-basic.c
-@@ -259,7 +259,10 @@ look_for_endstream:
- 	if (!state->warned)
- 	{
- 		state->warned = 1;
-+/* willus mod -- no warning */
-+/*
- 		fz_warn(ctx, "PDF stream Length incorrect");
-+*/
- 	}
- 	return *stm->rp++;
- }
-diff --git a/source/fitz/font-win32.c b/source/fitz/font-win32.c
-new file mode 100644
-index 000000000..45de8cfd3
---- /dev/null
-+++ b/source/fitz/font-win32.c
-@@ -0,0 +1,866 @@
-+/*
-+** Routines to access MS Windows system fonts.
-+** From sumatra PDF distro.
-+** Modified for MuPDF v1.9a by willus.com
-+*/
-+#include "mupdf/pdf.h"
-+
-+/*
-+	Which fonts are embedded is based on a few preprocessor definitions.
-+
-+	The base 14 fonts are always embedded.
-+	For CJK font substitution we embed DroidSansFallback.
-+
-+	Set NOCJK to skip all CJK support (this also omits embedding the CJK CMaps)
-+	Set NOCJKFONT to skip the embedded CJK font.
-+	Set NOCJKFULL to embed a smaller CJK font without CJK Extension A support.
-+*/
-+
-+#ifdef NOCJK
-+#define NOCJKFONT
-+#endif
-+
-+/* SumatraPDF: also load fonts included with Windows */
-+#ifdef _WIN32
-+
-+#ifndef UNICODE
-+#define UNICODE
-+#endif
-+#ifndef _UNICODE
-+#define _UNICODE
-+#endif
-+
-+#include <windows.h>
-+
-+// TODO: Use more of FreeType for TTF parsing (for performance reasons,
-+//       the fonts can't be parsed completely, though)
-+#include <ft2build.h>
-+#include FT_TRUETYPE_IDS_H
-+#include FT_TRUETYPE_TAGS_H
-+
-+#define TTC_VERSION1	0x00010000
-+#define TTC_VERSION2	0x00020000
-+
-+#define MAX_FACENAME	128
-+
-+// Note: the font face must be the first field so that the structure
-+//       can be treated like a simple string for searching
-+typedef struct pdf_fontmapMS_s
-+{
-+	char fontface[MAX_FACENAME];
-+	char fontpath[MAX_PATH];
-+	int index;
-+} pdf_fontmapMS;
-+
-+typedef struct pdf_fontlistMS_s
-+{
-+	pdf_fontmapMS *fontmap;
-+	int len;
-+	int cap;
-+} pdf_fontlistMS;
-+
-+typedef struct _tagTT_OFFSET_TABLE
-+{
-+	ULONG	uVersion;
-+	USHORT	uNumOfTables;
-+	USHORT	uSearchRange;
-+	USHORT	uEntrySelector;
-+	USHORT	uRangeShift;
-+} TT_OFFSET_TABLE;
-+
-+typedef struct _tagTT_TABLE_DIRECTORY
-+{
-+	ULONG	uTag;				//table name
-+	ULONG	uCheckSum;			//Check sum
-+	ULONG	uOffset;			//Offset from beginning of file
-+	ULONG	uLength;			//length of the table in bytes
-+} TT_TABLE_DIRECTORY;
-+
-+typedef struct _tagTT_NAME_TABLE_HEADER
-+{
-+	USHORT	uFSelector;			//format selector. Always 0
-+	USHORT	uNRCount;			//Name Records count
-+	USHORT	uStorageOffset;		//Offset for strings storage, from start of the table
-+} TT_NAME_TABLE_HEADER;
-+
-+typedef struct _tagTT_NAME_RECORD
-+{
-+	USHORT	uPlatformID;
-+	USHORT	uEncodingID;
-+	USHORT	uLanguageID;
-+	USHORT	uNameID;
-+	USHORT	uStringLength;
-+	USHORT	uStringOffset;	//from start of storage area
-+} TT_NAME_RECORD;
-+
-+typedef struct _tagFONT_COLLECTION
-+{
-+	ULONG	Tag;
-+	ULONG	Version;
-+	ULONG	NumFonts;
-+} FONT_COLLECTION;
-+
-+static struct {
-+	char *name;
-+	char *pattern;
-+} baseSubstitutes[] = {
-+	{ "Courier", "CourierNewPSMT" },
-+	{ "Courier-Bold", "CourierNewPS-BoldMT" },
-+	{ "Courier-Oblique", "CourierNewPS-ItalicMT" },
-+	{ "Courier-BoldOblique", "CourierNewPS-BoldItalicMT" },
-+	{ "Helvetica", "ArialMT" },
-+	{ "Helvetica-Bold", "Arial-BoldMT" },
-+	{ "Helvetica-Oblique", "Arial-ItalicMT" },
-+	{ "Helvetica-BoldOblique", "Arial-BoldItalicMT" },
-+	{ "Times-Roman", "TimesNewRomanPSMT" },
-+	{ "Times-Bold", "TimesNewRomanPS-BoldMT" },
-+	{ "Times-Italic", "TimesNewRomanPS-ItalicMT" },
-+	{ "Times-BoldItalic", "TimesNewRomanPS-BoldItalicMT" },
-+	{ "Symbol", "SymbolMT" },
-+};
-+static const char *base_font_names[][10] =
-+{
-+	{ "Courier", "CourierNew", "CourierNewPSMT", NULL },
-+	{ "Courier-Bold", "CourierNew,Bold", "Courier,Bold",
-+		"CourierNewPS-BoldMT", "CourierNew-Bold", NULL },
-+	{ "Courier-Oblique", "CourierNew,Italic", "Courier,Italic",
-+		"CourierNewPS-ItalicMT", "CourierNew-Italic", NULL },
-+	{ "Courier-BoldOblique", "CourierNew,BoldItalic", "Courier,BoldItalic",
-+		"CourierNewPS-BoldItalicMT", "CourierNew-BoldItalic", NULL },
-+	{ "Helvetica", "ArialMT", "Arial", NULL },
-+	{ "Helvetica-Bold", "Arial-BoldMT", "Arial,Bold", "Arial-Bold",
-+		"Helvetica,Bold", NULL },
-+	{ "Helvetica-Oblique", "Arial-ItalicMT", "Arial,Italic", "Arial-Italic",
-+		"Helvetica,Italic", "Helvetica-Italic", NULL },
-+	{ "Helvetica-BoldOblique", "Arial-BoldItalicMT",
-+		"Arial,BoldItalic", "Arial-BoldItalic",
-+		"Helvetica,BoldItalic", "Helvetica-BoldItalic", NULL },
-+	{ "Times-Roman", "TimesNewRomanPSMT", "TimesNewRoman",
-+		"TimesNewRomanPS", NULL },
-+	{ "Times-Bold", "TimesNewRomanPS-BoldMT", "TimesNewRoman,Bold",
-+		"TimesNewRomanPS-Bold", "TimesNewRoman-Bold", NULL },
-+	{ "Times-Italic", "TimesNewRomanPS-ItalicMT", "TimesNewRoman,Italic",
-+		"TimesNewRomanPS-Italic", "TimesNewRoman-Italic", NULL },
-+	{ "Times-BoldItalic", "TimesNewRomanPS-BoldItalicMT",
-+		"TimesNewRoman,BoldItalic", "TimesNewRomanPS-BoldItalic",
-+		"TimesNewRoman-BoldItalic", NULL },
-+	{ "Symbol", "Symbol,Italic", "Symbol,Bold", "Symbol,BoldItalic",
-+		"SymbolMT", "SymbolMT,Italic", "SymbolMT,Bold", "SymbolMT,BoldItalic", NULL },
-+	{ "ZapfDingbats", NULL }
-+};
-+
-+static pdf_fontlistMS fontlistMS =
-+{
-+	NULL,
-+	0,
-+	0,
-+};
-+static int strcmp_ignore_space(const char *a, const char *b);
-+static const char *clean_font_name(const char *fontname);
-+static const char *pdf_clean_base14_name(const char *fontname);
-+
-+static inline USHORT BEtoHs(USHORT x)
-+{
-+	BYTE *data = (BYTE *)&x;
-+	return (data[0] << 8) | data[1];
-+}
-+
-+static inline ULONG BEtoHl(ULONG x)
-+{
-+	BYTE *data = (BYTE *)&x;
-+	return (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3];
-+}
-+
-+static int strcmp_ignore_space(const char *a, const char *b)
-+{
-+	while (1)
-+	{
-+		while (*a == ' ')
-+			a++;
-+		while (*b == ' ')
-+			b++;
-+		if (*a != *b)
-+			return 1;
-+		if (*a == 0)
-+			return *a != *b;
-+		if (*b == 0)
-+			return *a != *b;
-+		a++;
-+		b++;
-+	}
-+}
-+
-+/* A little bit more sophisticated name matching so that e.g. "EurostileExtended"
-+   matches "EurostileExtended-Roman" or "Tahoma-Bold,Bold" matches "Tahoma-Bold" */
-+static int
-+lookup_compare(const void *elem1, const void *elem2)
-+{
-+	const char *val1 = elem1;
-+	const char *val2 = elem2;
-+	int len1 = strlen(val1);
-+	int len2 = strlen(val2);
-+
-+	if (len1 != len2)
-+	{
-+		const char *rest = len1 > len2 ? val1 + len2 : val2 + len1;
-+		if (',' == *rest || !_stricmp(rest, "-roman"))
-+			return _strnicmp(val1, val2, fz_mini(len1, len2));
-+	}
-+
-+	return _stricmp(val1, val2);
-+}
-+
-+static void
-+remove_spaces(char *srcDest)
-+{
-+	char *dest;
-+
-+	for (dest = srcDest; *srcDest; srcDest++)
-+		if (*srcDest != ' ')
-+			*dest++ = *srcDest;
-+	*dest = '\0';
-+}
-+
-+static int
-+str_ends_with(const char *str, const char *end)
-+{
-+	size_t len1 = strlen(str);
-+	size_t len2 = strlen(end);
-+
-+	return len1 >= len2 && !strcmp(str + len1 - len2, end);
-+}
-+
-+static pdf_fontmapMS *
-+pdf_find_windows_font_path(const char *fontname)
-+{
-+	return bsearch(fontname, fontlistMS.fontmap, fontlistMS.len, sizeof(pdf_fontmapMS), lookup_compare);
-+}
-+
-+/* source and dest can be same */
-+static void
-+decode_unicode_BE(fz_context *ctx, char *source, int sourcelen, char *dest, int destlen)
-+{
-+	WCHAR *tmp;
-+	int converted, i;
-+
-+	if (sourcelen % 2 != 0)
-+		fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : invalid unicode string");
-+
-+	tmp = fz_malloc_array(ctx, sourcelen / 2 + 1, sizeof(WCHAR));
-+	for (i = 0; i < sourcelen / 2; i++)
-+		tmp[i] = BEtoHs(((WCHAR *)source)[i]);
-+	tmp[sourcelen / 2] = '\0';
-+
-+	converted = WideCharToMultiByte(CP_UTF8, 0, tmp, -1, dest, destlen, NULL, NULL);
-+	fz_free(ctx, tmp);
-+	if (!converted)
-+		fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : invalid unicode string");
-+}
-+
-+static void
-+decode_platform_string(fz_context *ctx, int platform, int enctype, char *source, int sourcelen, char *dest, int destlen)
-+{
-+	switch (platform)
-+	{
-+	case TT_PLATFORM_APPLE_UNICODE:
-+		switch (enctype)
-+		{
-+		case TT_APPLE_ID_DEFAULT:
-+		case TT_APPLE_ID_UNICODE_2_0:
-+			decode_unicode_BE(ctx, source, sourcelen, dest, destlen);
-+			return;
-+		}
-+		fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : unsupported encoding (%d/%d)", platform, enctype);
-+	case TT_PLATFORM_MACINTOSH:
-+		switch (enctype)
-+		{
-+		case TT_MAC_ID_ROMAN:
-+			if (sourcelen + 1 > destlen)
-+				fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : overlong fontname: %s", source);
-+			// TODO: Convert to UTF-8 from what encoding?
-+			memcpy(dest, source, sourcelen);
-+			dest[sourcelen] = 0;
-+			return;
-+		}
-+		fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : unsupported encoding (%d/%d)", platform, enctype);
-+	case TT_PLATFORM_MICROSOFT:
-+		switch (enctype)
-+		{
-+		case TT_MS_ID_SYMBOL_CS:
-+		case TT_MS_ID_UNICODE_CS:
-+		case TT_MS_ID_UCS_4:
-+			decode_unicode_BE(ctx, source, sourcelen, dest, destlen);
-+			return;
-+		}
-+		fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : unsupported encoding (%d/%d)", platform, enctype);
-+	default:
-+		fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : unsupported encoding (%d/%d)", platform, enctype);
-+	}
-+}
-+
-+static void
-+grow_system_font_list(fz_context *ctx, pdf_fontlistMS *fl)
-+{
-+	int newcap;
-+	pdf_fontmapMS *newitems;
-+
-+	if (fl->cap == 0)
-+		newcap = 1024;
-+	else
-+		newcap = fl->cap * 2;
-+
-+	// use realloc/free for the fontmap, since the list can
-+	// remain in memory even with all fz_contexts destroyed
-+	newitems = realloc(fl->fontmap, newcap * sizeof(pdf_fontmapMS));
-+	if (!newitems)
-+		fz_throw(ctx, FZ_ERROR_GENERIC, "OOM in grow_system_font_list");
-+	memset(newitems + fl->cap, 0, sizeof(pdf_fontmapMS) * (newcap - fl->cap));
-+
-+	fl->fontmap = newitems;
-+	fl->cap = newcap;
-+}
-+
-+static void
-+append_mapping(fz_context *ctx, pdf_fontlistMS *fl, const char *facename, const char *path, int index)
-+{
-+	if (fl->len == fl->cap)
-+		grow_system_font_list(ctx, fl);
-+
-+	if (fl->len >= fl->cap)
-+		fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : fontlist overflow");
-+
-+	fz_strlcpy(fl->fontmap[fl->len].fontface, facename, sizeof(fl->fontmap[0].fontface));
-+	fz_strlcpy(fl->fontmap[fl->len].fontpath, path, sizeof(fl->fontmap[0].fontpath));
-+	fl->fontmap[fl->len].index = index;
-+
-+	++fl->len;
-+}
-+
-+static void
-+safe_read(fz_context *ctx, fz_stream *file, int offset, char *buf, int size)
-+{
-+	int n;
-+	fz_seek(ctx, file, offset, 0);
-+	n = fz_read(ctx, file, (unsigned char *)buf, size);
-+	if (n != size)
-+		fz_throw(ctx, FZ_ERROR_GENERIC, "safe_read: read %d, expected %d", n, size);
-+}
-+
-+static void
-+read_ttf_string(fz_context *ctx, fz_stream *file, int offset, TT_NAME_RECORD *ttRecordBE, char *buf, int size)
-+{
-+	char szTemp[MAX_FACENAME * 2];
-+	// ignore empty and overlong strings
-+	int stringLength = BEtoHs(ttRecordBE->uStringLength);
-+	if (stringLength == 0 || stringLength >= sizeof(szTemp))
-+		return;
-+
-+	safe_read(ctx, file, offset + BEtoHs(ttRecordBE->uStringOffset), szTemp, stringLength);
-+	decode_platform_string(ctx, BEtoHs(ttRecordBE->uPlatformID),
-+		BEtoHs(ttRecordBE->uEncodingID), szTemp, stringLength, buf, size);
-+}
-+
-+static void
-+makeFakePSName(char szName[MAX_FACENAME], const char *szStyle)
-+{
-+	// append the font's subfamily, unless it's a Regular font
-+	if (*szStyle && _stricmp(szStyle, "Regular") != 0)
-+	{
-+		fz_strlcat(szName, "-", MAX_FACENAME);
-+		fz_strlcat(szName, szStyle, MAX_FACENAME);
-+	}
-+	remove_spaces(szName);
-+}
-+
-+static void
-+parseTTF(fz_context *ctx, fz_stream *file, int offset, int index, const char *path)
-+{
-+	TT_OFFSET_TABLE ttOffsetTableBE;
-+	TT_TABLE_DIRECTORY tblDirBE;
-+	TT_NAME_TABLE_HEADER ttNTHeaderBE;
-+	TT_NAME_RECORD ttRecordBE;
-+
-+	char szPSName[MAX_FACENAME] = { 0 };
-+	char szTTName[MAX_FACENAME] = { 0 };
-+	char szStyle[MAX_FACENAME] = { 0 };
-+	char szCJKName[MAX_FACENAME] = { 0 };
-+	int i, count, tblOffset;
-+
-+	safe_read(ctx, file, offset, (char *)&ttOffsetTableBE, sizeof(TT_OFFSET_TABLE));
-+
-+	// check if this is a TrueType font of version 1.0 or an OpenType font
-+	if (BEtoHl(ttOffsetTableBE.uVersion) != TTC_VERSION1 &&
-+		BEtoHl(ttOffsetTableBE.uVersion) != TTAG_OTTO)
-+	{
-+		fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : invalid font version %x", (unsigned int)BEtoHl(ttOffsetTableBE.uVersion));
-+	}
-+
-+	// determine the name table's offset by iterating through the offset table
-+	count = BEtoHs(ttOffsetTableBE.uNumOfTables);
-+	for (i = 0; i < count; i++)
-+	{
-+		int entryOffset = offset + sizeof(TT_OFFSET_TABLE) + i * sizeof(TT_TABLE_DIRECTORY);
-+		safe_read(ctx, file, entryOffset, (char *)&tblDirBE, sizeof(TT_TABLE_DIRECTORY));
-+		if (!BEtoHl(tblDirBE.uTag) || BEtoHl(tblDirBE.uTag) == TTAG_name)
-+			break;
-+	}
-+	if (count == i || !BEtoHl(tblDirBE.uTag))
-+		fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : nameless font");
-+	tblOffset = BEtoHl(tblDirBE.uOffset);
-+
-+	// read the 'name' table for record count and offsets
-+	safe_read(ctx, file, tblOffset, (char *)&ttNTHeaderBE, sizeof(TT_NAME_TABLE_HEADER));
-+	offset = tblOffset + sizeof(TT_NAME_TABLE_HEADER);
-+	tblOffset += BEtoHs(ttNTHeaderBE.uStorageOffset);
-+
-+	// read through the strings for PostScript name and font family
-+	count = BEtoHs(ttNTHeaderBE.uNRCount);
-+	for (i = 0; i < count; i++)
-+	{
-+		short langId, nameId;
-+		BOOL isCJKName;
-+
-+		safe_read(ctx, file, offset + i * sizeof(TT_NAME_RECORD), (char *)&ttRecordBE, sizeof(TT_NAME_RECORD));
-+
-+		langId = BEtoHs(ttRecordBE.uLanguageID);
-+		nameId = BEtoHs(ttRecordBE.uNameID);
-+		isCJKName = TT_NAME_ID_FONT_FAMILY == nameId && LANG_CHINESE == PRIMARYLANGID(langId);
-+
-+		// ignore non-English strings (except for Chinese font names)
-+		if (langId && langId != TT_MS_LANGID_ENGLISH_UNITED_STATES && !isCJKName)
-+			continue;
-+		// ignore names other than font (sub)family and PostScript name
-+		fz_try(ctx)
-+		{
-+			if (isCJKName)
-+				read_ttf_string(ctx, file, tblOffset, &ttRecordBE, szCJKName, sizeof(szCJKName));
-+			else if (TT_NAME_ID_FONT_FAMILY == nameId)
-+				read_ttf_string(ctx, file, tblOffset, &ttRecordBE, szTTName, sizeof(szTTName));
-+			else if (TT_NAME_ID_FONT_SUBFAMILY == nameId)
-+				read_ttf_string(ctx, file, tblOffset, &ttRecordBE, szStyle, sizeof(szStyle));
-+			else if (TT_NAME_ID_PS_NAME == nameId)
-+				read_ttf_string(ctx, file, tblOffset, &ttRecordBE, szPSName, sizeof(szPSName));
-+		}
-+		fz_catch(ctx)
-+		{
-+			fz_warn(ctx, "ignoring face name decoding fonterror");
-+		}
-+	}
-+
-+	// try to prevent non-Arial fonts from accidentally substituting Arial
-+	if (!strcmp(szPSName, "ArialMT"))
-+	{
-+		// cf. https://code.google.com/p/sumatrapdf/issues/detail?id=2471
-+		if (strcmp(szTTName, "Arial") != 0)
-+			szPSName[0] = '\0';
-+		// TODO: is there a better way to distinguish Arial Caps from Arial proper?
-+		// cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1290
-+		else if (strstr(path, "caps") || strstr(path, "Caps"))
-+			fz_throw(ctx, FZ_ERROR_GENERIC, "ignore %s, as it can't be distinguished from Arial,Regular", path);
-+	}
-+
-+	if (szPSName[0])
-+		append_mapping(ctx, &fontlistMS, szPSName, path, index);
-+	if (szTTName[0])
-+	{
-+		// derive a PostScript-like name and add it, if it's different from the font's
-+		// included PostScript name; cf. http://code.google.com/p/sumatrapdf/issues/detail?id=376
-+		makeFakePSName(szTTName, szStyle);
-+		// compare the two names before adding this one
-+		if (lookup_compare(szTTName, szPSName))
-+			append_mapping(ctx, &fontlistMS, szTTName, path, index);
-+	}
-+	if (szCJKName[0])
-+	{
-+		makeFakePSName(szCJKName, szStyle);
-+		if (lookup_compare(szCJKName, szPSName) && lookup_compare(szCJKName, szTTName))
-+			append_mapping(ctx, &fontlistMS, szCJKName, path, index);
-+	}
-+}
-+
-+static void
-+parseTTFs(fz_context *ctx, const char *path)
-+{
-+	fz_stream *file = fz_open_file(ctx, path);
-+	/* "fonterror : %s not found", path */
-+	fz_try(ctx)
-+	{
-+		parseTTF(ctx, file, 0, 0, path);
-+	}
-+	fz_always(ctx)
-+	{
-+		fz_drop_stream(ctx,file);
-+	}
-+	fz_catch(ctx)
-+	{
-+		fz_rethrow(ctx);
-+	}
-+}
-+
-+static void
-+parseTTCs(fz_context *ctx, const char *path)
-+{
-+	FONT_COLLECTION fontcollectionBE;
-+	ULONG i, numFonts, *offsettableBE = NULL;
-+
-+	fz_stream *file = fz_open_file(ctx, path);
-+	/* "fonterror : %s not found", path */
-+
-+	fz_var(offsettableBE);
-+
-+	fz_try(ctx)
-+	{
-+		safe_read(ctx, file, 0, (char *)&fontcollectionBE, sizeof(FONT_COLLECTION));
-+		if (BEtoHl(fontcollectionBE.Tag) != TTAG_ttcf)
-+			fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : wrong format %x", (unsigned int)BEtoHl(fontcollectionBE.Tag));
-+		if (BEtoHl(fontcollectionBE.Version) != TTC_VERSION1 &&
-+			BEtoHl(fontcollectionBE.Version) != TTC_VERSION2)
-+		{
-+			fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : invalid version %x", (unsigned int)BEtoHl(fontcollectionBE.Version));
-+		}
-+
-+		numFonts = BEtoHl(fontcollectionBE.NumFonts);
-+		offsettableBE = fz_malloc_array(ctx, numFonts, sizeof(ULONG));
-+
-+		safe_read(ctx, file, sizeof(FONT_COLLECTION), (char *)offsettableBE, numFonts * sizeof(ULONG));
-+		for (i = 0; i < numFonts; i++)
-+			parseTTF(ctx, file, BEtoHl(offsettableBE[i]), i, path);
-+	}
-+	fz_always(ctx)
-+	{
-+		fz_free(ctx, offsettableBE);
-+		fz_drop_stream(ctx,file);
-+	}
-+	fz_catch(ctx)
-+	{
-+		fz_rethrow(ctx);
-+	}
-+}
-+
-+static void
-+extend_system_font_list(fz_context *ctx, const WCHAR *path)
-+{
-+	WCHAR szPath[MAX_PATH], *lpFileName;
-+	WIN32_FIND_DATA FileData;
-+	HANDLE hList;
-+
-+	GetFullPathName(path, nelem(szPath), szPath, &lpFileName);
-+
-+	hList = FindFirstFile(szPath, &FileData);
-+	if (hList == INVALID_HANDLE_VALUE)
-+	{
-+		// Don't complain about missing directories
-+		if (GetLastError() == ERROR_FILE_NOT_FOUND)
-+			return;
-+		fz_throw(ctx, FZ_ERROR_GENERIC, "extend_system_font_list: unknown error %d", (int)GetLastError());
-+	}
-+	do
-+	{
-+		if (!(FileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY))
-+		{
-+			char szPathUtf8[MAX_PATH], *fileExt;
-+			int res;
-+			lstrcpyn(lpFileName, FileData.cFileName, szPath + MAX_PATH - lpFileName);
-+			res = WideCharToMultiByte(CP_UTF8, 0, szPath, -1, szPathUtf8, sizeof(szPathUtf8), NULL, NULL);
-+			if (!res)
-+			{
-+				fz_warn(ctx, "WideCharToMultiByte failed for %S", szPath);
-+				continue;
-+			}
-+			fileExt = szPathUtf8 + strlen(szPathUtf8) - 4;
-+			fz_try(ctx)
-+			{
-+				if (!_stricmp(fileExt, ".ttc"))
-+					parseTTCs(ctx, szPathUtf8);
-+				else if (!_stricmp(fileExt, ".ttf") || !_stricmp(fileExt, ".otf"))
-+					parseTTFs(ctx, szPathUtf8);
-+			}
-+			fz_catch(ctx)
-+			{
-+				// ignore errors occurring while parsing a given font file
-+			}
-+		}
-+	} while (FindNextFile(hList, &FileData));
-+	FindClose(hList);
-+}
-+
-+static void
-+destroy_system_font_list(void)
-+{
-+	free(fontlistMS.fontmap);
-+	memset(&fontlistMS, 0, sizeof(fontlistMS));
-+}
-+
-+static void
-+create_system_font_list(fz_context *ctx)
-+{
-+	WCHAR szFontDir[MAX_PATH];
-+	UINT cch;
-+
-+	cch = GetWindowsDirectory(szFontDir, nelem(szFontDir) - 12);
-+	if (0 < cch && cch < nelem(szFontDir) - 12)
-+	{
-+        /* willus.com edit--Win XP default MSVCRT.DLL doesn't have wcscat_s */
-+#ifdef _WIN64
-+		wcscat_s(szFontDir, MAX_PATH, L"\\Fonts\\*.?t?");
-+#else
-+		wcscat(szFontDir,L"\\Fonts\\*.?t?");
-+#endif
-+		extend_system_font_list(ctx, szFontDir);
-+	}
-+
-+	if (fontlistMS.len == 0)
-+		fz_warn(ctx, "couldn't find any usable system fonts");
-+
-+#ifdef NOCJKFONT
-+	{
-+		// If no CJK fallback font is builtin but one has been shipped separately (in the same
-+		// directory as the main executable), add it to the list of loadable system fonts
-+		WCHAR szFile[MAX_PATH], *lpFileName;
-+		GetModuleFileName(0, szFontDir, MAX_PATH);
-+		GetFullPathName(szFontDir, MAX_PATH, szFile, &lpFileName);
-+		lstrcpyn(lpFileName, L"DroidSansFallback.ttf", szFile + MAX_PATH - lpFileName);
-+		extend_system_font_list(ctx, szFile);
-+	}
-+#endif
-+
-+	// sort the font list, so that it can be searched binarily
-+	qsort(fontlistMS.fontmap, fontlistMS.len, sizeof(pdf_fontmapMS), _stricmp);
-+
-+#ifdef DEBUG
-+	// allow to overwrite system fonts for debugging purposes
-+	// (either pass a full path or a search pattern such as "fonts\*.ttf")
-+	cch = GetEnvironmentVariable(L"MUPDF_FONTS_PATTERN", szFontDir, nelem(szFontDir));
-+	if (0 < cch && cch < nelem(szFontDir))
-+	{
-+		int i, prev_len = fontlistMS.len;
-+		extend_system_font_list(ctx, szFontDir);
-+		for (i = prev_len; i < fontlistMS.len; i++)
-+		{
-+			pdf_fontmapMS *entry = bsearch(fontlistMS.fontmap[i].fontface, fontlistMS.fontmap, prev_len, sizeof(pdf_fontmapMS), lookup_compare);
-+			if (entry)
-+				*entry = fontlistMS.fontmap[i];
-+		}
-+		qsort(fontlistMS.fontmap, fontlistMS.len, sizeof(pdf_fontmapMS), _stricmp);
-+	}
-+#endif
-+
-+	// make sure to clean up after ourselves
-+	atexit(destroy_system_font_list);
-+}
-+
-+static fz_font *
-+pdf_load_windows_font_by_name(fz_context *ctx, const char *orig_name)
-+{
-+	pdf_fontmapMS *found = NULL;
-+	char *comma, *fontname;
-+	fz_font *font;
-+
-+    /* WILLUS MOD--not multi-threaded for k2pdfopt */
-+	/* fz_synchronize_begin(); */
-+	if (fontlistMS.len == 0)
-+	{
-+		fz_try(ctx)
-+		{
-+			create_system_font_list(ctx);
-+		}
-+		fz_catch(ctx) { }
-+	}
-+    /* WILLUS MOD--not multi-threaded for k2pdfopt */
-+	/* fz_synchronize_end(); */
-+	if (fontlistMS.len == 0)
-+		fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror: couldn't find any fonts");
-+
-+	// work on a normalized copy of the font name
-+	fontname = fz_strdup(ctx, orig_name);
-+	remove_spaces(fontname);
-+
-+	// first, try to find the exact font name (including appended style information)
-+	comma = strchr(fontname, ',');
-+	if (comma)
-+	{
-+		*comma = '-';
-+		found = pdf_find_windows_font_path(fontname);
-+		*comma = ',';
-+	}
-+	// second, substitute the font name with a known PostScript name
-+	else
-+	{
-+		int i;
-+		for (i = 0; i < nelem(baseSubstitutes) && !found; i++)
-+			if (!strcmp(fontname, baseSubstitutes[i].name))
-+				found = pdf_find_windows_font_path(baseSubstitutes[i].pattern);
-+	}
-+	// third, search for the font name without additional style information
-+	if (!found)
-+		found = pdf_find_windows_font_path(fontname);
-+	// fourth, try to separate style from basename for prestyled fonts (e.g. "ArialBold")
-+	if (!found && !comma && (str_ends_with(fontname, "Bold") || str_ends_with(fontname, "Italic")))
-+	{
-+		int styleLen = str_ends_with(fontname, "Bold") ? 4 : str_ends_with(fontname, "BoldItalic") ? 10 : 6;
-+		fontname = fz_resize_array(ctx, fontname, strlen(fontname) + 2, sizeof(char));
-+		comma = fontname + strlen(fontname) - styleLen;
-+		memmove(comma + 1, comma, styleLen + 1);
-+		*comma = '-';
-+		found = pdf_find_windows_font_path(fontname);
-+		*comma = ',';
-+		if (!found)
-+			found = pdf_find_windows_font_path(fontname);
-+	}
-+	// fifth, try to convert the font name from the common Chinese codepage 936
-+	if (!found && fontname[0] < 0)
-+	{
-+		WCHAR cjkNameW[MAX_FACENAME];
-+		char cjkName[MAX_FACENAME];
-+		if (MultiByteToWideChar(936, MB_ERR_INVALID_CHARS, fontname, -1, cjkNameW, nelem(cjkNameW)) &&
-+			WideCharToMultiByte(CP_UTF8, 0, cjkNameW, -1, cjkName, nelem(cjkName), NULL, NULL))
-+		{
-+			comma = strchr(cjkName, ',');
-+			if (comma)
-+			{
-+				*comma = '-';
-+				found = pdf_find_windows_font_path(cjkName);
-+				*comma = ',';
-+			}
-+			if (!found)
-+				found = pdf_find_windows_font_path(cjkName);
-+		}
-+	}
-+
-+	fz_free(ctx, fontname);
-+	if (!found)
-+		fz_throw(ctx, FZ_ERROR_GENERIC, "couldn't find system font '%s'", orig_name);
-+
-+    /*
-+	fz_warn(ctx, "loading non-embedded font '%s' from '%s'", orig_name, found->fontpath);
-+    */
-+
-+	font = fz_new_font_from_file(ctx, orig_name, found->fontpath, found->index,
-+		strcmp(found->fontface, "DroidSansFallback") != 0);
-+    /* willus mod for MuPDF v1.10, 10-21-2016 */
-+    {
-+    fz_font_flags_t *flags;
-+    flags=fz_font_flags(font);
-+    if (flags!=NULL)
-+    	flags->ft_substitute = 1;
-+    }
-+	return font;
-+}
-+
-+static fz_font *
-+pdf_load_windows_font(fz_context *ctx, const char *fontname, int bold, int italic, int needs_exact_metrics)
-+{
-+	if (needs_exact_metrics)
-+	{
-+		const char *clean_name;
-+        /* WILLUS: Declare pdf_clean_base14_name() */
-+        extern const char *pdf_clean_base14_name(const char *fontname);
-+
-+		/* TODO: the metrics for Times-Roman and Courier don't match
-+		   those of Windows' Times New Roman and Courier New; for
-+		   some reason, Poppler doesn't seem to have this problem */
-+		int len;
-+		if (fz_lookup_builtin_font(ctx,fontname, bold, italic, &len))
-+			return NULL;
-+
-+		/* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=2173 */
-+		clean_name = pdf_clean_base14_name(fontname);
-+		if (clean_name != fontname && !strncmp(clean_name, "Times-", 6))
-+			return NULL;
-+	}
-+
-+	// TODO: unset font->ft_substitute for base14/needs_exact_metrics?
-+	return pdf_load_windows_font_by_name(ctx, fontname);
-+}
-+
-+static const char *clean_font_name(const char *fontname)
-+{
-+	int i, k;
-+	for (i = 0; i < nelem(base_font_names); i++)
-+		for (k = 0; base_font_names[i][k]; k++)
-+			if (!strcmp_ignore_space(base_font_names[i][k], fontname))
-+				return base_font_names[i][0];
-+	return fontname;
-+}
-+
-+
-+/* SumatraPDF: expose clean_font_name */
-+static const char * pdf_clean_base14_name(const char *fontname)
-+{
-+	return clean_font_name(fontname);
-+}
-+
-+static fz_font *
-+pdf_load_windows_cjk_font(fz_context *ctx, const char *fontname, int ros, int serif)
-+{
-+	fz_font *font;
-+
-+    font=NULL; /* WILLUS: Avoid compiler warning */
-+	/* try to find a matching system font before falling back to an approximate one */
-+	fz_try(ctx)
-+	{
-+		font = pdf_load_windows_font_by_name(ctx, fontname);
-+	}
-+	fz_catch(ctx)
-+	{
-+		font = NULL;
-+	}
-+	if (font)
-+		return font;
-+
-+	/* try to fall back to a reasonable system font */
-+	fz_try(ctx)
-+	{
-+		if (serif)
-+		{
-+			switch (ros)
-+			{
-+			case FZ_ADOBE_CNS: font = pdf_load_windows_font_by_name(ctx, "MingLiU"); break;
-+			case FZ_ADOBE_GB: font = pdf_load_windows_font_by_name(ctx, "SimSun"); break;
-+			case FZ_ADOBE_JAPAN: font = pdf_load_windows_font_by_name(ctx, "MS-Mincho"); break;
-+			case FZ_ADOBE_KOREA: font = pdf_load_windows_font_by_name(ctx, "Batang"); break;
-+			default: fz_throw(ctx, FZ_ERROR_GENERIC, "invalid serif ros");
-+			}
-+		}
-+		else
-+		{
-+			switch (ros)
-+			{
-+			case FZ_ADOBE_CNS: font = pdf_load_windows_font_by_name(ctx, "DFKaiShu-SB-Estd-BF"); break;
-+			case FZ_ADOBE_GB:
-+				fz_try(ctx)
-+				{
-+					font = pdf_load_windows_font_by_name(ctx, "KaiTi");
-+				}
-+				fz_catch(ctx)
-+				{
-+					font = pdf_load_windows_font_by_name(ctx, "KaiTi_GB2312");
-+				}
-+				break;
-+			case FZ_ADOBE_JAPAN: font = pdf_load_windows_font_by_name(ctx, "MS-Gothic"); break;
-+			case FZ_ADOBE_KOREA: font = pdf_load_windows_font_by_name(ctx, "Gulim"); break;
-+			default: fz_throw(ctx, FZ_ERROR_GENERIC, "invalid sans-serif ros");
-+			}
-+		}
-+	}
-+	fz_catch(ctx)
-+	{
-+#ifdef NOCJKFONT
-+		/* If no CJK fallback font is builtin, maybe one has been shipped separately */
-+		font = pdf_load_windows_font_by_name(ctx, "DroidSansFallback");
-+#else
-+		fz_rethrow(ctx);
-+#endif
-+	}
-+
-+	return font;
-+}
-+
-+#endif
-+
-+void pdf_install_load_system_font_funcs(fz_context *ctx)
-+{
-+#ifdef _WIN32
-+	fz_install_load_system_font_funcs(ctx, pdf_load_windows_font, pdf_load_windows_cjk_font, NULL);
-+#endif
-+}
-diff --git a/source/fitz/font.c b/source/fitz/font.c
-index 00c6e8f99..1448b4a56 100644
---- a/source/fitz/font.c
-+++ b/source/fitz/font.c
-@@ -4,8 +4,11 @@
- #include "draw-imp.h"
- 
- #include <ft2build.h>
-+/* willus mod -- remove hb includes */
-+/*
- #include "hb.h"
- #include "hb-ft.h"
-+*/
- 
- #include <assert.h>
- 
-diff --git a/source/fitz/stext-device.c b/source/fitz/stext-device.c
-index 2df90305e..b1f99e056 100644
---- a/source/fitz/stext-device.c
-+++ b/source/fitz/stext-device.c
-@@ -825,6 +825,11 @@ fz_new_stext_device(fz_context *ctx, fz_stext_page *page, const fz_stext_options
- 	dev->lastchar = ' ';
- 	dev->curdir = 1;
- 	dev->lasttext = NULL;
-+    /* willus mod -- seems like this should be here, but not sure. */
-+    if (opts)
-+        dev->flags = opts->flags;
-+    else
-+        dev->flags = 0;
- 
- 	return (fz_device*)dev;
- }
-diff --git a/source/fitz/string.c b/source/fitz/string.c
-index f8eedb682..7a767983d 100644
---- a/source/fitz/string.c
-+++ b/source/fitz/string.c
-@@ -560,6 +560,10 @@ fz_utflen(const char *s)
- */
- float fz_atof(const char *s)
- {
-+/* willus mod: atof(s), #if-#else-#endif */
-+#if (!defined(__SSE__))
-+    return(atof(s));
-+#else
- 	float result;
- 
- 	if (s == NULL)
-@@ -572,6 +576,7 @@ float fz_atof(const char *s)
- 		return 1;
- 	result = fz_clamp(result, -FLT_MAX, FLT_MAX);
- 	return result;
-+#endif
- }
- 
- /*
-diff --git a/source/pdf/pdf-annot.c b/source/pdf/pdf-annot.c
-index 4dfdf36fe..acff7d12a 100644
---- a/source/pdf/pdf-annot.c
-+++ b/source/pdf/pdf-annot.c
-@@ -5,8 +5,20 @@
- #include <string.h>
- #include <time.h>
- 
-+/* willus mod--don't use _mkgmtime--not available in Win XP */
- #ifdef _WIN32
--#define timegm _mkgmtime
-+static time_t timegm(struct tm *date);
-+static time_t timegm(struct tm *date)
-+
-+    {
-+    time_t t,z;
-+    struct tm gmz;
-+
-+    z=(time_t)0;
-+    gmz=(*gmtime(&z));
-+    t=mktime(date)-mktime(&gmz);
-+    return(t);
-+    }
- #endif
- 
- #define isdigit(c) (c >= '0' && c <= '9')
-diff --git a/source/pdf/pdf-link.c b/source/pdf/pdf-link.c
-index 37444b471..613cc05b9 100644
---- a/source/pdf/pdf-link.c
-+++ b/source/pdf/pdf-link.c
-@@ -345,6 +345,9 @@ pdf_resolve_link(fz_context *ctx, pdf_document *doc, const char *uri, float *xp,
- 		}
- 		return page;
- 	}
-+/* willus mod -- be quiet */
-+/*
- 	fz_warn(ctx, "unknown link uri '%s'", uri);
-+*/
- 	return -1;
- }
-diff --git a/source/pdf/pdf-parse.c b/source/pdf/pdf-parse.c
-index 04a772204..9dd0cd898 100644
---- a/source/pdf/pdf-parse.c
-+++ b/source/pdf/pdf-parse.c
-@@ -663,9 +663,14 @@ pdf_parse_ind_obj(fz_context *ctx, pdf_document *doc,
- 			if (c == '\r')
- 			{
- 				c = fz_peek_byte(ctx, file);
-+/* willus mod -- no warning */
-+/*
- 				if (c != '\n')
- 					fz_warn(ctx, "line feed missing after stream begin marker (%d %d R)", num, gen);
- 				else
-+*/
-+if (c=='\n')
-+/* willus mod -- end */
- 					fz_read_byte(ctx, file);
- 			}
- 			stm_ofs = fz_tell(ctx, file);
-diff --git a/source/pdf/pdf-xref.c b/source/pdf/pdf-xref.c
-index 8f888059b..08de7bfba 100644
---- a/source/pdf/pdf-xref.c
-+++ b/source/pdf/pdf-xref.c
-@@ -710,8 +710,11 @@ pdf_xref_size_from_old_trailer(fz_context *ctx, pdf_document *doc, pdf_lexbuf *b
- 		if (!s)
- 			fz_throw(ctx, FZ_ERROR_GENERIC, "xref subsection length missing");
- 		len = fz_atoi(fz_strsep(&s, " "));
-+/* willus mod -- no warning */
-+/*
- 		if (len < 0)
- 			fz_throw(ctx, FZ_ERROR_GENERIC, "xref subsection length must be positive");
-+*/
- 
- 		/* broken pdfs where the section is not on a separate line */
- 		if (s && *s != '\0')
-@@ -1378,7 +1381,10 @@ pdf_init_document(fz_context *ctx, pdf_document *doc)
- 	{
- 		pdf_drop_xref_sections(ctx, doc);
- 		fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
-+/* willus mod -- be quiet */
-+/*
- 		fz_warn(ctx, "trying to repair broken xref");
-+*/
- 		repaired = 1;
- 	}
- 
-@@ -1506,7 +1512,10 @@ pdf_drop_document_imp(fz_context *ctx, pdf_document *doc)
- 		/* Swallow error, but continue dropping */
- 	}
- 
-+/* willu smod -- no pdf_drop_js */
-+/*
- 	pdf_drop_js(ctx, doc->js);
-+*/
- 
- 	pdf_drop_xref_sections(ctx, doc);
- 	fz_free(ctx, doc->xref_index);
--- 
-2.22.0
-
diff --git a/pkgs/applications/misc/k2pdfopt/tesseract.patch b/pkgs/applications/misc/k2pdfopt/tesseract.patch
deleted file mode 100644
index adfee9ae282..00000000000
--- a/pkgs/applications/misc/k2pdfopt/tesseract.patch
+++ /dev/null
@@ -1,675 +0,0 @@
-From 39aa8502eee7bb669a29d1a9b3bfe5c9595ad960 Mon Sep 17 00:00:00 2001
-From: Daniel Fullmer <danielrf12@gmail.com>
-Date: Fri, 13 Sep 2019 13:45:05 -0400
-Subject: [PATCH] Willus mod changes from k2pdfopt
-
----
- src/api/Makefile.am        |   1 +
- src/api/baseapi.cpp        |  87 +++++++++++
- src/api/baseapi.h          |   3 +
- src/api/tesscapi.cpp       | 311 +++++++++++++++++++++++++++++++++++++
- src/api/tesseract.h        |  29 ++++
- src/ccmain/tessedit.cpp    |   5 +-
- src/ccutil/ccutil.h        |   7 +
- src/ccutil/genericvector.h |  21 ++-
- src/ccutil/mainblk.cpp     |  17 +-
- src/ccutil/params.cpp      |   3 +-
- src/ccutil/serialis.cpp    |   3 +
- src/ccutil/serialis.h      |   2 +
- src/lstm/input.cpp         |   3 +
- 13 files changed, 488 insertions(+), 4 deletions(-)
- create mode 100644 src/api/tesscapi.cpp
- create mode 100644 src/api/tesseract.h
-
-diff --git a/src/api/Makefile.am b/src/api/Makefile.am
-index d9b76eb6..cd2dc30f 100644
---- a/src/api/Makefile.am
-+++ b/src/api/Makefile.am
-@@ -39,6 +39,7 @@ libtesseract_api_la_SOURCES += lstmboxrenderer.cpp
- libtesseract_api_la_SOURCES += pdfrenderer.cpp
- libtesseract_api_la_SOURCES += wordstrboxrenderer.cpp
- libtesseract_api_la_SOURCES += renderer.cpp
-+libtesseract_api_la_SOURCES += tesscapi.cpp
- 
- lib_LTLIBRARIES += libtesseract.la
- libtesseract_la_LDFLAGS = $(LEPTONICA_LIBS) $(OPENCL_LDFLAGS) $(libarchive_LIBS)
-diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp
-index 9245d07c..ea964ee6 100644
---- a/src/api/baseapi.cpp
-+++ b/src/api/baseapi.cpp
-@@ -215,6 +215,14 @@ TessBaseAPI::TessBaseAPI()
-   // Use the current locale if building debug code.
-   std::locale::global(std::locale(""));
- #endif
-+  const char *locale;
-+  locale = std::setlocale(LC_ALL, nullptr);
-+/* willus mod Remove assertions--taken care of in tesscapi.cpp */
-+//  ASSERT_HOST(!strcmp(locale, "C"));
-+  locale = std::setlocale(LC_CTYPE, nullptr);
-+//  ASSERT_HOST(!strcmp(locale, "C"));
-+  locale = std::setlocale(LC_NUMERIC, nullptr);
-+//  ASSERT_HOST(!strcmp(locale, "C"));
- }
- 
- TessBaseAPI::~TessBaseAPI() {
-@@ -1333,6 +1341,85 @@ static void AddBoxToTSV(const PageIterator* it, PageIteratorLevel level,
-   text->add_str_int("\t", bottom - top);
- }
- 
-+/* willus mod */
-+int TessBaseAPI::GetOCRWords(int **x00,int **y00,int **x11,int **y11,int **ybaseline0,
-+                             char **utf8words)
-+
-+    {
-+    int iword,nwords,totlen,it8;
-+    int *x0,*y0,*x1,*y1,*ybaseline;
-+    char *tutf8;
-+
-+    ResultIterator *res_it = GetIterator();
-+    /* Count words */
-+    iword=0;
-+    totlen=0;
-+    while (!res_it->Empty(RIL_BLOCK))
-+        {
-+        if (res_it->Empty(RIL_WORD))
-+            {
-+            res_it->Next(RIL_WORD);
-+            continue;
-+            }
-+        iword++;
-+        STRING textstr=std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_WORD)).get();
-+        totlen+=strlen(textstr.string())+1;
-+        res_it->Next(RIL_WORD);
-+        }
-+    nwords=iword;
-+/*
-+printf("\nnwords=%d, totlen=%d\n",nwords,totlen);
-+*/
-+    x0=(*x00)=(int *)malloc(sizeof(int)*5*nwords);
-+    y0=(*y00)=&x0[nwords];
-+    x1=(*x11)=&y0[nwords];
-+    y1=(*y11)=&x1[nwords];
-+    ybaseline=(*ybaseline0)=&y1[nwords];
-+    tutf8=(*utf8words)=(char *)malloc(totlen);
-+    iword=0;
-+    it8=0;
-+    res_it->Begin();
-+    while (!res_it->Empty(RIL_BLOCK))
-+        {
-+        if (res_it->Empty(RIL_WORD))
-+            {
-+            res_it->Next(RIL_WORD);
-+            continue;
-+            }
-+        STRING textstr=std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_WORD)).get();
-+        strcpy(&tutf8[it8],textstr.string());
-+        it8 += strlen(&tutf8[it8])+1;
-+        /*
-+        STRING textstr("");
-+        textstr += std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_WORD)).get();
-+        */
-+/*
-+printf("Word %d: '%s'\n",iword,textstr.string());
-+*/
-+        int left, top, right, bottom;
-+        int u1,v1,u2,v2;
-+        res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
-+        res_it->Baseline(RIL_WORD, &u1, &v1, &u2, &v2);
-+        x0[iword]=left;
-+        x1[iword]=right;
-+        y0[iword]=top;
-+        y1[iword]=bottom;
-+        ybaseline[iword]=(v1+v2)/2;
-+        iword++;
-+/*
-+printf("BB: (%d,%d)-(%d,%d)  BL: (%d,%d)-(%d,%d)\n",left,bottom,right,top,x1,y1,x2,y2);
-+*/
-+        res_it->Next(RIL_WORD);
-+        }
-+/*
-+printf("iword=%d\n",iword);
-+*/
-+    return(iword);
-+    }
-+
-+/* willus mod */
-+int GetOCRWords(int **x0,int **y0,int **x1,int **y1,int **ybaseline,char **utf8words);
-+
- /**
-  * Make a TSV-formatted string from the internal data structures.
-  * page_number is 0-based but will appear in the output as 1-based.
-diff --git a/src/api/baseapi.h b/src/api/baseapi.h
-index 3724dd92..23be5920 100644
---- a/src/api/baseapi.h
-+++ b/src/api/baseapi.h
-@@ -575,6 +575,9 @@ class TESS_API TessBaseAPI {
-    */
-   char* GetHOCRText(ETEXT_DESC* monitor, int page_number);
- 
-+/* willus mod */
-+int GetOCRWords(int **x0,int **y0,int **x1,int **y1,int **ybaseline,char **utf8words);
-+
-   /**
-    * Make a HTML-formatted string with hOCR markup from the internal
-    * data structures.
-diff --git a/src/api/tesscapi.cpp b/src/api/tesscapi.cpp
-new file mode 100644
-index 00000000..1752fafe
---- /dev/null
-+++ b/src/api/tesscapi.cpp
-@@ -0,0 +1,311 @@
-+/*
-+** tesscapi.cpp    willus.com attempt at C wrapper for tesseract.
-+**                 (Butchered from tesseractmain.cpp)
-+**                 Last udpated 9-1-12
-+**
-+** Copyright (C) 2012  http://willus.com
-+**
-+** This program is free software: you can redistribute it and/or modify
-+** it under the terms of the GNU Affero General Public License as
-+** published by the Free Software Foundation, either version 3 of the
-+** License, or (at your option) any later version.
-+**
-+** This program is distributed in the hope that it will be useful,
-+** but WITHOUT ANY WARRANTY; without even the implied warranty of
-+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+** GNU Affero General Public License for more details.
-+**
-+** You should have received a copy of the GNU Affero General Public License
-+** along with this program.  If not, see <http://www.gnu.org/licenses/>.
-+**
-+*/
-+
-+/*
-+#include "mfcpch.h"
-+*/
-+// #define USE_VLD //Uncomment for Visual Leak Detector.
-+#if (defined _MSC_VER && defined USE_VLD)
-+#include <vld.h>
-+#endif
-+
-+// Include automatically generated configuration file if running autoconf
-+#ifdef HAVE_CONFIG_H
-+#include "config_auto.h"
-+#endif
-+#include <locale.h>
-+#ifdef USING_GETTEXT
-+#include <libintl.h>
-+#define _(x) gettext(x)
-+#else
-+#define _(x) (x)
-+#endif
-+
-+#include "allheaders.h"
-+#include "baseapi.h"
-+#include "strngs.h"
-+#include "params.h"
-+#include "blobs.h"
-+#include "simddetect.h"
-+#include "tesseractclass.h"
-+/*
-+#include "notdll.h"
-+*/
-+
-+/* C Wrappers */
-+#include "tesseract.h"
-+
-+// static tesseract::TessBaseAPI api[4];
-+
-+/*
-+** ocr_type=0:  OEM_DEFAULT
-+** ocr_type=1:  OEM_TESSERACT_ONLY
-+** ocr_type=2:  OEM_LSTM_ONLY
-+** ocr_type=3:  OEM_TESSERACT_LSTM_COMBINED
-+*/
-+void *tess_capi_init(char *datapath,char *language,int ocr_type,FILE *out,
-+                     char *initstr,int maxlen,int *status)
-+
-+    {
-+    char original_locale[256];
-+    tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI;
-+/*
-+printf("@tess_capi_init\n");
-+printf("    datapath='%s'\n",datapath);
-+printf("    language='%s'\n",language);
-+printf("    ocr_type=%d\n",ocr_type);
-+*/
-+#ifdef USE_NLS
-+    setlocale (LC_ALL, "");
-+    bindtextdomain (PACKAGE, LOCALEDIR);
-+    textdomain (PACKAGE);
-+#endif
-+    /* willus mod, 11-24-16 */
-+    /* Tesseract needs "C" locale to correctly parse all data .traineddata files. */
-+/*
-+printf("locale='%s'\n",setlocale(LC_ALL,NULL));
-+printf("ctype='%s'\n",setlocale(LC_CTYPE,NULL));
-+printf("numeric='%s'\n",setlocale(LC_NUMERIC,NULL));
-+*/
-+    strncpy(original_locale,setlocale(LC_ALL,NULL),255);
-+    original_locale[255]='\0';
-+/*
-+printf("original_locale='%s'\n",original_locale);
-+*/
-+    setlocale(LC_ALL,"C");
-+/*
-+printf("new locale='%s'\n",setlocale(LC_ALL,NULL));
-+printf("new ctype='%s'\n",setlocale(LC_CTYPE,NULL));
-+printf("new numeric='%s'\n",setlocale(LC_NUMERIC,NULL));
-+*/
-+    // fprintf(stderr, "tesseract %s\n", tesseract::TessBaseAPI::Version());
-+    // Make the order of args a bit more forgiving than it used to be.
-+    const char* lang = "eng";
-+    tesseract::PageSegMode pagesegmode = tesseract::PSM_SINGLE_BLOCK;
-+    if (language!=NULL && language[0]!='\0')
-+        lang = language;
-+    /*
-+    if (output == NULL)
-+        {
-+        fprintf(stderr, _("Usage:%s imagename outputbase [-l lang] "
-+                      "[-psm pagesegmode] [configfile...]\n"), argv[0]);
-+        fprintf(stderr,
-+            _("pagesegmode values are:\n"
-+              "0 = Orientation and script detection (OSD) only.\n"
-+              "1 = Automatic page segmentation with OSD.\n"
-+              "2 = Automatic page segmentation, but no OSD, or OCR\n"
-+              "3 = Fully automatic page segmentation, but no OSD. (Default)\n"
-+              "4 = Assume a single column of text of variable sizes.\n"
-+              "5 = Assume a single uniform block of vertically aligned text.\n"
-+              "6 = Assume a single uniform block of text.\n"
-+              "7 = Treat the image as a single text line.\n"
-+              "8 = Treat the image as a single word.\n"
-+              "9 = Treat the image as a single word in a circle.\n"
-+              "10 = Treat the image as a single character.\n"));
-+        fprintf(stderr, _("-l lang and/or -psm pagesegmode must occur before any"
-+                      "configfile.\n"));
-+        exit(1);
-+        }
-+    */
-+/*
-+printf("SSE = %s\n",SIMDDetect::IsSSEAvailable() ? "AVAILABLE" : "NOT AVAILABLE");
-+printf("AVX = %s\n",SIMDDetect::IsAVXAvailable() ? "AVAILABLE" : "NOT AVAILABLE");
-+*/
-+/*
-+v4.00 loads either TESSERACT enginer, LSTM engine, or both.  No CUBE.
-+*/
-+    ocr_type=0; /* Ignore specified and use default */
-+    api->SetOutputName(NULL);
-+    (*status)=api->Init(datapath,lang,
-+              ocr_type==0 ? tesseract::OEM_DEFAULT :
-+                (ocr_type==1 ? tesseract::OEM_TESSERACT_ONLY :
-+                   (ocr_type==2 ? tesseract::OEM_LSTM_ONLY :
-+                                  (tesseract::OEM_TESSERACT_LSTM_COMBINED))));
-+    if ((*status)!=0)
-+        {
-+        /* willus mod, 11-24-16 */
-+        setlocale(LC_ALL,original_locale);
-+        api->End();
-+        delete api;
-+        return(NULL);
-+        }
-+    /*
-+    api.Init("tesscapi",lang,tesseract::OEM_DEFAULT,
-+           &(argv[arg]), argc - arg, NULL, NULL, false);
-+    */
-+    // We have 2 possible sources of pagesegmode: a config file and
-+    // the command line. For backwards compatability reasons, the
-+    // default in tesseract is tesseract::PSM_SINGLE_BLOCK, but the
-+    // default for this program is tesseract::PSM_AUTO. We will let
-+    // the config file take priority, so the command-line default
-+    // can take priority over the tesseract default, so we use the
-+    // value from the command line only if the retrieved mode
-+    // is still tesseract::PSM_SINGLE_BLOCK, indicating no change
-+    // in any config file. Therefore the only way to force
-+    // tesseract::PSM_SINGLE_BLOCK is from the command line.
-+    // It would be simpler if we could set the value before Init,
-+    // but that doesn't work.
-+    if (api->GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK)
-+        api->SetPageSegMode(pagesegmode);
-+
-+    /*
-+    ** Initialization message
-+    */
-+    {
-+    char istr[1024];
-+    int sse,avx;
-+
-+// printf("tessedit_ocr_engine_mode = %d\n",tessedit_ocr_engine_mode);
-+    sprintf(istr,"%s",api->Version());
-+    sse=tesseract::SIMDDetect::IsSSEAvailable();
-+    avx=tesseract::SIMDDetect::IsAVXAvailable();
-+    if (sse || avx)
-+        sprintf(&istr[strlen(istr)]," [%s]",sse&&avx?"SSE+AVX":(sse?"SSE":"AVX"));
-+    sprintf(&istr[strlen(istr)],"\n    Tesseract data folder = '%s'",datapath==NULL?getenv("TESSDATA_PREFIX"):datapath);
-+    strcat(istr,"\n    Tesseract languages: ");
-+    GenericVector<STRING> languages;
-+    api->GetLoadedLanguagesAsVector(&languages);
-+/*
-+printf("OEM=%d\n",api->oem());
-+printf("Langs='%s'\n",api->GetInitLanguagesAsString());
-+printf("AnyTessLang()=%d\n",(int)api->tesseract()->AnyTessLang());
-+printf("AnyLSTMLang()=%d\n",(int)api->tesseract()->AnyLSTMLang());
-+printf("num_sub_langs()=%d\n",api->tesseract()->num_sub_langs());
-+printf("languages.size()=%d\n",(int)languages.size());
-+*/
-+
-+    for (int i=0;i<=api->tesseract()->num_sub_langs();i++)
-+        {
-+        tesseract::Tesseract *lang1;
-+        int eng;
-+        lang1 = i==0 ? api->tesseract() : api->tesseract()->get_sub_lang(i-1);
-+        eng=(int)lang1->tessedit_ocr_engine_mode;
-+        sprintf(&istr[strlen(istr)],"%s%s [%s]",i==0?"":", ",lang1->lang.string(),
-+                 eng==2?"LSTM+Tess":(eng==1?"LSTM":"Tess"));
-+        }
-+/*
-+printf("%d. '%s'\n",i+1,languages[i].string());
-+printf("    sublang[%d].oem_engine = %d\n",i+1,(int)api->tesseract()->get_sub_lang(i)->tessedit_ocr_engine_mode);
-+*/
-+
-+    /*
-+    if (ocr_type==0 || ocr_type==3)
-+        sprintf(&istr[strlen(istr)],"[LSTM+] (lang=");
-+    else if (ocr_type==2)
-+        sprintf(&istr[strlen(istr)],"[LSTM] (lang=");
-+    strncpy(&istr[strlen(istr)],language,253-strlen(istr));
-+    istr[253]='\0';
-+    strcat(istr,")");
-+    */
-+    if (out!=NULL)
-+        fprintf(out,"%s\n",istr);
-+    if (initstr!=NULL)
-+        {
-+        strncpy(initstr,istr,maxlen-1);
-+        initstr[maxlen-1]='\0';
-+        }
-+    }
-+
-+
-+    /* Turn off LSTM debugging output */
-+    api->SetVariable("lstm_debug_level","0");
-+#if (WILLUSDEBUG & 1)
-+    api->SetVariable("lstm_debug_level","9");
-+    api->SetVariable("paragraph_debug_level","9");
-+    api->SetVariable("tessdata_manager_debug_level","9");
-+    api->SetVariable("tosp_debug_level","9");
-+    api->SetVariable("wordrec_debug_level","9");
-+    api->SetVariable("segsearch_debug_level","9");
-+#endif
-+    /* willus mod, 11-24-16 */
-+    setlocale(LC_ALL,original_locale);
-+    return((void *)api);
-+    }
-+
-+
-+int tess_capi_get_ocr(void *vapi,PIX *pix,char *outstr,int maxlen,int segmode,FILE *out)
-+
-+    {
-+    tesseract::TessBaseAPI *api;
-+    static int old_segmode=-1;
-+
-+    api=(tesseract::TessBaseAPI *)vapi;
-+    if (old_segmode != segmode)
-+        {
-+        old_segmode=segmode;
-+        api->SetPageSegMode((tesseract::PageSegMode)segmode);
-+        }
-+    if (!api->ProcessPage(pix,0,NULL,NULL,0,NULL))
-+        {
-+        /* pixDestroy(&pix); */
-+        if (out!=NULL)
-+            fprintf(out,"tesscapi:  Error during bitmap processing.\n");
-+        api->Clear();
-+        return(-1);
-+        }
-+    strncpy(outstr,api->GetUTF8Text(),maxlen-1);
-+    outstr[maxlen-1]='\0';
-+    api->Clear();
-+    return(0);
-+    }
-+
-+
-+int tess_capi_get_ocr_multiword(void *vapi,PIX *pix,int segmode,
-+                                int **left,int **top,int **right,int **bottom,
-+                                int **ybase,char **text,int *nw,
-+                                FILE *out)
-+
-+    {
-+    tesseract::TessBaseAPI *api;
-+    static int old_segmode=-1;
-+
-+    api=(tesseract::TessBaseAPI *)vapi;
-+    if (old_segmode != segmode)
-+        {
-+        old_segmode=segmode;
-+        api->SetPageSegMode((tesseract::PageSegMode)segmode);
-+        }
-+    if (!api->ProcessPage(pix,0,NULL,NULL,0,NULL))
-+        {
-+        if (out!=NULL)
-+            fprintf(out,"tesscapi:  Error during bitmap processing.\n");
-+        api->Clear();
-+        (*nw)=0;
-+        return(-1);
-+        }
-+    (*nw)=api->GetOCRWords(left,top,right,bottom,ybase,text);
-+    api->Clear();
-+    return(0);
-+    }
-+
-+
-+void tess_capi_end(void *vapi)
-+
-+    {
-+    tesseract::TessBaseAPI *api;
-+
-+    if (vapi==NULL)
-+        return;
-+    api=(tesseract::TessBaseAPI *)vapi;
-+    api->End();
-+    delete api;
-+    }
-diff --git a/src/api/tesseract.h b/src/api/tesseract.h
-new file mode 100644
-index 00000000..575948cc
---- /dev/null
-+++ b/src/api/tesseract.h
-@@ -0,0 +1,29 @@
-+/*
-+** Willus.com's Tesseract C Wrappers
-+**
-+** 6-8-12
-+**
-+*/
-+
-+#ifndef           _TESSERACT_H_
-+#define           _TESSERACT_H_
-+
-+//#include <leptonica.h>
-+#ifdef __cplusplus
-+extern "C" {
-+#endif
-+
-+void *tess_capi_init(char *datapath,char *language,int ocr_type,FILE *out,
-+                    char *initstr,int maxlen,int *status);
-+int tess_capi_get_ocr(void *api,PIX *pix,char *outstr,int maxlen,int segmode,FILE *out);
-+int tess_capi_get_ocr_multiword(void *vapi,PIX *pix,int segmode,
-+                                int **left,int **top,int **right,int **bottom,
-+                                int **ybase,char **text,int *nw,
-+                                FILE *out);
-+void tess_capi_end(void *api);
-+
-+#ifdef __cplusplus
-+}
-+#endif
-+
-+#endif
-diff --git a/src/ccmain/tessedit.cpp b/src/ccmain/tessedit.cpp
-index 17f0951b..7af94ee2 100644
---- a/src/ccmain/tessedit.cpp
-+++ b/src/ccmain/tessedit.cpp
-@@ -101,6 +101,10 @@ bool Tesseract::init_tesseract_lang_data(
-         " to your \"tessdata\" directory.\n");
-     return false;
-   }
-+  /* willus mod */
-+  TFile fp;
-+  strncpy(fp.tfile_filename,tessdata_path.string(),511);
-+  fp.tfile_filename[511]='\0';
- #ifndef DISABLED_LEGACY_ENGINE
-   if (oem == OEM_DEFAULT) {
-     // Set the engine mode from availability, which can then be overridden by
-@@ -116,7 +120,6 @@ bool Tesseract::init_tesseract_lang_data(
- #endif  // ndef DISABLED_LEGACY_ENGINE
- 
-   // If a language specific config file (lang.config) exists, load it in.
--  TFile fp;
-   if (mgr->GetComponent(TESSDATA_LANG_CONFIG, &fp)) {
-     ParamUtils::ReadParamsFromFp(SET_PARAM_CONSTRAINT_NONE, &fp,
-                                  this->params());
-diff --git a/src/ccutil/ccutil.h b/src/ccutil/ccutil.h
-index 71e89c60..bdeccc14 100644
---- a/src/ccutil/ccutil.h
-+++ b/src/ccutil/ccutil.h
-@@ -80,6 +80,13 @@ class CCUtil {
-   // Member parameters.
-   // These have to be declared and initialized after params_ member, since
-   // params_ should be initialized before parameters are added to it.
-+/* willus mod */
-+/*
-+  #ifdef _WIN32
-+  STRING_VAR_H(tessedit_module_name, WINDLLNAME,
-+               "Module colocated with tessdata dir");
-+  #endif
-+*/
-   INT_VAR_H(ambigs_debug_level, 0, "Debug level for unichar ambiguities");
-   BOOL_VAR_H(use_definite_ambigs_for_classifier, false,
-              "Use definite ambiguities when running character classifier");
-diff --git a/src/ccutil/genericvector.h b/src/ccutil/genericvector.h
-index 3556d153..3a5e8662 100644
---- a/src/ccutil/genericvector.h
-+++ b/src/ccutil/genericvector.h
-@@ -382,7 +382,26 @@ inline bool LoadDataFromFile(const char* filename, GenericVector<char>* data) {
-       // reserve an extra byte in case caller wants to append a '\0' character
-       data->reserve(size + 1);
-       data->resize_no_init(size);
--      result = static_cast<long>(fread(&(*data)[0], 1, size, fp)) == size;
-+    /* willus mod Dec 2018--weird issue with Win XP and MinGW gcc 7.3.0 */
-+    /* Can't read entire file at once -- need to break up into smaller blocksize reads */
-+    {
-+    int frs,n;
-+    int blocksize;
-+    blocksize=1024*1024;
-+    for (n=0;1;)
-+        {
-+        int bs;
-+        bs= size-n > blocksize ? blocksize : size-n;
-+        frs=(int)fread(&(*data)[n],1,bs,fp);
-+        n+=frs;
-+        if (frs<bs || bs<blocksize || n>=size)
-+            break;
-+        }
-+    result = static_cast<long>((long)n==size);
-+    }
-+    /*
-+    result = static_cast<long>(fread(&(*data)[0], 1, size, fp)) == size;
-+    */
-     }
-     fclose(fp);
-   }
-diff --git a/src/ccutil/mainblk.cpp b/src/ccutil/mainblk.cpp
-index 52b04b04..80b26044 100644
---- a/src/ccutil/mainblk.cpp
-+++ b/src/ccutil/mainblk.cpp
-@@ -55,8 +55,22 @@ void CCUtil::main_setup(const char *argv0, const char *basename) {
- #if defined(_WIN32)
-   } else if (datadir == nullptr || _access(datadir.string(), 0) != 0) {
-     /* Look for tessdata in directory of executable. */
-+    /*
-+    char drive[_MAX_DRIVE];
-+    char dir[_MAX_DIR];
-+    */
-     char path[_MAX_PATH];
--    DWORD length = GetModuleFileName(nullptr, path, sizeof(path));
-+    int i;
-+    /* DWORD length = */ GetModuleFileName(nullptr, path, sizeof(path));
-+    /* willus mod--avoid _splitpath_s -- not in XP */
-+    for (i=strlen(path)-1;i>=0 && path[i]!='/' && path[i]!='\\';i--);
-+    if (i>=0)
-+        {
-+        path[i]='\0';
-+        datadir=path;
-+        datadir += "/tessdata";
-+        }
-+    /*
-     if (length > 0 && length < sizeof(path)) {
-       char* separator = std::strrchr(path, '\\');
-       if (separator != nullptr) {
-@@ -65,6 +79,7 @@ void CCUtil::main_setup(const char *argv0, const char *basename) {
-         datadir += "/tessdata";
-       }
-     }
-+    */
- #endif /* _WIN32 */
- #if defined(TESSDATA_PREFIX)
-   } else {
-diff --git a/src/ccutil/params.cpp b/src/ccutil/params.cpp
-index 00bf2563..486c5ce0 100644
---- a/src/ccutil/params.cpp
-+++ b/src/ccutil/params.cpp
-@@ -82,7 +82,8 @@ bool ParamUtils::ReadParamsFromFp(SetParamConstraint constraint, TFile *fp,
- 
-       if (!foundit) {
-         anyerr = true;         // had an error
--        tprintf("Warning: Parameter not found: %s\n", line);
-+        /* willus mod */
-+        tprintf("Tesseract warning: Parameter %s not found in file %s.\n",line,fp->tfile_filename);
-       }
-     }
-   }
-diff --git a/src/ccutil/serialis.cpp b/src/ccutil/serialis.cpp
-index 7def011f..6107a494 100644
---- a/src/ccutil/serialis.cpp
-+++ b/src/ccutil/serialis.cpp
-@@ -201,6 +201,9 @@ bool TFile::Open(const STRING& filename, FileReader reader) {
-   offset_ = 0;
-   is_writing_ = false;
-   swap_ = false;
-+  /* willus mod */
-+  strncpy(tfile_filename,filename.string(),511);
-+  tfile_filename[511]='\0';
-   if (reader == nullptr)
-     return LoadDataFromFile(filename, data_);
-   else
-diff --git a/src/ccutil/serialis.h b/src/ccutil/serialis.h
-index 095b9227..4cc8251e 100644
---- a/src/ccutil/serialis.h
-+++ b/src/ccutil/serialis.h
-@@ -77,6 +77,8 @@ class TFile {
-  public:
-   TFile();
-   ~TFile();
-+  /* willus mod */
-+  char tfile_filename[512];
- 
-   // All the Open methods load the whole file into memory for reading.
-   // Opens a file with a supplied reader, or nullptr to use the default.
-diff --git a/src/lstm/input.cpp b/src/lstm/input.cpp
-index 73b584b3..0b0b54c3 100644
---- a/src/lstm/input.cpp
-+++ b/src/lstm/input.cpp
-@@ -93,8 +93,11 @@ Pix* Input::PrepareLSTMInputs(const ImageData& image_data,
-     return nullptr;
-   }
-   if (width < min_width || height < min_width) {
-+    /* willus mod -- no warning */
-+    /*
-     tprintf("Image too small to scale!! (%dx%d vs min width of %d)\n", width,
-             height, min_width);
-+    */
-     pixDestroy(&pix);
-     return nullptr;
-   }
--- 
-2.22.0
-