From d8927c969e3387ca2669a616c0ba53bce918a031 Mon Sep 17 00:00:00 2001 From: Daniel Fullmer Date: Fri, 13 Sep 2019 15:11:45 -0400 Subject: [PATCH] Willus mod for k2pdfopt --- source/fitz/filter-basic.c | 3 + source/fitz/font-win32.c | 866 +++++++++++++++++++++++++++++++++++++ source/fitz/font.c | 3 + source/fitz/stext-device.c | 5 + source/fitz/string.c | 5 + source/pdf/pdf-annot.c | 14 +- source/pdf/pdf-link.c | 3 + source/pdf/pdf-parse.c | 5 + source/pdf/pdf-xref.c | 9 + 9 files changed, 912 insertions(+), 1 deletion(-) create mode 100644 source/fitz/font-win32.c diff --git a/source/fitz/filter-basic.c b/source/fitz/filter-basic.c index 0713a62e7..b8ef4d292 100644 --- a/source/fitz/filter-basic.c +++ b/source/fitz/filter-basic.c @@ -259,7 +259,10 @@ look_for_endstream: if (!state->warned) { state->warned = 1; +/* willus mod -- no warning */ +/* fz_warn(ctx, "PDF stream Length incorrect"); +*/ } return *stm->rp++; } diff --git a/source/fitz/font-win32.c b/source/fitz/font-win32.c new file mode 100644 index 000000000..45de8cfd3 --- /dev/null +++ b/source/fitz/font-win32.c @@ -0,0 +1,866 @@ +/* +** Routines to access MS Windows system fonts. +** From sumatra PDF distro. +** Modified for MuPDF v1.9a by willus.com +*/ +#include "mupdf/pdf.h" + +/* + Which fonts are embedded is based on a few preprocessor definitions. + + The base 14 fonts are always embedded. + For CJK font substitution we embed DroidSansFallback. + + Set NOCJK to skip all CJK support (this also omits embedding the CJK CMaps) + Set NOCJKFONT to skip the embedded CJK font. + Set NOCJKFULL to embed a smaller CJK font without CJK Extension A support. +*/ + +#ifdef NOCJK +#define NOCJKFONT +#endif + +/* SumatraPDF: also load fonts included with Windows */ +#ifdef _WIN32 + +#ifndef UNICODE +#define UNICODE +#endif +#ifndef _UNICODE +#define _UNICODE +#endif + +#include + +// TODO: Use more of FreeType for TTF parsing (for performance reasons, +// the fonts can't be parsed completely, though) +#include +#include FT_TRUETYPE_IDS_H +#include FT_TRUETYPE_TAGS_H + +#define TTC_VERSION1 0x00010000 +#define TTC_VERSION2 0x00020000 + +#define MAX_FACENAME 128 + +// Note: the font face must be the first field so that the structure +// can be treated like a simple string for searching +typedef struct pdf_fontmapMS_s +{ + char fontface[MAX_FACENAME]; + char fontpath[MAX_PATH]; + int index; +} pdf_fontmapMS; + +typedef struct pdf_fontlistMS_s +{ + pdf_fontmapMS *fontmap; + int len; + int cap; +} pdf_fontlistMS; + +typedef struct _tagTT_OFFSET_TABLE +{ + ULONG uVersion; + USHORT uNumOfTables; + USHORT uSearchRange; + USHORT uEntrySelector; + USHORT uRangeShift; +} TT_OFFSET_TABLE; + +typedef struct _tagTT_TABLE_DIRECTORY +{ + ULONG uTag; //table name + ULONG uCheckSum; //Check sum + ULONG uOffset; //Offset from beginning of file + ULONG uLength; //length of the table in bytes +} TT_TABLE_DIRECTORY; + +typedef struct _tagTT_NAME_TABLE_HEADER +{ + USHORT uFSelector; //format selector. Always 0 + USHORT uNRCount; //Name Records count + USHORT uStorageOffset; //Offset for strings storage, from start of the table +} TT_NAME_TABLE_HEADER; + +typedef struct _tagTT_NAME_RECORD +{ + USHORT uPlatformID; + USHORT uEncodingID; + USHORT uLanguageID; + USHORT uNameID; + USHORT uStringLength; + USHORT uStringOffset; //from start of storage area +} TT_NAME_RECORD; + +typedef struct _tagFONT_COLLECTION +{ + ULONG Tag; + ULONG Version; + ULONG NumFonts; +} FONT_COLLECTION; + +static struct { + char *name; + char *pattern; +} baseSubstitutes[] = { + { "Courier", "CourierNewPSMT" }, + { "Courier-Bold", "CourierNewPS-BoldMT" }, + { "Courier-Oblique", "CourierNewPS-ItalicMT" }, + { "Courier-BoldOblique", "CourierNewPS-BoldItalicMT" }, + { "Helvetica", "ArialMT" }, + { "Helvetica-Bold", "Arial-BoldMT" }, + { "Helvetica-Oblique", "Arial-ItalicMT" }, + { "Helvetica-BoldOblique", "Arial-BoldItalicMT" }, + { "Times-Roman", "TimesNewRomanPSMT" }, + { "Times-Bold", "TimesNewRomanPS-BoldMT" }, + { "Times-Italic", "TimesNewRomanPS-ItalicMT" }, + { "Times-BoldItalic", "TimesNewRomanPS-BoldItalicMT" }, + { "Symbol", "SymbolMT" }, +}; +static const char *base_font_names[][10] = +{ + { "Courier", "CourierNew", "CourierNewPSMT", NULL }, + { "Courier-Bold", "CourierNew,Bold", "Courier,Bold", + "CourierNewPS-BoldMT", "CourierNew-Bold", NULL }, + { "Courier-Oblique", "CourierNew,Italic", "Courier,Italic", + "CourierNewPS-ItalicMT", "CourierNew-Italic", NULL }, + { "Courier-BoldOblique", "CourierNew,BoldItalic", "Courier,BoldItalic", + "CourierNewPS-BoldItalicMT", "CourierNew-BoldItalic", NULL }, + { "Helvetica", "ArialMT", "Arial", NULL }, + { "Helvetica-Bold", "Arial-BoldMT", "Arial,Bold", "Arial-Bold", + "Helvetica,Bold", NULL }, + { "Helvetica-Oblique", "Arial-ItalicMT", "Arial,Italic", "Arial-Italic", + "Helvetica,Italic", "Helvetica-Italic", NULL }, + { "Helvetica-BoldOblique", "Arial-BoldItalicMT", + "Arial,BoldItalic", "Arial-BoldItalic", + "Helvetica,BoldItalic", "Helvetica-BoldItalic", NULL }, + { "Times-Roman", "TimesNewRomanPSMT", "TimesNewRoman", + "TimesNewRomanPS", NULL }, + { "Times-Bold", "TimesNewRomanPS-BoldMT", "TimesNewRoman,Bold", + "TimesNewRomanPS-Bold", "TimesNewRoman-Bold", NULL }, + { "Times-Italic", "TimesNewRomanPS-ItalicMT", "TimesNewRoman,Italic", + "TimesNewRomanPS-Italic", "TimesNewRoman-Italic", NULL }, + { "Times-BoldItalic", "TimesNewRomanPS-BoldItalicMT", + "TimesNewRoman,BoldItalic", "TimesNewRomanPS-BoldItalic", + "TimesNewRoman-BoldItalic", NULL }, + { "Symbol", "Symbol,Italic", "Symbol,Bold", "Symbol,BoldItalic", + "SymbolMT", "SymbolMT,Italic", "SymbolMT,Bold", "SymbolMT,BoldItalic", NULL }, + { "ZapfDingbats", NULL } +}; + +static pdf_fontlistMS fontlistMS = +{ + NULL, + 0, + 0, +}; +static int strcmp_ignore_space(const char *a, const char *b); +static const char *clean_font_name(const char *fontname); +static const char *pdf_clean_base14_name(const char *fontname); + +static inline USHORT BEtoHs(USHORT x) +{ + BYTE *data = (BYTE *)&x; + return (data[0] << 8) | data[1]; +} + +static inline ULONG BEtoHl(ULONG x) +{ + BYTE *data = (BYTE *)&x; + return (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; +} + +static int strcmp_ignore_space(const char *a, const char *b) +{ + while (1) + { + while (*a == ' ') + a++; + while (*b == ' ') + b++; + if (*a != *b) + return 1; + if (*a == 0) + return *a != *b; + if (*b == 0) + return *a != *b; + a++; + b++; + } +} + +/* A little bit more sophisticated name matching so that e.g. "EurostileExtended" + matches "EurostileExtended-Roman" or "Tahoma-Bold,Bold" matches "Tahoma-Bold" */ +static int +lookup_compare(const void *elem1, const void *elem2) +{ + const char *val1 = elem1; + const char *val2 = elem2; + int len1 = strlen(val1); + int len2 = strlen(val2); + + if (len1 != len2) + { + const char *rest = len1 > len2 ? val1 + len2 : val2 + len1; + if (',' == *rest || !_stricmp(rest, "-roman")) + return _strnicmp(val1, val2, fz_mini(len1, len2)); + } + + return _stricmp(val1, val2); +} + +static void +remove_spaces(char *srcDest) +{ + char *dest; + + for (dest = srcDest; *srcDest; srcDest++) + if (*srcDest != ' ') + *dest++ = *srcDest; + *dest = '\0'; +} + +static int +str_ends_with(const char *str, const char *end) +{ + size_t len1 = strlen(str); + size_t len2 = strlen(end); + + return len1 >= len2 && !strcmp(str + len1 - len2, end); +} + +static pdf_fontmapMS * +pdf_find_windows_font_path(const char *fontname) +{ + return bsearch(fontname, fontlistMS.fontmap, fontlistMS.len, sizeof(pdf_fontmapMS), lookup_compare); +} + +/* source and dest can be same */ +static void +decode_unicode_BE(fz_context *ctx, char *source, int sourcelen, char *dest, int destlen) +{ + WCHAR *tmp; + int converted, i; + + if (sourcelen % 2 != 0) + fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : invalid unicode string"); + + tmp = fz_malloc_array(ctx, sourcelen / 2 + 1, sizeof(WCHAR)); + for (i = 0; i < sourcelen / 2; i++) + tmp[i] = BEtoHs(((WCHAR *)source)[i]); + tmp[sourcelen / 2] = '\0'; + + converted = WideCharToMultiByte(CP_UTF8, 0, tmp, -1, dest, destlen, NULL, NULL); + fz_free(ctx, tmp); + if (!converted) + fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : invalid unicode string"); +} + +static void +decode_platform_string(fz_context *ctx, int platform, int enctype, char *source, int sourcelen, char *dest, int destlen) +{ + switch (platform) + { + case TT_PLATFORM_APPLE_UNICODE: + switch (enctype) + { + case TT_APPLE_ID_DEFAULT: + case TT_APPLE_ID_UNICODE_2_0: + decode_unicode_BE(ctx, source, sourcelen, dest, destlen); + return; + } + fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : unsupported encoding (%d/%d)", platform, enctype); + case TT_PLATFORM_MACINTOSH: + switch (enctype) + { + case TT_MAC_ID_ROMAN: + if (sourcelen + 1 > destlen) + fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : overlong fontname: %s", source); + // TODO: Convert to UTF-8 from what encoding? + memcpy(dest, source, sourcelen); + dest[sourcelen] = 0; + return; + } + fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : unsupported encoding (%d/%d)", platform, enctype); + case TT_PLATFORM_MICROSOFT: + switch (enctype) + { + case TT_MS_ID_SYMBOL_CS: + case TT_MS_ID_UNICODE_CS: + case TT_MS_ID_UCS_4: + decode_unicode_BE(ctx, source, sourcelen, dest, destlen); + return; + } + fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : unsupported encoding (%d/%d)", platform, enctype); + default: + fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : unsupported encoding (%d/%d)", platform, enctype); + } +} + +static void +grow_system_font_list(fz_context *ctx, pdf_fontlistMS *fl) +{ + int newcap; + pdf_fontmapMS *newitems; + + if (fl->cap == 0) + newcap = 1024; + else + newcap = fl->cap * 2; + + // use realloc/free for the fontmap, since the list can + // remain in memory even with all fz_contexts destroyed + newitems = realloc(fl->fontmap, newcap * sizeof(pdf_fontmapMS)); + if (!newitems) + fz_throw(ctx, FZ_ERROR_GENERIC, "OOM in grow_system_font_list"); + memset(newitems + fl->cap, 0, sizeof(pdf_fontmapMS) * (newcap - fl->cap)); + + fl->fontmap = newitems; + fl->cap = newcap; +} + +static void +append_mapping(fz_context *ctx, pdf_fontlistMS *fl, const char *facename, const char *path, int index) +{ + if (fl->len == fl->cap) + grow_system_font_list(ctx, fl); + + if (fl->len >= fl->cap) + fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : fontlist overflow"); + + fz_strlcpy(fl->fontmap[fl->len].fontface, facename, sizeof(fl->fontmap[0].fontface)); + fz_strlcpy(fl->fontmap[fl->len].fontpath, path, sizeof(fl->fontmap[0].fontpath)); + fl->fontmap[fl->len].index = index; + + ++fl->len; +} + +static void +safe_read(fz_context *ctx, fz_stream *file, int offset, char *buf, int size) +{ + int n; + fz_seek(ctx, file, offset, 0); + n = fz_read(ctx, file, (unsigned char *)buf, size); + if (n != size) + fz_throw(ctx, FZ_ERROR_GENERIC, "safe_read: read %d, expected %d", n, size); +} + +static void +read_ttf_string(fz_context *ctx, fz_stream *file, int offset, TT_NAME_RECORD *ttRecordBE, char *buf, int size) +{ + char szTemp[MAX_FACENAME * 2]; + // ignore empty and overlong strings + int stringLength = BEtoHs(ttRecordBE->uStringLength); + if (stringLength == 0 || stringLength >= sizeof(szTemp)) + return; + + safe_read(ctx, file, offset + BEtoHs(ttRecordBE->uStringOffset), szTemp, stringLength); + decode_platform_string(ctx, BEtoHs(ttRecordBE->uPlatformID), + BEtoHs(ttRecordBE->uEncodingID), szTemp, stringLength, buf, size); +} + +static void +makeFakePSName(char szName[MAX_FACENAME], const char *szStyle) +{ + // append the font's subfamily, unless it's a Regular font + if (*szStyle && _stricmp(szStyle, "Regular") != 0) + { + fz_strlcat(szName, "-", MAX_FACENAME); + fz_strlcat(szName, szStyle, MAX_FACENAME); + } + remove_spaces(szName); +} + +static void +parseTTF(fz_context *ctx, fz_stream *file, int offset, int index, const char *path) +{ + TT_OFFSET_TABLE ttOffsetTableBE; + TT_TABLE_DIRECTORY tblDirBE; + TT_NAME_TABLE_HEADER ttNTHeaderBE; + TT_NAME_RECORD ttRecordBE; + + char szPSName[MAX_FACENAME] = { 0 }; + char szTTName[MAX_FACENAME] = { 0 }; + char szStyle[MAX_FACENAME] = { 0 }; + char szCJKName[MAX_FACENAME] = { 0 }; + int i, count, tblOffset; + + safe_read(ctx, file, offset, (char *)&ttOffsetTableBE, sizeof(TT_OFFSET_TABLE)); + + // check if this is a TrueType font of version 1.0 or an OpenType font + if (BEtoHl(ttOffsetTableBE.uVersion) != TTC_VERSION1 && + BEtoHl(ttOffsetTableBE.uVersion) != TTAG_OTTO) + { + fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : invalid font version %x", (unsigned int)BEtoHl(ttOffsetTableBE.uVersion)); + } + + // determine the name table's offset by iterating through the offset table + count = BEtoHs(ttOffsetTableBE.uNumOfTables); + for (i = 0; i < count; i++) + { + int entryOffset = offset + sizeof(TT_OFFSET_TABLE) + i * sizeof(TT_TABLE_DIRECTORY); + safe_read(ctx, file, entryOffset, (char *)&tblDirBE, sizeof(TT_TABLE_DIRECTORY)); + if (!BEtoHl(tblDirBE.uTag) || BEtoHl(tblDirBE.uTag) == TTAG_name) + break; + } + if (count == i || !BEtoHl(tblDirBE.uTag)) + fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : nameless font"); + tblOffset = BEtoHl(tblDirBE.uOffset); + + // read the 'name' table for record count and offsets + safe_read(ctx, file, tblOffset, (char *)&ttNTHeaderBE, sizeof(TT_NAME_TABLE_HEADER)); + offset = tblOffset + sizeof(TT_NAME_TABLE_HEADER); + tblOffset += BEtoHs(ttNTHeaderBE.uStorageOffset); + + // read through the strings for PostScript name and font family + count = BEtoHs(ttNTHeaderBE.uNRCount); + for (i = 0; i < count; i++) + { + short langId, nameId; + BOOL isCJKName; + + safe_read(ctx, file, offset + i * sizeof(TT_NAME_RECORD), (char *)&ttRecordBE, sizeof(TT_NAME_RECORD)); + + langId = BEtoHs(ttRecordBE.uLanguageID); + nameId = BEtoHs(ttRecordBE.uNameID); + isCJKName = TT_NAME_ID_FONT_FAMILY == nameId && LANG_CHINESE == PRIMARYLANGID(langId); + + // ignore non-English strings (except for Chinese font names) + if (langId && langId != TT_MS_LANGID_ENGLISH_UNITED_STATES && !isCJKName) + continue; + // ignore names other than font (sub)family and PostScript name + fz_try(ctx) + { + if (isCJKName) + read_ttf_string(ctx, file, tblOffset, &ttRecordBE, szCJKName, sizeof(szCJKName)); + else if (TT_NAME_ID_FONT_FAMILY == nameId) + read_ttf_string(ctx, file, tblOffset, &ttRecordBE, szTTName, sizeof(szTTName)); + else if (TT_NAME_ID_FONT_SUBFAMILY == nameId) + read_ttf_string(ctx, file, tblOffset, &ttRecordBE, szStyle, sizeof(szStyle)); + else if (TT_NAME_ID_PS_NAME == nameId) + read_ttf_string(ctx, file, tblOffset, &ttRecordBE, szPSName, sizeof(szPSName)); + } + fz_catch(ctx) + { + fz_warn(ctx, "ignoring face name decoding fonterror"); + } + } + + // try to prevent non-Arial fonts from accidentally substituting Arial + if (!strcmp(szPSName, "ArialMT")) + { + // cf. https://code.google.com/p/sumatrapdf/issues/detail?id=2471 + if (strcmp(szTTName, "Arial") != 0) + szPSName[0] = '\0'; + // TODO: is there a better way to distinguish Arial Caps from Arial proper? + // cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1290 + else if (strstr(path, "caps") || strstr(path, "Caps")) + fz_throw(ctx, FZ_ERROR_GENERIC, "ignore %s, as it can't be distinguished from Arial,Regular", path); + } + + if (szPSName[0]) + append_mapping(ctx, &fontlistMS, szPSName, path, index); + if (szTTName[0]) + { + // derive a PostScript-like name and add it, if it's different from the font's + // included PostScript name; cf. http://code.google.com/p/sumatrapdf/issues/detail?id=376 + makeFakePSName(szTTName, szStyle); + // compare the two names before adding this one + if (lookup_compare(szTTName, szPSName)) + append_mapping(ctx, &fontlistMS, szTTName, path, index); + } + if (szCJKName[0]) + { + makeFakePSName(szCJKName, szStyle); + if (lookup_compare(szCJKName, szPSName) && lookup_compare(szCJKName, szTTName)) + append_mapping(ctx, &fontlistMS, szCJKName, path, index); + } +} + +static void +parseTTFs(fz_context *ctx, const char *path) +{ + fz_stream *file = fz_open_file(ctx, path); + /* "fonterror : %s not found", path */ + fz_try(ctx) + { + parseTTF(ctx, file, 0, 0, path); + } + fz_always(ctx) + { + fz_drop_stream(ctx,file); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +static void +parseTTCs(fz_context *ctx, const char *path) +{ + FONT_COLLECTION fontcollectionBE; + ULONG i, numFonts, *offsettableBE = NULL; + + fz_stream *file = fz_open_file(ctx, path); + /* "fonterror : %s not found", path */ + + fz_var(offsettableBE); + + fz_try(ctx) + { + safe_read(ctx, file, 0, (char *)&fontcollectionBE, sizeof(FONT_COLLECTION)); + if (BEtoHl(fontcollectionBE.Tag) != TTAG_ttcf) + fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : wrong format %x", (unsigned int)BEtoHl(fontcollectionBE.Tag)); + if (BEtoHl(fontcollectionBE.Version) != TTC_VERSION1 && + BEtoHl(fontcollectionBE.Version) != TTC_VERSION2) + { + fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : invalid version %x", (unsigned int)BEtoHl(fontcollectionBE.Version)); + } + + numFonts = BEtoHl(fontcollectionBE.NumFonts); + offsettableBE = fz_malloc_array(ctx, numFonts, sizeof(ULONG)); + + safe_read(ctx, file, sizeof(FONT_COLLECTION), (char *)offsettableBE, numFonts * sizeof(ULONG)); + for (i = 0; i < numFonts; i++) + parseTTF(ctx, file, BEtoHl(offsettableBE[i]), i, path); + } + fz_always(ctx) + { + fz_free(ctx, offsettableBE); + fz_drop_stream(ctx,file); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +static void +extend_system_font_list(fz_context *ctx, const WCHAR *path) +{ + WCHAR szPath[MAX_PATH], *lpFileName; + WIN32_FIND_DATA FileData; + HANDLE hList; + + GetFullPathName(path, nelem(szPath), szPath, &lpFileName); + + hList = FindFirstFile(szPath, &FileData); + if (hList == INVALID_HANDLE_VALUE) + { + // Don't complain about missing directories + if (GetLastError() == ERROR_FILE_NOT_FOUND) + return; + fz_throw(ctx, FZ_ERROR_GENERIC, "extend_system_font_list: unknown error %d", (int)GetLastError()); + } + do + { + if (!(FileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) + { + char szPathUtf8[MAX_PATH], *fileExt; + int res; + lstrcpyn(lpFileName, FileData.cFileName, szPath + MAX_PATH - lpFileName); + res = WideCharToMultiByte(CP_UTF8, 0, szPath, -1, szPathUtf8, sizeof(szPathUtf8), NULL, NULL); + if (!res) + { + fz_warn(ctx, "WideCharToMultiByte failed for %S", szPath); + continue; + } + fileExt = szPathUtf8 + strlen(szPathUtf8) - 4; + fz_try(ctx) + { + if (!_stricmp(fileExt, ".ttc")) + parseTTCs(ctx, szPathUtf8); + else if (!_stricmp(fileExt, ".ttf") || !_stricmp(fileExt, ".otf")) + parseTTFs(ctx, szPathUtf8); + } + fz_catch(ctx) + { + // ignore errors occurring while parsing a given font file + } + } + } while (FindNextFile(hList, &FileData)); + FindClose(hList); +} + +static void +destroy_system_font_list(void) +{ + free(fontlistMS.fontmap); + memset(&fontlistMS, 0, sizeof(fontlistMS)); +} + +static void +create_system_font_list(fz_context *ctx) +{ + WCHAR szFontDir[MAX_PATH]; + UINT cch; + + cch = GetWindowsDirectory(szFontDir, nelem(szFontDir) - 12); + if (0 < cch && cch < nelem(szFontDir) - 12) + { + /* willus.com edit--Win XP default MSVCRT.DLL doesn't have wcscat_s */ +#ifdef _WIN64 + wcscat_s(szFontDir, MAX_PATH, L"\\Fonts\\*.?t?"); +#else + wcscat(szFontDir,L"\\Fonts\\*.?t?"); +#endif + extend_system_font_list(ctx, szFontDir); + } + + if (fontlistMS.len == 0) + fz_warn(ctx, "couldn't find any usable system fonts"); + +#ifdef NOCJKFONT + { + // If no CJK fallback font is builtin but one has been shipped separately (in the same + // directory as the main executable), add it to the list of loadable system fonts + WCHAR szFile[MAX_PATH], *lpFileName; + GetModuleFileName(0, szFontDir, MAX_PATH); + GetFullPathName(szFontDir, MAX_PATH, szFile, &lpFileName); + lstrcpyn(lpFileName, L"DroidSansFallback.ttf", szFile + MAX_PATH - lpFileName); + extend_system_font_list(ctx, szFile); + } +#endif + + // sort the font list, so that it can be searched binarily + qsort(fontlistMS.fontmap, fontlistMS.len, sizeof(pdf_fontmapMS), _stricmp); + +#ifdef DEBUG + // allow to overwrite system fonts for debugging purposes + // (either pass a full path or a search pattern such as "fonts\*.ttf") + cch = GetEnvironmentVariable(L"MUPDF_FONTS_PATTERN", szFontDir, nelem(szFontDir)); + if (0 < cch && cch < nelem(szFontDir)) + { + int i, prev_len = fontlistMS.len; + extend_system_font_list(ctx, szFontDir); + for (i = prev_len; i < fontlistMS.len; i++) + { + pdf_fontmapMS *entry = bsearch(fontlistMS.fontmap[i].fontface, fontlistMS.fontmap, prev_len, sizeof(pdf_fontmapMS), lookup_compare); + if (entry) + *entry = fontlistMS.fontmap[i]; + } + qsort(fontlistMS.fontmap, fontlistMS.len, sizeof(pdf_fontmapMS), _stricmp); + } +#endif + + // make sure to clean up after ourselves + atexit(destroy_system_font_list); +} + +static fz_font * +pdf_load_windows_font_by_name(fz_context *ctx, const char *orig_name) +{ + pdf_fontmapMS *found = NULL; + char *comma, *fontname; + fz_font *font; + + /* WILLUS MOD--not multi-threaded for k2pdfopt */ + /* fz_synchronize_begin(); */ + if (fontlistMS.len == 0) + { + fz_try(ctx) + { + create_system_font_list(ctx); + } + fz_catch(ctx) { } + } + /* WILLUS MOD--not multi-threaded for k2pdfopt */ + /* fz_synchronize_end(); */ + if (fontlistMS.len == 0) + fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror: couldn't find any fonts"); + + // work on a normalized copy of the font name + fontname = fz_strdup(ctx, orig_name); + remove_spaces(fontname); + + // first, try to find the exact font name (including appended style information) + comma = strchr(fontname, ','); + if (comma) + { + *comma = '-'; + found = pdf_find_windows_font_path(fontname); + *comma = ','; + } + // second, substitute the font name with a known PostScript name + else + { + int i; + for (i = 0; i < nelem(baseSubstitutes) && !found; i++) + if (!strcmp(fontname, baseSubstitutes[i].name)) + found = pdf_find_windows_font_path(baseSubstitutes[i].pattern); + } + // third, search for the font name without additional style information + if (!found) + found = pdf_find_windows_font_path(fontname); + // fourth, try to separate style from basename for prestyled fonts (e.g. "ArialBold") + if (!found && !comma && (str_ends_with(fontname, "Bold") || str_ends_with(fontname, "Italic"))) + { + int styleLen = str_ends_with(fontname, "Bold") ? 4 : str_ends_with(fontname, "BoldItalic") ? 10 : 6; + fontname = fz_resize_array(ctx, fontname, strlen(fontname) + 2, sizeof(char)); + comma = fontname + strlen(fontname) - styleLen; + memmove(comma + 1, comma, styleLen + 1); + *comma = '-'; + found = pdf_find_windows_font_path(fontname); + *comma = ','; + if (!found) + found = pdf_find_windows_font_path(fontname); + } + // fifth, try to convert the font name from the common Chinese codepage 936 + if (!found && fontname[0] < 0) + { + WCHAR cjkNameW[MAX_FACENAME]; + char cjkName[MAX_FACENAME]; + if (MultiByteToWideChar(936, MB_ERR_INVALID_CHARS, fontname, -1, cjkNameW, nelem(cjkNameW)) && + WideCharToMultiByte(CP_UTF8, 0, cjkNameW, -1, cjkName, nelem(cjkName), NULL, NULL)) + { + comma = strchr(cjkName, ','); + if (comma) + { + *comma = '-'; + found = pdf_find_windows_font_path(cjkName); + *comma = ','; + } + if (!found) + found = pdf_find_windows_font_path(cjkName); + } + } + + fz_free(ctx, fontname); + if (!found) + fz_throw(ctx, FZ_ERROR_GENERIC, "couldn't find system font '%s'", orig_name); + + /* + fz_warn(ctx, "loading non-embedded font '%s' from '%s'", orig_name, found->fontpath); + */ + + font = fz_new_font_from_file(ctx, orig_name, found->fontpath, found->index, + strcmp(found->fontface, "DroidSansFallback") != 0); + /* willus mod for MuPDF v1.10, 10-21-2016 */ + { + fz_font_flags_t *flags; + flags=fz_font_flags(font); + if (flags!=NULL) + flags->ft_substitute = 1; + } + return font; +} + +static fz_font * +pdf_load_windows_font(fz_context *ctx, const char *fontname, int bold, int italic, int needs_exact_metrics) +{ + if (needs_exact_metrics) + { + const char *clean_name; + /* WILLUS: Declare pdf_clean_base14_name() */ + extern const char *pdf_clean_base14_name(const char *fontname); + + /* TODO: the metrics for Times-Roman and Courier don't match + those of Windows' Times New Roman and Courier New; for + some reason, Poppler doesn't seem to have this problem */ + int len; + if (fz_lookup_builtin_font(ctx,fontname, bold, italic, &len)) + return NULL; + + /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=2173 */ + clean_name = pdf_clean_base14_name(fontname); + if (clean_name != fontname && !strncmp(clean_name, "Times-", 6)) + return NULL; + } + + // TODO: unset font->ft_substitute for base14/needs_exact_metrics? + return pdf_load_windows_font_by_name(ctx, fontname); +} + +static const char *clean_font_name(const char *fontname) +{ + int i, k; + for (i = 0; i < nelem(base_font_names); i++) + for (k = 0; base_font_names[i][k]; k++) + if (!strcmp_ignore_space(base_font_names[i][k], fontname)) + return base_font_names[i][0]; + return fontname; +} + + +/* SumatraPDF: expose clean_font_name */ +static const char * pdf_clean_base14_name(const char *fontname) +{ + return clean_font_name(fontname); +} + +static fz_font * +pdf_load_windows_cjk_font(fz_context *ctx, const char *fontname, int ros, int serif) +{ + fz_font *font; + + font=NULL; /* WILLUS: Avoid compiler warning */ + /* try to find a matching system font before falling back to an approximate one */ + fz_try(ctx) + { + font = pdf_load_windows_font_by_name(ctx, fontname); + } + fz_catch(ctx) + { + font = NULL; + } + if (font) + return font; + + /* try to fall back to a reasonable system font */ + fz_try(ctx) + { + if (serif) + { + switch (ros) + { + case FZ_ADOBE_CNS: font = pdf_load_windows_font_by_name(ctx, "MingLiU"); break; + case FZ_ADOBE_GB: font = pdf_load_windows_font_by_name(ctx, "SimSun"); break; + case FZ_ADOBE_JAPAN: font = pdf_load_windows_font_by_name(ctx, "MS-Mincho"); break; + case FZ_ADOBE_KOREA: font = pdf_load_windows_font_by_name(ctx, "Batang"); break; + default: fz_throw(ctx, FZ_ERROR_GENERIC, "invalid serif ros"); + } + } + else + { + switch (ros) + { + case FZ_ADOBE_CNS: font = pdf_load_windows_font_by_name(ctx, "DFKaiShu-SB-Estd-BF"); break; + case FZ_ADOBE_GB: + fz_try(ctx) + { + font = pdf_load_windows_font_by_name(ctx, "KaiTi"); + } + fz_catch(ctx) + { + font = pdf_load_windows_font_by_name(ctx, "KaiTi_GB2312"); + } + break; + case FZ_ADOBE_JAPAN: font = pdf_load_windows_font_by_name(ctx, "MS-Gothic"); break; + case FZ_ADOBE_KOREA: font = pdf_load_windows_font_by_name(ctx, "Gulim"); break; + default: fz_throw(ctx, FZ_ERROR_GENERIC, "invalid sans-serif ros"); + } + } + } + fz_catch(ctx) + { +#ifdef NOCJKFONT + /* If no CJK fallback font is builtin, maybe one has been shipped separately */ + font = pdf_load_windows_font_by_name(ctx, "DroidSansFallback"); +#else + fz_rethrow(ctx); +#endif + } + + return font; +} + +#endif + +void pdf_install_load_system_font_funcs(fz_context *ctx) +{ +#ifdef _WIN32 + fz_install_load_system_font_funcs(ctx, pdf_load_windows_font, pdf_load_windows_cjk_font, NULL); +#endif +} diff --git a/source/fitz/font.c b/source/fitz/font.c index 00c6e8f99..1448b4a56 100644 --- a/source/fitz/font.c +++ b/source/fitz/font.c @@ -4,8 +4,11 @@ #include "draw-imp.h" #include +/* willus mod -- remove hb includes */ +/* #include "hb.h" #include "hb-ft.h" +*/ #include diff --git a/source/fitz/stext-device.c b/source/fitz/stext-device.c index 2df90305e..b1f99e056 100644 --- a/source/fitz/stext-device.c +++ b/source/fitz/stext-device.c @@ -825,6 +825,11 @@ fz_new_stext_device(fz_context *ctx, fz_stext_page *page, const fz_stext_options dev->lastchar = ' '; dev->curdir = 1; dev->lasttext = NULL; + /* willus mod -- seems like this should be here, but not sure. */ + if (opts) + dev->flags = opts->flags; + else + dev->flags = 0; return (fz_device*)dev; } diff --git a/source/fitz/string.c b/source/fitz/string.c index f8eedb682..7a767983d 100644 --- a/source/fitz/string.c +++ b/source/fitz/string.c @@ -560,6 +560,10 @@ fz_utflen(const char *s) */ float fz_atof(const char *s) { +/* willus mod: atof(s), #if-#else-#endif */ +#if (!defined(__SSE__)) + return(atof(s)); +#else float result; if (s == NULL) @@ -572,6 +576,7 @@ float fz_atof(const char *s) return 1; result = fz_clamp(result, -FLT_MAX, FLT_MAX); return result; +#endif } /* diff --git a/source/pdf/pdf-annot.c b/source/pdf/pdf-annot.c index 4dfdf36fe..acff7d12a 100644 --- a/source/pdf/pdf-annot.c +++ b/source/pdf/pdf-annot.c @@ -5,8 +5,20 @@ #include #include +/* willus mod--don't use _mkgmtime--not available in Win XP */ #ifdef _WIN32 -#define timegm _mkgmtime +static time_t timegm(struct tm *date); +static time_t timegm(struct tm *date) + + { + time_t t,z; + struct tm gmz; + + z=(time_t)0; + gmz=(*gmtime(&z)); + t=mktime(date)-mktime(&gmz); + return(t); + } #endif #define isdigit(c) (c >= '0' && c <= '9') diff --git a/source/pdf/pdf-link.c b/source/pdf/pdf-link.c index 37444b471..613cc05b9 100644 --- a/source/pdf/pdf-link.c +++ b/source/pdf/pdf-link.c @@ -345,6 +345,9 @@ pdf_resolve_link(fz_context *ctx, pdf_document *doc, const char *uri, float *xp, } return page; } +/* willus mod -- be quiet */ +/* fz_warn(ctx, "unknown link uri '%s'", uri); +*/ return -1; } diff --git a/source/pdf/pdf-parse.c b/source/pdf/pdf-parse.c index 04a772204..9dd0cd898 100644 --- a/source/pdf/pdf-parse.c +++ b/source/pdf/pdf-parse.c @@ -663,9 +663,14 @@ pdf_parse_ind_obj(fz_context *ctx, pdf_document *doc, if (c == '\r') { c = fz_peek_byte(ctx, file); +/* willus mod -- no warning */ +/* if (c != '\n') fz_warn(ctx, "line feed missing after stream begin marker (%d %d R)", num, gen); else +*/ +if (c=='\n') +/* willus mod -- end */ fz_read_byte(ctx, file); } stm_ofs = fz_tell(ctx, file); diff --git a/source/pdf/pdf-xref.c b/source/pdf/pdf-xref.c index 8f888059b..08de7bfba 100644 --- a/source/pdf/pdf-xref.c +++ b/source/pdf/pdf-xref.c @@ -710,8 +710,11 @@ pdf_xref_size_from_old_trailer(fz_context *ctx, pdf_document *doc, pdf_lexbuf *b if (!s) fz_throw(ctx, FZ_ERROR_GENERIC, "xref subsection length missing"); len = fz_atoi(fz_strsep(&s, " ")); +/* willus mod -- no warning */ +/* if (len < 0) fz_throw(ctx, FZ_ERROR_GENERIC, "xref subsection length must be positive"); +*/ /* broken pdfs where the section is not on a separate line */ if (s && *s != '\0') @@ -1378,7 +1381,10 @@ pdf_init_document(fz_context *ctx, pdf_document *doc) { pdf_drop_xref_sections(ctx, doc); fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); +/* willus mod -- be quiet */ +/* fz_warn(ctx, "trying to repair broken xref"); +*/ repaired = 1; } @@ -1506,7 +1512,10 @@ pdf_drop_document_imp(fz_context *ctx, pdf_document *doc) /* Swallow error, but continue dropping */ } +/* willu smod -- no pdf_drop_js */ +/* pdf_drop_js(ctx, doc->js); +*/ pdf_drop_xref_sections(ctx, doc); fz_free(ctx, doc->xref_index); -- 2.22.0