From 939129603641ad82c0f533648acc7d91611348ae Mon Sep 17 00:00:00 2001 From: Calixte Denizet Date: Thu, 21 May 2026 21:48:04 +0200 Subject: [PATCH] Recover CFF FontBBox with negative coordinates encoded as unsigned 16-bit It fixes #21312. --- src/core/cff_parser.js | 50 +++++++++++++-- test/pdfs/.gitignore | 1 + test/pdfs/issue_cff_unsigned_bbox.pdf | Bin 0 -> 2796 bytes test/test_manifest.json | 7 +++ test/unit/cff_parser_spec.js | 84 ++++++++++++++++++++++++++ 5 files changed, 136 insertions(+), 6 deletions(-) create mode 100644 test/pdfs/issue_cff_unsigned_bbox.pdf diff --git a/src/core/cff_parser.js b/src/core/cff_parser.js index 2a8935dd8..ee78174da 100644 --- a/src/core/cff_parser.js +++ b/src/core/cff_parser.js @@ -17,6 +17,7 @@ import { bytesToString, FormatError, info, + isArrayEqual, shadow, stringToBytes, Util, @@ -33,6 +34,20 @@ import { DataBuilder } from "./data_builder.js"; // Maximum subroutine call depth of type 2 charstrings. Matches OTS. const MAX_SUBR_NESTING = 10; +function looksLikeUnsigned16BitNegative(coord) { + return coord > 0x7fff && coord <= 0xffff; +} + +function recoverSigned16BitBBox(bbox, onlyLowerLeft = false) { + return Util.normalizeRect( + bbox.map((coord, i) => + (!onlyLowerLeft || i < 2) && looksLikeUnsigned16BitNegative(coord) + ? coord - 0x10000 + : coord + ) + ); +} + /** * The CFF class takes a Type1 file and wrap it into a * 'Compact Font Format' which itself embed Type2 charstrings. @@ -268,13 +283,36 @@ class CFFParser { } let fontBBox = topDict.getByName("FontBBox"); - if (fontBBox?.every(coord => coord === 0) && properties.bbox) { - fontBBox = Util.normalizeRect( - properties.bbox.map(coord => - coord > 0x7fff && coord <= 0xffff ? coord - 0x10000 : coord - ) - ); + const descriptorBBox = properties.bbox?.some(coord => coord !== 0) + ? recoverSigned16BitBBox(properties.bbox) + : null; + const cffBBoxHasUnsignedLowerLeft = fontBBox + ?.slice(0, 2) + .some(looksLikeUnsigned16BitNegative); + const cffBBoxHasUnsignedCoords = fontBBox?.some( + looksLikeUnsigned16BitNegative + ); + if (fontBBox?.every(coord => coord === 0) && descriptorBBox) { + // The CFF FontBBox is empty, hence fall back to the FontDescriptor bbox. + fontBBox = descriptorBBox; topDict.setByName("FontBBox", fontBBox); + } else if (cffBBoxHasUnsignedCoords) { + const recoveredFontBBox = recoverSigned16BitBBox(fontBBox); + const descriptorCorroborates = + descriptorBBox && + properties.bbox.some(coord => coord < 0) && + !properties.bbox.some(looksLikeUnsigned16BitNegative) && + isArrayEqual(recoveredFontBBox, descriptorBBox); + + if (descriptorCorroborates || cffBBoxHasUnsignedLowerLeft) { + // Some Ghostscript-generated CFF fonts encode negative lower-left + // coordinates as unsigned 16-bit values. Preserve large upper-right + // coordinates unless the descriptor independently confirms the repair. + fontBBox = descriptorCorroborates + ? recoveredFontBBox + : recoverSigned16BitBBox(fontBBox, /* onlyLowerLeft = */ true); + topDict.setByName("FontBBox", fontBBox); + } } if (fontBBox?.some(coord => coord !== 0)) { // adjusting ascent/descent diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 16ebf27c2..e866a0f1f 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -923,3 +923,4 @@ !issue18032.pdf !Embedded_font.pdf !issue18548_reduced.pdf +!issue_cff_unsigned_bbox.pdf diff --git a/test/pdfs/issue_cff_unsigned_bbox.pdf b/test/pdfs/issue_cff_unsigned_bbox.pdf new file mode 100644 index 0000000000000000000000000000000000000000..c31688d7cb115a8e96c04cfa546e3487947039d2 GIT binary patch literal 2796 zcma)8e^6A%9pCe~2ku@kBR7XX;BgDeFXedeesTy>5TR1jY6U};(Dt}{z$M&0c=rHF z)SH@_+6*?CBsiBpTE#|_mPx0Xh|Q?cbgG!7+S-|*PVAtqF;&|nq^X%b`<$e^kE2p( zoxYiU`}Vuv-S7AN`F_6pwqVn`28XlE0}HNyaN#}Z#3Gj4AA?m@SZE&Xk+4t~Py^j^ zCl)pZI;9wPQH9O8x*AH+5L0C1|6DMwCu|6ZVz|}K)Z6GTa$i)%&RGlD%>0GSj3QCh zX*p*?de@q(B7r>PS#)*ezn8h=UQhwiyZy)CU)C?toRIC^~6# z(dMSjLz|a2U#c&sy+8F`QH`4)gZ0huTaclavXH`$xi{m;G~l}PLQ`M6s^iauA{N#L zVp3{G540%|jcrn7N0Sr@x69ojEYwGXawr_7nQ9DCXu|5CV?7quNwFX`6^N>AHO+{R z;n6k=3MP1<2X&3>*xLWw5vt!b7*nN4W3)ro)390U4AZa&ad9mrjZ}h#jY>#Tn6t$* z&Qf7hUr$fB6k+KXS-SK*vH+BDk18v;oViXzYM~jSkhI`dCyhs>d~l_DcdGZKdT*-t z%^8=c#{N0-is|uec$m@o@XYc`_wt*82(6W-|B;%8?m#D_s;;_Lr?bQ3^&F%T3d-t6T5927ZM3snqE>}kmO7Qq&E!#?UP6nqXT22e>{79hl2d4+U_ZiEusk({ zyCpaF>sZ~dQ#PInBSmTZO4o9)tCE+d6FdH?gR7jUcLjvWNOlK4vWXF^emSSafa?hZmBkZDujvbhG z{s(Pd+@-2Ls|8_TV4y6(SY8&CBSJ|j9thLJg`Q86(oe1GHLSCxXgY+VvQ#LnqA@M$ z2g2<5niQpl69hbi$OJGb03|>KUa$t#f_m_EuoXN7_JRa>3A_r%!EeC@Faa)uD-^~* zfe*o7!Drx0gU$3Thin9Lr%1ht^l+y%+F3X)d^g{u<#336ng@|FA0ork$+OFO)PFaB z0QHmMn`ESGjuE3~Bx%SVde5YGcJ2yfBl63*?cK8nM~^#rR01{M z=cObEsdCqLDZ6^h`Hw&O&G{>%hjyul?WZp}AkxO25RqKXiYBs1I`Tn_&BOoM%a8l` zPl~A^bHYzOPZm_d3Lahj`SF)u&mMX4hd=nS9a+y*^48%4Qzq*>RnYpgr>>iIr-kY?Y z`N#1o%pWT{z1);XuGrdcZ@Xb4TewzNo5Q5lXd||yC!NMEnaFl6Xww#uAXz{plV$fv zKdJuKMR<;Umn8;qqBd!=zBW+1V?$p}#vZQk(vEZe z7X>o8M0<#{Xit-lYlfp_xA7?Hu#w%@wB4Mg09rQ2ZxH@{V!2TWt#`_J>qo`3UkNP& zw3NfTp%0Qp1BOKMaUg2*)5`JI;@&ej%d=?=}0 zjAS%!J3W5j$T9n)mrWwCm4D_S%j_t>6qZ5C%J={p5g~H%l&HzLmX6$zLvG#!(by}8 zkGw7)Zf}=+L%~;i$Lz?-e@+%>zUzY4TV81Ww466-Mh-2}7IG*)aBu+qeKkbS?|AYV zCA+1iYeScP4~N)kY)cKv**X_j}_}?-oy`8WJ zNAJsg^y)N6=5*0(;~bgGLw}reWN!cbwO;3ZnO~f@AG>Sa-;W;n^e3n2x74Qa6B4FB e$j!3ME|~0wpkLzximJ!x@_JpcprC$Z1N;}@@Q3IC literal 0 HcmV?d00001 diff --git a/test/test_manifest.json b/test/test_manifest.json index 9c9153bdc..6b6ba745f 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -14320,5 +14320,12 @@ "firstPage": 1, "lastPage": 1, "type": "eq" + }, + { + "id": "issue_cff_unsigned_bbox", + "file": "pdfs/issue_cff_unsigned_bbox.pdf", + "md5": "d2606e2c6cc9e679b8b88c2800c6e1a9", + "rounds": 1, + "type": "eq" } ] diff --git a/test/unit/cff_parser_spec.js b/test/unit/cff_parser_spec.js index e5ddd905b..dc9555846 100644 --- a/test/unit/cff_parser_spec.js +++ b/test/unit/cff_parser_spec.js @@ -154,6 +154,90 @@ describe("CFFParser", function () { expect(properties.ascentScaled).toEqual(true); }); + it("repairs a FontBBox with unsigned-encoded negative coordinates", function () { + // [-456, -305, 2158, 989] encoded as unsigned 16-bit values; produced + // by some Ghostscript-generated CFF fonts. + cff.topDict.setByName("FontBBox", [65080, 65231, 2158, 989]); + const fontDataRepaired = new CFFCompiler(cff).compile(); + + const properties = { + bbox: [-456, -305, 2158, 989], + }; + const reparsedCff = new CFFParser( + new Stream(fontDataRepaired), + properties, + SEAC_ANALYSIS_ENABLED + ).parse(); + + expect(reparsedCff.topDict.getByName("FontBBox")).toEqual([ + -456, -305, 2158, 989, + ]); + expect(properties.ascent).toEqual(989); + expect(properties.descent).toEqual(-305); + expect(properties.ascentScaled).toEqual(true); + }); + + it("doesn't replace a repairable FontBBox with an empty descriptor bbox", function () { + cff.topDict.setByName("FontBBox", [65080, 65231, 2158, 989]); + const fontDataRepaired = new CFFCompiler(cff).compile(); + + const properties = { + bbox: [0, 0, 0, 0], + }; + const reparsedCff = new CFFParser( + new Stream(fontDataRepaired), + properties, + SEAC_ANALYSIS_ENABLED + ).parse(); + + expect(reparsedCff.topDict.getByName("FontBBox")).toEqual([ + -456, -305, 2158, 989, + ]); + expect(properties.ascent).toEqual(989); + expect(properties.descent).toEqual(-305); + expect(properties.ascentScaled).toEqual(true); + }); + + it("repairs unsigned-encoded negative FontBBox without descriptor data", function () { + cff.topDict.setByName("FontBBox", [65080, 65231, 2158, 989]); + const fontDataRepaired = new CFFCompiler(cff).compile(); + + const properties = {}; + const reparsedCff = new CFFParser( + new Stream(fontDataRepaired), + properties, + SEAC_ANALYSIS_ENABLED + ).parse(); + + expect(reparsedCff.topDict.getByName("FontBBox")).toEqual([ + -456, -305, 2158, 989, + ]); + expect(properties.ascent).toEqual(989); + expect(properties.descent).toEqual(-305); + expect(properties.ascentScaled).toEqual(true); + }); + + it("preserves large positive upper FontBBox coordinates", function () { + cff.topDict.setByName("FontBBox", [0, -305, 40000, 989]); + const fontDataRepaired = new CFFCompiler(cff).compile(); + + const properties = { + bbox: [0, -305, 40000, 989], + }; + const reparsedCff = new CFFParser( + new Stream(fontDataRepaired), + properties, + SEAC_ANALYSIS_ENABLED + ).parse(); + + expect(reparsedCff.topDict.getByName("FontBBox")).toEqual([ + 0, -305, 40000, 989, + ]); + expect(properties.ascent).toEqual(989); + expect(properties.descent).toEqual(-305); + expect(properties.ascentScaled).toEqual(true); + }); + it("repairs likely Ghostscript-zeroed FDArray private defaults", function () { cff.isCIDFont = true; cff.topDict.setByName("ROS", [0, 0, 0]);