diff --git a/src/core/cff_parser.js b/src/core/cff_parser.js index 2a8935dd8..ee78174da 100644 --- a/src/core/cff_parser.js +++ b/src/core/cff_parser.js @@ -17,6 +17,7 @@ import { bytesToString, FormatError, info, + isArrayEqual, shadow, stringToBytes, Util, @@ -33,6 +34,20 @@ import { DataBuilder } from "./data_builder.js"; // Maximum subroutine call depth of type 2 charstrings. Matches OTS. const MAX_SUBR_NESTING = 10; +function looksLikeUnsigned16BitNegative(coord) { + return coord > 0x7fff && coord <= 0xffff; +} + +function recoverSigned16BitBBox(bbox, onlyLowerLeft = false) { + return Util.normalizeRect( + bbox.map((coord, i) => + (!onlyLowerLeft || i < 2) && looksLikeUnsigned16BitNegative(coord) + ? coord - 0x10000 + : coord + ) + ); +} + /** * The CFF class takes a Type1 file and wrap it into a * 'Compact Font Format' which itself embed Type2 charstrings. @@ -268,13 +283,36 @@ class CFFParser { } let fontBBox = topDict.getByName("FontBBox"); - if (fontBBox?.every(coord => coord === 0) && properties.bbox) { - fontBBox = Util.normalizeRect( - properties.bbox.map(coord => - coord > 0x7fff && coord <= 0xffff ? coord - 0x10000 : coord - ) - ); + const descriptorBBox = properties.bbox?.some(coord => coord !== 0) + ? recoverSigned16BitBBox(properties.bbox) + : null; + const cffBBoxHasUnsignedLowerLeft = fontBBox + ?.slice(0, 2) + .some(looksLikeUnsigned16BitNegative); + const cffBBoxHasUnsignedCoords = fontBBox?.some( + looksLikeUnsigned16BitNegative + ); + if (fontBBox?.every(coord => coord === 0) && descriptorBBox) { + // The CFF FontBBox is empty, hence fall back to the FontDescriptor bbox. + fontBBox = descriptorBBox; topDict.setByName("FontBBox", fontBBox); + } else if (cffBBoxHasUnsignedCoords) { + const recoveredFontBBox = recoverSigned16BitBBox(fontBBox); + const descriptorCorroborates = + descriptorBBox && + properties.bbox.some(coord => coord < 0) && + !properties.bbox.some(looksLikeUnsigned16BitNegative) && + isArrayEqual(recoveredFontBBox, descriptorBBox); + + if (descriptorCorroborates || cffBBoxHasUnsignedLowerLeft) { + // Some Ghostscript-generated CFF fonts encode negative lower-left + // coordinates as unsigned 16-bit values. Preserve large upper-right + // coordinates unless the descriptor independently confirms the repair. + fontBBox = descriptorCorroborates + ? recoveredFontBBox + : recoverSigned16BitBBox(fontBBox, /* onlyLowerLeft = */ true); + topDict.setByName("FontBBox", fontBBox); + } } if (fontBBox?.some(coord => coord !== 0)) { // adjusting ascent/descent diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 16ebf27c2..e866a0f1f 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -923,3 +923,4 @@ !issue18032.pdf !Embedded_font.pdf !issue18548_reduced.pdf +!issue_cff_unsigned_bbox.pdf diff --git a/test/pdfs/issue_cff_unsigned_bbox.pdf b/test/pdfs/issue_cff_unsigned_bbox.pdf new file mode 100644 index 000000000..c31688d7c Binary files /dev/null and b/test/pdfs/issue_cff_unsigned_bbox.pdf differ diff --git a/test/test_manifest.json b/test/test_manifest.json index 9c9153bdc..6b6ba745f 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -14320,5 +14320,12 @@ "firstPage": 1, "lastPage": 1, "type": "eq" + }, + { + "id": "issue_cff_unsigned_bbox", + "file": "pdfs/issue_cff_unsigned_bbox.pdf", + "md5": "d2606e2c6cc9e679b8b88c2800c6e1a9", + "rounds": 1, + "type": "eq" } ] diff --git a/test/unit/cff_parser_spec.js b/test/unit/cff_parser_spec.js index e5ddd905b..dc9555846 100644 --- a/test/unit/cff_parser_spec.js +++ b/test/unit/cff_parser_spec.js @@ -154,6 +154,90 @@ describe("CFFParser", function () { expect(properties.ascentScaled).toEqual(true); }); + it("repairs a FontBBox with unsigned-encoded negative coordinates", function () { + // [-456, -305, 2158, 989] encoded as unsigned 16-bit values; produced + // by some Ghostscript-generated CFF fonts. + cff.topDict.setByName("FontBBox", [65080, 65231, 2158, 989]); + const fontDataRepaired = new CFFCompiler(cff).compile(); + + const properties = { + bbox: [-456, -305, 2158, 989], + }; + const reparsedCff = new CFFParser( + new Stream(fontDataRepaired), + properties, + SEAC_ANALYSIS_ENABLED + ).parse(); + + expect(reparsedCff.topDict.getByName("FontBBox")).toEqual([ + -456, -305, 2158, 989, + ]); + expect(properties.ascent).toEqual(989); + expect(properties.descent).toEqual(-305); + expect(properties.ascentScaled).toEqual(true); + }); + + it("doesn't replace a repairable FontBBox with an empty descriptor bbox", function () { + cff.topDict.setByName("FontBBox", [65080, 65231, 2158, 989]); + const fontDataRepaired = new CFFCompiler(cff).compile(); + + const properties = { + bbox: [0, 0, 0, 0], + }; + const reparsedCff = new CFFParser( + new Stream(fontDataRepaired), + properties, + SEAC_ANALYSIS_ENABLED + ).parse(); + + expect(reparsedCff.topDict.getByName("FontBBox")).toEqual([ + -456, -305, 2158, 989, + ]); + expect(properties.ascent).toEqual(989); + expect(properties.descent).toEqual(-305); + expect(properties.ascentScaled).toEqual(true); + }); + + it("repairs unsigned-encoded negative FontBBox without descriptor data", function () { + cff.topDict.setByName("FontBBox", [65080, 65231, 2158, 989]); + const fontDataRepaired = new CFFCompiler(cff).compile(); + + const properties = {}; + const reparsedCff = new CFFParser( + new Stream(fontDataRepaired), + properties, + SEAC_ANALYSIS_ENABLED + ).parse(); + + expect(reparsedCff.topDict.getByName("FontBBox")).toEqual([ + -456, -305, 2158, 989, + ]); + expect(properties.ascent).toEqual(989); + expect(properties.descent).toEqual(-305); + expect(properties.ascentScaled).toEqual(true); + }); + + it("preserves large positive upper FontBBox coordinates", function () { + cff.topDict.setByName("FontBBox", [0, -305, 40000, 989]); + const fontDataRepaired = new CFFCompiler(cff).compile(); + + const properties = { + bbox: [0, -305, 40000, 989], + }; + const reparsedCff = new CFFParser( + new Stream(fontDataRepaired), + properties, + SEAC_ANALYSIS_ENABLED + ).parse(); + + expect(reparsedCff.topDict.getByName("FontBBox")).toEqual([ + 0, -305, 40000, 989, + ]); + expect(properties.ascent).toEqual(989); + expect(properties.descent).toEqual(-305); + expect(properties.ascentScaled).toEqual(true); + }); + it("repairs likely Ghostscript-zeroed FDArray private defaults", function () { cff.isCIDFont = true; cff.topDict.setByName("ROS", [0, 0, 0]);