diff --git a/src/core/type1_font.js b/src/core/type1_font.js index 8b4d2cad3..5dcdb1ecd 100644 --- a/src/core/type1_font.js +++ b/src/core/type1_font.js @@ -13,6 +13,7 @@ * limitations under the License. */ +import { bytesToString, FormatError, warn } from "../shared/util.js"; import { CFF, CFFCharset, @@ -24,7 +25,6 @@ import { CFFStrings, CFFTopDict, } from "./cff_parser.js"; -import { FormatError, warn } from "../shared/util.js"; import { SEAC_ANALYSIS_ENABLED, type1FontGlyphMapping } from "./fonts_utils.js"; import { isWhiteSpace } from "./core_utils.js"; import { Stream } from "./stream.js"; @@ -149,6 +149,18 @@ function getEexecBlock(stream, suggestedLength) { }; } +// Detects the CID-keyed Type 1 format (Adobe TechNote 5014, CIDFontType 0). +// Caller must additionally check `properties.composite`, since only composite +// fonts are wrapped as CIDFontType0 in PDF. +function isCidKeyedType1File(file) { + const sample = file.peekBytes(2048); + if (sample.length < 2 || sample[0] !== 0x25 || sample[1] !== 0x21) { + return false; + } + const text = bytesToString(sample); + return text.includes("Resource-CIDFont") || /\/CIDFontType\s+0\b/.test(text); +} + /** * Type1Font is also a CIDFontType0. */ @@ -156,6 +168,31 @@ class Type1Font { #rawFileLength; constructor(name, file, properties) { + let data; + if (properties.composite && isCidKeyedType1File(file)) { + data = this.#parseCidKeyedType1(file, properties); + } + data ||= this.#parseType1(file, properties); + for (const key in data.properties) { + properties[key] = data.properties[key]; + } + + const charstrings = data.charstrings; + const type2Charstrings = this.getType2Charstrings(charstrings); + const subrs = this.getType2Subrs(data.subrs); + + this.charstrings = charstrings; + this.data = this.wrap( + name, + type2Charstrings, + this.charstrings, + subrs, + properties + ); + this.seacs = this.getSeacs(data.charstrings); + } + + #parseType1(file, properties) { // Some bad generators embed pfb file as is, we have to strip 6-byte header. // Also, length1 and length2 might be off by 6 bytes as well. // http://www.math.ubc.ca/~cass/piscript/type1.pdf @@ -173,7 +210,6 @@ class Type1Font { pfbHeader[2]; } - // Get the data block containing glyphs and subrs information const headerBlock = getHeaderBlock(file, headerBlockLength); const headerBlockParser = new Type1Parser( headerBlock.stream, @@ -191,7 +227,6 @@ class Type1Font { pfbHeader[2]; } - // Decrypt the data blocks and retrieve it's content const eexecBlock = getEexecBlock(file, eexecBlockLength); const eexecBlockParser = new Type1Parser( eexecBlock.stream, @@ -199,24 +234,23 @@ class Type1Font { SEAC_ANALYSIS_ENABLED ); const data = eexecBlockParser.extractFontProgram(properties); - for (const key in data.properties) { - properties[key] = data.properties[key]; - } this.#rawFileLength = headerBlock.length + eexecBlock.length; + return data; + } - const charstrings = data.charstrings; - const type2Charstrings = this.getType2Charstrings(charstrings); - const subrs = this.getType2Subrs(data.subrs); - - this.charstrings = charstrings; - this.data = this.wrap( - name, - type2Charstrings, - this.charstrings, - subrs, - properties - ); - this.seacs = this.getSeacs(data.charstrings); + #parseCidKeyedType1(file, properties) { + const fileStart = file.pos; + const length = file.end - fileStart; + const parser = new Type1Parser(file, false, SEAC_ANALYSIS_ENABLED); + const data = parser.extractCidKeyedFontProgram(properties); + if (!data) { + // Reset the stream so the regular Type 1 path can re-try. + file.pos = fileStart; + warn("Type1Font: unable to parse CID-keyed Type 1 font."); + return null; + } + this.#rawFileLength = length; + return data; } get numGlyphs() { diff --git a/src/core/type1_parser.js b/src/core/type1_parser.js index 843e671ad..eb85dbf46 100644 --- a/src/core/type1_parser.js +++ b/src/core/type1_parser.js @@ -730,6 +730,280 @@ class Type1Parser { return program; } + /* + * Returns an object containing a Subrs array and a CharStrings array + * extracted from a CID-keyed Type 1 font program (Adobe TechNote 5014, + * CIDFontType 0). The stream must start at the PostScript header. + * + * The binary section that follows the "StartData" marker contains: + * - CIDMap at CIDMapOffset, with (CIDCount + 1) entries; each entry is + * FDBytes (FD-index) + GDBytes (glyph data offset) bytes. + * - SubrMap at SubrMapOffset, with (SubrCount + 1) entries of SDBytes + * each, holding subr data offsets. + * - The charstring/subr data, each encrypted with the Type 1 charstring + * cipher and prefixed by `lenIV` random bytes. + * + * Only single-FDArray fonts are supported. + */ + extractCidKeyedFontProgram(properties) { + const stream = this.stream; + const privateData = new Map([["lenIV", 4]]); + const program = { + subrs: [], + charstrings: [], + properties: { privateData }, + }; + + let cidCount = 0; + let cidMapOffset = -1; + let fdBytes = 1; + let gdBytes = 0; + let subrMapOffset = -1; + let sdBytes = 0; + let subrCount = 0; + let startDataLength = 0; + let startDataIsHex = false; + let foundStartData = false; + const previousTokens = []; + + function rememberToken(value) { + previousTokens.push(value); + if (previousTokens.length > 4) { + previousTokens.shift(); + } + } + + let token; + while ((token = this.getToken()) !== null) { + if (token === "StartData") { + const dataType = previousTokens.at(-3); + const dataLength = previousTokens.at(-1); + if ( + previousTokens.at(-4) !== "(" || + previousTokens.at(-2) !== ")" || + (dataType !== "Binary" && dataType !== "Hex") || + !/^\d+$/.test(dataLength) + ) { + return null; + } + startDataLength = parseInt(dataLength, 10); + if (startDataLength <= 0) { + return null; + } + startDataIsHex = dataType === "Hex"; + foundStartData = true; + break; + } + rememberToken(token); + if (token !== "/") { + continue; + } + token = this.getToken(); + rememberToken(token); + switch (token) { + case "FontMatrix": + properties.fontMatrix = this.readNumberArray(); + break; + case "FontBBox": + const fontBBox = this.readNumberArray(); + properties.ascent = Math.max(fontBBox[3], fontBBox[1]); + properties.descent = Math.min(fontBBox[1], fontBBox[3]); + properties.ascentScaled = true; + break; + case "CIDCount": + cidCount = this.readInt(); + break; + case "CIDMapOffset": + cidMapOffset = this.readInt(); + break; + case "FDBytes": + fdBytes = this.readInt(); + break; + case "GDBytes": + gdBytes = this.readInt(); + break; + case "SubrMapOffset": + subrMapOffset = this.readInt(); + break; + case "SDBytes": + sdBytes = this.readInt(); + break; + case "SubrCount": + subrCount = this.readInt(); + break; + case "BlueValues": + case "OtherBlues": + case "FamilyBlues": + case "FamilyOtherBlues": + // *Blue* values are skipped while hinting is disabled. + this.readNumberArray(); + break; + case "StemSnapH": + case "StemSnapV": + privateData.set(token, this.readNumberArray()); + break; + case "StdHW": + case "StdVW": + privateData.set(token, this.readNumberArray()[0]); + break; + case "BlueShift": + case "lenIV": + case "BlueFuzz": + case "BlueScale": + case "LanguageGroup": + privateData.set(token, this.readNumber()); + break; + case "ExpansionFactor": + privateData.set(token, this.readNumber() || 0.06); + break; + case "ForceBold": + privateData.set(token, this.readBoolean()); + break; + } + } + + if ( + !foundStartData || + cidCount <= 0 || + cidMapOffset < 0 || + fdBytes < 0 || + fdBytes > 4 || + gdBytes < 1 || + gdBytes > 4 + ) { + return null; + } + + // After "StartData", currentChar is the single separator byte (typically + // a space); the next byte starts the binary block. `startDataLength` is + // only an upper bound: some generators (see issue 15292) write a wrong + // value, and the buffer is also untrusted PostScript input -- cap to the + // stream's remaining bytes before allocating. + const maxLength = stream.end - stream.pos; + if (startDataLength > maxLength) { + if (!startDataIsHex) { + startDataLength = maxLength; + } else if (startDataLength > 2 * maxLength) { + // Hex needs ~2 chars per output byte; anything larger is impossible. + return null; + } + } + let binary = stream.getBytes(startDataIsHex ? undefined : startDataLength); + if (startDataIsHex) { + const decoded = new Uint8Array(startDataLength); + let digit1 = -1, + j = 0; + for (let i = 0, ii = binary.length; i < ii && j < startDataLength; i++) { + const digit = binary[i]; + if (!isHexDigit(digit)) { + continue; + } + if (digit1 < 0) { + digit1 = digit; + continue; + } + decoded[j++] = parseInt(String.fromCharCode(digit1, digit), 16); + digit1 = -1; + } + if (j !== startDataLength) { + return null; + } + binary = decoded; + } + const lenIV = privateData.get("lenIV"); + const cidEntrySize = fdBytes + gdBytes; + const subrs = []; + + function readUint(offset, byteCount) { + let n = 0; + for (let i = 0; i < byteCount; i++) { + n = (n << 8) | binary[offset + i]; + } + return n >>> 0; + } + + if ( + cidMapOffset + (cidCount + 1) * cidEntrySize > binary.length || + (subrCount > 0 && + (subrMapOffset < 0 || + sdBytes < 1 || + sdBytes > 4 || + subrMapOffset + (subrCount + 1) * sdBytes > binary.length)) + ) { + return null; + } + + if (fdBytes > 0) { + // Only single-FDArray fonts are supported here. Reject CID-keyed fonts + // that actually select multiple font dictionaries, since each FD can + // define different private data and subroutines. + for (let cid = 0; cid < cidCount; cid++) { + if (readUint(cidMapOffset + cid * cidEntrySize, fdBytes) !== 0) { + return null; + } + } + } + + if (subrCount > 0) { + const subrOffsets = new Array(subrCount + 1); + for (let i = 0; i <= subrCount; i++) { + subrOffsets[i] = readUint(subrMapOffset + i * sdBytes, sdBytes); + } + for (let i = 0; i < subrCount; i++) { + const start = subrOffsets[i]; + const end = subrOffsets[i + 1]; + if (end > binary.length || end < start) { + subrs[i] = new Uint8Array(0); + continue; + } + subrs[i] = this.readCharStrings(binary.subarray(start, end), lenIV); + } + } + + const charstrings = []; + let prevOffset = readUint(cidMapOffset + fdBytes, gdBytes); + for (let cid = 0; cid < cidCount; cid++) { + const nextOffset = readUint( + cidMapOffset + (cid + 1) * cidEntrySize + fdBytes, + gdBytes + ); + const glyphName = cid === 0 ? ".notdef" : `cid${cid}`; + if (nextOffset > prevOffset && nextOffset <= binary.length) { + const encoded = this.readCharStrings( + binary.subarray(prevOffset, nextOffset), + lenIV + ); + const charString = new Type1CharString(); + const error = charString.convert( + encoded, + subrs, + this.seacAnalysisEnabled + ); + charstrings.push({ + glyphName, + charstring: error ? [14] : charString.output, + width: charString.width, + lsb: charString.lsb, + seac: charString.seac, + }); + } else { + // Empty intervals should select CID 0's notdef glyph, while still + // keeping the slot so the CID-to-GID mapping stays aligned. + const notDef = charstrings[0]; + charstrings.push({ + glyphName, + charstring: notDef?.charstring.slice() || [0x8b, 0x0e], // 0 endchar + width: notDef?.width || 0, + lsb: notDef?.lsb || 0, + }); + } + prevOffset = nextOffset; + } + program.subrs = subrs; + program.charstrings = charstrings; + return program; + } + extractFontHeader(properties) { let token; while ((token = this.getToken()) !== null) { diff --git a/test/unit/type1_parser_spec.js b/test/unit/type1_parser_spec.js index 0d18a7c16..fbb8566e2 100644 --- a/test/unit/type1_parser_spec.js +++ b/test/unit/type1_parser_spec.js @@ -13,11 +13,58 @@ * limitations under the License. */ +import { bytesToString } from "../../src/shared/util.js"; import { SEAC_ANALYSIS_ENABLED } from "../../src/core/fonts_utils.js"; import { StringStream } from "../../src/core/stream.js"; import { Type1Parser } from "../../src/core/type1_parser.js"; describe("Type1Parser", function () { + function createCidKeyedFontStream({ + binary, + cidCount = 3, + fdBytes = 0, + dataFormat = "Binary", + declaredLength = binary.length, + lenIV = -1, + subrMapOffset = 0, + subrCount = 0, + sdBytes = 0, + trailer = "", + }) { + const data = dataFormat === "Hex" ? binary.toHex() : bytesToString(binary); + return new StringStream( + "%!PS-Adobe-3.0 Resource-CIDFont\n" + + "/CIDMapOffset 0 def\n" + + `/FDBytes ${fdBytes} def\n` + + "/GDBytes 1 def\n" + + `/CIDCount ${cidCount} def\n` + + `/SubrMapOffset ${subrMapOffset} def\n` + + `/SDBytes ${sdBytes} def\n` + + `/SubrCount ${subrCount} def\n` + + "/Private 5 dict dup begin\n" + + `/lenIV ${lenIV} def\n` + + "end def\n" + + `(${dataFormat}) ${declaredLength} StartData ${data}${trailer}` + ); + } + + // Inverse of the Type 1 charstring cipher: produces ciphertext that + // `Type1Parser.readCharStrings` (with matching lenIV) decodes back to + // `plain`. The leading `lenIV` plaintext bytes are zero padding. + function encryptCharString(plain, lenIV) { + const c1 = 52845, + c2 = 22719; + let r = 4330; + const out = new Uint8Array(plain.length + lenIV); + for (let i = 0; i < out.length; i++) { + const src = i < lenIV ? 0 : plain[i - lenIV]; + const cipher = (src ^ (r >> 8)) & 0xff; + out[i] = cipher; + r = ((cipher + r) * c1 + c2) & 0xffff; + } + return out; + } + it("splits tokens", function () { const stream = new StringStream("/BlueValues[-17 0]noaccess def"); const parser = new Type1Parser(stream, false, SEAC_ANALYSIS_ENABLED); @@ -97,6 +144,210 @@ describe("Type1Parser", function () { expect(program.properties.privateData.get("ExpansionFactor")).toEqual(99); }); + it("parses a CID-keyed Type 1 font program", function () { + // 0 500 hsbw endchar + const notdefCharString = [0x8b, 0xf8, 0x88, 0x0d, 0x0e]; + // 0 250 hsbw endchar + const cid2CharString = [0x8b, 0xf7, 0x8e, 0x0d, 0x0e]; + const binary = Uint8Array.of( + // CIDMap: CID 0 has data, CID 1 is empty, CID 2 has data. + 4, + 9, + 9, + 14, + ...notdefCharString, + ...cid2CharString + ); + const stream = createCidKeyedFontStream({ binary }); + const parser = new Type1Parser(stream, false, SEAC_ANALYSIS_ENABLED); + const program = parser.extractCidKeyedFontProgram({}); + + expect(program.subrs.length).toEqual(0); + expect(program.charstrings.map(({ glyphName }) => glyphName)).toEqual([ + ".notdef", + "cid1", + "cid2", + ]); + expect(program.charstrings[0].width).toEqual(500); + expect(program.charstrings[1].width).toEqual(500); + expect(program.charstrings[1].charstring).toEqual( + program.charstrings[0].charstring + ); + expect(program.charstrings[2].width).toEqual(250); + }); + + it("parses a hex-encoded CID-keyed Type 1 data section", function () { + const binary = Uint8Array.of( + 4, + 9, + 9, + 14, + 0x8b, + 0xf8, + 0x88, + 0x0d, + 0x0e, + 0x8b, + 0xf7, + 0x8e, + 0x0d, + 0x0e + ); + const stream = createCidKeyedFontStream({ binary, dataFormat: "Hex" }); + const parser = new Type1Parser(stream, false, SEAC_ANALYSIS_ENABLED); + const program = parser.extractCidKeyedFontProgram({}); + + expect(program.charstrings[2].width).toEqual(250); + }); + + it("rejects CID-keyed Type 1 fonts with multiple FD indices", function () { + const binary = Uint8Array.of( + // CIDMap: CID 0 selects FD index 1, which is unsupported. + 1, + 4, + 0, + 9, + 0x8b, + 0xf8, + 0x88, + 0x0d, + 0x0e + ); + const stream = createCidKeyedFontStream({ + binary, + cidCount: 1, + fdBytes: 1, + }); + const parser = new Type1Parser(stream, false, SEAC_ANALYSIS_ENABLED); + + expect(parser.extractCidKeyedFontProgram({})).toEqual(null); + }); + + it("uses subrs when parsing a CID-keyed Type 1 font", function () { + // 0 333 hsbw return -- callable subroutine. + const subr0 = [0x8b, 0xf7, 0xe1, 0x0d, 0x0b]; + // 0 500 hsbw endchar. + const cid0 = [0x8b, 0xf8, 0x88, 0x0d, 0x0e]; + // 0 callsubr endchar -- delegates the width to subr 0. + const cid1 = [0x8b, 0x0a, 0x0e]; + // Layout: CIDMap(3) || SubrMap(2) || subr0 || cid0 || cid1. + const subrStart = 5; + const cid0Start = subrStart + subr0.length; + const cid1Start = cid0Start + cid0.length; + const binary = Uint8Array.of( + cid0Start, // CID 0 + cid1Start, // CID 1 + cid1Start + cid1.length, // CIDMap sentinel + subrStart, // subr 0 + cid0Start, // SubrMap sentinel + ...subr0, + ...cid0, + ...cid1 + ); + const stream = createCidKeyedFontStream({ + binary, + cidCount: 2, + subrMapOffset: 3, + subrCount: 1, + sdBytes: 1, + }); + const parser = new Type1Parser(stream, false, SEAC_ANALYSIS_ENABLED); + const program = parser.extractCidKeyedFontProgram({}); + + expect(program.subrs.length).toEqual(1); + expect(program.charstrings[0].width).toEqual(500); + expect(program.charstrings[1].width).toEqual(333); + }); + + it("decrypts charstrings when lenIV > 0", function () { + const cid0Plain = [0x8b, 0xf8, 0x88, 0x0d, 0x0e]; // 0 500 hsbw endchar + const cid0Cipher = encryptCharString(cid0Plain, 4); + const binary = Uint8Array.of( + // CIDMap: 2 entries (CIDCount + 1). + 2, + 2 + cid0Cipher.length, + ...cid0Cipher + ); + const stream = createCidKeyedFontStream({ + binary, + cidCount: 1, + lenIV: 4, + }); + const parser = new Type1Parser(stream, false, SEAC_ANALYSIS_ENABLED); + const program = parser.extractCidKeyedFontProgram({}); + + expect(program.charstrings[0].width).toEqual(500); + }); + + it("decodes hex CID-keyed data with whitespace between digits", function () { + const binary = Uint8Array.of(4, 9, 9, 14, 0x8b, 0xf8, 0x88, 0x0d, 0x0e); + const hexWithSpaces = binary + .toHex() + .match(/.{1,2}/g) + .join(" "); + const stream = new StringStream( + "%!PS-Adobe-3.0 Resource-CIDFont\n" + + "/CIDMapOffset 0 def\n" + + "/FDBytes 0 def\n" + + "/GDBytes 1 def\n" + + "/CIDCount 1 def\n" + + "/Private 5 dict dup begin /lenIV -1 def end def\n" + + `(Hex) ${binary.length} StartData ${hexWithSpaces}` + ); + const parser = new Type1Parser(stream, false, SEAC_ANALYSIS_ENABLED); + const program = parser.extractCidKeyedFontProgram({}); + + expect(program.charstrings[0].width).toEqual(500); + }); + + it("rejects truncated CID-keyed binary data", function () { + // CIDMap declares 3 CIDs (4 entries x 1 byte = 4 bytes) but only 2 bytes + // of binary follow, so the CIDMap read goes past the end. + const binary = Uint8Array.of(0, 0); + const stream = createCidKeyedFontStream({ binary, cidCount: 3 }); + const parser = new Type1Parser(stream, false, SEAC_ANALYSIS_ENABLED); + + expect(parser.extractCidKeyedFontProgram({})).toEqual(null); + }); + + it("rejects malformed StartData token sequences", function () { + const cases = [ + // Missing the "(Binary)" / "(Hex)" parenthesised tag. + "Binary 4 StartData \x00\x00\x00\x00", + // Non-numeric length. + "(Binary) abc StartData \x00\x00\x00\x00", + // Unsupported data type. + "(Ascii) 4 StartData \x00\x00\x00\x00", + // Zero length. + "(Binary) 0 StartData", + ]; + for (const tail of cases) { + const stream = new StringStream( + "%!PS-Adobe-3.0 Resource-CIDFont\n" + + "/CIDMapOffset 0 def /FDBytes 0 def /GDBytes 1 def\n" + + "/CIDCount 1 def\n" + + "/Private 5 dict dup begin /lenIV -1 def end def\n" + + tail + ); + const parser = new Type1Parser(stream, false, SEAC_ANALYSIS_ENABLED); + expect(parser.extractCidKeyedFontProgram({})).toEqual(null); + } + }); + + it("rejects oversized hex StartData lengths", function () { + // Declares 1 GiB of hex data; must be rejected before any allocation. + const stream = new StringStream( + "%!PS-Adobe-3.0 Resource-CIDFont\n" + + "/CIDMapOffset 0 def /FDBytes 0 def /GDBytes 1 def\n" + + "/CIDCount 1 def\n" + + "/Private 5 dict dup begin /lenIV -1 def end def\n" + + "(Hex) 1073741824 StartData 00" + ); + const parser = new Type1Parser(stream, false, SEAC_ANALYSIS_ENABLED); + + expect(parser.extractCidKeyedFontProgram({})).toEqual(null); + }); + it("parses font header font matrix", function () { const stream = new StringStream( "/FontMatrix [0.001 0 0 0.001 0 0 ]readonly def\n"