Parse CID-keyed Type 1 fonts instead of falling back to a system font

It fixes #15292.

PDFs can embed a CID-keyed Type 1 program (Adobe TechNote 5014,
CIDFontType 0) under /Subtype /CIDFontType0 + /FontFile. Its binary
CIDMap/SubrMap layout has no eexec block, so Type1Font's eexec-only
parser used to fall through and trigger the work-around added in
PR #15397.
Split the constructor and parse the binary CIDMap, SubrMap
and charstrings (encrypted with the standard Type 1 charstring cipher)
through the existing Type1CharString.convert + CFF wrap pipeline.

Only single-FDArray fonts are supported; the StartData length is
clamped to the stream's remaining bytes before allocating.
This commit is contained in:
calixteman 2026-05-24 22:37:51 +02:00 committed by Calixte Denizet
parent 03eda70d7e
commit e1de5c30b5
3 changed files with 578 additions and 19 deletions

View File

@ -13,6 +13,7 @@
* limitations under the License.
*/
import { bytesToString, FormatError, warn } from "../shared/util.js";
import {
CFF,
CFFCharset,
@ -24,7 +25,6 @@ import {
CFFStrings,
CFFTopDict,
} from "./cff_parser.js";
import { FormatError, warn } from "../shared/util.js";
import { SEAC_ANALYSIS_ENABLED, type1FontGlyphMapping } from "./fonts_utils.js";
import { isWhiteSpace } from "./core_utils.js";
import { Stream } from "./stream.js";
@ -149,6 +149,18 @@ function getEexecBlock(stream, suggestedLength) {
};
}
// Detects the CID-keyed Type 1 format (Adobe TechNote 5014, CIDFontType 0).
// Caller must additionally check `properties.composite`, since only composite
// fonts are wrapped as CIDFontType0 in PDF.
function isCidKeyedType1File(file) {
const sample = file.peekBytes(2048);
if (sample.length < 2 || sample[0] !== 0x25 || sample[1] !== 0x21) {
return false;
}
const text = bytesToString(sample);
return text.includes("Resource-CIDFont") || /\/CIDFontType\s+0\b/.test(text);
}
/**
* Type1Font is also a CIDFontType0.
*/
@ -156,6 +168,31 @@ class Type1Font {
#rawFileLength;
constructor(name, file, properties) {
let data;
if (properties.composite && isCidKeyedType1File(file)) {
data = this.#parseCidKeyedType1(file, properties);
}
data ||= this.#parseType1(file, properties);
for (const key in data.properties) {
properties[key] = data.properties[key];
}
const charstrings = data.charstrings;
const type2Charstrings = this.getType2Charstrings(charstrings);
const subrs = this.getType2Subrs(data.subrs);
this.charstrings = charstrings;
this.data = this.wrap(
name,
type2Charstrings,
this.charstrings,
subrs,
properties
);
this.seacs = this.getSeacs(data.charstrings);
}
#parseType1(file, properties) {
// Some bad generators embed pfb file as is, we have to strip 6-byte header.
// Also, length1 and length2 might be off by 6 bytes as well.
// http://www.math.ubc.ca/~cass/piscript/type1.pdf
@ -173,7 +210,6 @@ class Type1Font {
pfbHeader[2];
}
// Get the data block containing glyphs and subrs information
const headerBlock = getHeaderBlock(file, headerBlockLength);
const headerBlockParser = new Type1Parser(
headerBlock.stream,
@ -191,7 +227,6 @@ class Type1Font {
pfbHeader[2];
}
// Decrypt the data blocks and retrieve it's content
const eexecBlock = getEexecBlock(file, eexecBlockLength);
const eexecBlockParser = new Type1Parser(
eexecBlock.stream,
@ -199,24 +234,23 @@ class Type1Font {
SEAC_ANALYSIS_ENABLED
);
const data = eexecBlockParser.extractFontProgram(properties);
for (const key in data.properties) {
properties[key] = data.properties[key];
}
this.#rawFileLength = headerBlock.length + eexecBlock.length;
return data;
}
const charstrings = data.charstrings;
const type2Charstrings = this.getType2Charstrings(charstrings);
const subrs = this.getType2Subrs(data.subrs);
this.charstrings = charstrings;
this.data = this.wrap(
name,
type2Charstrings,
this.charstrings,
subrs,
properties
);
this.seacs = this.getSeacs(data.charstrings);
#parseCidKeyedType1(file, properties) {
const fileStart = file.pos;
const length = file.end - fileStart;
const parser = new Type1Parser(file, false, SEAC_ANALYSIS_ENABLED);
const data = parser.extractCidKeyedFontProgram(properties);
if (!data) {
// Reset the stream so the regular Type 1 path can re-try.
file.pos = fileStart;
warn("Type1Font: unable to parse CID-keyed Type 1 font.");
return null;
}
this.#rawFileLength = length;
return data;
}
get numGlyphs() {

View File

@ -730,6 +730,280 @@ class Type1Parser {
return program;
}
/*
* Returns an object containing a Subrs array and a CharStrings array
* extracted from a CID-keyed Type 1 font program (Adobe TechNote 5014,
* CIDFontType 0). The stream must start at the PostScript header.
*
* The binary section that follows the "StartData" marker contains:
* - CIDMap at CIDMapOffset, with (CIDCount + 1) entries; each entry is
* FDBytes (FD-index) + GDBytes (glyph data offset) bytes.
* - SubrMap at SubrMapOffset, with (SubrCount + 1) entries of SDBytes
* each, holding subr data offsets.
* - The charstring/subr data, each encrypted with the Type 1 charstring
* cipher and prefixed by `lenIV` random bytes.
*
* Only single-FDArray fonts are supported.
*/
extractCidKeyedFontProgram(properties) {
const stream = this.stream;
const privateData = new Map([["lenIV", 4]]);
const program = {
subrs: [],
charstrings: [],
properties: { privateData },
};
let cidCount = 0;
let cidMapOffset = -1;
let fdBytes = 1;
let gdBytes = 0;
let subrMapOffset = -1;
let sdBytes = 0;
let subrCount = 0;
let startDataLength = 0;
let startDataIsHex = false;
let foundStartData = false;
const previousTokens = [];
function rememberToken(value) {
previousTokens.push(value);
if (previousTokens.length > 4) {
previousTokens.shift();
}
}
let token;
while ((token = this.getToken()) !== null) {
if (token === "StartData") {
const dataType = previousTokens.at(-3);
const dataLength = previousTokens.at(-1);
if (
previousTokens.at(-4) !== "(" ||
previousTokens.at(-2) !== ")" ||
(dataType !== "Binary" && dataType !== "Hex") ||
!/^\d+$/.test(dataLength)
) {
return null;
}
startDataLength = parseInt(dataLength, 10);
if (startDataLength <= 0) {
return null;
}
startDataIsHex = dataType === "Hex";
foundStartData = true;
break;
}
rememberToken(token);
if (token !== "/") {
continue;
}
token = this.getToken();
rememberToken(token);
switch (token) {
case "FontMatrix":
properties.fontMatrix = this.readNumberArray();
break;
case "FontBBox":
const fontBBox = this.readNumberArray();
properties.ascent = Math.max(fontBBox[3], fontBBox[1]);
properties.descent = Math.min(fontBBox[1], fontBBox[3]);
properties.ascentScaled = true;
break;
case "CIDCount":
cidCount = this.readInt();
break;
case "CIDMapOffset":
cidMapOffset = this.readInt();
break;
case "FDBytes":
fdBytes = this.readInt();
break;
case "GDBytes":
gdBytes = this.readInt();
break;
case "SubrMapOffset":
subrMapOffset = this.readInt();
break;
case "SDBytes":
sdBytes = this.readInt();
break;
case "SubrCount":
subrCount = this.readInt();
break;
case "BlueValues":
case "OtherBlues":
case "FamilyBlues":
case "FamilyOtherBlues":
// *Blue* values are skipped while hinting is disabled.
this.readNumberArray();
break;
case "StemSnapH":
case "StemSnapV":
privateData.set(token, this.readNumberArray());
break;
case "StdHW":
case "StdVW":
privateData.set(token, this.readNumberArray()[0]);
break;
case "BlueShift":
case "lenIV":
case "BlueFuzz":
case "BlueScale":
case "LanguageGroup":
privateData.set(token, this.readNumber());
break;
case "ExpansionFactor":
privateData.set(token, this.readNumber() || 0.06);
break;
case "ForceBold":
privateData.set(token, this.readBoolean());
break;
}
}
if (
!foundStartData ||
cidCount <= 0 ||
cidMapOffset < 0 ||
fdBytes < 0 ||
fdBytes > 4 ||
gdBytes < 1 ||
gdBytes > 4
) {
return null;
}
// After "StartData", currentChar is the single separator byte (typically
// a space); the next byte starts the binary block. `startDataLength` is
// only an upper bound: some generators (see issue 15292) write a wrong
// value, and the buffer is also untrusted PostScript input -- cap to the
// stream's remaining bytes before allocating.
const maxLength = stream.end - stream.pos;
if (startDataLength > maxLength) {
if (!startDataIsHex) {
startDataLength = maxLength;
} else if (startDataLength > 2 * maxLength) {
// Hex needs ~2 chars per output byte; anything larger is impossible.
return null;
}
}
let binary = stream.getBytes(startDataIsHex ? undefined : startDataLength);
if (startDataIsHex) {
const decoded = new Uint8Array(startDataLength);
let digit1 = -1,
j = 0;
for (let i = 0, ii = binary.length; i < ii && j < startDataLength; i++) {
const digit = binary[i];
if (!isHexDigit(digit)) {
continue;
}
if (digit1 < 0) {
digit1 = digit;
continue;
}
decoded[j++] = parseInt(String.fromCharCode(digit1, digit), 16);
digit1 = -1;
}
if (j !== startDataLength) {
return null;
}
binary = decoded;
}
const lenIV = privateData.get("lenIV");
const cidEntrySize = fdBytes + gdBytes;
const subrs = [];
function readUint(offset, byteCount) {
let n = 0;
for (let i = 0; i < byteCount; i++) {
n = (n << 8) | binary[offset + i];
}
return n >>> 0;
}
if (
cidMapOffset + (cidCount + 1) * cidEntrySize > binary.length ||
(subrCount > 0 &&
(subrMapOffset < 0 ||
sdBytes < 1 ||
sdBytes > 4 ||
subrMapOffset + (subrCount + 1) * sdBytes > binary.length))
) {
return null;
}
if (fdBytes > 0) {
// Only single-FDArray fonts are supported here. Reject CID-keyed fonts
// that actually select multiple font dictionaries, since each FD can
// define different private data and subroutines.
for (let cid = 0; cid < cidCount; cid++) {
if (readUint(cidMapOffset + cid * cidEntrySize, fdBytes) !== 0) {
return null;
}
}
}
if (subrCount > 0) {
const subrOffsets = new Array(subrCount + 1);
for (let i = 0; i <= subrCount; i++) {
subrOffsets[i] = readUint(subrMapOffset + i * sdBytes, sdBytes);
}
for (let i = 0; i < subrCount; i++) {
const start = subrOffsets[i];
const end = subrOffsets[i + 1];
if (end > binary.length || end < start) {
subrs[i] = new Uint8Array(0);
continue;
}
subrs[i] = this.readCharStrings(binary.subarray(start, end), lenIV);
}
}
const charstrings = [];
let prevOffset = readUint(cidMapOffset + fdBytes, gdBytes);
for (let cid = 0; cid < cidCount; cid++) {
const nextOffset = readUint(
cidMapOffset + (cid + 1) * cidEntrySize + fdBytes,
gdBytes
);
const glyphName = cid === 0 ? ".notdef" : `cid${cid}`;
if (nextOffset > prevOffset && nextOffset <= binary.length) {
const encoded = this.readCharStrings(
binary.subarray(prevOffset, nextOffset),
lenIV
);
const charString = new Type1CharString();
const error = charString.convert(
encoded,
subrs,
this.seacAnalysisEnabled
);
charstrings.push({
glyphName,
charstring: error ? [14] : charString.output,
width: charString.width,
lsb: charString.lsb,
seac: charString.seac,
});
} else {
// Empty intervals should select CID 0's notdef glyph, while still
// keeping the slot so the CID-to-GID mapping stays aligned.
const notDef = charstrings[0];
charstrings.push({
glyphName,
charstring: notDef?.charstring.slice() || [0x8b, 0x0e], // 0 endchar
width: notDef?.width || 0,
lsb: notDef?.lsb || 0,
});
}
prevOffset = nextOffset;
}
program.subrs = subrs;
program.charstrings = charstrings;
return program;
}
extractFontHeader(properties) {
let token;
while ((token = this.getToken()) !== null) {

View File

@ -13,11 +13,58 @@
* limitations under the License.
*/
import { bytesToString } from "../../src/shared/util.js";
import { SEAC_ANALYSIS_ENABLED } from "../../src/core/fonts_utils.js";
import { StringStream } from "../../src/core/stream.js";
import { Type1Parser } from "../../src/core/type1_parser.js";
describe("Type1Parser", function () {
function createCidKeyedFontStream({
binary,
cidCount = 3,
fdBytes = 0,
dataFormat = "Binary",
declaredLength = binary.length,
lenIV = -1,
subrMapOffset = 0,
subrCount = 0,
sdBytes = 0,
trailer = "",
}) {
const data = dataFormat === "Hex" ? binary.toHex() : bytesToString(binary);
return new StringStream(
"%!PS-Adobe-3.0 Resource-CIDFont\n" +
"/CIDMapOffset 0 def\n" +
`/FDBytes ${fdBytes} def\n` +
"/GDBytes 1 def\n" +
`/CIDCount ${cidCount} def\n` +
`/SubrMapOffset ${subrMapOffset} def\n` +
`/SDBytes ${sdBytes} def\n` +
`/SubrCount ${subrCount} def\n` +
"/Private 5 dict dup begin\n" +
`/lenIV ${lenIV} def\n` +
"end def\n" +
`(${dataFormat}) ${declaredLength} StartData ${data}${trailer}`
);
}
// Inverse of the Type 1 charstring cipher: produces ciphertext that
// `Type1Parser.readCharStrings` (with matching lenIV) decodes back to
// `plain`. The leading `lenIV` plaintext bytes are zero padding.
function encryptCharString(plain, lenIV) {
const c1 = 52845,
c2 = 22719;
let r = 4330;
const out = new Uint8Array(plain.length + lenIV);
for (let i = 0; i < out.length; i++) {
const src = i < lenIV ? 0 : plain[i - lenIV];
const cipher = (src ^ (r >> 8)) & 0xff;
out[i] = cipher;
r = ((cipher + r) * c1 + c2) & 0xffff;
}
return out;
}
it("splits tokens", function () {
const stream = new StringStream("/BlueValues[-17 0]noaccess def");
const parser = new Type1Parser(stream, false, SEAC_ANALYSIS_ENABLED);
@ -97,6 +144,210 @@ describe("Type1Parser", function () {
expect(program.properties.privateData.get("ExpansionFactor")).toEqual(99);
});
it("parses a CID-keyed Type 1 font program", function () {
// 0 500 hsbw endchar
const notdefCharString = [0x8b, 0xf8, 0x88, 0x0d, 0x0e];
// 0 250 hsbw endchar
const cid2CharString = [0x8b, 0xf7, 0x8e, 0x0d, 0x0e];
const binary = Uint8Array.of(
// CIDMap: CID 0 has data, CID 1 is empty, CID 2 has data.
4,
9,
9,
14,
...notdefCharString,
...cid2CharString
);
const stream = createCidKeyedFontStream({ binary });
const parser = new Type1Parser(stream, false, SEAC_ANALYSIS_ENABLED);
const program = parser.extractCidKeyedFontProgram({});
expect(program.subrs.length).toEqual(0);
expect(program.charstrings.map(({ glyphName }) => glyphName)).toEqual([
".notdef",
"cid1",
"cid2",
]);
expect(program.charstrings[0].width).toEqual(500);
expect(program.charstrings[1].width).toEqual(500);
expect(program.charstrings[1].charstring).toEqual(
program.charstrings[0].charstring
);
expect(program.charstrings[2].width).toEqual(250);
});
it("parses a hex-encoded CID-keyed Type 1 data section", function () {
const binary = Uint8Array.of(
4,
9,
9,
14,
0x8b,
0xf8,
0x88,
0x0d,
0x0e,
0x8b,
0xf7,
0x8e,
0x0d,
0x0e
);
const stream = createCidKeyedFontStream({ binary, dataFormat: "Hex" });
const parser = new Type1Parser(stream, false, SEAC_ANALYSIS_ENABLED);
const program = parser.extractCidKeyedFontProgram({});
expect(program.charstrings[2].width).toEqual(250);
});
it("rejects CID-keyed Type 1 fonts with multiple FD indices", function () {
const binary = Uint8Array.of(
// CIDMap: CID 0 selects FD index 1, which is unsupported.
1,
4,
0,
9,
0x8b,
0xf8,
0x88,
0x0d,
0x0e
);
const stream = createCidKeyedFontStream({
binary,
cidCount: 1,
fdBytes: 1,
});
const parser = new Type1Parser(stream, false, SEAC_ANALYSIS_ENABLED);
expect(parser.extractCidKeyedFontProgram({})).toEqual(null);
});
it("uses subrs when parsing a CID-keyed Type 1 font", function () {
// 0 333 hsbw return -- callable subroutine.
const subr0 = [0x8b, 0xf7, 0xe1, 0x0d, 0x0b];
// 0 500 hsbw endchar.
const cid0 = [0x8b, 0xf8, 0x88, 0x0d, 0x0e];
// 0 callsubr endchar -- delegates the width to subr 0.
const cid1 = [0x8b, 0x0a, 0x0e];
// Layout: CIDMap(3) || SubrMap(2) || subr0 || cid0 || cid1.
const subrStart = 5;
const cid0Start = subrStart + subr0.length;
const cid1Start = cid0Start + cid0.length;
const binary = Uint8Array.of(
cid0Start, // CID 0
cid1Start, // CID 1
cid1Start + cid1.length, // CIDMap sentinel
subrStart, // subr 0
cid0Start, // SubrMap sentinel
...subr0,
...cid0,
...cid1
);
const stream = createCidKeyedFontStream({
binary,
cidCount: 2,
subrMapOffset: 3,
subrCount: 1,
sdBytes: 1,
});
const parser = new Type1Parser(stream, false, SEAC_ANALYSIS_ENABLED);
const program = parser.extractCidKeyedFontProgram({});
expect(program.subrs.length).toEqual(1);
expect(program.charstrings[0].width).toEqual(500);
expect(program.charstrings[1].width).toEqual(333);
});
it("decrypts charstrings when lenIV > 0", function () {
const cid0Plain = [0x8b, 0xf8, 0x88, 0x0d, 0x0e]; // 0 500 hsbw endchar
const cid0Cipher = encryptCharString(cid0Plain, 4);
const binary = Uint8Array.of(
// CIDMap: 2 entries (CIDCount + 1).
2,
2 + cid0Cipher.length,
...cid0Cipher
);
const stream = createCidKeyedFontStream({
binary,
cidCount: 1,
lenIV: 4,
});
const parser = new Type1Parser(stream, false, SEAC_ANALYSIS_ENABLED);
const program = parser.extractCidKeyedFontProgram({});
expect(program.charstrings[0].width).toEqual(500);
});
it("decodes hex CID-keyed data with whitespace between digits", function () {
const binary = Uint8Array.of(4, 9, 9, 14, 0x8b, 0xf8, 0x88, 0x0d, 0x0e);
const hexWithSpaces = binary
.toHex()
.match(/.{1,2}/g)
.join(" ");
const stream = new StringStream(
"%!PS-Adobe-3.0 Resource-CIDFont\n" +
"/CIDMapOffset 0 def\n" +
"/FDBytes 0 def\n" +
"/GDBytes 1 def\n" +
"/CIDCount 1 def\n" +
"/Private 5 dict dup begin /lenIV -1 def end def\n" +
`(Hex) ${binary.length} StartData ${hexWithSpaces}`
);
const parser = new Type1Parser(stream, false, SEAC_ANALYSIS_ENABLED);
const program = parser.extractCidKeyedFontProgram({});
expect(program.charstrings[0].width).toEqual(500);
});
it("rejects truncated CID-keyed binary data", function () {
// CIDMap declares 3 CIDs (4 entries x 1 byte = 4 bytes) but only 2 bytes
// of binary follow, so the CIDMap read goes past the end.
const binary = Uint8Array.of(0, 0);
const stream = createCidKeyedFontStream({ binary, cidCount: 3 });
const parser = new Type1Parser(stream, false, SEAC_ANALYSIS_ENABLED);
expect(parser.extractCidKeyedFontProgram({})).toEqual(null);
});
it("rejects malformed StartData token sequences", function () {
const cases = [
// Missing the "(Binary)" / "(Hex)" parenthesised tag.
"Binary 4 StartData \x00\x00\x00\x00",
// Non-numeric length.
"(Binary) abc StartData \x00\x00\x00\x00",
// Unsupported data type.
"(Ascii) 4 StartData \x00\x00\x00\x00",
// Zero length.
"(Binary) 0 StartData",
];
for (const tail of cases) {
const stream = new StringStream(
"%!PS-Adobe-3.0 Resource-CIDFont\n" +
"/CIDMapOffset 0 def /FDBytes 0 def /GDBytes 1 def\n" +
"/CIDCount 1 def\n" +
"/Private 5 dict dup begin /lenIV -1 def end def\n" +
tail
);
const parser = new Type1Parser(stream, false, SEAC_ANALYSIS_ENABLED);
expect(parser.extractCidKeyedFontProgram({})).toEqual(null);
}
});
it("rejects oversized hex StartData lengths", function () {
// Declares 1 GiB of hex data; must be rejected before any allocation.
const stream = new StringStream(
"%!PS-Adobe-3.0 Resource-CIDFont\n" +
"/CIDMapOffset 0 def /FDBytes 0 def /GDBytes 1 def\n" +
"/CIDCount 1 def\n" +
"/Private 5 dict dup begin /lenIV -1 def end def\n" +
"(Hex) 1073741824 StartData 00"
);
const parser = new Type1Parser(stream, false, SEAC_ANALYSIS_ENABLED);
expect(parser.extractCidKeyedFontProgram({})).toEqual(null);
});
it("parses font header font matrix", function () {
const stream = new StringStream(
"/FontMatrix [0.001 0 0 0.001 0 0 ]readonly def\n"