Strip the JBIG2 file header from JBIG2Decode streams

It's rendering correctly in Acrobat and PdfBox.
This commit is contained in:
Calixte Denizet 2026-06-05 16:31:11 +02:00
parent 091f459d2e
commit 88c52a1523
4 changed files with 30 additions and 2 deletions

View File

@ -51,17 +51,37 @@ class Jbig2Stream extends DecodeStream {
return true;
}
// The JBIG2 file header is defined in ITU-T T.88, Annex D.4:
// https://www.itu.int/rec/T-REC-T.88
static stripFileHeader(bytes) {
if (
bytes.length >= 9 &&
bytes[0] === 0x97 &&
bytes[1] === 0x4a &&
bytes[2] === 0x42 &&
bytes[3] === 0x32 &&
bytes[4] === 0x0d &&
bytes[5] === 0x0a &&
bytes[6] === 0x1a &&
bytes[7] === 0x0a
) {
const headerLength = (bytes[8] & 2) === 0 ? 13 : 9;
return bytes.subarray(headerLength);
}
return bytes;
}
async decodeImage(bytes, length, _decoderOptions) {
if (this.eof) {
return this.buffer;
}
bytes ||= this.bytes;
bytes = Jbig2Stream.stripFileHeader(bytes || this.bytes);
let globals = null;
if (this.params instanceof Dict) {
const globalsStream = this.params.get("JBIG2Globals");
if (globalsStream instanceof BaseStream) {
globals = globalsStream.getBytes();
globals = Jbig2Stream.stripFileHeader(globalsStream.getBytes());
}
}
this.buffer = await JBig2CCITTFaxImage.instance.decode(

View File

@ -930,3 +930,4 @@
!90ms_rksj_h_sample.pdf
!issue21346.pdf
!cidfont_cmap_overflow.pdf
!jbig2_file_header.pdf

Binary file not shown.

View File

@ -14357,5 +14357,12 @@
"md5": "8ce57e23b42b3232b0f2060bdd2c8b2a",
"rounds": 1,
"type": "eq"
},
{
"id": "jbig2_file_header",
"file": "pdfs/jbig2_file_header.pdf",
"md5": "702be2459f9bdbe93338ab3a584babc4",
"rounds": 1,
"type": "eq"
}
]