From 88c52a15237e30bd3d8be6d5221a10da589ca4db Mon Sep 17 00:00:00 2001 From: Calixte Denizet Date: Fri, 5 Jun 2026 16:31:11 +0200 Subject: [PATCH] Strip the JBIG2 file header from JBIG2Decode streams It's rendering correctly in Acrobat and PdfBox. --- src/core/jbig2_stream.js | 24 ++++++++++++++++++++++-- test/pdfs/.gitignore | 1 + test/pdfs/jbig2_file_header.pdf | Bin 0 -> 774 bytes test/test_manifest.json | 7 +++++++ 4 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 test/pdfs/jbig2_file_header.pdf diff --git a/src/core/jbig2_stream.js b/src/core/jbig2_stream.js index e94c1edb1..3b16f8674 100644 --- a/src/core/jbig2_stream.js +++ b/src/core/jbig2_stream.js @@ -51,17 +51,37 @@ class Jbig2Stream extends DecodeStream { return true; } + // The JBIG2 file header is defined in ITU-T T.88, Annex D.4: + // https://www.itu.int/rec/T-REC-T.88 + static stripFileHeader(bytes) { + if ( + bytes.length >= 9 && + bytes[0] === 0x97 && + bytes[1] === 0x4a && + bytes[2] === 0x42 && + bytes[3] === 0x32 && + bytes[4] === 0x0d && + bytes[5] === 0x0a && + bytes[6] === 0x1a && + bytes[7] === 0x0a + ) { + const headerLength = (bytes[8] & 2) === 0 ? 13 : 9; + return bytes.subarray(headerLength); + } + return bytes; + } + async decodeImage(bytes, length, _decoderOptions) { if (this.eof) { return this.buffer; } - bytes ||= this.bytes; + bytes = Jbig2Stream.stripFileHeader(bytes || this.bytes); let globals = null; if (this.params instanceof Dict) { const globalsStream = this.params.get("JBIG2Globals"); if (globalsStream instanceof BaseStream) { - globals = globalsStream.getBytes(); + globals = Jbig2Stream.stripFileHeader(globalsStream.getBytes()); } } this.buffer = await JBig2CCITTFaxImage.instance.decode( diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 9db3f6b26..8bb7acc70 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -930,3 +930,4 @@ !90ms_rksj_h_sample.pdf !issue21346.pdf !cidfont_cmap_overflow.pdf +!jbig2_file_header.pdf diff --git a/test/pdfs/jbig2_file_header.pdf b/test/pdfs/jbig2_file_header.pdf new file mode 100644 index 0000000000000000000000000000000000000000..99fff7640ef03a356e7adc28c91130a2b9933799 GIT binary patch literal 774 zcmZWn&2G~`5O!NmDjnkAX;--I$~bV~sujc7DE{nM%9Syv^_Ur1JCE*KdSVm&)ET zSzl+nr$bpdOxfqKH7A7$@_4(b+n&`)N|_RgLF9tIJWYDW8WJd@%%* zD?m4!ojrgg6*YOX=b|h3)@T^)vknw;q+%$L*OQ&zF~l0JR_4;Xrci`}wQ`MV3hGax ze=Lp1#gb(_>yCQbT(Onm-Pq$a(MP5py9a6iNMl*dIFPD46fv*{yhvl%RN|D?QeCt` z)nzu6x#7)OGaYD9Y^&DV^coP$1lTs@eUq@d7Da_tATsh{tL9xM*U6IOI7S`Uey*W) zhI)W%#hy7!H|=SJ&>89hYAGv|Ka=J0c>L+<#+U8yyO&n4ykC7id1arg&(7h^+mDaV zf8YA??#ARZhAi`GH8fYMQ`5AjBvp>m^sH=B$=nNP#Gl(an0bT&zFZn-)Hc5%>BIkX zohUdYE)9q~6B_t_)u)GahH