Merge pull request #21139 from Snuffleupagus/jbig2_nowasm_fallback

[api-minor] Replace the CCITT and JBig2 fallback decoders with a JS version of the PDFium decoder
This commit is contained in:
Jonas Jenwald 2026-04-23 15:08:17 +02:00 committed by GitHub
commit bede5e5bfa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 109 additions and 3981 deletions

15
external/jbig2/jbig2_nowasm_fallback.js vendored Normal file

File diff suppressed because one or more lines are too long

View File

@ -638,10 +638,17 @@ function createWasmBundle() {
base: "external/qcms",
encoding: false,
}),
gulp.src(["external/jbig2/*.wasm", "external/jbig2/LICENSE_*"], {
base: "external/jbig2",
encoding: false,
}),
gulp.src(
[
"external/jbig2/*.wasm",
"external/jbig2/jbig2_nowasm_fallback.js",
"external/jbig2/LICENSE_*",
],
{
base: "external/jbig2",
encoding: false,
}
),
]);
}

View File

@ -1,182 +0,0 @@
/* Copyright 2012 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Table C-2
const QeTable = [
{ qe: 0x5601, nmps: 1, nlps: 1, switchFlag: 1 },
{ qe: 0x3401, nmps: 2, nlps: 6, switchFlag: 0 },
{ qe: 0x1801, nmps: 3, nlps: 9, switchFlag: 0 },
{ qe: 0x0ac1, nmps: 4, nlps: 12, switchFlag: 0 },
{ qe: 0x0521, nmps: 5, nlps: 29, switchFlag: 0 },
{ qe: 0x0221, nmps: 38, nlps: 33, switchFlag: 0 },
{ qe: 0x5601, nmps: 7, nlps: 6, switchFlag: 1 },
{ qe: 0x5401, nmps: 8, nlps: 14, switchFlag: 0 },
{ qe: 0x4801, nmps: 9, nlps: 14, switchFlag: 0 },
{ qe: 0x3801, nmps: 10, nlps: 14, switchFlag: 0 },
{ qe: 0x3001, nmps: 11, nlps: 17, switchFlag: 0 },
{ qe: 0x2401, nmps: 12, nlps: 18, switchFlag: 0 },
{ qe: 0x1c01, nmps: 13, nlps: 20, switchFlag: 0 },
{ qe: 0x1601, nmps: 29, nlps: 21, switchFlag: 0 },
{ qe: 0x5601, nmps: 15, nlps: 14, switchFlag: 1 },
{ qe: 0x5401, nmps: 16, nlps: 14, switchFlag: 0 },
{ qe: 0x5101, nmps: 17, nlps: 15, switchFlag: 0 },
{ qe: 0x4801, nmps: 18, nlps: 16, switchFlag: 0 },
{ qe: 0x3801, nmps: 19, nlps: 17, switchFlag: 0 },
{ qe: 0x3401, nmps: 20, nlps: 18, switchFlag: 0 },
{ qe: 0x3001, nmps: 21, nlps: 19, switchFlag: 0 },
{ qe: 0x2801, nmps: 22, nlps: 19, switchFlag: 0 },
{ qe: 0x2401, nmps: 23, nlps: 20, switchFlag: 0 },
{ qe: 0x2201, nmps: 24, nlps: 21, switchFlag: 0 },
{ qe: 0x1c01, nmps: 25, nlps: 22, switchFlag: 0 },
{ qe: 0x1801, nmps: 26, nlps: 23, switchFlag: 0 },
{ qe: 0x1601, nmps: 27, nlps: 24, switchFlag: 0 },
{ qe: 0x1401, nmps: 28, nlps: 25, switchFlag: 0 },
{ qe: 0x1201, nmps: 29, nlps: 26, switchFlag: 0 },
{ qe: 0x1101, nmps: 30, nlps: 27, switchFlag: 0 },
{ qe: 0x0ac1, nmps: 31, nlps: 28, switchFlag: 0 },
{ qe: 0x09c1, nmps: 32, nlps: 29, switchFlag: 0 },
{ qe: 0x08a1, nmps: 33, nlps: 30, switchFlag: 0 },
{ qe: 0x0521, nmps: 34, nlps: 31, switchFlag: 0 },
{ qe: 0x0441, nmps: 35, nlps: 32, switchFlag: 0 },
{ qe: 0x02a1, nmps: 36, nlps: 33, switchFlag: 0 },
{ qe: 0x0221, nmps: 37, nlps: 34, switchFlag: 0 },
{ qe: 0x0141, nmps: 38, nlps: 35, switchFlag: 0 },
{ qe: 0x0111, nmps: 39, nlps: 36, switchFlag: 0 },
{ qe: 0x0085, nmps: 40, nlps: 37, switchFlag: 0 },
{ qe: 0x0049, nmps: 41, nlps: 38, switchFlag: 0 },
{ qe: 0x0025, nmps: 42, nlps: 39, switchFlag: 0 },
{ qe: 0x0015, nmps: 43, nlps: 40, switchFlag: 0 },
{ qe: 0x0009, nmps: 44, nlps: 41, switchFlag: 0 },
{ qe: 0x0005, nmps: 45, nlps: 42, switchFlag: 0 },
{ qe: 0x0001, nmps: 45, nlps: 43, switchFlag: 0 },
{ qe: 0x5601, nmps: 46, nlps: 46, switchFlag: 0 },
];
/**
* This class implements the QM Coder decoding as defined in
* JPEG 2000 Part I Final Committee Draft Version 1.0
* Annex C.3 Arithmetic decoding procedure
* available at http://www.jpeg.org/public/fcd15444-1.pdf
*
* The arithmetic decoder is used in conjunction with context models to decode
* JPEG2000 and JBIG2 streams.
*/
class ArithmeticDecoder {
// C.3.5 Initialisation of the decoder (INITDEC)
constructor(data, start, end) {
this.data = data;
this.bp = start;
this.dataEnd = end;
this.chigh = data[start];
this.clow = 0;
this.byteIn();
this.chigh = ((this.chigh << 7) & 0xffff) | ((this.clow >> 9) & 0x7f);
this.clow = (this.clow << 7) & 0xffff;
this.ct -= 7;
this.a = 0x8000;
}
// C.3.4 Compressed data input (BYTEIN)
byteIn() {
const data = this.data;
let bp = this.bp;
if (data[bp] === 0xff) {
if (data[bp + 1] > 0x8f) {
this.clow += 0xff00;
this.ct = 8;
} else {
bp++;
this.clow += data[bp] << 9;
this.ct = 7;
this.bp = bp;
}
} else {
bp++;
this.clow += bp < this.dataEnd ? data[bp] << 8 : 0xff00;
this.ct = 8;
this.bp = bp;
}
if (this.clow > 0xffff) {
this.chigh += this.clow >> 16;
this.clow &= 0xffff;
}
}
// C.3.2 Decoding a decision (DECODE)
readBit(contexts, pos) {
// Contexts are packed into 1 byte:
// highest 7 bits carry cx.index, lowest bit carries cx.mps
let cx_index = contexts[pos] >> 1,
cx_mps = contexts[pos] & 1;
const qeTableIcx = QeTable[cx_index];
const qeIcx = qeTableIcx.qe;
let d;
let a = this.a - qeIcx;
if (this.chigh < qeIcx) {
// exchangeLps
if (a < qeIcx) {
a = qeIcx;
d = cx_mps;
cx_index = qeTableIcx.nmps;
} else {
a = qeIcx;
d = 1 ^ cx_mps;
if (qeTableIcx.switchFlag === 1) {
cx_mps = d;
}
cx_index = qeTableIcx.nlps;
}
} else {
this.chigh -= qeIcx;
if ((a & 0x8000) !== 0) {
this.a = a;
return cx_mps;
}
// exchangeMps
if (a < qeIcx) {
d = 1 ^ cx_mps;
if (qeTableIcx.switchFlag === 1) {
cx_mps = d;
}
cx_index = qeTableIcx.nlps;
} else {
d = cx_mps;
cx_index = qeTableIcx.nmps;
}
}
// C.3.3 renormD;
do {
if (this.ct === 0) {
this.byteIn();
}
a <<= 1;
this.chigh = ((this.chigh << 1) & 0xffff) | ((this.clow >> 15) & 1);
this.clow = (this.clow << 1) & 0xffff;
this.ct--;
} while ((a & 0x8000) === 0);
this.a = a;
contexts[pos] = (cx_index << 1) | cx_mps;
return d;
}
}
export { ArithmeticDecoder };

File diff suppressed because it is too large Load Diff

View File

@ -13,11 +13,10 @@
* limitations under the License.
*/
import { shadow, warn } from "../shared/util.js";
import { CCITTFaxDecoder } from "./ccitt.js";
import { shadow, unreachable } from "../shared/util.js";
import { DecodeStream } from "./decode_stream.js";
import { Dict } from "./primitives.js";
import { JBig2CCITTFaxWasmImage } from "./jbig2_ccittFax_wasm.js";
import { JBig2CCITTFaxImage } from "./jbig2_ccittFax.js";
class CCITTFaxStream extends DecodeStream {
constructor(str, maybeLength, params) {
@ -48,7 +47,7 @@ class CCITTFaxStream extends DecodeStream {
}
readBlock() {
this.decodeImageFallback();
unreachable("CCITTFaxStream.readBlock");
}
get isImageStream() {
@ -69,59 +68,18 @@ class CCITTFaxStream extends DecodeStream {
: this.bytes;
}
try {
this.buffer = await JBig2CCITTFaxWasmImage.decode(
bytes,
this.dict.get("W", "Width"),
this.dict.get("H", "Height"),
null,
this.params
);
} catch {
warn("CCITTFaxStream: Falling back to JS CCITTFax decoder.");
return this.decodeImageFallback(bytes, length);
}
this.buffer = await JBig2CCITTFaxImage.decode(
bytes,
this.dict.get("W", "Width"),
this.dict.get("H", "Height"),
null,
this.params
);
this.bufferLength = this.buffer.length;
this.eof = true;
return this.buffer;
}
decodeImageFallback(bytes, length) {
if (this.eof) {
return this.buffer;
}
const { params } = this;
if (!bytes) {
this.stream.reset();
bytes = this.bytes;
}
let pos = 0;
const source = {
next() {
return bytes[pos++] ?? -1;
},
};
if (length && this.buffer.byteLength < length) {
this.buffer = new Uint8Array(length);
}
this.ccittFaxDecoder = new CCITTFaxDecoder(source, params);
let outPos = 0;
while (!this.eof) {
const c = this.ccittFaxDecoder.readNextChar();
if (c === -1) {
this.eof = true;
break;
}
if (!length) {
this.ensureBuffer(outPos + 1);
}
this.buffer[outPos++] = c;
}
this.bufferLength = this.buffer.length;
return this.buffer.subarray(0, length || this.bufferLength);
}
}
export { CCITTFaxStream };

View File

@ -16,7 +16,7 @@
import { clearPatternCaches } from "./pattern.js";
import { clearPrimitiveCaches } from "./primitives.js";
import { clearUnicodeCaches } from "./unicode.js";
import { JBig2CCITTFaxWasmImage } from "./jbig2_ccittFax_wasm.js";
import { JBig2CCITTFaxImage } from "./jbig2_ccittFax.js";
import { JpxImage } from "./jpx.js";
function clearGlobalCaches() {
@ -24,9 +24,9 @@ function clearGlobalCaches() {
clearPrimitiveCaches();
clearUnicodeCaches();
// Remove the global `JBig2CCITTFaxWasmImage`/`JpxImage` instances,
// Remove the global `JBig2CCITTFaxImage`/`JpxImage` instances,
// since they may hold references to the WebAssembly modules.
JBig2CCITTFaxWasmImage.cleanup();
JBig2CCITTFaxImage.cleanup();
JpxImage.cleanup();
}

View File

@ -268,13 +268,6 @@ function toRomanNumerals(number, lowerCase = false) {
return lowerCase ? roman.toLowerCase() : roman;
}
// Calculate the base 2 logarithm of the number `x`. This differs from the
// native function in the sense that it returns the ceiling value and that it
// returns 0 instead of `Infinity`/`NaN` for `x` values smaller than/equal to 0.
function log2(x) {
return x > 0 ? Math.ceil(Math.log2(x)) : 0;
}
// Checks if ch is one of the following characters: SPACE, TAB, CR or LF.
function isWhiteSpace(ch) {
return ch === 0x20 || ch === 0x09 || ch === 0x0d || ch === 0x0a;
@ -771,7 +764,6 @@ export {
isBooleanArray,
isNumberArray,
isWhiteSpace,
log2,
lookupMatrix,
lookupNormalRect,
lookupRect,

File diff suppressed because it is too large Load Diff

View File

@ -13,12 +13,17 @@
* limitations under the License.
*/
import { BaseException, warn } from "../shared/util.js";
import { fetchBinaryData } from "./core_utils.js";
import JBig2 from "../../external/jbig2/jbig2.js";
import { Jbig2Error } from "./jbig2.js";
import { warn } from "../shared/util.js";
class JBig2CCITTFaxWasmImage {
class Jbig2Error extends BaseException {
constructor(msg) {
super(msg, "Jbig2Error");
}
}
class JBig2CCITTFaxImage {
static #buffer = null;
static #handler = null;
@ -41,6 +46,24 @@ class JBig2CCITTFaxWasmImage {
}
}
static async #getJsModule(fallbackCallback) {
const path =
typeof PDFJSDev === "undefined"
? `../${this.#wasmUrl}jbig2_nowasm_fallback.js`
: `${this.#wasmUrl}jbig2_nowasm_fallback.js`;
let instance = null;
try {
const mod = await (typeof PDFJSDev === "undefined"
? import(path) // eslint-disable-line no-unsanitized/method
: __raw_import__(path));
instance = mod.default();
} catch (e) {
warn(`JBig2CCITTFaxImage#getJsModule: ${e}`);
}
fallbackCallback(instance);
}
static async #instantiateWasm(fallbackCallback, imports, successCallback) {
const filename = "jbig2.wasm";
try {
@ -60,8 +83,10 @@ class JBig2CCITTFaxWasmImage {
const results = await WebAssembly.instantiate(this.#buffer, imports);
return successCallback(results.instance);
} catch (reason) {
warn(`JBig2Image#instantiateWasm: ${reason}`);
return fallbackCallback(null);
warn(`JBig2CCITTFaxImage#instantiateWasm: ${reason}`);
this.#getJsModule(fallbackCallback);
return null;
} finally {
this.#handler = null;
}
@ -71,19 +96,20 @@ class JBig2CCITTFaxWasmImage {
if (!this.#modulePromise) {
const { promise, resolve } = Promise.withResolvers();
const promises = [promise];
if (this.#useWasm) {
if (!this.#useWasm) {
this.#getJsModule(resolve);
} else {
promises.push(
JBig2({
warn,
instantiateWasm: this.#instantiateWasm.bind(this, resolve),
})
);
} else {
resolve(null);
}
this.#modulePromise = Promise.race(promises);
}
const module = await this.#modulePromise;
if (!module) {
throw new Jbig2Error("JBig2 failed to initialize");
}
@ -137,4 +163,4 @@ class JBig2CCITTFaxWasmImage {
}
}
export { JBig2CCITTFaxWasmImage };
export { JBig2CCITTFaxImage, Jbig2Error };

View File

@ -13,12 +13,11 @@
* limitations under the License.
*/
import { shadow, warn } from "../shared/util.js";
import { shadow, unreachable } from "../shared/util.js";
import { BaseStream } from "./base_stream.js";
import { DecodeStream } from "./decode_stream.js";
import { Dict } from "./primitives.js";
import { JBig2CCITTFaxWasmImage } from "./jbig2_ccittFax_wasm.js";
import { Jbig2Image } from "./jbig2.js";
import { JBig2CCITTFaxImage } from "./jbig2_ccittFax.js";
/**
* For JBIG2's we use a library to decode these images and
@ -45,7 +44,7 @@ class Jbig2Stream extends DecodeStream {
}
readBlock() {
this.decodeImageFallback();
unreachable("Jbig2Stream.readBlock");
}
get isAsyncDecoder() {
@ -61,55 +60,21 @@ class Jbig2Stream extends DecodeStream {
return this.buffer;
}
bytes ||= this.bytes;
try {
let globals = null;
if (this.params instanceof Dict) {
const globalsStream = this.params.get("JBIG2Globals");
if (globalsStream instanceof BaseStream) {
globals = globalsStream.getBytes();
}
}
this.buffer = await JBig2CCITTFaxWasmImage.decode(
bytes,
this.dict.get("Width"),
this.dict.get("Height"),
globals
);
} catch {
warn("Jbig2Stream: Falling back to JS JBIG2 decoder.");
return this.decodeImageFallback(bytes, length);
}
this.bufferLength = this.buffer.length;
this.eof = true;
return this.buffer;
}
decodeImageFallback(bytes, _length) {
if (this.eof) {
return this.buffer;
}
bytes ||= this.bytes;
const jbig2Image = new Jbig2Image();
const chunks = [];
let globals = null;
if (this.params instanceof Dict) {
const globalsStream = this.params.get("JBIG2Globals");
if (globalsStream instanceof BaseStream) {
const globals = globalsStream.getBytes();
chunks.push({ data: globals, start: 0, end: globals.length });
globals = globalsStream.getBytes();
}
}
chunks.push({ data: bytes, start: 0, end: bytes.length });
const data = jbig2Image.parseChunks(chunks);
const dataLength = data.length;
// JBIG2 had black as 1 and white as 0, inverting the colors
for (let i = 0; i < dataLength; i++) {
data[i] ^= 0xff;
}
this.buffer = data;
this.bufferLength = dataLength;
this.buffer = await JBig2CCITTFaxImage.decode(
bytes,
this.dict.get("Width"),
this.dict.get("Height"),
globals
);
this.bufferLength = this.buffer.length;
this.eof = true;
return this.buffer;

View File

@ -22,7 +22,7 @@ import {
} from "../shared/util.js";
import { ChunkedStreamManager } from "./chunked_stream.js";
import { ImageResizer } from "./image_resizer.js";
import { JBig2CCITTFaxWasmImage } from "./jbig2_ccittFax_wasm.js";
import { JBig2CCITTFaxImage } from "./jbig2_ccittFax.js";
import { JpegStream } from "./jpeg_stream.js";
import { JpxImage } from "./jpx.js";
import { MissingDataException } from "./core_utils.js";
@ -85,7 +85,7 @@ class BasePdfManager {
JpxImage.setOptions(options);
IccColorSpace.setOptions(options);
CmykICCBasedCS.setOptions(options);
JBig2CCITTFaxWasmImage.setOptions(options);
JBig2CCITTFaxImage.setOptions(options);
PDFFunctionFactory.setOptions(options);
Pattern.setOptions(options);
}

View File

@ -18,7 +18,10 @@ import {
setVerbosityLevel,
VerbosityLevel,
} from "./shared/util.js";
import { Jbig2Error, Jbig2Image } from "./core/jbig2.js";
import {
Jbig2Error,
JBig2CCITTFaxImage as Jbig2Image,
} from "./core/jbig2_ccittFax.js";
import { JpegError, JpegImage } from "./core/jpg.js";
import { JpxError, JpxImage } from "./core/jpx.js";

View File

@ -719,12 +719,6 @@ class Driver {
this._log(`[${++this.tasksDone}] ${task.id}:\n`);
if (task.type === "skip-because-failing") {
this._log(` Skipping file "${task.file} because it's failing"\n`);
this._nextTask();
return;
}
// Support *linked* test-cases for the other suites, e.g. unit- and
// integration-tests, without needing to run them as reference-tests.
if (task.type === "other") {
@ -947,11 +941,7 @@ class Driver {
}
// Skip tasks that do not load a PDF or that need DOM setup (XFA style
// element injection) to happen synchronously before getDocument.
if (
task.type === "skip-because-failing" ||
task.type === "other" ||
task.enableXfa
) {
if (task.type === "other" || task.enableXfa) {
return;
}
if (!task._prefetchedLoadingTask) {

View File

@ -8732,6 +8732,14 @@
"rounds": 1,
"type": "eq"
},
{
"id": "ccitt_EndOfBlock_false_nowasm",
"file": "pdfs/ccitt_EndOfBlock_false.pdf",
"md5": "ce718efe601cd7491dd00651b4790329",
"rounds": 1,
"type": "eq",
"useWasm": false
},
{
"id": "issue9940",
"file": "pdfs/issue9940.pdf",
@ -13519,7 +13527,7 @@
"file": "pdfs/bitmap-symbol-context-reuse.pdf",
"md5": "3d79e2d087515c2fdbed6fec0ad86e91",
"rounds": 1,
"type": "skip-because-failing",
"type": "eq",
"useWasm": false
},
{

View File

@ -23,7 +23,6 @@ import {
getSizeInBytes,
isAscii,
isWhiteSpace,
log2,
numberToString,
parseXFAPath,
recoverJsURL,
@ -198,20 +197,6 @@ describe("core_utils", function () {
});
});
describe("log2", function () {
it("handles values smaller than/equal to zero", function () {
expect(log2(0)).toEqual(0);
expect(log2(-1)).toEqual(0);
});
it("handles values larger than zero", function () {
expect(log2(1)).toEqual(0);
expect(log2(2)).toEqual(1);
expect(log2(3)).toEqual(2);
expect(log2(3.14)).toEqual(2);
});
});
describe("numberToString", function () {
it("should stringify integers", function () {
expect(numberToString(1)).toEqual("1");

View File

@ -18,7 +18,10 @@ import {
setVerbosityLevel,
VerbosityLevel,
} from "../../src/shared/util.js";
import { Jbig2Error, Jbig2Image } from "../../src/core/jbig2.js";
import {
Jbig2Error,
JBig2CCITTFaxImage as Jbig2Image,
} from "../../src/core/jbig2_ccittFax.js";
import { JpegError, JpegImage } from "../../src/core/jpg.js";
import { JpxError, JpxImage } from "../../src/core/jpx.js";

View File

@ -32,7 +32,9 @@ const STANDARD_FONT_DATA_URL = isNodeJS
? "./external/standard_fonts/"
: "../../external/standard_fonts/";
const WASM_URL = isNodeJS ? "./external/openjpeg/" : "../../external/openjpeg/";
const WASM_URL = isNodeJS
? "./external/openjpeg/"
: "../../build/generic/web/wasm/";
class DefaultFileReaderFactory {
static async fetch(params) {