Use the ccittfax decoder from pdfium

The decoder is a dependency of the jbig2 one and is already
included in pdf.js, so we just need to wire it up.
It improves the performance of documents using ccittfax images.
This commit is contained in:
calixteman 2026-02-01 17:24:38 +01:00
parent 471adfd023
commit 88c2051698
No known key found for this signature in database
GPG Key ID: 0C5442631EE0691F
9 changed files with 142 additions and 51 deletions

File diff suppressed because one or more lines are too long

Binary file not shown.

View File

@ -465,20 +465,33 @@ const blackTable3 = [
* @param {Object} [options] - Decoding options.
*/
class CCITTFaxDecoder {
constructor(source, options = {}) {
constructor(
source,
options = {
K: 0,
EndOfLine: false,
EncodedByteAlign: false,
Columns: 1728,
Rows: 0,
EndOfBlock: true,
BlackIs1: false,
}
) {
if (typeof source?.next !== "function") {
throw new Error('CCITTFaxDecoder - invalid "source" parameter.');
}
this.source = source;
this.eof = false;
this.encoding = options.K || 0;
this.eoline = options.EndOfLine || false;
this.byteAlign = options.EncodedByteAlign || false;
this.columns = options.Columns || 1728;
this.rows = options.Rows || 0;
this.eoblock = options.EndOfBlock ?? true;
this.black = options.BlackIs1 || false;
({
K: this.encoding,
EndOfLine: this.eoline,
EncodedByteAlign: this.byteAlign,
Columns: this.columns,
Rows: this.rows,
EndOfBlock: this.eoblock,
BlackIs1: this.black,
} = options);
this.codingLine = new Uint32Array(this.columns + 1);
this.refLine = new Uint32Array(this.columns + 2);

View File

@ -13,52 +13,115 @@
* limitations under the License.
*/
import { shadow, warn } from "../shared/util.js";
import { CCITTFaxDecoder } from "./ccitt.js";
import { DecodeStream } from "./decode_stream.js";
import { Dict } from "./primitives.js";
import { JBig2CCITTFaxWasmImage } from "./jbig2_ccittFax_wasm.js";
class CCITTFaxStream extends DecodeStream {
constructor(str, maybeLength, params) {
super(maybeLength);
this.stream = str;
this.maybeLength = maybeLength;
this.dict = str.dict;
if (!(params instanceof Dict)) {
params = Dict.empty;
}
const source = {
next() {
return str.getByte();
},
this.params = {
K: params.get("K") || 0,
EndOfLine: !!params.get("EndOfLine"),
EncodedByteAlign: !!params.get("EncodedByteAlign"),
Columns: params.get("Columns") || 1728,
Rows: params.get("Rows") || 0,
EndOfBlock: !!(params.get("EndOfBlock") ?? true),
BlackIs1: !!params.get("BlackIs1"),
};
this.ccittFaxDecoder = new CCITTFaxDecoder(source, {
K: params.get("K"),
EndOfLine: params.get("EndOfLine"),
EncodedByteAlign: params.get("EncodedByteAlign"),
Columns: params.get("Columns"),
Rows: params.get("Rows"),
EndOfBlock: params.get("EndOfBlock"),
BlackIs1: params.get("BlackIs1"),
});
}
get bytes() {
// If `this.maybeLength` is null, we'll get the entire stream.
return shadow(this, "bytes", this.stream.getBytes(this.maybeLength));
}
readBlock() {
while (!this.eof) {
const c = this.ccittFaxDecoder.readNextChar();
if (c === -1) {
this.eof = true;
return;
}
this.ensureBuffer(this.bufferLength + 1);
this.buffer[this.bufferLength++] = c;
}
this.decodeImageFallback();
}
get isImageStream() {
return true;
}
get isAsyncDecoder() {
return true;
}
async decodeImage(bytes, length, _decoderOptions) {
if (this.eof) {
return this.buffer;
}
if (!bytes) {
bytes = this.stream.isAsync
? (await this.stream.asyncGetBytes()) || this.bytes
: this.bytes;
}
try {
this.buffer = await JBig2CCITTFaxWasmImage.decode(
bytes,
this.dict.get("W", "Width"),
this.dict.get("H", "Height"),
null,
this.params
);
} catch {
warn("CCITTFaxStream: Falling back to JS CCITTFax decoder.");
return this.decodeImageFallback(bytes, length);
}
this.bufferLength = this.buffer.length;
this.eof = true;
return this.buffer;
}
decodeImageFallback(bytes, length) {
if (this.eof) {
return this.buffer;
}
const { params } = this;
if (!bytes) {
this.stream.reset();
bytes = this.bytes;
}
let pos = 0;
const source = {
next() {
return bytes[pos++] ?? -1;
},
};
if (length && this.buffer.byteLength < length) {
this.buffer = new Uint8Array(length);
}
this.ccittFaxDecoder = new CCITTFaxDecoder(source, params);
let outPos = 0;
while (!this.eof) {
const c = this.ccittFaxDecoder.readNextChar();
if (c === -1) {
this.eof = true;
break;
}
if (!length) {
this.ensureBuffer(outPos + 1);
}
this.buffer[outPos++] = c;
}
this.bufferLength = this.buffer.length;
return this.buffer.subarray(0, length || this.bufferLength);
}
}
export { CCITTFaxStream };

View File

@ -102,12 +102,12 @@ class DecodeStream extends BaseStream {
async getImageData(length, decoderOptions) {
if (!this.canAsyncDecodeImageFromBuffer) {
if (this.isAsyncDecoder) {
return this.decodeImage(null, decoderOptions);
return this.decodeImage(null, length, decoderOptions);
}
return this.getBytes(length, decoderOptions);
}
const data = await this.stream.asyncGetBytes();
return this.decodeImage(data, decoderOptions);
return this.decodeImage(data, length, decoderOptions);
}
reset() {

View File

@ -23,7 +23,7 @@ class JBig2Error extends BaseException {
}
}
class JBig2WasmImage {
class JBig2CCITTFaxWasmImage {
static #buffer = null;
static #handler = null;
@ -69,7 +69,7 @@ class JBig2WasmImage {
}
}
static async decode(bytes, width, height, globals) {
static async decode(bytes, width, height, globals, CCITTOptions) {
if (!this.#modulePromise) {
const { promise, resolve } = Promise.withResolvers();
const promises = [promise];
@ -95,13 +95,28 @@ class JBig2WasmImage {
const size = bytes.length;
ptr = module._malloc(size);
module.writeArrayToMemory(bytes, ptr);
const globalsSize = globals ? globals.length : 0;
if (globalsSize > 0) {
globalsPtr = module._malloc(globalsSize);
module.writeArrayToMemory(globals, globalsPtr);
}
module._jbig2_decode(ptr, size, width, height, globalsPtr, globalsSize);
if (CCITTOptions) {
module._ccitt_decode(
ptr,
size,
width,
height,
CCITTOptions.K,
CCITTOptions.EndOfLine ? 1 : 0,
CCITTOptions.EncodedByteAlign ? 1 : 0,
CCITTOptions.BlackIs1 ? 1 : 0,
CCITTOptions.Columns,
CCITTOptions.Rows
);
} else {
const globalsSize = globals ? globals.length : 0;
if (globalsSize > 0) {
globalsPtr = module._malloc(globalsSize);
module.writeArrayToMemory(globals, globalsPtr);
}
module._jbig2_decode(ptr, size, width, height, globalsPtr, globalsSize);
}
if (!module.imageData) {
throw new JBig2Error("Unknown error");
}
@ -124,4 +139,4 @@ class JBig2WasmImage {
}
}
export { JBig2Error, JBig2WasmImage };
export { JBig2CCITTFaxWasmImage, JBig2Error };

View File

@ -17,8 +17,8 @@ import { shadow, warn } from "../shared/util.js";
import { BaseStream } from "./base_stream.js";
import { DecodeStream } from "./decode_stream.js";
import { Dict } from "./primitives.js";
import { JBig2CCITTFaxWasmImage } from "./jbig2_ccittFax_wasm.js";
import { Jbig2Image } from "./jbig2.js";
import { JBig2WasmImage } from "./jbig2_wasm.js";
/**
* For JBIG2's we use a library to decode these images and
@ -45,7 +45,7 @@ class Jbig2Stream extends DecodeStream {
}
readBlock() {
this.decodeImage();
this.decodeImageFallback();
}
get isAsyncDecoder() {
@ -56,7 +56,7 @@ class Jbig2Stream extends DecodeStream {
return true;
}
async decodeImage(bytes, _decoderOptions) {
async decodeImage(bytes, length, _decoderOptions) {
if (this.eof) {
return this.buffer;
}
@ -69,7 +69,7 @@ class Jbig2Stream extends DecodeStream {
globals = globalsStream.getBytes();
}
}
this.buffer = await JBig2WasmImage.decode(
this.buffer = await JBig2CCITTFaxWasmImage.decode(
bytes,
this.dict.get("Width"),
this.dict.get("Height"),
@ -77,7 +77,7 @@ class Jbig2Stream extends DecodeStream {
);
} catch {
warn("Jbig2Stream: Falling back to JS JBIG2 decoder.");
return this.decodeImageFallback(bytes);
return this.decodeImageFallback(bytes, length);
}
this.bufferLength = this.buffer.length;
this.eof = true;
@ -85,7 +85,7 @@ class Jbig2Stream extends DecodeStream {
return this.buffer;
}
async decodeImageFallback(bytes) {
decodeImageFallback(bytes, _length) {
if (this.eof) {
return this.buffer;
}

View File

@ -49,7 +49,7 @@ class JpxStream extends DecodeStream {
return true;
}
async decodeImage(bytes, decoderOptions) {
async decodeImage(bytes, _length, decoderOptions) {
if (this.eof) {
return this.buffer;
}

View File

@ -22,7 +22,7 @@ import {
} from "../shared/util.js";
import { ChunkedStreamManager } from "./chunked_stream.js";
import { ImageResizer } from "./image_resizer.js";
import { JBig2WasmImage } from "./jbig2_wasm.js";
import { JBig2CCITTFaxWasmImage } from "./jbig2_ccittFax_wasm.js";
import { JpegStream } from "./jpeg_stream.js";
import { JpxImage } from "./jpx.js";
import { MissingDataException } from "./core_utils.js";
@ -82,7 +82,7 @@ class BasePdfManager {
JpxImage.setOptions(options);
IccColorSpace.setOptions(options);
CmykICCBasedCS.setOptions(options);
JBig2WasmImage.setOptions(options);
JBig2CCITTFaxWasmImage.setOptions(options);
}
get docId() {