Use DecompressionStream in async code

Usually, content stream or fonts are compressed using FlateDecode.
So use the DecompressionStream API to decompress those streams
in the async code path.
This commit is contained in:
calixteman 2026-01-23 21:35:09 +01:00
parent 6a4a3b060d
commit 9f660be8a2
No known key found for this signature in database
GPG Key ID: 0C5442631EE0691F
6 changed files with 66 additions and 7 deletions

View File

@ -699,6 +699,12 @@ class CMapFactory {
if (encoding instanceof Name) {
return createBuiltInCMap(encoding.name, fetchBuiltInCMap);
} else if (encoding instanceof BaseStream) {
if (encoding.isAsync) {
const bytes = await encoding.asyncGetBytes();
if (bytes) {
encoding = new Stream(bytes, 0, bytes.length, encoding.dict);
}
}
const parsedCMap = await parseCMap(
/* cMap = */ new CMap(),
/* lexer = */ new Lexer(encoding),

View File

@ -61,6 +61,7 @@ import {
RefSetCache,
} from "./primitives.js";
import { getXfaFontDict, getXfaFontName } from "./xfa_fonts.js";
import { NullStream, Stream } from "./stream.js";
import { BaseStream } from "./base_stream.js";
import { calculateMD5 } from "./calculate_md5.js";
import { Catalog } from "./catalog.js";
@ -68,7 +69,6 @@ import { clearGlobalCaches } from "./cleanup_helper.js";
import { DatasetReader } from "./dataset_reader.js";
import { Intersector } from "./intersector.js";
import { Linearization } from "./parser.js";
import { NullStream } from "./stream.js";
import { ObjectLoader } from "./object_loader.js";
import { OperatorList } from "./operator_list.js";
import { PartialEvaluator } from "./evaluator.js";
@ -271,9 +271,31 @@ class Page {
const content = await this.pdfManager.ensure(this, "content");
if (content instanceof BaseStream && !content.isImageStream) {
if (content.isAsync) {
const bytes = await content.asyncGetBytes();
if (bytes) {
return new Stream(bytes, 0, bytes.length, content.dict);
}
}
return content;
}
if (Array.isArray(content)) {
const promises = [];
for (let i = 0, ii = content.length; i < ii; i++) {
const item = content[i];
if (item instanceof BaseStream && item.isAsync) {
promises.push(
item.asyncGetBytes().then(bytes => {
if (bytes) {
content[i] = new Stream(bytes, 0, bytes.length, item.dict);
}
})
);
}
}
if (promises.length > 0) {
await Promise.all(promises);
}
return new StreamsSequenceStream(
content,
this.#onSubStreamError.bind(this)

View File

@ -1706,7 +1706,7 @@ class PartialEvaluator {
return null;
}
getOperatorList({
async getOperatorList({
stream,
task,
resources,
@ -1715,6 +1715,13 @@ class PartialEvaluator {
fallbackFontDict = null,
prevRefs = null,
}) {
if (stream.isAsync) {
const bytes = await stream.asyncGetBytes();
if (bytes) {
stream = new Stream(bytes, 0, bytes.length, stream.dict);
}
}
const objId = stream.dict?.objId;
const seenRefs = new RefSet(prevRefs);
@ -2373,7 +2380,7 @@ class PartialEvaluator {
});
}
getTextContent({
async getTextContent({
stream,
task,
resources,
@ -2389,6 +2396,13 @@ class PartialEvaluator {
prevRefs = null,
intersector = null,
}) {
if (stream.isAsync) {
const bytes = await stream.asyncGetBytes();
if (bytes) {
stream = new Stream(bytes, 0, bytes.length, stream.dict);
}
}
const objId = stream.dict?.objId;
const seenRefs = new RefSet(prevRefs);
@ -4565,8 +4579,16 @@ class PartialEvaluator {
if (fontFile) {
if (!(fontFile instanceof BaseStream)) {
throw new FormatError("FontFile should be a stream");
} else if (fontFile.isEmpty) {
throw new FormatError("FontFile is empty");
} else {
if (fontFile.isAsync) {
const bytes = await fontFile.asyncGetBytes();
if (bytes) {
fontFile = new Stream(bytes, 0, bytes.length, fontFile.dict);
}
}
if (fontFile.isEmpty) {
throw new FormatError("FontFile is empty");
}
}
}
} catch (ex) {

View File

@ -122,6 +122,8 @@ const fixedDistCodeTab = [
];
class FlateStream extends DecodeStream {
#isAsync = true;
constructor(str, maybeLength) {
super(maybeLength);
@ -162,7 +164,9 @@ class FlateStream extends DecodeStream {
async asyncGetBytes() {
this.stream.reset();
const bytes = this.stream.getBytes();
const bytes = this.stream.isAsync
? await this.stream.asyncGetBytes()
: this.stream.getBytes();
try {
const { readable, writable } = new DecompressionStream("deflate");
@ -200,6 +204,7 @@ class FlateStream extends DecodeStream {
// decoder.
// We already get the bytes from the underlying stream, so we just reuse
// them to avoid get them again.
this.#isAsync = false;
this.stream = new Stream(
bytes,
2 /* = header size (see ctor) */,
@ -212,7 +217,7 @@ class FlateStream extends DecodeStream {
}
get isAsync() {
return true;
return this.#isAsync;
}
getBits(bits) {

View File

@ -51,6 +51,7 @@ class Stream extends BaseStream {
const strEnd = this.end;
if (!length) {
this.pos = strEnd;
return bytes.subarray(pos, strEnd);
}
let end = pos + length;

View File

@ -68,6 +68,7 @@ describe("CFFParser", function () {
});
beforeEach(function () {
fontData.reset();
parser = new CFFParser(fontData, {}, SEAC_ANALYSIS_ENABLED);
cff = parser.parse();
});
@ -168,6 +169,7 @@ describe("CFFParser", function () {
});
it("parses a CharString endchar with 4 args w/seac enabled", function () {
fontData.reset();
const cffParser = new CFFParser(
fontData,
{},
@ -197,6 +199,7 @@ describe("CFFParser", function () {
});
it("parses a CharString endchar with 4 args w/seac disabled", function () {
fontData.reset();
const cffParser = new CFFParser(
fontData,
{},