Use DecompressionStream in async code

Usually, content stream or fonts are compressed using FlateDecode.
So use the DecompressionStream API to decompress those streams
in the async code path.
This commit is contained in:
calixteman 2026-01-23 21:35:09 +01:00
parent 6a4a3b060d
commit 9f660be8a2
No known key found for this signature in database
GPG Key ID: 0C5442631EE0691F
6 changed files with 66 additions and 7 deletions

View File

@ -699,6 +699,12 @@ class CMapFactory {
if (encoding instanceof Name) { if (encoding instanceof Name) {
return createBuiltInCMap(encoding.name, fetchBuiltInCMap); return createBuiltInCMap(encoding.name, fetchBuiltInCMap);
} else if (encoding instanceof BaseStream) { } else if (encoding instanceof BaseStream) {
if (encoding.isAsync) {
const bytes = await encoding.asyncGetBytes();
if (bytes) {
encoding = new Stream(bytes, 0, bytes.length, encoding.dict);
}
}
const parsedCMap = await parseCMap( const parsedCMap = await parseCMap(
/* cMap = */ new CMap(), /* cMap = */ new CMap(),
/* lexer = */ new Lexer(encoding), /* lexer = */ new Lexer(encoding),

View File

@ -61,6 +61,7 @@ import {
RefSetCache, RefSetCache,
} from "./primitives.js"; } from "./primitives.js";
import { getXfaFontDict, getXfaFontName } from "./xfa_fonts.js"; import { getXfaFontDict, getXfaFontName } from "./xfa_fonts.js";
import { NullStream, Stream } from "./stream.js";
import { BaseStream } from "./base_stream.js"; import { BaseStream } from "./base_stream.js";
import { calculateMD5 } from "./calculate_md5.js"; import { calculateMD5 } from "./calculate_md5.js";
import { Catalog } from "./catalog.js"; import { Catalog } from "./catalog.js";
@ -68,7 +69,6 @@ import { clearGlobalCaches } from "./cleanup_helper.js";
import { DatasetReader } from "./dataset_reader.js"; import { DatasetReader } from "./dataset_reader.js";
import { Intersector } from "./intersector.js"; import { Intersector } from "./intersector.js";
import { Linearization } from "./parser.js"; import { Linearization } from "./parser.js";
import { NullStream } from "./stream.js";
import { ObjectLoader } from "./object_loader.js"; import { ObjectLoader } from "./object_loader.js";
import { OperatorList } from "./operator_list.js"; import { OperatorList } from "./operator_list.js";
import { PartialEvaluator } from "./evaluator.js"; import { PartialEvaluator } from "./evaluator.js";
@ -271,9 +271,31 @@ class Page {
const content = await this.pdfManager.ensure(this, "content"); const content = await this.pdfManager.ensure(this, "content");
if (content instanceof BaseStream && !content.isImageStream) { if (content instanceof BaseStream && !content.isImageStream) {
if (content.isAsync) {
const bytes = await content.asyncGetBytes();
if (bytes) {
return new Stream(bytes, 0, bytes.length, content.dict);
}
}
return content; return content;
} }
if (Array.isArray(content)) { if (Array.isArray(content)) {
const promises = [];
for (let i = 0, ii = content.length; i < ii; i++) {
const item = content[i];
if (item instanceof BaseStream && item.isAsync) {
promises.push(
item.asyncGetBytes().then(bytes => {
if (bytes) {
content[i] = new Stream(bytes, 0, bytes.length, item.dict);
}
})
);
}
}
if (promises.length > 0) {
await Promise.all(promises);
}
return new StreamsSequenceStream( return new StreamsSequenceStream(
content, content,
this.#onSubStreamError.bind(this) this.#onSubStreamError.bind(this)

View File

@ -1706,7 +1706,7 @@ class PartialEvaluator {
return null; return null;
} }
getOperatorList({ async getOperatorList({
stream, stream,
task, task,
resources, resources,
@ -1715,6 +1715,13 @@ class PartialEvaluator {
fallbackFontDict = null, fallbackFontDict = null,
prevRefs = null, prevRefs = null,
}) { }) {
if (stream.isAsync) {
const bytes = await stream.asyncGetBytes();
if (bytes) {
stream = new Stream(bytes, 0, bytes.length, stream.dict);
}
}
const objId = stream.dict?.objId; const objId = stream.dict?.objId;
const seenRefs = new RefSet(prevRefs); const seenRefs = new RefSet(prevRefs);
@ -2373,7 +2380,7 @@ class PartialEvaluator {
}); });
} }
getTextContent({ async getTextContent({
stream, stream,
task, task,
resources, resources,
@ -2389,6 +2396,13 @@ class PartialEvaluator {
prevRefs = null, prevRefs = null,
intersector = null, intersector = null,
}) { }) {
if (stream.isAsync) {
const bytes = await stream.asyncGetBytes();
if (bytes) {
stream = new Stream(bytes, 0, bytes.length, stream.dict);
}
}
const objId = stream.dict?.objId; const objId = stream.dict?.objId;
const seenRefs = new RefSet(prevRefs); const seenRefs = new RefSet(prevRefs);
@ -4565,8 +4579,16 @@ class PartialEvaluator {
if (fontFile) { if (fontFile) {
if (!(fontFile instanceof BaseStream)) { if (!(fontFile instanceof BaseStream)) {
throw new FormatError("FontFile should be a stream"); throw new FormatError("FontFile should be a stream");
} else if (fontFile.isEmpty) { } else {
throw new FormatError("FontFile is empty"); if (fontFile.isAsync) {
const bytes = await fontFile.asyncGetBytes();
if (bytes) {
fontFile = new Stream(bytes, 0, bytes.length, fontFile.dict);
}
}
if (fontFile.isEmpty) {
throw new FormatError("FontFile is empty");
}
} }
} }
} catch (ex) { } catch (ex) {

View File

@ -122,6 +122,8 @@ const fixedDistCodeTab = [
]; ];
class FlateStream extends DecodeStream { class FlateStream extends DecodeStream {
#isAsync = true;
constructor(str, maybeLength) { constructor(str, maybeLength) {
super(maybeLength); super(maybeLength);
@ -162,7 +164,9 @@ class FlateStream extends DecodeStream {
async asyncGetBytes() { async asyncGetBytes() {
this.stream.reset(); this.stream.reset();
const bytes = this.stream.getBytes(); const bytes = this.stream.isAsync
? await this.stream.asyncGetBytes()
: this.stream.getBytes();
try { try {
const { readable, writable } = new DecompressionStream("deflate"); const { readable, writable } = new DecompressionStream("deflate");
@ -200,6 +204,7 @@ class FlateStream extends DecodeStream {
// decoder. // decoder.
// We already get the bytes from the underlying stream, so we just reuse // We already get the bytes from the underlying stream, so we just reuse
// them to avoid get them again. // them to avoid get them again.
this.#isAsync = false;
this.stream = new Stream( this.stream = new Stream(
bytes, bytes,
2 /* = header size (see ctor) */, 2 /* = header size (see ctor) */,
@ -212,7 +217,7 @@ class FlateStream extends DecodeStream {
} }
get isAsync() { get isAsync() {
return true; return this.#isAsync;
} }
getBits(bits) { getBits(bits) {

View File

@ -51,6 +51,7 @@ class Stream extends BaseStream {
const strEnd = this.end; const strEnd = this.end;
if (!length) { if (!length) {
this.pos = strEnd;
return bytes.subarray(pos, strEnd); return bytes.subarray(pos, strEnd);
} }
let end = pos + length; let end = pos + length;

View File

@ -68,6 +68,7 @@ describe("CFFParser", function () {
}); });
beforeEach(function () { beforeEach(function () {
fontData.reset();
parser = new CFFParser(fontData, {}, SEAC_ANALYSIS_ENABLED); parser = new CFFParser(fontData, {}, SEAC_ANALYSIS_ENABLED);
cff = parser.parse(); cff = parser.parse();
}); });
@ -168,6 +169,7 @@ describe("CFFParser", function () {
}); });
it("parses a CharString endchar with 4 args w/seac enabled", function () { it("parses a CharString endchar with 4 args w/seac enabled", function () {
fontData.reset();
const cffParser = new CFFParser( const cffParser = new CFFParser(
fontData, fontData,
{}, {},
@ -197,6 +199,7 @@ describe("CFFParser", function () {
}); });
it("parses a CharString endchar with 4 args w/seac disabled", function () { it("parses a CharString endchar with 4 args w/seac disabled", function () {
fontData.reset();
const cffParser = new CFFParser( const cffParser = new CFFParser(
fontData, fontData,
{}, {},