From 9f660be8a2c6d11076f400d458cc20218f6ab199 Mon Sep 17 00:00:00 2001 From: calixteman Date: Fri, 23 Jan 2026 21:35:09 +0100 Subject: [PATCH] Use DecompressionStream in async code Usually, content stream or fonts are compressed using FlateDecode. So use the DecompressionStream API to decompress those streams in the async code path. --- src/core/cmap.js | 6 ++++++ src/core/document.js | 24 +++++++++++++++++++++++- src/core/evaluator.js | 30 ++++++++++++++++++++++++++---- src/core/flate_stream.js | 9 +++++++-- src/core/stream.js | 1 + test/unit/cff_parser_spec.js | 3 +++ 6 files changed, 66 insertions(+), 7 deletions(-) diff --git a/src/core/cmap.js b/src/core/cmap.js index 6c0cab60c..5ef35ff8f 100644 --- a/src/core/cmap.js +++ b/src/core/cmap.js @@ -699,6 +699,12 @@ class CMapFactory { if (encoding instanceof Name) { return createBuiltInCMap(encoding.name, fetchBuiltInCMap); } else if (encoding instanceof BaseStream) { + if (encoding.isAsync) { + const bytes = await encoding.asyncGetBytes(); + if (bytes) { + encoding = new Stream(bytes, 0, bytes.length, encoding.dict); + } + } const parsedCMap = await parseCMap( /* cMap = */ new CMap(), /* lexer = */ new Lexer(encoding), diff --git a/src/core/document.js b/src/core/document.js index a33d20385..d5865736f 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -61,6 +61,7 @@ import { RefSetCache, } from "./primitives.js"; import { getXfaFontDict, getXfaFontName } from "./xfa_fonts.js"; +import { NullStream, Stream } from "./stream.js"; import { BaseStream } from "./base_stream.js"; import { calculateMD5 } from "./calculate_md5.js"; import { Catalog } from "./catalog.js"; @@ -68,7 +69,6 @@ import { clearGlobalCaches } from "./cleanup_helper.js"; import { DatasetReader } from "./dataset_reader.js"; import { Intersector } from "./intersector.js"; import { Linearization } from "./parser.js"; -import { NullStream } from "./stream.js"; import { ObjectLoader } from "./object_loader.js"; import { OperatorList } from "./operator_list.js"; import { PartialEvaluator } from "./evaluator.js"; @@ -271,9 +271,31 @@ class Page { const content = await this.pdfManager.ensure(this, "content"); if (content instanceof BaseStream && !content.isImageStream) { + if (content.isAsync) { + const bytes = await content.asyncGetBytes(); + if (bytes) { + return new Stream(bytes, 0, bytes.length, content.dict); + } + } return content; } if (Array.isArray(content)) { + const promises = []; + for (let i = 0, ii = content.length; i < ii; i++) { + const item = content[i]; + if (item instanceof BaseStream && item.isAsync) { + promises.push( + item.asyncGetBytes().then(bytes => { + if (bytes) { + content[i] = new Stream(bytes, 0, bytes.length, item.dict); + } + }) + ); + } + } + if (promises.length > 0) { + await Promise.all(promises); + } return new StreamsSequenceStream( content, this.#onSubStreamError.bind(this) diff --git a/src/core/evaluator.js b/src/core/evaluator.js index e75fdff9c..6a2ba2986 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -1706,7 +1706,7 @@ class PartialEvaluator { return null; } - getOperatorList({ + async getOperatorList({ stream, task, resources, @@ -1715,6 +1715,13 @@ class PartialEvaluator { fallbackFontDict = null, prevRefs = null, }) { + if (stream.isAsync) { + const bytes = await stream.asyncGetBytes(); + if (bytes) { + stream = new Stream(bytes, 0, bytes.length, stream.dict); + } + } + const objId = stream.dict?.objId; const seenRefs = new RefSet(prevRefs); @@ -2373,7 +2380,7 @@ class PartialEvaluator { }); } - getTextContent({ + async getTextContent({ stream, task, resources, @@ -2389,6 +2396,13 @@ class PartialEvaluator { prevRefs = null, intersector = null, }) { + if (stream.isAsync) { + const bytes = await stream.asyncGetBytes(); + if (bytes) { + stream = new Stream(bytes, 0, bytes.length, stream.dict); + } + } + const objId = stream.dict?.objId; const seenRefs = new RefSet(prevRefs); @@ -4565,8 +4579,16 @@ class PartialEvaluator { if (fontFile) { if (!(fontFile instanceof BaseStream)) { throw new FormatError("FontFile should be a stream"); - } else if (fontFile.isEmpty) { - throw new FormatError("FontFile is empty"); + } else { + if (fontFile.isAsync) { + const bytes = await fontFile.asyncGetBytes(); + if (bytes) { + fontFile = new Stream(bytes, 0, bytes.length, fontFile.dict); + } + } + if (fontFile.isEmpty) { + throw new FormatError("FontFile is empty"); + } } } } catch (ex) { diff --git a/src/core/flate_stream.js b/src/core/flate_stream.js index bdb9d223d..064b46ee8 100644 --- a/src/core/flate_stream.js +++ b/src/core/flate_stream.js @@ -122,6 +122,8 @@ const fixedDistCodeTab = [ ]; class FlateStream extends DecodeStream { + #isAsync = true; + constructor(str, maybeLength) { super(maybeLength); @@ -162,7 +164,9 @@ class FlateStream extends DecodeStream { async asyncGetBytes() { this.stream.reset(); - const bytes = this.stream.getBytes(); + const bytes = this.stream.isAsync + ? await this.stream.asyncGetBytes() + : this.stream.getBytes(); try { const { readable, writable } = new DecompressionStream("deflate"); @@ -200,6 +204,7 @@ class FlateStream extends DecodeStream { // decoder. // We already get the bytes from the underlying stream, so we just reuse // them to avoid get them again. + this.#isAsync = false; this.stream = new Stream( bytes, 2 /* = header size (see ctor) */, @@ -212,7 +217,7 @@ class FlateStream extends DecodeStream { } get isAsync() { - return true; + return this.#isAsync; } getBits(bits) { diff --git a/src/core/stream.js b/src/core/stream.js index 710b92f8c..e47be2ccd 100644 --- a/src/core/stream.js +++ b/src/core/stream.js @@ -51,6 +51,7 @@ class Stream extends BaseStream { const strEnd = this.end; if (!length) { + this.pos = strEnd; return bytes.subarray(pos, strEnd); } let end = pos + length; diff --git a/test/unit/cff_parser_spec.js b/test/unit/cff_parser_spec.js index 98a9418c8..82b9ba4cd 100644 --- a/test/unit/cff_parser_spec.js +++ b/test/unit/cff_parser_spec.js @@ -68,6 +68,7 @@ describe("CFFParser", function () { }); beforeEach(function () { + fontData.reset(); parser = new CFFParser(fontData, {}, SEAC_ANALYSIS_ENABLED); cff = parser.parse(); }); @@ -168,6 +169,7 @@ describe("CFFParser", function () { }); it("parses a CharString endchar with 4 args w/seac enabled", function () { + fontData.reset(); const cffParser = new CFFParser( fontData, {}, @@ -197,6 +199,7 @@ describe("CFFParser", function () { }); it("parses a CharString endchar with 4 args w/seac disabled", function () { + fontData.reset(); const cffParser = new CFFParser( fontData, {},