Merge pull request #20593 from calixteman/decompress_content_stream

Use DecompressionStream in async code
This commit is contained in:
calixteman 2026-01-25 21:27:16 +01:00 committed by GitHub
commit 001058abb2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 66 additions and 7 deletions

View File

@ -699,6 +699,12 @@ class CMapFactory {
if (encoding instanceof Name) {
return createBuiltInCMap(encoding.name, fetchBuiltInCMap);
} else if (encoding instanceof BaseStream) {
if (encoding.isAsync) {
const bytes = await encoding.asyncGetBytes();
if (bytes) {
encoding = new Stream(bytes, 0, bytes.length, encoding.dict);
}
}
const parsedCMap = await parseCMap(
/* cMap = */ new CMap(),
/* lexer = */ new Lexer(encoding),

View File

@ -61,6 +61,7 @@ import {
RefSetCache,
} from "./primitives.js";
import { getXfaFontDict, getXfaFontName } from "./xfa_fonts.js";
import { NullStream, Stream } from "./stream.js";
import { BaseStream } from "./base_stream.js";
import { calculateMD5 } from "./calculate_md5.js";
import { Catalog } from "./catalog.js";
@ -68,7 +69,6 @@ import { clearGlobalCaches } from "./cleanup_helper.js";
import { DatasetReader } from "./dataset_reader.js";
import { Intersector } from "./intersector.js";
import { Linearization } from "./parser.js";
import { NullStream } from "./stream.js";
import { ObjectLoader } from "./object_loader.js";
import { OperatorList } from "./operator_list.js";
import { PartialEvaluator } from "./evaluator.js";
@ -271,9 +271,31 @@ class Page {
const content = await this.pdfManager.ensure(this, "content");
if (content instanceof BaseStream && !content.isImageStream) {
if (content.isAsync) {
const bytes = await content.asyncGetBytes();
if (bytes) {
return new Stream(bytes, 0, bytes.length, content.dict);
}
}
return content;
}
if (Array.isArray(content)) {
const promises = [];
for (let i = 0, ii = content.length; i < ii; i++) {
const item = content[i];
if (item instanceof BaseStream && item.isAsync) {
promises.push(
item.asyncGetBytes().then(bytes => {
if (bytes) {
content[i] = new Stream(bytes, 0, bytes.length, item.dict);
}
})
);
}
}
if (promises.length > 0) {
await Promise.all(promises);
}
return new StreamsSequenceStream(
content,
this.#onSubStreamError.bind(this)

View File

@ -1706,7 +1706,7 @@ class PartialEvaluator {
return null;
}
getOperatorList({
async getOperatorList({
stream,
task,
resources,
@ -1715,6 +1715,13 @@ class PartialEvaluator {
fallbackFontDict = null,
prevRefs = null,
}) {
if (stream.isAsync) {
const bytes = await stream.asyncGetBytes();
if (bytes) {
stream = new Stream(bytes, 0, bytes.length, stream.dict);
}
}
const objId = stream.dict?.objId;
const seenRefs = new RefSet(prevRefs);
@ -2373,7 +2380,7 @@ class PartialEvaluator {
});
}
getTextContent({
async getTextContent({
stream,
task,
resources,
@ -2389,6 +2396,13 @@ class PartialEvaluator {
prevRefs = null,
intersector = null,
}) {
if (stream.isAsync) {
const bytes = await stream.asyncGetBytes();
if (bytes) {
stream = new Stream(bytes, 0, bytes.length, stream.dict);
}
}
const objId = stream.dict?.objId;
const seenRefs = new RefSet(prevRefs);
@ -4565,8 +4579,16 @@ class PartialEvaluator {
if (fontFile) {
if (!(fontFile instanceof BaseStream)) {
throw new FormatError("FontFile should be a stream");
} else if (fontFile.isEmpty) {
throw new FormatError("FontFile is empty");
} else {
if (fontFile.isAsync) {
const bytes = await fontFile.asyncGetBytes();
if (bytes) {
fontFile = new Stream(bytes, 0, bytes.length, fontFile.dict);
}
}
if (fontFile.isEmpty) {
throw new FormatError("FontFile is empty");
}
}
}
} catch (ex) {

View File

@ -122,6 +122,8 @@ const fixedDistCodeTab = [
];
class FlateStream extends DecodeStream {
#isAsync = true;
constructor(str, maybeLength) {
super(maybeLength);
@ -162,7 +164,9 @@ class FlateStream extends DecodeStream {
async asyncGetBytes() {
this.stream.reset();
const bytes = this.stream.getBytes();
const bytes = this.stream.isAsync
? await this.stream.asyncGetBytes()
: this.stream.getBytes();
try {
const { readable, writable } = new DecompressionStream("deflate");
@ -200,6 +204,7 @@ class FlateStream extends DecodeStream {
// decoder.
// We already get the bytes from the underlying stream, so we just reuse
// them to avoid get them again.
this.#isAsync = false;
this.stream = new Stream(
bytes,
2 /* = header size (see ctor) */,
@ -212,7 +217,7 @@ class FlateStream extends DecodeStream {
}
get isAsync() {
return true;
return this.#isAsync;
}
getBits(bits) {

View File

@ -51,6 +51,7 @@ class Stream extends BaseStream {
const strEnd = this.end;
if (!length) {
this.pos = strEnd;
return bytes.subarray(pos, strEnd);
}
let end = pos + length;

View File

@ -68,6 +68,7 @@ describe("CFFParser", function () {
});
beforeEach(function () {
fontData.reset();
parser = new CFFParser(fontData, {}, SEAC_ANALYSIS_ENABLED);
cff = parser.parse();
});
@ -168,6 +169,7 @@ describe("CFFParser", function () {
});
it("parses a CharString endchar with 4 args w/seac enabled", function () {
fontData.reset();
const cffParser = new CFFParser(
fontData,
{},
@ -197,6 +199,7 @@ describe("CFFParser", function () {
});
it("parses a CharString endchar with 4 args w/seac disabled", function () {
fontData.reset();
const cffParser = new CFFParser(
fontData,
{},