Merge pull request #21486 from Snuffleupagus/getTextContent-sink-fixes

Improve the `sink` handling in `getTextContent` for Highlight annotations (PR 20019 follow-up)
This commit is contained in:
Tim van der Meij 2026-06-23 20:02:34 +02:00 committed by GitHub
commit 4117b75a10
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 30 additions and 24 deletions

View File

@ -789,8 +789,6 @@ class Page {
includeMarkedContent: false,
disableNormalization: false,
sink: null,
viewBox: this.view,
lang: null,
intersector,
}).then(() => {
intersector.setText();

View File

@ -78,6 +78,7 @@ import {
LocalTilingPatternCache,
RegionalImageCache,
} from "./image_utils.js";
import { parseMarkedContentProps, textSinkWrapper } from "./evaluator_utils.js";
import { BaseStream } from "./base_stream.js";
import { bidi } from "./bidi.js";
import { ColorSpace } from "./colorspace.js";
@ -87,7 +88,6 @@ import { getGlyphsUnicode } from "./glyphlist.js";
import { getMetrics } from "./metrics.js";
import { getUnicodeForGlyph } from "./unicode.js";
import { MurmurHash3_64 } from "../shared/murmurhash3.js";
import { parseMarkedContentProps } from "./evaluator_utils.js";
import { PDFImage } from "./image.js";
import { Stream } from "./stream.js";
import { stringToPDFString } from "./string_utils.js";
@ -2380,6 +2380,7 @@ class PartialEvaluator {
stream = new Stream(bytes, 0, bytes.length, stream.dict);
}
}
sink ??= textSinkWrapper(null);
const objId = stream.dict?.objId;
const seenRefs = new RefSet(prevRefs);
@ -3153,7 +3154,7 @@ class PartialEvaluator {
if (batch && length < TEXT_CHUNK_BATCH_SIZE) {
return;
}
sink?.enqueue(textContent, length);
sink.enqueue(textContent, length);
textContent.items = [];
textContent.styles = Object.create(null);
}
@ -3163,7 +3164,7 @@ class PartialEvaluator {
return new Promise(function promiseBody(resolve, reject) {
const next = function (promise) {
enqueueChunk(/* batch = */ true);
Promise.all([promise, sink?.ready]).then(function () {
Promise.all([promise, sink.ready]).then(function () {
try {
promiseBody(resolve, reject);
} catch (ex) {
@ -3409,22 +3410,7 @@ class PartialEvaluator {
// Enqueue the `textContent` chunk before parsing the /Form
// XObject.
enqueueChunk();
const sinkWrapper = {
enqueueInvoked: false,
enqueue(chunk, size) {
this.enqueueInvoked = true;
sink.enqueue(chunk, size);
},
get desiredSize() {
return sink.desiredSize ?? 0;
},
get ready() {
return sink.ready;
},
};
const sinkWrapper = textSinkWrapper(sink);
self
.getTextContent({
@ -3436,7 +3422,7 @@ class PartialEvaluator {
: resources,
stateManager: xObjStateManager,
includeMarkedContent,
sink: sink && sinkWrapper,
sink: sinkWrapper,
seenStyles,
viewBox,
lang,
@ -3563,7 +3549,7 @@ class PartialEvaluator {
}
break;
} // switch
if (textContent.items.length >= (sink?.desiredSize ?? 1)) {
if (textContent.items.length >= sink.desiredSize) {
// Wait for ready, if we reach highWaterMark.
stop = true;
break;

View File

@ -16,6 +16,28 @@
import { Dict, Name, Ref } from "./primitives.js";
import { FormatError, warn } from "../shared/util.js";
function textSinkWrapper(sink) {
const TEXT_CONTENT_CHUNK_SIZE = 100; // Same as in `src/display/api.js`.
const resolved = sink ? null : Promise.resolve();
return {
enqueueInvoked: false,
enqueue(chunk, size) {
this.enqueueInvoked = true;
sink?.enqueue(chunk, size);
},
get desiredSize() {
return sink?.desiredSize ?? TEXT_CONTENT_CHUNK_SIZE;
},
get ready() {
return sink?.ready ?? resolved;
},
};
}
function _parseVisibilityExpression(
xref,
array,
@ -120,4 +142,4 @@ function parseMarkedContentProps(xref, contentProperties, resources) {
return null;
}
export { parseMarkedContentProps };
export { parseMarkedContentProps, textSinkWrapper };