mirror of
https://github.com/mozilla/pdf.js.git
synced 2026-06-24 17:05:47 +02:00
Merge pull request #21486 from Snuffleupagus/getTextContent-sink-fixes
Improve the `sink` handling in `getTextContent` for Highlight annotations (PR 20019 follow-up)
This commit is contained in:
commit
4117b75a10
@ -789,8 +789,6 @@ class Page {
|
||||
includeMarkedContent: false,
|
||||
disableNormalization: false,
|
||||
sink: null,
|
||||
viewBox: this.view,
|
||||
lang: null,
|
||||
intersector,
|
||||
}).then(() => {
|
||||
intersector.setText();
|
||||
|
||||
@ -78,6 +78,7 @@ import {
|
||||
LocalTilingPatternCache,
|
||||
RegionalImageCache,
|
||||
} from "./image_utils.js";
|
||||
import { parseMarkedContentProps, textSinkWrapper } from "./evaluator_utils.js";
|
||||
import { BaseStream } from "./base_stream.js";
|
||||
import { bidi } from "./bidi.js";
|
||||
import { ColorSpace } from "./colorspace.js";
|
||||
@ -87,7 +88,6 @@ import { getGlyphsUnicode } from "./glyphlist.js";
|
||||
import { getMetrics } from "./metrics.js";
|
||||
import { getUnicodeForGlyph } from "./unicode.js";
|
||||
import { MurmurHash3_64 } from "../shared/murmurhash3.js";
|
||||
import { parseMarkedContentProps } from "./evaluator_utils.js";
|
||||
import { PDFImage } from "./image.js";
|
||||
import { Stream } from "./stream.js";
|
||||
import { stringToPDFString } from "./string_utils.js";
|
||||
@ -2380,6 +2380,7 @@ class PartialEvaluator {
|
||||
stream = new Stream(bytes, 0, bytes.length, stream.dict);
|
||||
}
|
||||
}
|
||||
sink ??= textSinkWrapper(null);
|
||||
|
||||
const objId = stream.dict?.objId;
|
||||
const seenRefs = new RefSet(prevRefs);
|
||||
@ -3153,7 +3154,7 @@ class PartialEvaluator {
|
||||
if (batch && length < TEXT_CHUNK_BATCH_SIZE) {
|
||||
return;
|
||||
}
|
||||
sink?.enqueue(textContent, length);
|
||||
sink.enqueue(textContent, length);
|
||||
textContent.items = [];
|
||||
textContent.styles = Object.create(null);
|
||||
}
|
||||
@ -3163,7 +3164,7 @@ class PartialEvaluator {
|
||||
return new Promise(function promiseBody(resolve, reject) {
|
||||
const next = function (promise) {
|
||||
enqueueChunk(/* batch = */ true);
|
||||
Promise.all([promise, sink?.ready]).then(function () {
|
||||
Promise.all([promise, sink.ready]).then(function () {
|
||||
try {
|
||||
promiseBody(resolve, reject);
|
||||
} catch (ex) {
|
||||
@ -3409,22 +3410,7 @@ class PartialEvaluator {
|
||||
// Enqueue the `textContent` chunk before parsing the /Form
|
||||
// XObject.
|
||||
enqueueChunk();
|
||||
const sinkWrapper = {
|
||||
enqueueInvoked: false,
|
||||
|
||||
enqueue(chunk, size) {
|
||||
this.enqueueInvoked = true;
|
||||
sink.enqueue(chunk, size);
|
||||
},
|
||||
|
||||
get desiredSize() {
|
||||
return sink.desiredSize ?? 0;
|
||||
},
|
||||
|
||||
get ready() {
|
||||
return sink.ready;
|
||||
},
|
||||
};
|
||||
const sinkWrapper = textSinkWrapper(sink);
|
||||
|
||||
self
|
||||
.getTextContent({
|
||||
@ -3436,7 +3422,7 @@ class PartialEvaluator {
|
||||
: resources,
|
||||
stateManager: xObjStateManager,
|
||||
includeMarkedContent,
|
||||
sink: sink && sinkWrapper,
|
||||
sink: sinkWrapper,
|
||||
seenStyles,
|
||||
viewBox,
|
||||
lang,
|
||||
@ -3563,7 +3549,7 @@ class PartialEvaluator {
|
||||
}
|
||||
break;
|
||||
} // switch
|
||||
if (textContent.items.length >= (sink?.desiredSize ?? 1)) {
|
||||
if (textContent.items.length >= sink.desiredSize) {
|
||||
// Wait for ready, if we reach highWaterMark.
|
||||
stop = true;
|
||||
break;
|
||||
|
||||
@ -16,6 +16,28 @@
|
||||
import { Dict, Name, Ref } from "./primitives.js";
|
||||
import { FormatError, warn } from "../shared/util.js";
|
||||
|
||||
function textSinkWrapper(sink) {
|
||||
const TEXT_CONTENT_CHUNK_SIZE = 100; // Same as in `src/display/api.js`.
|
||||
const resolved = sink ? null : Promise.resolve();
|
||||
|
||||
return {
|
||||
enqueueInvoked: false,
|
||||
|
||||
enqueue(chunk, size) {
|
||||
this.enqueueInvoked = true;
|
||||
sink?.enqueue(chunk, size);
|
||||
},
|
||||
|
||||
get desiredSize() {
|
||||
return sink?.desiredSize ?? TEXT_CONTENT_CHUNK_SIZE;
|
||||
},
|
||||
|
||||
get ready() {
|
||||
return sink?.ready ?? resolved;
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function _parseVisibilityExpression(
|
||||
xref,
|
||||
array,
|
||||
@ -120,4 +142,4 @@ function parseMarkedContentProps(xref, contentProperties, resources) {
|
||||
return null;
|
||||
}
|
||||
|
||||
export { parseMarkedContentProps };
|
||||
export { parseMarkedContentProps, textSinkWrapper };
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user