diff --git a/src/core/document.js b/src/core/document.js index 7ab48a6bd..a2eaa0737 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -789,8 +789,6 @@ class Page { includeMarkedContent: false, disableNormalization: false, sink: null, - viewBox: this.view, - lang: null, intersector, }).then(() => { intersector.setText(); diff --git a/src/core/evaluator.js b/src/core/evaluator.js index be6031937..97736b6c0 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -78,6 +78,7 @@ import { LocalTilingPatternCache, RegionalImageCache, } from "./image_utils.js"; +import { parseMarkedContentProps, textSinkWrapper } from "./evaluator_utils.js"; import { BaseStream } from "./base_stream.js"; import { bidi } from "./bidi.js"; import { ColorSpace } from "./colorspace.js"; @@ -87,7 +88,6 @@ import { getGlyphsUnicode } from "./glyphlist.js"; import { getMetrics } from "./metrics.js"; import { getUnicodeForGlyph } from "./unicode.js"; import { MurmurHash3_64 } from "../shared/murmurhash3.js"; -import { parseMarkedContentProps } from "./evaluator_utils.js"; import { PDFImage } from "./image.js"; import { Stream } from "./stream.js"; import { stringToPDFString } from "./string_utils.js"; @@ -2380,6 +2380,7 @@ class PartialEvaluator { stream = new Stream(bytes, 0, bytes.length, stream.dict); } } + sink ??= textSinkWrapper(null); const objId = stream.dict?.objId; const seenRefs = new RefSet(prevRefs); @@ -3153,7 +3154,7 @@ class PartialEvaluator { if (batch && length < TEXT_CHUNK_BATCH_SIZE) { return; } - sink?.enqueue(textContent, length); + sink.enqueue(textContent, length); textContent.items = []; textContent.styles = Object.create(null); } @@ -3163,7 +3164,7 @@ class PartialEvaluator { return new Promise(function promiseBody(resolve, reject) { const next = function (promise) { enqueueChunk(/* batch = */ true); - Promise.all([promise, sink?.ready]).then(function () { + Promise.all([promise, sink.ready]).then(function () { try { promiseBody(resolve, reject); } catch (ex) { @@ -3409,22 +3410,7 @@ class PartialEvaluator { // Enqueue the `textContent` chunk before parsing the /Form // XObject. enqueueChunk(); - const sinkWrapper = { - enqueueInvoked: false, - - enqueue(chunk, size) { - this.enqueueInvoked = true; - sink.enqueue(chunk, size); - }, - - get desiredSize() { - return sink.desiredSize ?? 0; - }, - - get ready() { - return sink.ready; - }, - }; + const sinkWrapper = textSinkWrapper(sink); self .getTextContent({ @@ -3436,7 +3422,7 @@ class PartialEvaluator { : resources, stateManager: xObjStateManager, includeMarkedContent, - sink: sink && sinkWrapper, + sink: sinkWrapper, seenStyles, viewBox, lang, @@ -3563,7 +3549,7 @@ class PartialEvaluator { } break; } // switch - if (textContent.items.length >= (sink?.desiredSize ?? 1)) { + if (textContent.items.length >= sink.desiredSize) { // Wait for ready, if we reach highWaterMark. stop = true; break; diff --git a/src/core/evaluator_utils.js b/src/core/evaluator_utils.js index b871bfa93..b2d4b6fe6 100644 --- a/src/core/evaluator_utils.js +++ b/src/core/evaluator_utils.js @@ -16,6 +16,28 @@ import { Dict, Name, Ref } from "./primitives.js"; import { FormatError, warn } from "../shared/util.js"; +function textSinkWrapper(sink) { + const TEXT_CONTENT_CHUNK_SIZE = 100; // Same as in `src/display/api.js`. + const resolved = sink ? null : Promise.resolve(); + + return { + enqueueInvoked: false, + + enqueue(chunk, size) { + this.enqueueInvoked = true; + sink?.enqueue(chunk, size); + }, + + get desiredSize() { + return sink?.desiredSize ?? TEXT_CONTENT_CHUNK_SIZE; + }, + + get ready() { + return sink?.ready ?? resolved; + }, + }; +} + function _parseVisibilityExpression( xref, array, @@ -120,4 +142,4 @@ function parseMarkedContentProps(xref, contentProperties, resources) { return null; } -export { parseMarkedContentProps }; +export { parseMarkedContentProps, textSinkWrapper };