Merge pull request #20648 from Snuffleupagus/api-async-getTextContent

Convert `PDFPageProxy.prototype.getTextContent` to an asynchronous method
This commit is contained in:
calixteman 2026-02-12 13:47:04 +01:00 committed by GitHub
commit f24768d7b4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1739,7 +1739,7 @@ class PDFPageProxy {
* @returns {Promise<TextContent>} A promise that is resolved with a
* {@link TextContent} object that represents the page's text content.
*/
getTextContent(params = {}) {
async getTextContent(params = {}) {
if (this._transport._htmlForXfa) {
// TODO: We need to revisit this once the XFA foreground patch lands and
// only do this for non-foreground XFA.
@ -1747,28 +1747,18 @@ class PDFPageProxy {
}
const readableStream = this.streamTextContent(params);
return new Promise(function (resolve, reject) {
function pump() {
reader.read().then(function ({ value, done }) {
if (done) {
resolve(textContent);
return;
}
textContent.lang ??= value.lang;
Object.assign(textContent.styles, value.styles);
textContent.items.push(...value.items);
pump();
}, reject);
}
const textContent = {
items: [],
styles: Object.create(null),
lang: null,
};
const reader = readableStream.getReader();
const textContent = {
items: [],
styles: Object.create(null),
lang: null,
};
pump();
});
for await (const value of readableStream) {
textContent.lang ??= value.lang;
Object.assign(textContent.styles, value.styles);
textContent.items.push(...value.items);
}
return textContent;
}
/**