Merge pull request #21304 from Snuffleupagus/PdfTextExtractor-tests

Add basic integration-tests for the `PdfTextExtractor` class
This commit is contained in:
Tim van der Meij 2026-05-21 20:27:53 +02:00 committed by GitHub
commit 9b5cd3db64
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 133 additions and 0 deletions

View File

@ -44,6 +44,7 @@ async function runTests(results) {
"signature_editor_spec.mjs",
"simple_viewer_spec.mjs",
"stamp_editor_spec.mjs",
"text_extractor_spec.mjs",
"text_field_spec.mjs",
"text_layer_spec.mjs",
"text_layer_images_spec.mjs",

View File

@ -0,0 +1,119 @@
/* Copyright 2026 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { closePages, loadAndWait } from "./test_utils.mjs";
async function dispatchRequestTextContent(page, id) {
return page.evaluate(requestId => {
const event = new CustomEvent("requestTextContent", {
bubbles: true,
cancelable: true,
detail: { requestId },
});
window.dispatchEvent(event);
}, id);
}
async function getReportTextData(page) {
await page.waitForFunction(() => window._reportTextData !== undefined);
return page.evaluate(() => {
const data = window._reportTextData;
delete window._reportTextData;
return data;
});
}
describe("PdfTextExtractor", () => {
describe("Simple multi-page document", () => {
let pages;
beforeEach(async () => {
pages = await loadAndWait("basicapi.pdf", ".textLayer .endOfContent");
});
afterEach(async () => {
await closePages(pages);
});
it("check that all text is extracted", async () => {
await Promise.all(
pages.map(async ([browserName, page]) => {
await dispatchRequestTextContent(page, 1);
const { text, requestId } = await getReportTextData(page);
expect(text).toEqual(
[
"Table Of Content",
"Chapter 1 .......................................................... 2",
"Paragraph 1.1 ...................................................... 3",
"page 1 / 3",
"Chapter 1",
"page 2 / 3",
"Paragraph 1.1",
"Powered by TCPDF (www.tcpdf.org)",
"page 3 / 3",
].join("\n")
);
expect(requestId).toEqual(1);
})
);
});
});
describe("Multi-page document, with disableAutoFetch=true set", () => {
let pages;
beforeEach(async () => {
pages = await loadAndWait(
"tracemonkey.pdf",
".textLayer .endOfContent",
null,
null,
{
disableAutoFetch: true,
disableStream: true,
}
);
});
afterEach(async () => {
await closePages(pages);
});
it("check that all text is extracted", async () => {
await Promise.all(
pages.map(async ([browserName, page]) => {
await dispatchRequestTextContent(page, 2);
const { text, requestId } = await getReportTextData(page);
expect(
text.startsWith(
"Trace-based Just-in-Time Type Specialization for Dynamic\nLanguages"
)
).toBeTrue();
expect(
text.endsWith(
"Conference on Virtual Execution Environments, pages 8393. ACM\nPress, 2007."
)
).toBeTrue();
expect(text.length).toEqual(82804);
expect(requestId).toEqual(2);
})
);
});
});
});

View File

@ -39,6 +39,19 @@ class Preferences extends BasePreferences {
}
class ExternalServices extends BaseExternalServices {
constructor() {
super();
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
// For testing purposes.
Object.defineProperty(this, "reportText", {
value: data => {
window._reportTextData = data;
},
});
}
}
async createL10n() {
return new GenericL10n(AppOptions.get("localeProperties")?.lang);
}