From 429b469ecbfad83027f6a9a816ea3a4fc03869f5 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Wed, 20 May 2026 14:45:29 +0200 Subject: [PATCH] Add basic integration-tests for the `PdfTextExtractor` class --- test/integration/jasmine-boot.js | 1 + test/integration/text_extractor_spec.mjs | 119 +++++++++++++++++++++++ web/genericcom.js | 13 +++ 3 files changed, 133 insertions(+) create mode 100644 test/integration/text_extractor_spec.mjs diff --git a/test/integration/jasmine-boot.js b/test/integration/jasmine-boot.js index 4ed75f561..f61cab215 100644 --- a/test/integration/jasmine-boot.js +++ b/test/integration/jasmine-boot.js @@ -44,6 +44,7 @@ async function runTests(results) { "signature_editor_spec.mjs", "simple_viewer_spec.mjs", "stamp_editor_spec.mjs", + "text_extractor_spec.mjs", "text_field_spec.mjs", "text_layer_spec.mjs", "text_layer_images_spec.mjs", diff --git a/test/integration/text_extractor_spec.mjs b/test/integration/text_extractor_spec.mjs new file mode 100644 index 000000000..c665217b8 --- /dev/null +++ b/test/integration/text_extractor_spec.mjs @@ -0,0 +1,119 @@ +/* Copyright 2026 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { closePages, loadAndWait } from "./test_utils.mjs"; + +async function dispatchRequestTextContent(page, id) { + return page.evaluate(requestId => { + const event = new CustomEvent("requestTextContent", { + bubbles: true, + cancelable: true, + detail: { requestId }, + }); + window.dispatchEvent(event); + }, id); +} + +async function getReportTextData(page) { + await page.waitForFunction(() => window._reportTextData !== undefined); + return page.evaluate(() => { + const data = window._reportTextData; + delete window._reportTextData; + return data; + }); +} + +describe("PdfTextExtractor", () => { + describe("Simple multi-page document", () => { + let pages; + + beforeEach(async () => { + pages = await loadAndWait("basicapi.pdf", ".textLayer .endOfContent"); + }); + + afterEach(async () => { + await closePages(pages); + }); + + it("check that all text is extracted", async () => { + await Promise.all( + pages.map(async ([browserName, page]) => { + await dispatchRequestTextContent(page, 1); + + const { text, requestId } = await getReportTextData(page); + + expect(text).toEqual( + [ + "Table Of Content", + "Chapter 1 .......................................................... 2", + "Paragraph 1.1 ...................................................... 3", + "page 1 / 3", + "Chapter 1", + "page 2 / 3", + "Paragraph 1.1", + "Powered by TCPDF (www.tcpdf.org)", + "page 3 / 3", + ].join("\n") + ); + expect(requestId).toEqual(1); + }) + ); + }); + }); + + describe("Multi-page document, with disableAutoFetch=true set", () => { + let pages; + + beforeEach(async () => { + pages = await loadAndWait( + "tracemonkey.pdf", + ".textLayer .endOfContent", + null, + null, + { + disableAutoFetch: true, + disableStream: true, + } + ); + }); + + afterEach(async () => { + await closePages(pages); + }); + + it("check that all text is extracted", async () => { + await Promise.all( + pages.map(async ([browserName, page]) => { + await dispatchRequestTextContent(page, 2); + + const { text, requestId } = await getReportTextData(page); + + expect( + text.startsWith( + "Trace-based Just-in-Time Type Specialization for Dynamic\nLanguages" + ) + ).toBeTrue(); + expect( + text.endsWith( + "Conference on Virtual Execution Environments, pages 83–93. ACM\nPress, 2007." + ) + ).toBeTrue(); + expect(text.length).toEqual(82804); + expect(requestId).toEqual(2); + }) + ); + }); + }); +}); diff --git a/web/genericcom.js b/web/genericcom.js index 77909b487..c0f7e8877 100644 --- a/web/genericcom.js +++ b/web/genericcom.js @@ -39,6 +39,19 @@ class Preferences extends BasePreferences { } class ExternalServices extends BaseExternalServices { + constructor() { + super(); + + if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) { + // For testing purposes. + Object.defineProperty(this, "reportText", { + value: data => { + window._reportTextData = data; + }, + }); + } + } + async createL10n() { return new GenericL10n(AppOptions.get("localeProperties")?.lang); }