Add an integration-test for merging a corrupt PDF

Currently when opening a PDF document the following code is used, where `checkFirstPage`/`checkLastPage` helps detect XRef corruption; note 86a18bd5fe/src/core/worker.js (L167-L176) However when merging a PDF into an existing document the parsing is only "partial"; note 86a18bd5fe/src/core/worker.js (L632-L634) It seems a little strange to not support corrupt PDFs in a consistent manner in the code-base, hence this patch adds a new `BasePdfManager` helper that handles all the relevant parsing/checking and re-uses that when merging PDFs.
2026-06-23 00:15:51 +02:00 · 2026-06-14 09:49:23 +02:00 · 2026-06-14 09:49:23 +02:00 · e1c930adfe
commit e1c930adfe
parent 86a18bd5fe
3 changed files with 71 additions and 15 deletions
--- a/src/core/pdf_manager.js
+++ b/src/core/pdf_manager.js
@ -116,6 +116,19 @@ class BasePdfManager {
    return this.ensure(this.pdfDocument.catalog, prop, args);
  }

+  async initDocument(recoveryMode) {
+    await this.ensureDoc("checkHeader");
+    await this.ensureDoc("parseStartXRef");
+    await this.ensureDoc("parse", [recoveryMode]);
+
+    // Check that at least the first page can be successfully loaded,
+    // since otherwise the XRef table is definitely not valid.
+    await this.ensureDoc("checkFirstPage", [recoveryMode]);
+    // Check that the last page can be successfully loaded, to ensure that
+    // `numPages` is correct, and fallback to walking the entire /Pages-tree.
+    await this.ensureDoc("checkLastPage", [recoveryMode]);
+  }
+
  getPage(pageIndex) {
    return this.pdfDocument.getPage(pageIndex);
  }
--- a/src/core/worker.js
+++ b/src/core/worker.js
@ -164,16 +164,7 @@ class WorkerMessageHandler {
    }

    async function loadDocument(recoveryMode) {
-      await pdfManager.ensureDoc("checkHeader");
-      await pdfManager.ensureDoc("parseStartXRef");
-      await pdfManager.ensureDoc("parse", [recoveryMode]);
-
-      // Check that at least the first page can be successfully loaded,
-      // since otherwise the XRef table is definitely not valid.
-      await pdfManager.ensureDoc("checkFirstPage", [recoveryMode]);
-      // Check that the last page can be successfully loaded, to ensure that
-      // `numPages` is correct, and fallback to walking the entire /Pages-tree.
-      await pdfManager.ensureDoc("checkLastPage", [recoveryMode]);
+      await pdfManager.initDocument(recoveryMode);

      const isPureXfa = await pdfManager.ensureDoc("isPureXfa");
      if (isPureXfa) {
@ -629,9 +620,7 @@ class WorkerMessageHandler {
            while (true) {
              try {
                await manager.requestLoadedStream();
-                await manager.ensureDoc("checkHeader");
-                await manager.ensureDoc("parseStartXRef");
-                await manager.ensureDoc("parse", [recoveryMode]);
+                await manager.initDocument(recoveryMode);
                break;
              } catch (e) {
                if (e instanceof XRefParseException) {
--- a/test/integration/reorganize_pages_spec.mjs
+++ b/test/integration/reorganize_pages_spec.mjs
@ -127,9 +127,11 @@ async function waitForHavingContents(page, expected) {
  });
  return page.waitForFunction(
    ex => {
+      const textLayers = document.querySelectorAll(".textLayer");
      const buffer = [];
-      for (const textLayer of document.querySelectorAll(".textLayer")) {
-        buffer.push(parseInt(textLayer.textContent.trim(), 10));
+      for (const [i, textLayer] of textLayers.entries()) {
+        const text = textLayer.textContent.trim();
+        buffer.push(typeof ex[i] === "string" ? text : parseInt(text, 10));
      }
      return ex.length === buffer.length && ex.every((v, i) => v === buffer[i]);
    },
@ -3396,6 +3398,58 @@ describe("Reorganize Pages View", () => {
        })
      );
    });
+
+    it("should merge a corrupt PDF (with invalid pages /Count) after the current page", async () => {
+      await Promise.all(
+        pages.map(async ([browserName, page]) => {
+          await waitForThumbnailVisible(page, 1);
+
+          // Navigate to page 2 so the merged PDF is inserted after it.
+          await page.evaluate(() => {
+            window.PDFViewerApplication.page = 2;
+          });
+          await page.waitForFunction(
+            () => window.PDFViewerApplication.page === 2
+          );
+          await waitAndClick(page, getThumbnailSelector(2));
+
+          const handleMerged = await createPromise(page, resolve => {
+            window.PDFViewerApplication.eventBus.on(
+              "thumbnailsloaded",
+              resolve,
+              { once: true }
+            );
+          });
+
+          const picker = await page.$("#viewsManagerAddFilePicker");
+          await picker.uploadFile(
+            path.join(__dirname, "../pdfs/poppler-91414-0-53.pdf")
+          );
+          await awaitPromise(handleMerged);
+
+          // Original 3 pages + 1 merged page = 4 pages total.
+          await page.waitForFunction(
+            () => parseInt(document.getElementById("pageNumber").max, 10) === 4
+          );
+
+          // Focus must move to the first newly inserted page (page 3, since
+          // we merged after page 2).
+          await page.waitForFunction(
+            () => window.PDFViewerApplication.page === 3
+          );
+
+          // Pages 1–2 come from the original document, then the page of
+          // the merged PDF, then page 3 of the original shifted to the end.
+          await waitForHavingContents(page, [1, 2, "foobar", 3]);
+
+          await waitForTextToBe(
+            page,
+            "#viewsManagerStatusActionLabel",
+            `${FSI}1${PDI} selected`
+          );
+        })
+      );
+    });
  });

  describe("Drag-and-drop PDF merge", () => {