Handle corrupt PDFs that lack /Kids array and just inline the /Page dictionary (issue 21436)

This basically extends PR 9549 to the fallback `getAllPageDicts` method, which didn't exist at the time, in order to support more cases of corrupt PDF documents.
2026-06-23 00:15:51 +02:00 · 2026-06-12 11:43:37 +02:00 · 2026-06-12 11:43:37 +02:00 · 131d6b7d38
commit 131d6b7d38
parent 63db4bb777
5 changed files with 88 additions and 0 deletions
--- a/src/core/catalog.js
+++ b/src/core/catalog.js
@ -1484,6 +1484,22 @@ class Catalog {
        }
      }
      if (!Array.isArray(kids)) {
+        // Prevent errors in corrupt PDF documents that violate the
+        // specification by *inlining* Page dicts (fixes issue21436.pdf).
+        let type = currentNode.getRaw("Type");
+        if (type instanceof Ref) {
+          try {
+            type = await xref.fetchAsync(type);
+          } catch (ex) {
+            addPageError(ex);
+            break;
+          }
+        }
+        if (isName(type, "Page") || !currentNode.has("Kids")) {
+          addPageDict(currentNode, null);
+          break;
+        }
+
        addPageError(
          new FormatError("Page dictionary kids object is not an array.")
        );
--- a/test/pdfs/.gitignore
+++ b/test/pdfs/.gitignore
@ -929,6 +929,7 @@
 !issue_cff_unsigned_bbox.pdf
 !90ms_rksj_h_sample.pdf
 !issue21346.pdf
+!issue21436.pdf
 !cidfont_cmap_overflow.pdf
 !jbig2_file_header.pdf
 !text_field_own_canvas_calc.pdf
--- a/test/pdfs/issue21436.pdf
+++ b/test/pdfs/issue21436.pdf
@ -0,0 +1,28 @@
+%PDF-1.7
+1 0 obj
+<< /Type /Catalog /Pages 3 0 R >>
+endobj
+2 0 obj
+<< /Type /Pages /Kids [3 0 R] /Count 1 >>
+endobj
+3 0 obj
+<< /Type /Page /Parent 2 0 R /MediaBox [0 0 200 200] /Contents 4 0 R /Resources << >> >>
+endobj
+4 0 obj
+<< /Length 18 >>
+stream
+10 10 180 180 re S
+endstream
+endobj
+xref
+0 5
+0000000000 65535 f 
+0000000009 00000 n 
+0000000058 00000 n 
+0000000115 00000 n 
+0000000219 00000 n 
+trailer
+<< /Size 5 /Root 1 0 R >>
+startxref
+287
+%%EOF
--- a/test/test_manifest.json
+++ b/test/test_manifest.json
@ -2802,6 +2802,13 @@
    "link": true,
    "type": "eq"
  },
+  {
+    "id": "issue21436",
+    "file": "pdfs/issue21436.pdf",
+    "md5": "93c4292a52db8a37eefd651cb677d2a9",
+    "rounds": 1,
+    "type": "eq"
+  },
  {
    "id": "txt2pdf",
    "file": "pdfs/txt2pdf.pdf",
--- a/test/unit/api_spec.js
+++ b/test/unit/api_spec.js
@ -827,6 +827,42 @@ describe("api", function () {
      ]);
    });

+    it("creates pdf doc from PDF files, with /Pages tree without /Kids array", async function () {
+      const loadingTask1 = getDocument(buildGetDocumentParams("issue9540.pdf"));
+      const loadingTask2 = getDocument(
+        buildGetDocumentParams("issue21436.pdf")
+      );
+
+      expect(loadingTask1).toBeInstanceOf(PDFDocumentLoadingTask);
+      expect(loadingTask2).toBeInstanceOf(PDFDocumentLoadingTask);
+
+      const pdfDocument1 = await loadingTask1.promise;
+      const pdfDocument2 = await loadingTask2.promise;
+
+      expect(pdfDocument1.numPages).toEqual(1);
+      expect(pdfDocument2.numPages).toEqual(1);
+
+      const pageA = await pdfDocument1.getPage(1);
+      expect(pageA).toBeInstanceOf(PDFPageProxy);
+
+      const opListA = await pageA.getOperatorList();
+      expect(opListA.fnArray.length).toEqual(19);
+      expect(opListA.argsArray.length).toEqual(19);
+      expect(opListA.lastChunk).toEqual(true);
+      expect(opListA.separateAnnots).toEqual(null);
+
+      const pageB = await pdfDocument2.getPage(1);
+      expect(pageB).toBeInstanceOf(PDFPageProxy);
+
+      const opListB = await pageB.getOperatorList();
+      expect(opListB.fnArray.length).toEqual(1);
+      expect(opListB.argsArray.length).toEqual(1);
+      expect(opListB.lastChunk).toEqual(true);
+      expect(opListB.separateAnnots).toEqual(null);
+
+      await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]);
+    });
+
    it("creates pdf doc from PDF files, with circular references", async function () {
      const loadingTask1 = getDocument(
        buildGetDocumentParams("poppler-91414-0-53.pdf")