mirror of
https://github.com/mozilla/pdf.js.git
synced 2026-06-23 00:15:51 +02:00
Handle corrupt PDFs that lack /Kids array and just inline the /Page dictionary (issue 21436)
This basically extends PR 9549 to the fallback `getAllPageDicts` method, which didn't exist at the time, in order to support more cases of corrupt PDF documents.
This commit is contained in:
parent
63db4bb777
commit
131d6b7d38
@ -1484,6 +1484,22 @@ class Catalog {
|
||||
}
|
||||
}
|
||||
if (!Array.isArray(kids)) {
|
||||
// Prevent errors in corrupt PDF documents that violate the
|
||||
// specification by *inlining* Page dicts (fixes issue21436.pdf).
|
||||
let type = currentNode.getRaw("Type");
|
||||
if (type instanceof Ref) {
|
||||
try {
|
||||
type = await xref.fetchAsync(type);
|
||||
} catch (ex) {
|
||||
addPageError(ex);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (isName(type, "Page") || !currentNode.has("Kids")) {
|
||||
addPageDict(currentNode, null);
|
||||
break;
|
||||
}
|
||||
|
||||
addPageError(
|
||||
new FormatError("Page dictionary kids object is not an array.")
|
||||
);
|
||||
|
||||
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
@ -929,6 +929,7 @@
|
||||
!issue_cff_unsigned_bbox.pdf
|
||||
!90ms_rksj_h_sample.pdf
|
||||
!issue21346.pdf
|
||||
!issue21436.pdf
|
||||
!cidfont_cmap_overflow.pdf
|
||||
!jbig2_file_header.pdf
|
||||
!text_field_own_canvas_calc.pdf
|
||||
|
||||
28
test/pdfs/issue21436.pdf
Normal file
28
test/pdfs/issue21436.pdf
Normal file
@ -0,0 +1,28 @@
|
||||
%PDF-1.7
|
||||
1 0 obj
|
||||
<< /Type /Catalog /Pages 3 0 R >>
|
||||
endobj
|
||||
2 0 obj
|
||||
<< /Type /Pages /Kids [3 0 R] /Count 1 >>
|
||||
endobj
|
||||
3 0 obj
|
||||
<< /Type /Page /Parent 2 0 R /MediaBox [0 0 200 200] /Contents 4 0 R /Resources << >> >>
|
||||
endobj
|
||||
4 0 obj
|
||||
<< /Length 18 >>
|
||||
stream
|
||||
10 10 180 180 re S
|
||||
endstream
|
||||
endobj
|
||||
xref
|
||||
0 5
|
||||
0000000000 65535 f
|
||||
0000000009 00000 n
|
||||
0000000058 00000 n
|
||||
0000000115 00000 n
|
||||
0000000219 00000 n
|
||||
trailer
|
||||
<< /Size 5 /Root 1 0 R >>
|
||||
startxref
|
||||
287
|
||||
%%EOF
|
||||
@ -2802,6 +2802,13 @@
|
||||
"link": true,
|
||||
"type": "eq"
|
||||
},
|
||||
{
|
||||
"id": "issue21436",
|
||||
"file": "pdfs/issue21436.pdf",
|
||||
"md5": "93c4292a52db8a37eefd651cb677d2a9",
|
||||
"rounds": 1,
|
||||
"type": "eq"
|
||||
},
|
||||
{
|
||||
"id": "txt2pdf",
|
||||
"file": "pdfs/txt2pdf.pdf",
|
||||
|
||||
@ -827,6 +827,42 @@ describe("api", function () {
|
||||
]);
|
||||
});
|
||||
|
||||
it("creates pdf doc from PDF files, with /Pages tree without /Kids array", async function () {
|
||||
const loadingTask1 = getDocument(buildGetDocumentParams("issue9540.pdf"));
|
||||
const loadingTask2 = getDocument(
|
||||
buildGetDocumentParams("issue21436.pdf")
|
||||
);
|
||||
|
||||
expect(loadingTask1).toBeInstanceOf(PDFDocumentLoadingTask);
|
||||
expect(loadingTask2).toBeInstanceOf(PDFDocumentLoadingTask);
|
||||
|
||||
const pdfDocument1 = await loadingTask1.promise;
|
||||
const pdfDocument2 = await loadingTask2.promise;
|
||||
|
||||
expect(pdfDocument1.numPages).toEqual(1);
|
||||
expect(pdfDocument2.numPages).toEqual(1);
|
||||
|
||||
const pageA = await pdfDocument1.getPage(1);
|
||||
expect(pageA).toBeInstanceOf(PDFPageProxy);
|
||||
|
||||
const opListA = await pageA.getOperatorList();
|
||||
expect(opListA.fnArray.length).toEqual(19);
|
||||
expect(opListA.argsArray.length).toEqual(19);
|
||||
expect(opListA.lastChunk).toEqual(true);
|
||||
expect(opListA.separateAnnots).toEqual(null);
|
||||
|
||||
const pageB = await pdfDocument2.getPage(1);
|
||||
expect(pageB).toBeInstanceOf(PDFPageProxy);
|
||||
|
||||
const opListB = await pageB.getOperatorList();
|
||||
expect(opListB.fnArray.length).toEqual(1);
|
||||
expect(opListB.argsArray.length).toEqual(1);
|
||||
expect(opListB.lastChunk).toEqual(true);
|
||||
expect(opListB.separateAnnots).toEqual(null);
|
||||
|
||||
await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]);
|
||||
});
|
||||
|
||||
it("creates pdf doc from PDF files, with circular references", async function () {
|
||||
const loadingTask1 = getDocument(
|
||||
buildGetDocumentParams("poppler-91414-0-53.pdf")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user