mirror of
https://github.com/mozilla/pdf.js.git
synced 2026-06-23 00:15:51 +02:00
Add an integration-test for merging a corrupt PDF
Currently when opening a PDF document the following code is used, where `checkFirstPage`/`checkLastPage` helps detect XRef corruption; note86a18bd5fe/src/core/worker.js (L167-L176)However when merging a PDF into an existing document the parsing is only "partial"; note86a18bd5fe/src/core/worker.js (L632-L634)It seems a little strange to not support corrupt PDFs in a consistent manner in the code-base, hence this patch adds a new `BasePdfManager` helper that handles all the relevant parsing/checking and re-uses that when merging PDFs.
This commit is contained in:
parent
86a18bd5fe
commit
e1c930adfe
@ -116,6 +116,19 @@ class BasePdfManager {
|
||||
return this.ensure(this.pdfDocument.catalog, prop, args);
|
||||
}
|
||||
|
||||
async initDocument(recoveryMode) {
|
||||
await this.ensureDoc("checkHeader");
|
||||
await this.ensureDoc("parseStartXRef");
|
||||
await this.ensureDoc("parse", [recoveryMode]);
|
||||
|
||||
// Check that at least the first page can be successfully loaded,
|
||||
// since otherwise the XRef table is definitely not valid.
|
||||
await this.ensureDoc("checkFirstPage", [recoveryMode]);
|
||||
// Check that the last page can be successfully loaded, to ensure that
|
||||
// `numPages` is correct, and fallback to walking the entire /Pages-tree.
|
||||
await this.ensureDoc("checkLastPage", [recoveryMode]);
|
||||
}
|
||||
|
||||
getPage(pageIndex) {
|
||||
return this.pdfDocument.getPage(pageIndex);
|
||||
}
|
||||
|
||||
@ -164,16 +164,7 @@ class WorkerMessageHandler {
|
||||
}
|
||||
|
||||
async function loadDocument(recoveryMode) {
|
||||
await pdfManager.ensureDoc("checkHeader");
|
||||
await pdfManager.ensureDoc("parseStartXRef");
|
||||
await pdfManager.ensureDoc("parse", [recoveryMode]);
|
||||
|
||||
// Check that at least the first page can be successfully loaded,
|
||||
// since otherwise the XRef table is definitely not valid.
|
||||
await pdfManager.ensureDoc("checkFirstPage", [recoveryMode]);
|
||||
// Check that the last page can be successfully loaded, to ensure that
|
||||
// `numPages` is correct, and fallback to walking the entire /Pages-tree.
|
||||
await pdfManager.ensureDoc("checkLastPage", [recoveryMode]);
|
||||
await pdfManager.initDocument(recoveryMode);
|
||||
|
||||
const isPureXfa = await pdfManager.ensureDoc("isPureXfa");
|
||||
if (isPureXfa) {
|
||||
@ -629,9 +620,7 @@ class WorkerMessageHandler {
|
||||
while (true) {
|
||||
try {
|
||||
await manager.requestLoadedStream();
|
||||
await manager.ensureDoc("checkHeader");
|
||||
await manager.ensureDoc("parseStartXRef");
|
||||
await manager.ensureDoc("parse", [recoveryMode]);
|
||||
await manager.initDocument(recoveryMode);
|
||||
break;
|
||||
} catch (e) {
|
||||
if (e instanceof XRefParseException) {
|
||||
|
||||
@ -127,9 +127,11 @@ async function waitForHavingContents(page, expected) {
|
||||
});
|
||||
return page.waitForFunction(
|
||||
ex => {
|
||||
const textLayers = document.querySelectorAll(".textLayer");
|
||||
const buffer = [];
|
||||
for (const textLayer of document.querySelectorAll(".textLayer")) {
|
||||
buffer.push(parseInt(textLayer.textContent.trim(), 10));
|
||||
for (const [i, textLayer] of textLayers.entries()) {
|
||||
const text = textLayer.textContent.trim();
|
||||
buffer.push(typeof ex[i] === "string" ? text : parseInt(text, 10));
|
||||
}
|
||||
return ex.length === buffer.length && ex.every((v, i) => v === buffer[i]);
|
||||
},
|
||||
@ -3396,6 +3398,58 @@ describe("Reorganize Pages View", () => {
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
it("should merge a corrupt PDF (with invalid pages /Count) after the current page", async () => {
|
||||
await Promise.all(
|
||||
pages.map(async ([browserName, page]) => {
|
||||
await waitForThumbnailVisible(page, 1);
|
||||
|
||||
// Navigate to page 2 so the merged PDF is inserted after it.
|
||||
await page.evaluate(() => {
|
||||
window.PDFViewerApplication.page = 2;
|
||||
});
|
||||
await page.waitForFunction(
|
||||
() => window.PDFViewerApplication.page === 2
|
||||
);
|
||||
await waitAndClick(page, getThumbnailSelector(2));
|
||||
|
||||
const handleMerged = await createPromise(page, resolve => {
|
||||
window.PDFViewerApplication.eventBus.on(
|
||||
"thumbnailsloaded",
|
||||
resolve,
|
||||
{ once: true }
|
||||
);
|
||||
});
|
||||
|
||||
const picker = await page.$("#viewsManagerAddFilePicker");
|
||||
await picker.uploadFile(
|
||||
path.join(__dirname, "../pdfs/poppler-91414-0-53.pdf")
|
||||
);
|
||||
await awaitPromise(handleMerged);
|
||||
|
||||
// Original 3 pages + 1 merged page = 4 pages total.
|
||||
await page.waitForFunction(
|
||||
() => parseInt(document.getElementById("pageNumber").max, 10) === 4
|
||||
);
|
||||
|
||||
// Focus must move to the first newly inserted page (page 3, since
|
||||
// we merged after page 2).
|
||||
await page.waitForFunction(
|
||||
() => window.PDFViewerApplication.page === 3
|
||||
);
|
||||
|
||||
// Pages 1–2 come from the original document, then the page of
|
||||
// the merged PDF, then page 3 of the original shifted to the end.
|
||||
await waitForHavingContents(page, [1, 2, "foobar", 3]);
|
||||
|
||||
await waitForTextToBe(
|
||||
page,
|
||||
"#viewsManagerStatusActionLabel",
|
||||
`${FSI}1${PDI} selected`
|
||||
);
|
||||
})
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Drag-and-drop PDF merge", () => {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user