mirror of
https://github.com/mozilla/pdf.js.git
synced 2026-04-13 00:34:04 +02:00
Add attachments when merging/reorganizing a pdf (bug 2026956)
This commit is contained in:
parent
a40b91f0bb
commit
5b8c04f383
@ -1084,6 +1084,15 @@ class Catalog {
|
||||
return shadow(this, "attachments", attachments);
|
||||
}
|
||||
|
||||
get rawEmbeddedFiles() {
|
||||
const obj = this.#catDict.get("Names");
|
||||
if (!(obj instanceof Dict) || !obj.has("EmbeddedFiles")) {
|
||||
return null;
|
||||
}
|
||||
const nameTree = new NameTree(obj.getRaw("EmbeddedFiles"), this.xref);
|
||||
return nameTree.getAll(/* isRaw = */ true);
|
||||
}
|
||||
|
||||
get xfaImages() {
|
||||
const obj = this.#catDict.get("Names");
|
||||
let xfaImages = null;
|
||||
|
||||
@ -76,6 +76,7 @@ class DocumentData {
|
||||
this.hasSignatureAnnotations = false;
|
||||
this.fieldToParent = new RefSetCache();
|
||||
this.outline = null;
|
||||
this.embeddedFiles = null;
|
||||
}
|
||||
}
|
||||
|
||||
@ -163,6 +164,8 @@ class PDFEditor {
|
||||
|
||||
outlineItems = null;
|
||||
|
||||
embeddedFiles = new Map();
|
||||
|
||||
constructor({ useObjectStreams = true, title = "", author = "" } = {}) {
|
||||
[this.rootRef, this.rootDict] = this.newDict;
|
||||
[this.infoRef, this.infoDict] = this.newDict;
|
||||
@ -694,6 +697,7 @@ class PDFEditor {
|
||||
await this.#mergeStructTrees(allDocumentData);
|
||||
await this.#mergeAcroForms(allDocumentData);
|
||||
this.#buildOutline(allDocumentData);
|
||||
await this.#collectEmbeddedFiles(allDocumentData);
|
||||
|
||||
return this.writePDF();
|
||||
}
|
||||
@ -723,6 +727,9 @@ class PDFEditor {
|
||||
pdfManager
|
||||
.ensureCatalog("documentOutlineForEditor")
|
||||
.then(outline => (documentData.outline = outline)),
|
||||
pdfManager
|
||||
.ensureCatalog("rawEmbeddedFiles")
|
||||
.then(ef => (documentData.embeddedFiles = ef)),
|
||||
]);
|
||||
const structTreeRoot = documentData.structTreeRoot;
|
||||
if (structTreeRoot) {
|
||||
@ -2078,13 +2085,15 @@ class PDFEditor {
|
||||
const maxLeaves =
|
||||
MAX_IN_NAME_TREE_NODE <= 1 ? allEntries.length : MAX_IN_NAME_TREE_NODE;
|
||||
const [treeRef, treeDict] = this.newDict;
|
||||
const stack = [{ dict: treeDict, entries: allEntries }];
|
||||
const stack = [{ dict: treeDict, entries: allEntries, isRoot: true }];
|
||||
const valueType = areNames ? "Names" : "Nums";
|
||||
|
||||
while (stack.length > 0) {
|
||||
const { dict, entries } = stack.pop();
|
||||
const { dict, entries, isRoot } = stack.pop();
|
||||
if (entries.length <= maxLeaves) {
|
||||
dict.set("Limits", [entries[0][0], entries.at(-1)[0]]);
|
||||
if (!isRoot) {
|
||||
dict.set("Limits", [entries[0][0], entries.at(-1)[0]]);
|
||||
}
|
||||
dict.set(valueType, entries.flat());
|
||||
continue;
|
||||
}
|
||||
@ -2124,6 +2133,63 @@ class PDFEditor {
|
||||
rootDict.set("PageLabels", pageLabelsRef);
|
||||
}
|
||||
|
||||
/**
|
||||
* Collect and clone EmbeddedFiles from all source documents.
|
||||
* @param {Array<DocumentData>} allDocumentData
|
||||
*/
|
||||
async #collectEmbeddedFiles(allDocumentData) {
|
||||
const { embeddedFiles } = this;
|
||||
for (const documentData of allDocumentData) {
|
||||
const {
|
||||
embeddedFiles: docEmbeddedFiles,
|
||||
document: { xref },
|
||||
} = documentData;
|
||||
if (!docEmbeddedFiles?.size) {
|
||||
continue;
|
||||
}
|
||||
this.currentDocument = documentData;
|
||||
for (const [key, valueRef] of docEmbeddedFiles) {
|
||||
let name = key;
|
||||
if (embeddedFiles.has(name)) {
|
||||
const displayName = stringToPDFString(
|
||||
key,
|
||||
/* keepEscapeSequence = */ true
|
||||
);
|
||||
for (let i = 1; ; i++) {
|
||||
const deduped = `${displayName}_${i}`;
|
||||
if (!embeddedFiles.has(deduped)) {
|
||||
name = deduped;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
embeddedFiles.set(
|
||||
name,
|
||||
await this.#collectDependencies(valueRef, true, xref)
|
||||
);
|
||||
}
|
||||
this.currentDocument = null;
|
||||
}
|
||||
}
|
||||
|
||||
#makeEmbeddedFilesTree() {
|
||||
const { embeddedFiles } = this;
|
||||
if (embeddedFiles.size === 0) {
|
||||
return;
|
||||
}
|
||||
if (!this.namesDict) {
|
||||
[this.namesRef, this.namesDict] = this.newDict;
|
||||
this.rootDict.set("Names", this.namesRef);
|
||||
}
|
||||
this.namesDict.set(
|
||||
"EmbeddedFiles",
|
||||
this.#makeNameNumTree(
|
||||
Array.from(embeddedFiles.entries()),
|
||||
/* areNames = */ true
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#makeDestinationsTree() {
|
||||
const { namedDestinations } = this;
|
||||
if (namedDestinations.size === 0) {
|
||||
@ -2245,6 +2311,7 @@ class PDFEditor {
|
||||
this.#makeAcroForm();
|
||||
this.#makePageTree();
|
||||
this.#makePageLabelsTree();
|
||||
this.#makeEmbeddedFilesTree();
|
||||
this.#makeDestinationsTree();
|
||||
this.#makeStructTree();
|
||||
await this.#makeOutline();
|
||||
|
||||
@ -6429,6 +6429,71 @@ small scripts as well as for`);
|
||||
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("preserves EmbeddedFiles (attachments) when extracting pages", async function () {
|
||||
let loadingTask = getDocument(buildGetDocumentParams("attachment.pdf"));
|
||||
let pdfDoc = await loadingTask.promise;
|
||||
|
||||
// Verify the original document has the expected attachment.
|
||||
const originalAttachments = await pdfDoc.getAttachments();
|
||||
expect(originalAttachments["foo.txt"]).toBeDefined();
|
||||
|
||||
const data = await pdfDoc.extractPages([
|
||||
{ document: null, includePages: [0] },
|
||||
]);
|
||||
await loadingTask.destroy();
|
||||
|
||||
loadingTask = getDocument(data);
|
||||
pdfDoc = await loadingTask.promise;
|
||||
|
||||
const attachments = await pdfDoc.getAttachments();
|
||||
expect(attachments).not.toBeNull();
|
||||
expect(attachments["foo.txt"]).toEqual({
|
||||
rawFilename: "foo.txt",
|
||||
filename: "foo.txt",
|
||||
content: new Uint8Array([98, 97, 114, 32, 98, 97, 122, 32, 10]),
|
||||
description: "",
|
||||
});
|
||||
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("preserves EmbeddedFiles (attachments) when merging two PDFs", async function () {
|
||||
let loadingTask = getDocument(buildGetDocumentParams("attachment.pdf"));
|
||||
let pdfDoc = await loadingTask.promise;
|
||||
|
||||
// Merge attachment.pdf with itself: both copies carry "foo.txt", so
|
||||
// the second one should be deduplicated to "foo.txt_1".
|
||||
const data = await pdfDoc.extractPages([
|
||||
{ document: null },
|
||||
{ document: null },
|
||||
]);
|
||||
await loadingTask.destroy();
|
||||
|
||||
loadingTask = getDocument(data);
|
||||
pdfDoc = await loadingTask.promise;
|
||||
|
||||
const attachments = await pdfDoc.getAttachments();
|
||||
expect(attachments).not.toBeNull();
|
||||
|
||||
const expectedContent = new Uint8Array([
|
||||
98, 97, 114, 32, 98, 97, 122, 32, 10,
|
||||
]);
|
||||
expect(attachments["foo.txt"]).toEqual({
|
||||
rawFilename: "foo.txt",
|
||||
filename: "foo.txt",
|
||||
content: expectedContent,
|
||||
description: "",
|
||||
});
|
||||
expect(attachments["foo.txt_1"]).toEqual({
|
||||
rawFilename: "foo.txt",
|
||||
filename: "foo.txt",
|
||||
content: expectedContent,
|
||||
description: "",
|
||||
});
|
||||
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
});
|
||||
|
||||
describe("AcroForm", function () {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user