From baf8647b1f87c1dff437d3eb2576bde08f04a2a5 Mon Sep 17 00:00:00 2001 From: calixteman Date: Sat, 7 Mar 2026 17:47:24 +0100 Subject: [PATCH] Add the possibility to merge/update acroforms when merging/extracting (bug 2015853) --- src/core/core_utils.js | 33 +++ src/core/editor/pdf_editor.js | 510 +++++++++++++++++++++++++++++++--- test/pdfs/.gitignore | 1 + test/pdfs/form_two_pages.pdf | Bin 0 -> 23136 bytes test/unit/api_spec.js | 107 +++++++ test/unit/core_utils_spec.js | 98 +++++++ 6 files changed, 713 insertions(+), 36 deletions(-) create mode 100644 test/pdfs/form_two_pages.pdf diff --git a/src/core/core_utils.js b/src/core/core_utils.js index d0d801fb5..287547616 100644 --- a/src/core/core_utils.js +++ b/src/core/core_utils.js @@ -208,6 +208,38 @@ function getParentToUpdate(dict, ref, xref) { return result; } +function deepCompare(a, b) { + if (a === b) { + return true; + } + if (a instanceof Dict && b instanceof Dict) { + if (a.size !== b.size) { + return false; + } + for (const [key, value1] of a.getRawEntries()) { + const value2 = b.getRaw(key); + if (value2 === undefined || !deepCompare(value1, value2)) { + return false; + } + } + return true; + } + + if (Array.isArray(a) && Array.isArray(b)) { + if (a.length !== b.length) { + return false; + } + for (let i = 0, ii = a.length; i < ii; i++) { + if (!deepCompare(a[i], b[i])) { + return false; + } + } + return true; + } + + return false; +} + // prettier-ignore const ROMAN_NUMBER_MAP = [ "", "C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM", @@ -745,6 +777,7 @@ export { arrayBuffersToBytes, codePointIter, collectActions, + deepCompare, encodeToXmlString, escapePDFName, escapeString, diff --git a/src/core/editor/pdf_editor.js b/src/core/editor/pdf_editor.js index aca8649b8..470913817 100644 --- a/src/core/editor/pdf_editor.js +++ b/src/core/editor/pdf_editor.js @@ -17,13 +17,17 @@ /** @typedef {import("../document.js").Page} Page */ /** @typedef {import("../xref.js").XRef} XRef */ +import { + deepCompare, + getInheritableProperty, + stringToAsciiOrUTF16BE, +} from "../core_utils.js"; import { Dict, isName, Name, Ref, RefSet, RefSetCache } from "../primitives.js"; import { getModificationDate, stringToPDFString } from "../../shared/util.js"; import { incrementalUpdate, writeValue } from "../writer.js"; import { NameTree, NumberTree } from "../name_number_tree.js"; import { BaseStream } from "../base_stream.js"; import { StringStream } from "../stream.js"; -import { stringToAsciiOrUTF16BE } from "../core_utils.js"; const MAX_LEAVES_PER_PAGES_NODE = 16; const MAX_IN_NAME_TREE_NODE = 64; @@ -60,6 +64,12 @@ class DocumentData { this.namespaces = null; this.structTreeAF = null; this.structTreePronunciationLexicon = []; + this.acroForm = null; + this.acroFormDefaultAppearance = ""; + this.acroFormDefaultResources = null; + this.acroFormQ = 0; + this.hasSignatureAnnotations = false; + this.fieldToParent = new RefSetCache(); } } @@ -124,6 +134,20 @@ class PDFEditor { structTreePronunciationLexicon = []; + fields = []; + + acroFormDefaultAppearance = ""; + + acroFormDefaultResources = null; + + acroFormNeedAppearances = false; + + acroFormSigFlags = 0; + + acroFormCalculationOrder = null; + + acroFormQ = 0; + constructor({ useObjectStreams = true, title = "", author = "" } = {}) { [this.rootRef, this.rootDict] = this.newDict; [this.infoRef, this.infoDict] = this.newDict; @@ -625,6 +649,7 @@ class PDFEditor { this.#fixPostponedRefCopies(allDocumentData); await this.#mergeStructTrees(allDocumentData); + await this.#mergeAcroForms(allDocumentData); return this.writePDF(); } @@ -648,6 +673,9 @@ class PDFEditor { pdfManager .ensureCatalog("structTreeRoot") .then(structTreeRoot => (documentData.structTreeRoot = structTreeRoot)), + pdfManager + .ensureCatalog("acroForm") + .then(acroForm => (documentData.acroForm = acroForm)), ]); const structTreeRoot = documentData.structTreeRoot; if (structTreeRoot) { @@ -683,7 +711,12 @@ class PDFEditor { async #postCollectPageData(pageData) { const { page: { xref, annotations }, - documentData: { pagesMap, destinations, usedNamedDestinations }, + documentData: { + pagesMap, + destinations, + usedNamedDestinations, + fieldToParent, + }, } = pageData; if (!annotations) { @@ -693,6 +726,7 @@ class PDFEditor { const promises = []; let newAnnotations = []; let newIndex = 0; + let { hasSignatureAnnotations } = pageData.documentData; // Filter out annotations that are linking to deleted pages. for (const annotationRef of annotations) { @@ -700,6 +734,20 @@ class PDFEditor { promises.push( xref.fetchIfRefAsync(annotationRef).then(async annotationDict => { if (!isName(annotationDict.get("Subtype"), "Link")) { + if (isName(annotationDict.get("Subtype"), "Widget")) { + hasSignatureAnnotations ||= isName( + annotationDict.get("FT"), + "Sig" + ); + const parentRef = annotationDict.get("Parent") || null; + // We remove the parent to avoid visiting it when cloning the + // annotation. + // It'll be fixed later in #mergeAcroForms when merging the + // AcroForms. + annotationDict.delete("Parent"); + fieldToParent.put(annotationRef, parentRef); + } + newAnnotations[newAnnotationIndex] = annotationRef; return; } @@ -735,6 +783,7 @@ class PDFEditor { await Promise.all(promises); newAnnotations = newAnnotations.filter(annot => !!annot); pageData.annotations = newAnnotations.length > 0 ? newAnnotations : null; + pageData.documentData.hasSignatureAnnotations ||= hasSignatureAnnotations; } /** @@ -813,46 +862,52 @@ class PDFEditor { } const pageRef = this.newPages[i]; const pageDict = this.xref[pageRef.num]; + const visited = new RefSet(); + visited.put(pageRef); // Visit the new page in order to collect used StructParent entries. - this.#visitObject(pageDict, dict => { - const structParent = - dict.get("StructParent") ?? dict.get("StructParents"); - if (typeof structParent !== "number") { - return; - } - usedStructParents.add(structParent); - let parent = parentTree.get(structParent); - const parentRef = parent instanceof Ref ? parent : null; - if (parentRef) { - const array = xref.fetch(parentRef); - if (Array.isArray(array)) { - parent = array; + this.#visitObject( + pageDict, + dict => { + const structParent = + dict.get("StructParent") ?? dict.get("StructParents"); + if (typeof structParent !== "number") { + return; + } + usedStructParents.add(structParent); + let parent = parentTree.get(structParent); + const parentRef = parent instanceof Ref ? parent : null; + if (parentRef) { + const array = xref.fetch(parentRef); + if (Array.isArray(array)) { + parent = array; + } + } + if (Array.isArray(parent) && parent.every(ref => ref === null)) { + parent = null; + } + if (!parent) { + if (dict.has("StructParent")) { + dict.delete("StructParent"); + } else { + dict.delete("StructParents"); + } + return; + } + let newStructParent = oldStructParentMapping.get(structParent); + if (newStructParent === undefined) { + newStructParent = newStructParentId++; + oldStructParentMapping.set(structParent, newStructParent); + newParentTree.set(newStructParent, [oldRefMapping, parent]); } - } - if (Array.isArray(parent) && parent.every(ref => ref === null)) { - parent = null; - } - if (!parent) { if (dict.has("StructParent")) { - dict.delete("StructParent"); + dict.set("StructParent", newStructParent); } else { - dict.delete("StructParents"); + dict.set("StructParents", newStructParent); } - return; - } - let newStructParent = oldStructParentMapping.get(structParent); - if (newStructParent === undefined) { - newStructParent = newStructParentId++; - oldStructParentMapping.set(structParent, newStructParent); - newParentTree.set(newStructParent, [oldRefMapping, parent]); - } - if (dict.has("StructParent")) { - dict.set("StructParent", newStructParent); - } else { - dict.set("StructParents", newStructParent); - } - }); + }, + visited + ); } const { @@ -1159,6 +1214,361 @@ class PDFEditor { } } + async #mergeAcroForms(allDocumentData) { + this.#setAcroFormDefaultBasicValues(allDocumentData); + this.#setAcroFormDefaultAppearance(allDocumentData); + this.#setAcroFormQ(allDocumentData); + await this.#setAcroFormDefaultResources(allDocumentData); + const newFields = this.fields; + for (const documentData of allDocumentData) { + let fields = documentData.acroForm?.get("Fields") || null; + if (!fields && documentData.fieldToParent.size > 0) { + fields = this.#fixFields( + documentData.fieldToParent, + documentData.document.xref + ); + } + if (Array.isArray(fields) && fields.length > 0) { + this.currentDocument = documentData; + await this.#cloneFields(newFields, fields); + this.currentDocument = null; + } + } + } + + #setAcroFormQ(allDocumentData) { + let firstQ = 0; + let firstDocData = null; + for (const documentData of allDocumentData) { + const q = documentData.acroForm?.get("Q"); + if (typeof q !== "number" || q === 0) { + continue; + } + if (firstDocData?.acroFormQ > 0) { + documentData.acroFormQ = q; + continue; + } + if (firstQ === 0) { + firstQ = q; + firstDocData = documentData; + continue; + } + if (q === firstQ) { + continue; + } + firstDocData.acroFormQ ||= firstQ; + documentData.acroFormQ = q; + firstQ = 0; + } + + if (firstQ > 0) { + this.acroFormQ = firstQ; + } + } + + #setAcroFormDefaultBasicValues(allDocumentData) { + let sigFlags = 0; + let needAppearances = false; + const calculationOrder = []; + for (const documentData of allDocumentData) { + if (!documentData.acroForm) { + continue; + } + const sf = documentData.acroForm.get("SigFlags"); + if (typeof sf === "number" && documentData.hasSignatureAnnotations) { + sigFlags |= sf; + } + if (documentData.acroForm.get("NeedAppearances") === true) { + needAppearances = true; + } + const co = documentData.acroForm.get("CO") || null; + if (!Array.isArray(co)) { + continue; + } + const { oldRefMapping } = documentData; + for (const coRef of co) { + const newCoRef = oldRefMapping.get(coRef); + if (newCoRef) { + calculationOrder.push(newCoRef); + } + } + } + this.acroFormSigFlags = sigFlags; + this.acroFormNeedAppearances = needAppearances; + this.acroFormCalculationOrder = + calculationOrder.length > 0 ? calculationOrder : null; + } + + #setAcroFormDefaultAppearance(allDocumentData) { + // If all the DAs are the same we just use it in the AcroForm. Otherwise, we + // set the DA for each documentData and use for any annotations that don't + // have their own DA. + let firstDA = null; + let firstDocData = null; + for (const documentData of allDocumentData) { + const da = documentData.acroForm?.get("DA") || null; + if (!da || typeof da !== "string") { + continue; + } + if (firstDocData?.acroFormDefaultAppearance) { + documentData.acroFormDefaultAppearance = da; + continue; + } + if (!firstDA) { + firstDA = da; + firstDocData = documentData; + continue; + } + if (da === firstDA) { + continue; + } + firstDocData.acroFormDefaultAppearance ||= firstDA; + documentData.acroFormDefaultAppearance = da; + firstDA = null; + } + + if (firstDA) { + this.acroFormDefaultAppearance = firstDA; + } + } + + async #setAcroFormDefaultResources(allDocumentData) { + let firstDR = null; + let firstDRRef = null; + let firstDocData = null; + for (const documentData of allDocumentData) { + const dr = documentData.acroForm?.get("DR") || null; + if (!dr || !(dr instanceof Dict)) { + continue; + } + if (firstDocData?.acroFormDefaultResources) { + documentData.acroFormDefaultResources = dr; + continue; + } + if (!firstDR) { + firstDR = dr; + firstDRRef = documentData.acroForm.getRaw("DR"); + firstDocData = documentData; + continue; + } + if (deepCompare(firstDR, dr)) { + continue; + } + firstDocData.acroFormDefaultResources ||= firstDR; + documentData.acroFormDefaultResources = dr; + firstDR = null; + firstDRRef = null; + } + + if (firstDR) { + this.currentDocument = firstDocData; + this.acroFormDefaultResources = await this.#collectDependencies( + firstDRRef, + true, + firstDocData.document.xref + ); + this.currentDocument = null; + } + } + + /** + * If the document has some fields but no Fields entry in the AcroForm, we + * need to fix that by creating a Fields entry with the oldest parent field + * for each field. + * @param {Map} fieldToParent + * @param {XRef} xref + * @returns {Array} + */ + #fixFields(fieldToParent, xref) { + const newFields = []; + const processed = new RefSet(); + for (const [fieldRef, parentRef] of fieldToParent) { + if (!parentRef) { + newFields.push(fieldRef); + continue; + } + let parent = parentRef; + let lastNonNullParent = parentRef; + while (true) { + parent = xref.fetchIfRef(parent)?.get("Parent") || null; + if (!parent) { + break; + } + lastNonNullParent = parent; + } + if (!processed.has(lastNonNullParent)) { + newFields.push(lastNonNullParent); + processed.put(lastNonNullParent); + } + } + return newFields; + } + + async #cloneFields(newFields, fields) { + const processed = new RefSet(); + const stack = [ + { + kids: fields, + newKids: newFields, + pos: 0, + oldParentRef: null, + parentRef: null, + parent: null, + }, + ]; + const { + document: { xref }, + oldRefMapping, + fieldToParent, + acroFormDefaultAppearance, + acroFormDefaultResources, + acroFormQ, + } = this.currentDocument; + const daToFix = []; + const drToFix = []; + + while (stack.length > 0) { + const data = stack.at(-1); + const { kids, newKids, parent, pos } = data; + if (pos === kids.length) { + stack.pop(); + if (newKids.length === 0 || !parent) { + continue; + } + + const parentDict = (this.xref[data.parentRef.num] = + this.cloneDict(parent)); + parentDict.delete("Parent"); + parentDict.delete("Kids"); + await this.#collectDependencies(parentDict, false, xref); + parentDict.set("Kids", newKids); + + if (stack.length > 0) { + const lastData = stack.at(-1); + if (!lastData.parentRef && lastData.oldParentRef) { + const parentRef = (lastData.parentRef = this.newRef); + parentDict.set("Parent", parentRef); + oldRefMapping.put(lastData.oldParentRef, parentRef); + } + lastData.newKids.push(data.parentRef); + } + continue; + } + const oldKidRef = kids[data.pos++]; + if (!(oldKidRef instanceof Ref) || processed.has(oldKidRef)) { + continue; + } + processed.put(oldKidRef); + const kid = xref.fetchIfRef(oldKidRef); + if (kid.has("Kids")) { + const kidsArray = kid.get("Kids"); + if (!Array.isArray(kidsArray)) { + continue; + } + stack.push({ + kids: kidsArray, + newKids: [], + pos: 0, + oldParentRef: oldKidRef, + parentRef: null, + parent: kid, + }); + + continue; + } + + if (!fieldToParent.has(oldKidRef)) { + continue; + } + const newRef = oldRefMapping.get(oldKidRef); + if (!newRef) { + continue; + } + newKids.push(newRef); + if (!data.parentRef && data.oldParentRef) { + data.parentRef = this.newRef; + oldRefMapping.put(data.oldParentRef, data.parentRef); + } + const newKid = this.xref[newRef.num]; + if (data.parentRef) { + newKid.set("Parent", data.parentRef); + } + if ( + acroFormDefaultAppearance && + isName(newKid.get("FT"), "Tx") && + !newKid.has("DA") + ) { + // Fix the DA later since we need to have all the fields tree. + daToFix.push(newKid); + } + if ( + acroFormDefaultResources && + !newKid.has("Kids") && + newKid.get("AP") instanceof Dict + ) { + // Fix the DR later since we need to have all the fields tree. + drToFix.push(newKid); + } + if (acroFormQ && !newKid.has("Q")) { + newKid.set("Q", acroFormQ); + } + } + + for (const field of daToFix) { + const da = getInheritableProperty({ dict: field, key: "DA" }); + if (!da) { + // No DA in a parent field, we can set the default one. + field.set("DA", acroFormDefaultAppearance); + } + } + const resourcesValuesCache = new Map(); + for (const field of drToFix) { + const ap = field.get("AP"); + for (const value of ap.getValues()) { + if (!(value instanceof BaseStream)) { + continue; + } + let resources = value.dict.getRaw("Resources"); + if (!resources) { + const newResourcesRef = + await resourcesValuesCache.getOrInsertComputed( + acroFormDefaultResources, + () => this.#cloneObject(acroFormDefaultResources, xref) + ); + value.dict.set("Resources", newResourcesRef); + continue; + } + + resources = xref.fetchIfRef(resources); + for (const [ + resKey, + resValue, + ] of acroFormDefaultResources.getRawEntries()) { + if (!resources.has(resKey)) { + let newResValue = resValue; + if (resValue instanceof Ref) { + newResValue = await this.#collectDependencies( + resValue, + true, + xref + ); + } else if ( + resValue instanceof Dict || + resValue instanceof BaseStream || + Array.isArray(resValue) + ) { + newResValue = await resourcesValuesCache.getOrInsertComputed( + resValue, + () => this.#cloneObject(resValue, xref) + ); + } + resources.set(resKey, newResValue); + } + } + } + } + } + async #collectPageLabels() { // We can only preserve page labels when editing a single PDF file. // This is consistent with behavior in Adobe Acrobat. @@ -1484,6 +1894,33 @@ class PDFEditor { rootDict.set("StructTreeRoot", structTreeRef); } + #makeAcroForm() { + if (this.fields.length === 0) { + return; + } + const { rootDict } = this; + const acroFormRef = this.newRef; + const acroForm = (this.xref[acroFormRef.num] = new Dict()); + rootDict.set("AcroForm", acroFormRef); + acroForm.set("Fields", this.fields); + if (this.acroFormNeedAppearances) { + acroForm.set("NeedAppearances", true); + } + if (this.acroFormSigFlags > 0) { + acroForm.set("SigFlags", this.acroFormSigFlags); + } + acroForm.setIfArray("CO", this.acroFormCalculationOrder); + acroForm.setIfDict("DR", this.acroFormDefaultResources); + if (this.acroFormDefaultAppearance) { + acroForm.set("DA", this.acroFormDefaultAppearance); + } + if (this.acroFormQ > 0) { + acroForm.set("Q", this.acroFormQ); + } + // We don't merge XFA stuff because it'd require to parse, extract and merge + // all the data, which is a lot of work for a deprecated feature (i.e. XFA). + } + /** * Create the root dictionary. * @returns {Promise} @@ -1492,6 +1929,7 @@ class PDFEditor { const { rootDict } = this; rootDict.setIfName("Type", "Catalog"); rootDict.setIfName("Version", this.version); + this.#makeAcroForm(); this.#makePageTree(); this.#makePageLabelsTree(); this.#makeDestinationsTree(); diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 9eea6f975..01472be49 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -878,3 +878,4 @@ !two_pages.pdf !sci-notation.pdf !nested_outline.pdf +!form_two_pages.pdf diff --git a/test/pdfs/form_two_pages.pdf b/test/pdfs/form_two_pages.pdf new file mode 100644 index 0000000000000000000000000000000000000000..079319f6deb0c3d38c93139ee764f8c96049cd3e GIT binary patch literal 23136 zcmeHP33L=yx&|f;CKA8}a6v5*5QL=mErviyCniEzk|0RiNH3M7p_kALNmN8YWphDM zQ5;1@a73LEH%6UN5Jg#)MMW7=K?Ri&L6$)j-uK_?bQ%dh-n^Oj&df>9Nq5z)yZzt4 z-uvIK?n50#Sz@-9*?s$tcQ<8bs46FLVb9pi++4oMABb5Iz9`_1Sq{q^_E{*3L}Q$& z@`J2URjis5C4nEz$$Uk9jl~b~j4h7^^YWsxh~*AuW>jyJh6_W5a$!k@FhWp-Vb3nv zP`jRA*L#?-v357;OTS+@@`aXZ z=d}3ta^c)eE96Ve0arwfs{PNX(zRcs>ghYyWlR-1+{etDr_I-CRtE{~A+O)LvH#5qH&bYPDisv5;w7C7lM-RojPU!V; z_0;!|yfyNLAqQ6$e_#E~=@-5*>KgO;$oT@aws}?Vw4w`NSI$4@FSpFtb*(V( z#+whHCbT&07}Kr^H1tzILsc4hEGxug<77O3dd{Z{0sN541bf+_cWR-FIvtX211d+3qw^XGTG<@Yze`AOE>t7p~Ed;Hn!&l^2& zKqqa+*PE)g*7rZ_{ii4HjjlfPncubS9o*h(%8nM3yLa|oa^VN>9zC+4apwf+y$?EQfKC+-S^$(d!K6g?8e&1M%}bw)C2jBVWXG4G%82Ed7Myw!^gs0>@9L-zf4Fnuoz)-q`licMM~+7LHr|)#?&$DI=tjt|F#D+91jme1;Fv5ma7+?R za7;ZJ$L_g(O~xDj+ZImhetoa6*KY1y+5nKS>sC8h@#wB3B$mSNxTdFZk!8B;>p z-Cj1YILM`abjuy@wmWTM|NZ$HEv2+CpIF}FEZ3%Yo?9{KqS_}vpZEvY?Wa%VUN4DW zGe26}_sq}@)35iG-?`(%PsQP#Ba7ZMzIfxlhg$`|eE+F7zc-JiXPoK0_tWYbKi{s+ zU%%+;59Xb(-8c8r^PjGs^oRZF<%>_>UiSRB>r_YkpUS7rxqHjGwV5l!k9E)|tXVnt zw8wTG-S^h-GAAj$_b$D9)sP=bZ)(}1?XE`_p9q)Oh*rKg5-tiyf}CWcu#7LYVs4*1 z=H?`JJ0uf3eCtFL$E{Jy?jN2sMjX-$Du?JvGAVwGPv zxWaqgIIrpJ+Al97w=SnHSQEsoxw>E=6wRsY+trPkSUI?+V!kWKs>H_i?V3*yxRIqp zxq@)S;>>I@%PR;Xr<>WDuKI*Zy$>e|lFSPlFN#^BnInlgqQ)iuy5?ozDmUV*%qepe zCC0;D->%iMSWOPk*Vfi%*UH)9NEI)drpXHuFG*Rbkrk~E#oTpSq39)cCk_2Ltf)8Q zuZj7?A&&ZWd&2Qp->zM0pyXeIuwYGcpinfM5t;1`2l+a84KHR3d{d2l-ejAacqG84 z=JWDazzSNSSQPEVCffni($ZwpCWi|Kn`~K=g2!>~@BqWfO*V4-s>9yeMj(%CNFwTS z$MUJph;U?FWguK@6U_Kw!<35|=ahzh{>pj>+;JY*sAUOqmY`RNI!Sd^8SebVK13}?P*VFysridO8ic!vUQV1zLg855U-ZL92uCWy;Xs}(y9q2f zNzE3sB~g-Oo1WB~DV_MdIh9aucP!6cQxoue-6Uu{IFZXYK1!+2*q-AEd*dV?C62s! z-0#b=46V}Z72R2;h!4Y1JXwZNsb;w~RjE`3-DBxGn?`AAYTFa~EeS&fQ`)BV3rcc^M#8?h*NXfKLdR80X`T?f z983!$Bj)nQ7c^0Gf)$AZ!aV3rW9^2*MEzWzG&x1HZL`5jVK-p2_@vQFk>I4d|MM>z z3Hj&uo$RrBkR%kRa?uResi2gs(+pPgta3Q*mDX1YCSMe?_<=+_(}GQ&xEnCoPc6 z`2V&|%07@Izb9X81twq*i>s(a^QvBaQ3YR7#}D-TqE3;m2wi+d&kCz9CiZF^n5`re zYrteDLZT0@7w;&;-dSM?`(^%66)v&EhKRGFfK%D>GB1xW3Ws8NTx<;pa}rIg%$|=# z+cRbSD7VAINsO*M{9)_O3edb*O-^PHIFZ%mMEg!+U2~E`9k?22ET_PmShea}jU>gd={eE+c*~7x8EC1;Vvp1>uw-nL@Uxnw)GRKsQk2%gDtV zQ_iaxn^9PiaUyT!20l4uHa!UOsk((Dv3nH)dS0 zwP5_+qYK8w+D3M1^ZwSdi*r@m+XH9soV|4Z_FLQacsBp&u^~U*_u318%sQO*!UF%SQ4rXb21YK-Z6p`1wmj!*ATjgBb;pSdo*lcpeglD zTOWeGkHLl-;S`*zAaD{yQ_?j~77dqzEfd8MIL)bJcU6ZrVzK5jF$#v%I8k&e0_vcK z1R<7DhkeVU3o;9Fr?%9c!OA7msj3pIPT#U_ayk@6GewjcoTjOqj=8Fu!Ku(c9ongD zn1~{BGVV29hF%$*f%>{4<6lQz`c^qj5;;XzI1S_KGNvlXoT-Z(7I&NmbTr({sE2(V z_N}5{n^=WYWE0m0r$Dn*OjVIoY$RdwG!L|u@GXHf1!WTEYlx^%V*>>hYN!Gh8Dq(s z&dCDi4`om=FUsY=`rwENvm^pxsxKN4P(^l$ z27qDGLEi)(z%pfALn1(<2tY)DND_g)EhnVHC6UNuY?1;=(42}Sq70k@cbO!R&||)g zZc%lD^DYVYSpwK30g@y-!FiVi+$2a2ai72^H6kpa5a<(iA%P@u#8?HYC4pfQq?xb* zgPf!VCfaF`Obu-rh9t>!53E&~e+{^50x^~tPbva&Qb3>t(Nob_B}Onwz(gQsGKoDv z(!fwi4MqSZ22jLym>(Q4vMA6nQYQQx^iBMM=7Dhv z7|F_@Jj}F^Mp7Lb3wW_P8+b+n4ilqeKClLKVNV!a&}axr1ye}171%b##61|G==g>W zCBm45fe;n;kX;ji1P};N*Z?%Br3%0X*UYC0=vRQ08X_K$ zbiq8b2c*uVP2_}NKFAs2+NClFgliD2N-%isYC}7+1^}6jt;iq-V*qF}*#=3hLVg{N64FfbA+OIEp{OqAsPT+^j0%R3Z->K1 zIodIXU>wkglU2X;3SL%>!d;!oKmtl8A405}x|3l`7=k&( zSZJ^_5m2}^2)#y@0?MVqmm;`uY2X!^TnaKYSU(MlK*0ur3%D5*&ruh6k@dhoxfv2H zmj?c6aHlK~fh*9+oFgD1bFPuKCi-Ln^Oar1D-FD~%g_$bSinLi9?HUk7VweqU=~M_ zoN&u7jphU%GNY{lcnuRH?a~y(2?0X|T$6>M$*8guX2r##ATW*mnWmCIQc#9^!;I?WqcLY4mPm(& zkx$j3VKg`Lzho|LO9@b+XgVLG%(+X4sn(f*p-h&bfCkDi1{8|b zlT|1m)x%h%P^hcRz!zBRn6p7v zj+K#7Lw`_O16UhS5OS*qgq@;e@)7hv6ZGC*=bVbXo=ApUDDv!1^a#A&Uml z03J}Eh9*P7b(F!dk}jaFp^=fKGSEh20!IVb8^FN?Rt7w(0pr8UfwuuXX}$&wkO^97 z&L%LRx+Xj*(oZZ3W92YV6!X#8CX9qh1_ARkA&Vx;;Qny~;WDA<>;bH{2_+;#O;}|j z6pdXbtf`4>ny?9FrVh!%8BjEnl!g=)!8{BC+BN_TA;yH6SAy`;q|qnt4VMX_GQlt% zhQu@^Cu>VygM^5fz>0M_`x=xhXrS0V~QF*POx z*a%Qt+)+fsnxGdzYmY!{N!%qyKrP&nw%QkHN9#W}7ZHY16oCkviwIP32NtuMFodEA z!C~}?fCP8IoyHTvD11Nw*idjoD0ZQQh9ZYSgo-4r#1Zxgu%QG?1j1iLG$7JykF7vO zm^fL*Qci@f;e!kUdyW;K2s5WZ3`Ef&+maAxk`2;r5^K^m_$(7$>hKQ+*=fv?944s< zIa%_RG*b$_WdP0=%*;oS3>sJuLs{_w$$>KA0u#qj6aZ2@rjlrpm{JTzE{2H0(Bzsl zTI}PW#Ykk1K$H%1NAVrvH|A<72qyQ%`oysG&87jk$c>2-0WZN2kQ2rQ3)`6Y)oI^> z!akY~9#R-bo|L>Vc`CBN&f+sS_L)q}3O+%rLQZm#BoIN!pshh3=$v zi$;qY(sWcK1I2n{)zMazNY6nTE(|ilHc5mK46;b;5PXmTQ57%)l8P=#N61@3Txoik zu#A8WABl|t8L}?Q7?66z1yz8PC%ABd$;m9gS;Ia&|}jH~GDU%CFKxiXCcJgIB%hIKcD(|LSR98E z7zwrqM?irCER@3qK&16F!;kh7VTho!#1nk5I1U%U$X-UVV(2--9F$N`uy71*taDj3 zhZ5kc0z36XG|KZL!cYh$1-LsmTxyH{h2_vO`j>lI+;) z9U4oAlOkx?q{z^!h%|^Orfc{hzsK&dX@y`8CDa?7NurpjhBIMON=qZ$K^90d5E~_k zrHv#FoM*)v8E_4LnBqEI00=CJZ1JHrk*JZcCPzf8BaxOm3=NGWpB6I=G7W|WcUXHf z3qf$w8iffc7Ad%cUA4y}KY;Z}Vg%I$GPc>Gc!F(5pa$DWBjF_dqhN_44X3WrHYSDt zaKChc*qFs$2$c}enb3UNCc=g_Ga3qLvUfvZfyhb%ZL*^91FqC83hksPcHxIad{zYM zot`B=93=9`>U$x1Qsu{~oQe*JWFBCUj7{p6v|)T~EA>NpJKxoEalgg^l4S{&C%IMFW$87l0yae?VXmkroDOLhEKQNcFU$6J&?&=@AJ^%Pd^Yurb>nZnL z;8_0N#*O{kIL@0=a*5z?)nncA$8Wh~%3YHux4d6zHK#|$nCH))(tFmxvgoXixj)?f z!>o*bv(6k?R=)r2F44%@%ZfVpK6_!OCpz6bIql-thP~70s^WEhRuyNJ6{lUfsH)Y0 z&I=dyyyxuhiyr@~M<=WI7gK$+|KzWGy6l~`@aU``u5gyTeP0l@ys8}_D^2) z#nfO49oX-+dQjE}n`%#Q_hg^q;m-~&E-78n*2#7F?HPk^yDh)>q*!dv&wDHROJ-Mm zwC1M;1-q-3&A;RImb?4)xpDQ3qc0uldwA(5hgLn^Z_Cl$om($?ZED@5{(s&xW?Q(! zo;K6ll%;>M_vfBpzqW6|;VwH44LtO~q;qZ<(c!?VXU4U^_vqi=J?Q%S)gIseu=K$@ z``-PdXX3|(ut?}VBKw`OZu8qk3!UvBp8MJ>eOLc=^V=Ig9EL-YMo;7B1z)u3d}RLx zm^FPTRTLFTeiO`uojsKPxYNTE6qyHCZ*|?%etL z?yo){y)HiMd~R8b*uJlhH6d1%Vm&VFi?l6_UP|hCKrF-;RYGU6TV*GaVf`o*izM=Z zcq|qUNhxV$#1hF4nI;R_NYmn9m+&FsiWE5#q+}c?HXQ9%CBD>+ylkBlM`BrZYLas8 zf+32A;}NeFrQCBQ>@BxqPD(<D|A!6$U?qwZx()e%>7@C8EDmf*z>YYs%98nigiT2iIl1LO zA^-m_<^NY6pa0)-5zGIVWX@vw|34=3|F6yed!LE^Yn}Vj_jhiy)jehT9PjT|ywT~= zw>QkWtar_}MSoed^r1l&qdp5gbk^Xk%>4J&T~AGqZtvKR&)#2G@#CBeo_peqcKzml zciJmX&rffq_sVZs`kQub&fOo{KJ9@Hv6X|LIqQ=r4s=^Pf5-K=pEmrvHCLQ-ewXzd zM*0daS^UcEv5(w1XYAQ`A9-R=MZYZ%e!k9i)%NU}(FZ;`a%APIL!X_yI(zNim+s-( zeAKn>jh*v6)49zPHoK7jf8?R?oD*#i1MZjk|I#Z@DgTcpIE54^$^RqzqNmOIe=G_S zoZ9IM+5;gIg}9vU2p|$fq)72~bN;_M|KFVdM|_Ws9_(^8=l`*1j%};v{6Fna&^{II zNyFhc=l`4Y|Jdtp&i^;({}Ev}=l_v9z~*Xm{-1Jc&H4Z4{6A8t&G~;MCs>A}Isf0B z|7Yo@=KOzi{=Yf@Pv;)!xC&+ToAdv`fb!eT`Tzg_{6E`C|KH~S*@k9Dbw{>*G#RuijhP?f0i-Q++ z>i4B%($=3J{%PxXM|>Y#6qs~y)|8Q-=Kr}Lx2;y#qfbBdn>T;+WZJ&J-*``qKeEKL z_h@A0o*A_dOqxDD%d6kNIp^?mo9}#i_r(iacHEZPrTgwzFTZ+e?ZKtD4E*^Ft2Q)F ztG=uEn4P~V%-Pw#{=yg4elMN={@NcuH?RMA?ViPQ<@!3{urN4@cC z{hvleKiU3UoEw>#{^6M?$_ zWu+Cd@ZPAXQ&MxCN z`5G${s}9Gb?vT%(2DUPwbZLuCY9&kPXI~$0BZ@6?8|&@s)neiyssX z&F0VP2s)q2(h3Z zHyT(F9%qF-?g$;J;{ zfS8hx54!P!w-E2Msv?%f*978G8>?7tI2s3F{&0kkRpVLm&h3rIEIt^=ua{+l)F1W{ z>sfDJ%jXXS+&uW6tdIEw-BE9xUS)_j20a`fcSq>$t+=dq2P*9m5`|Gp)bROCF!+3X zwEPrd%V(m*=O;O+uG%ifGDzWR7*mX)$>Xanw6vl9{&>n7x zU44jMeMn06A&CmfR>626=C29V^FwUr4!4^Qx0?=6X*xXF=4$(4MRhn5;vxS*$fzd} z<=u8$xBb|i(iZQ+fJWh^X$_*~W-z#+o((X7r%*25vKw0|jlrgbK>F=Ee!Gr8rH(&Y z#{vvEM@qwm?doB>dN`$eI8hA7t;Qt|XRae3??K2{iVVHTP#idGEff^wJ49oEP*~tFi?qC4{C?KqD8zw( z{5BFlOVaWUv~c93eo?+da1`dbc=pB(23lFefjzP=W#Mp)BL|F=Hva1@{HP*v_O1+; zVuTaFo#8~B&P%+r81GH?cv`gTQFPx^Z3nfzZA1SL!tH*$Xj0ot&$<8pbJkAwv~7Ev zFlBGQn-5(3&Yp{ATW>7ef9#?2N2cF1#`A32%=BREdHdF%|7hm=AKPm;wRZees2*-V z{lG;7e(HQQT?(J|PX3GBech%nY~8Kf$NjGv(P2b~H#5@CT>H7uX-D4< a.fieldName) + .map(a => a.fieldName) + .sort(); + + // Extract only page 2 (0-based index = 1). + const data = await pdfDoc.extractPages([ + { document: null, includePages: [1] }, + ]); + await loadingTask.destroy(); + + loadingTask = getDocument(data); + pdfDoc = await loadingTask.promise; + + expect(pdfDoc.numPages).toEqual(1); + + // The AcroForm Fields in the new PDF should correspond exactly to the + // annotations that were on page 2 of the original document, with the + // same T entries (encoded in fieldName). + const page = await pdfDoc.getPage(1); + const annotations = await page.getAnnotations(); + const fieldNames = annotations + .filter(a => a.fieldName) + .map(a => a.fieldName) + .sort(); + + expect(fieldNames).toEqual(origFieldNames); + + // Also verify the AcroForm Fields via getFieldObjects, which directly + // reflects the T entries of the fields in the AcroForm dictionary. + const fieldObjects = await pdfDoc.getFieldObjects(); + expect(fieldObjects).not.toBeNull(); + expect(Object.keys(fieldObjects).sort()).toEqual(origFieldNames); + + await loadingTask.destroy(); + }); + + it("merge pages 2 and 1 and check AcroForm Fields T entries", async function () { + let loadingTask = getDocument( + buildGetDocumentParams("form_two_pages.pdf") + ); + let pdfDoc = await loadingTask.promise; + + // Collect fieldNames from each page of the original document. + const origPage1 = await pdfDoc.getPage(1); + const origPage1FieldNames = (await origPage1.getAnnotations()) + .filter(a => a.fieldName) + .map(a => a.fieldName) + .sort(); + + const origPage2 = await pdfDoc.getPage(2); + const origPage2FieldNames = (await origPage2.getAnnotations()) + .filter(a => a.fieldName) + .map(a => a.fieldName) + .sort(); + + // Extract page 2 first, then page 1. + const data = await pdfDoc.extractPages([ + { document: null, includePages: [1] }, + { document: null, includePages: [0] }, + ]); + await loadingTask.destroy(); + + loadingTask = getDocument(data); + pdfDoc = await loadingTask.promise; + + expect(pdfDoc.numPages).toEqual(2); + + // Page 1 of the new PDF should have the fields from original page 2. + const page1 = await pdfDoc.getPage(1); + const page1FieldNames = (await page1.getAnnotations()) + .filter(a => a.fieldName) + .map(a => a.fieldName) + .sort(); + expect(page1FieldNames).toEqual(origPage2FieldNames); + + // Page 2 of the new PDF should have the fields from original page 1. + const page2 = await pdfDoc.getPage(2); + const page2FieldNames = (await page2.getAnnotations()) + .filter(a => a.fieldName) + .map(a => a.fieldName) + .sort(); + expect(page2FieldNames).toEqual(origPage1FieldNames); + + // The AcroForm Fields should contain all fields from both pages. + const fieldObjects = await pdfDoc.getFieldObjects(); + expect(fieldObjects).not.toBeNull(); + const allOrigFieldNames = [ + ...new Set([...origPage1FieldNames, ...origPage2FieldNames]), + ].sort(); + expect(Object.keys(fieldObjects).sort()).toEqual(allOrigFieldNames); + + await loadingTask.destroy(); + }); + }); }); }); diff --git a/test/unit/core_utils_spec.js b/test/unit/core_utils_spec.js index 402796de5..2005d8ced 100644 --- a/test/unit/core_utils_spec.js +++ b/test/unit/core_utils_spec.js @@ -15,6 +15,7 @@ import { arrayBuffersToBytes, + deepCompare, encodeToXmlString, escapePDFName, escapeString, @@ -474,6 +475,103 @@ describe("core_utils", function () { }); }); + describe("deepCompare", function () { + it("should return true for the same reference", function () { + const dict = new Dict(); + expect(deepCompare(dict, dict)).toBeTrue(); + const arr = [1, 2, 3]; + expect(deepCompare(arr, arr)).toBeTrue(); + }); + + it("should return true for identical primitive values", function () { + expect(deepCompare(1, 1)).toBeTrue(); + expect(deepCompare("hello", "hello")).toBeTrue(); + expect(deepCompare(null, null)).toBeTrue(); + }); + + it("should return false for different primitive values", function () { + expect(deepCompare(1, 2)).toBeFalse(); + expect(deepCompare("hello", "world")).toBeFalse(); + }); + + it("should return true for two equal empty Dicts", function () { + expect(deepCompare(new Dict(), new Dict())).toBeTrue(); + }); + + it("should return false for Dicts with different sizes", function () { + const a = new Dict(); + a.set("key", 1); + expect(deepCompare(a, new Dict())).toBeFalse(); + }); + + it("should return true for Dicts with same Ref values", function () { + const ref = Ref.get(10, 0); + const a = new Dict(); + a.set("Foo", ref); + const b = new Dict(); + b.set("Foo", ref); + expect(deepCompare(a, b)).toBeTrue(); + }); + + it("should return false for Dicts with different Ref values", function () { + const a = new Dict(); + a.set("Foo", Ref.get(10, 0)); + const b = new Dict(); + b.set("Foo", Ref.get(20, 0)); + expect(deepCompare(a, b)).toBeFalse(); + }); + + it("should return false for Dicts with different numeric values", function () { + const a = new Dict(); + a.set("Foo", 1); + const b = new Dict(); + b.set("Foo", 2); + expect(deepCompare(a, b)).toBeFalse(); + }); + + it("should return true for equal nested Dicts", function () { + const inner1 = new Dict(); + inner1.set("Bar", Ref.get(5, 0)); + const outer1 = new Dict(); + outer1.set("Foo", inner1); + + const inner2 = new Dict(); + inner2.set("Bar", Ref.get(5, 0)); + const outer2 = new Dict(); + outer2.set("Foo", inner2); + + expect(deepCompare(outer1, outer2)).toBeTrue(); + }); + + it("should return false for Dicts with the same key but different nested Dicts", function () { + const inner1 = new Dict(); + inner1.set("Bar", Ref.get(5, 0)); + const outer1 = new Dict(); + outer1.set("Foo", inner1); + + const inner2 = new Dict(); + inner2.set("Bar", Ref.get(99, 0)); + const outer2 = new Dict(); + outer2.set("Foo", inner2); + + expect(deepCompare(outer1, outer2)).toBeFalse(); + }); + + it("should return true for equal arrays", function () { + const ref = Ref.get(1, 0); + expect(deepCompare([ref, ref], [ref, ref])).toBeTrue(); + }); + + it("should return false for arrays with different lengths", function () { + const ref = Ref.get(1, 0); + expect(deepCompare([ref, ref], [ref])).toBeFalse(); + }); + + it("should return false for arrays with different values", function () { + expect(deepCompare([Ref.get(1, 0)], [Ref.get(2, 0)])).toBeFalse(); + }); + }); + describe("getSizeInBytes", function () { it("should get the size in bytes to use to represent a positive integer", function () { expect(getSizeInBytes(0)).toEqual(0);