mirror of
https://github.com/mozilla/pdf.js.git
synced 2026-06-23 00:15:51 +02:00
Merge pull request #21465 from calixteman/fix_attachments
Re-derive annotation attachment content from the xref after cleanup
This commit is contained in:
commit
3956ac1b39
@ -5408,33 +5408,28 @@ class FileAttachmentAnnotation extends MarkupAnnotation {
|
||||
super(params);
|
||||
|
||||
const { annotationGlobals, dict } = params;
|
||||
const fileSpecRef = dict.getRaw("FS");
|
||||
const fsDict = dict.get("FS");
|
||||
const file = new FileSpec(fsDict);
|
||||
/** @type {{catalog?: Catalog}} */
|
||||
const { catalog } = annotationGlobals.pdfManager.pdfDocument;
|
||||
|
||||
// When this annotation references an embedded file that’s already in the
|
||||
// catalog `NameTree` (such as `EFOpen`), reuse that `NameTree` id so the
|
||||
// sidebar and annotation paths resolve the same attachment identity.
|
||||
let fileId =
|
||||
fileSpecRef instanceof Ref
|
||||
? catalog?.attachmentIdByRef.get(fileSpecRef)
|
||||
: undefined;
|
||||
|
||||
// Fallback ids are namespaced to keep annotation-local ids distinct from
|
||||
// `NameTree` ids (which are filename-based).
|
||||
if (catalog && fsDict instanceof Dict && typeof fileId !== "string") {
|
||||
const baseFileId = `annotation:${this.data.id}`;
|
||||
fileId = baseFileId;
|
||||
|
||||
let i = 1;
|
||||
while (catalog.attachmentDictById.has(fileId)) {
|
||||
fileId = `${baseFileId}-${i++}`;
|
||||
// Encode the embedded content's reference in the id so it can be
|
||||
// re-fetched from the xref on demand (see `Catalog.attachmentContent`)
|
||||
// instead of being cached where `cleanup` would wipe it. The file-spec is
|
||||
// usually indirect; when it's inline its embedded-file stream still isn't
|
||||
// (streams are always indirect), so fall back to that ref.
|
||||
let fileId;
|
||||
if (fsDict instanceof Dict) {
|
||||
let contentRef = dict.getRaw("FS");
|
||||
if (!(contentRef instanceof Ref)) {
|
||||
contentRef = FileSpec.pickPlatformItem(
|
||||
fsDict.get("EF"),
|
||||
/* raw = */ true
|
||||
);
|
||||
}
|
||||
if (contentRef instanceof Ref) {
|
||||
fileId = catalog?.getAttachmentIdForAnnotation(contentRef);
|
||||
}
|
||||
|
||||
// Cache only fallbacks.
|
||||
catalog.attachmentDictById.set(fileId, fsDict);
|
||||
}
|
||||
|
||||
this.data.hasOwnCanvas = this.data.noRotate;
|
||||
|
||||
@ -119,18 +119,12 @@ function fetchRemoteDest(action) {
|
||||
class Catalog {
|
||||
#actualNumPages = null;
|
||||
|
||||
/** @type {RefSetCache | null} */
|
||||
#attachmentIdByRef = null;
|
||||
#annotationAttachmentIdByRef = new RefSetCache();
|
||||
|
||||
#annotationAttachmentRefById = new Map();
|
||||
|
||||
#catDict = null;
|
||||
|
||||
/**
|
||||
* Attachment dictionaries keyed by attachment id.
|
||||
*
|
||||
* @type {Map<string, Dict>}
|
||||
*/
|
||||
attachmentDictById = new Map();
|
||||
|
||||
builtInCMapCache = new Map();
|
||||
|
||||
fontCache = new RefSetCache();
|
||||
@ -164,33 +158,44 @@ class Catalog {
|
||||
this.toplevelPagesDict; // eslint-disable-line no-unused-expressions
|
||||
}
|
||||
|
||||
/**
|
||||
* Attachment ids keyed by embedded-file reference.
|
||||
*
|
||||
* @type {RefSetCache}
|
||||
*/
|
||||
get attachmentIdByRef() {
|
||||
if (this.#attachmentIdByRef) {
|
||||
return this.#attachmentIdByRef;
|
||||
}
|
||||
|
||||
const attachmentIdByRef = new RefSetCache();
|
||||
for (const [name, ref] of this.rawEmbeddedFiles || []) {
|
||||
if (!(ref instanceof Ref)) {
|
||||
continue;
|
||||
}
|
||||
attachmentIdByRef.put(
|
||||
ref,
|
||||
stringToPDFString(name, /* keepEscapeSequence = */ true)
|
||||
);
|
||||
}
|
||||
return (this.#attachmentIdByRef = attachmentIdByRef);
|
||||
}
|
||||
|
||||
cloneDict() {
|
||||
return this.#catDict.clone();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an id for an attachment from a FileAttachment annotation.
|
||||
*
|
||||
* The id is registered here rather than parsed from a public string prefix in
|
||||
* `attachmentContent`, since catalog attachment names can be arbitrary PDF
|
||||
* strings and may otherwise collide with annotation-local ids.
|
||||
*
|
||||
* @param {Ref} ref
|
||||
* File-spec or embedded-file stream reference.
|
||||
* @returns {string}
|
||||
* Attachment id.
|
||||
*/
|
||||
getAttachmentIdForAnnotation(ref) {
|
||||
let id = this.#annotationAttachmentIdByRef.get(ref);
|
||||
if (id) {
|
||||
return id;
|
||||
}
|
||||
|
||||
const baseId = `attachmentRef:${ref.toString()}`;
|
||||
id = baseId;
|
||||
|
||||
let i = 1;
|
||||
while (
|
||||
this.#annotationAttachmentRefById.has(id) ||
|
||||
this.attachments?.has(id)
|
||||
) {
|
||||
id = `${baseId}-${i++}`;
|
||||
}
|
||||
|
||||
this.#annotationAttachmentIdByRef.put(ref, id);
|
||||
this.#annotationAttachmentRefById.set(id, ref);
|
||||
return id;
|
||||
}
|
||||
|
||||
get version() {
|
||||
const version = this.#catDict.get("Version");
|
||||
if (version instanceof Name) {
|
||||
@ -1157,19 +1162,12 @@ class Catalog {
|
||||
}
|
||||
|
||||
/**
|
||||
* Get content for an attachment.
|
||||
*
|
||||
* @param {string} id
|
||||
* Unique attachment identifier (required).
|
||||
* @returns {CatalogAttachmentContent}
|
||||
* Content.
|
||||
* Unique attachment identifier.
|
||||
* @returns {CatalogAttachmentContent | undefined}
|
||||
* Content, or `undefined` when no named attachment exists for the id.
|
||||
*/
|
||||
attachmentContent(id) {
|
||||
const dict = this.attachmentDictById.get(id);
|
||||
if (dict) {
|
||||
return FileSpec.readContent(dict);
|
||||
}
|
||||
|
||||
#attachmentContentByName(id) {
|
||||
const obj = this.#catDict.get("Names");
|
||||
if (obj instanceof Dict && obj.has("EmbeddedFiles")) {
|
||||
const nameTree = new NameTree(obj.getRaw("EmbeddedFiles"), this.xref);
|
||||
@ -1179,6 +1177,36 @@ class Catalog {
|
||||
}
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get content for an attachment.
|
||||
*
|
||||
* @param {string} id
|
||||
* Unique attachment identifier (required).
|
||||
* @returns {CatalogAttachmentContent}
|
||||
* Content.
|
||||
*/
|
||||
attachmentContent(id) {
|
||||
const namedContent = this.#attachmentContentByName(id);
|
||||
if (namedContent !== undefined) {
|
||||
return namedContent;
|
||||
}
|
||||
|
||||
// Annotation-local attachments register the reference of their embedded
|
||||
// content in the catalog, so it's re-fetched from the xref on demand
|
||||
// instead of being cached (which would then need to survive `cleanup`).
|
||||
// The reference points either at the file-spec dictionary or, for an inline
|
||||
// file-spec, straight at the embedded-file stream.
|
||||
const ref = this.#annotationAttachmentRefById.get(id);
|
||||
if (ref) {
|
||||
const target = this.xref.fetch(ref);
|
||||
if (target instanceof BaseStream) {
|
||||
return FileSpec.readStreamContent(target);
|
||||
}
|
||||
return target instanceof Dict ? FileSpec.readContent(target) : null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@ -1280,9 +1308,6 @@ class Catalog {
|
||||
|
||||
async cleanup(manuallyTriggered = false) {
|
||||
clearGlobalCaches();
|
||||
this.#attachmentIdByRef?.clear();
|
||||
this.#attachmentIdByRef = null;
|
||||
this.attachmentDictById.clear();
|
||||
this.globalColorSpaceCache.clear();
|
||||
this.globalImageCache.clear(/* onlyData = */ manuallyTriggered);
|
||||
this.pageKidsCountCache.clear();
|
||||
|
||||
@ -27,29 +27,6 @@ import { stringToPDFString } from "./string_utils.js";
|
||||
* @import { CatalogAttachmentContent } from "./catalog.js";
|
||||
*/
|
||||
|
||||
/**
|
||||
* Get a platform-specific item from a file-spec dictionary.
|
||||
*
|
||||
* Search order follows the PDF platform keys: `UF`, `F`, `Unix`, `Mac`,
|
||||
* `DOS`.
|
||||
*
|
||||
* @param {Dict | null | undefined} dict
|
||||
* Dictionary.
|
||||
* @returns {unknown}
|
||||
* Matching dictionary value or `null` when no key is found.
|
||||
*/
|
||||
function pickPlatformItem(dict) {
|
||||
if (dict instanceof Dict) {
|
||||
// Look for the filename in this order: UF, F, Unix, Mac, DOS
|
||||
for (const key of ["UF", "F", "Unix", "Mac", "DOS"]) {
|
||||
if (dict.has(key)) {
|
||||
return dict.get(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* "A PDF file can refer to the contents of another file by using a File
|
||||
* Specification (PDF 1.1)", see the spec (7.11) for more details.
|
||||
@ -76,7 +53,7 @@ class FileSpec {
|
||||
}
|
||||
|
||||
get filename() {
|
||||
const item = pickPlatformItem(this.root);
|
||||
const item = FileSpec.pickPlatformItem(this.root);
|
||||
if (item && typeof item === "string") {
|
||||
// NOTE: The following replacement order is INTENTIONAL, regardless of
|
||||
// what some static code analysers (e.g. CodeQL) may claim.
|
||||
@ -105,6 +82,31 @@ class FileSpec {
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a platform-specific item from a file-spec dictionary.
|
||||
*
|
||||
* Search order follows the PDF platform keys: `UF`, `F`, `Unix`, `Mac`,
|
||||
* `DOS`.
|
||||
*
|
||||
* @param {Dict | null | undefined} dict
|
||||
* Dictionary.
|
||||
* @param {boolean} [raw]
|
||||
* Return the raw (possibly indirect) value rather than the resolved one.
|
||||
* @returns {unknown}
|
||||
* Matching dictionary value or `null` when no key is found.
|
||||
*/
|
||||
static pickPlatformItem(dict, raw = false) {
|
||||
if (dict instanceof Dict) {
|
||||
// Look for the filename in this order: UF, F, Unix, Mac, DOS
|
||||
for (const key of ["UF", "F", "Unix", "Mac", "DOS"]) {
|
||||
if (dict.has(key)) {
|
||||
return raw ? dict.getRaw(key) : dict.get(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read attachment bytes from a file-spec dictionary.
|
||||
*
|
||||
@ -119,24 +121,36 @@ class FileSpec {
|
||||
if (!(dict instanceof Dict)) {
|
||||
return null;
|
||||
}
|
||||
const ef = pickPlatformItem(dict.get("EF"));
|
||||
const ef = this.pickPlatformItem(dict.get("EF"));
|
||||
if (!(ef instanceof BaseStream)) {
|
||||
warn(
|
||||
"Embedded file specification points to non-existing/invalid content"
|
||||
);
|
||||
return null;
|
||||
}
|
||||
return this.readStreamContent(ef);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the bytes of an embedded-file stream.
|
||||
*
|
||||
* @param {BaseStream} stream
|
||||
* Embedded-file stream.
|
||||
* @returns {CatalogAttachmentContent}
|
||||
* Attachment bytes.
|
||||
* @throws {PasswordException}
|
||||
* When the bytes are encrypted and no key is available.
|
||||
*/
|
||||
static readStreamContent(stream) {
|
||||
// Throw if we need a password but don’t have one.
|
||||
const encrypt = dict.xref?.encrypt;
|
||||
const encrypt = stream.dict?.xref?.encrypt;
|
||||
if (encrypt?.encryptionKey === null) {
|
||||
throw new PasswordException(
|
||||
"No password given",
|
||||
PasswordResponses.NEED_PASSWORD
|
||||
);
|
||||
}
|
||||
|
||||
return ef.getBytes();
|
||||
return stream.getBytes();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -26,6 +26,7 @@ import {
|
||||
AnnotationFieldFlag,
|
||||
AnnotationFlag,
|
||||
AnnotationType,
|
||||
bytesToString,
|
||||
DrawOPS,
|
||||
OPS,
|
||||
RenderingIntentFlag,
|
||||
@ -41,6 +42,7 @@ import {
|
||||
} from "./test_utils.js";
|
||||
import { Dict, Name, Ref, RefSetCache } from "../../src/core/primitives.js";
|
||||
import { Lexer, Parser } from "../../src/core/parser.js";
|
||||
import { Catalog } from "../../src/core/catalog.js";
|
||||
import { FlateStream } from "../../src/core/flate_stream.js";
|
||||
import { PartialEvaluator } from "../../src/core/evaluator.js";
|
||||
import { StringStream } from "../../src/core/stream.js";
|
||||
@ -52,9 +54,10 @@ describe("annotation", function () {
|
||||
constructor(params) {
|
||||
this.pdfDocument = {
|
||||
catalog: {
|
||||
attachmentDictById: new Map(),
|
||||
attachmentIdByRef: new RefSetCache(),
|
||||
baseUrl: params.docBaseUrl || null,
|
||||
getAttachmentIdForAnnotation(ref) {
|
||||
return `attachmentRef:${ref.toString()}`;
|
||||
},
|
||||
},
|
||||
};
|
||||
this.evaluatorOptions = {
|
||||
@ -4403,20 +4406,17 @@ describe("annotation", function () {
|
||||
idFactoryMock
|
||||
);
|
||||
expect(data.annotationType).toEqual(AnnotationType.FILEATTACHMENT);
|
||||
expect(data.fileId.startsWith("annotation:")).toEqual(true);
|
||||
// The file-spec is an indirect object, so its reference is encoded in the
|
||||
// id and re-fetched on demand.
|
||||
expect(data.fileId).toEqual("attachmentRef:19R");
|
||||
expect(data.file).toEqual({
|
||||
rawFilename: "Test.txt",
|
||||
filename: "Test.txt",
|
||||
description: "abc",
|
||||
});
|
||||
|
||||
// Content lookup and reading requires a bigger mock than used here.
|
||||
expect(
|
||||
pdfManagerMock.pdfDocument.catalog.attachmentDictById.has(data.fileId)
|
||||
).toEqual(true);
|
||||
});
|
||||
|
||||
it("should reuse the attachment NameTree id for referenced files", async function () {
|
||||
it("should re-derive an inline file attachment from its embedded stream", async function () {
|
||||
const fileStream = new StringStream(
|
||||
"<<\n" +
|
||||
"/Type /EmbeddedFile\n" +
|
||||
@ -4432,41 +4432,36 @@ describe("annotation", function () {
|
||||
allowStreams: true,
|
||||
});
|
||||
|
||||
const fileStreamRef = Ref.get(28, 0);
|
||||
const fileStreamRef = Ref.get(18, 0);
|
||||
const fileStreamDict = parser.getObj();
|
||||
|
||||
const embeddedFileDict = new Dict();
|
||||
embeddedFileDict.set("F", fileStreamRef);
|
||||
|
||||
const fileSpecRef = Ref.get(29, 0);
|
||||
// The file-spec is inline (not an indirect object), so the embedded-file
|
||||
// stream's reference is encoded in the id instead.
|
||||
const fileSpecDict = new Dict();
|
||||
fileSpecDict.set("Type", Name.get("Filespec"));
|
||||
fileSpecDict.set("Desc", "abc");
|
||||
fileSpecDict.set("EF", embeddedFileDict);
|
||||
fileSpecDict.set("UF", "Test.txt");
|
||||
|
||||
const fileAttachmentRef = Ref.get(30, 0);
|
||||
const fileAttachmentRef = Ref.get(20, 0);
|
||||
const fileAttachmentDict = new Dict();
|
||||
fileAttachmentDict.set("Type", Name.get("Annot"));
|
||||
fileAttachmentDict.set("Subtype", Name.get("FileAttachment"));
|
||||
fileAttachmentDict.set("FS", fileSpecRef);
|
||||
fileAttachmentDict.set("FS", fileSpecDict);
|
||||
fileAttachmentDict.set("T", "Topic");
|
||||
fileAttachmentDict.set("Contents", "Test.txt");
|
||||
|
||||
const xref = new XRefMock([
|
||||
{ ref: fileStreamRef, data: fileStreamDict },
|
||||
{ ref: fileSpecRef, data: fileSpecDict },
|
||||
{ ref: fileAttachmentRef, data: fileAttachmentDict },
|
||||
]);
|
||||
embeddedFileDict.assignXref(xref);
|
||||
fileSpecDict.assignXref(xref);
|
||||
fileAttachmentDict.assignXref(xref);
|
||||
|
||||
pdfManagerMock.pdfDocument.catalog.attachmentIdByRef.put(
|
||||
fileSpecRef,
|
||||
"Test.txt"
|
||||
);
|
||||
|
||||
const { data } = await AnnotationFactory.create(
|
||||
xref,
|
||||
fileAttachmentRef,
|
||||
@ -4474,17 +4469,81 @@ describe("annotation", function () {
|
||||
idFactoryMock
|
||||
);
|
||||
expect(data.annotationType).toEqual(AnnotationType.FILEATTACHMENT);
|
||||
expect(data.fileId).toEqual("Test.txt");
|
||||
expect(data.fileId).toEqual("attachmentRef:18R");
|
||||
expect(data.file).toEqual({
|
||||
rawFilename: "Test.txt",
|
||||
filename: "Test.txt",
|
||||
description: "abc",
|
||||
});
|
||||
});
|
||||
|
||||
// File should not be added as it’s already referenced in the `NameTree`.
|
||||
it("should keep named attachment ids distinct from annotation attachment ids", function () {
|
||||
const annotationStreamRef = Ref.get(18, 0);
|
||||
const annotationStreamDict = new Dict();
|
||||
annotationStreamDict.set("Type", Name.get("EmbeddedFile"));
|
||||
const annotationStream = new StringStream(
|
||||
"Annotation attachment",
|
||||
annotationStreamDict
|
||||
);
|
||||
|
||||
const namedStreamRef = Ref.get(21, 0);
|
||||
const namedStreamDict = new Dict();
|
||||
namedStreamDict.set("Type", Name.get("EmbeddedFile"));
|
||||
const namedStream = new StringStream("Named attachment", namedStreamDict);
|
||||
|
||||
const namedEmbeddedFileDict = new Dict();
|
||||
namedEmbeddedFileDict.set("F", namedStreamRef);
|
||||
|
||||
const namedFileSpecRef = Ref.get(22, 0);
|
||||
const namedFileSpecDict = new Dict();
|
||||
namedFileSpecDict.set("Type", Name.get("Filespec"));
|
||||
namedFileSpecDict.set("EF", namedEmbeddedFileDict);
|
||||
namedFileSpecDict.set("F", "Named.txt");
|
||||
|
||||
const pagesDict = new Dict();
|
||||
const embeddedFilesDict = new Dict();
|
||||
embeddedFilesDict.set("Names", ["attachmentRef:18R", namedFileSpecRef]);
|
||||
|
||||
const namesDict = new Dict();
|
||||
namesDict.set("EmbeddedFiles", embeddedFilesDict);
|
||||
|
||||
const catalogDict = new Dict();
|
||||
catalogDict.set("Pages", pagesDict);
|
||||
catalogDict.set("Names", namesDict);
|
||||
|
||||
const xref = new XRefMock([
|
||||
{ ref: annotationStreamRef, data: annotationStream },
|
||||
{ ref: namedStreamRef, data: namedStream },
|
||||
{ ref: namedFileSpecRef, data: namedFileSpecDict },
|
||||
]);
|
||||
xref.getCatalogObj = () => catalogDict;
|
||||
|
||||
for (const dict of [
|
||||
annotationStreamDict,
|
||||
namedStreamDict,
|
||||
namedEmbeddedFileDict,
|
||||
namedFileSpecDict,
|
||||
pagesDict,
|
||||
embeddedFilesDict,
|
||||
namesDict,
|
||||
catalogDict,
|
||||
]) {
|
||||
dict.assignXref(xref);
|
||||
}
|
||||
|
||||
const catalog = new Catalog(pdfManagerMock, xref);
|
||||
const annotationId =
|
||||
catalog.getAttachmentIdForAnnotation(annotationStreamRef);
|
||||
|
||||
expect(annotationId).toEqual("attachmentRef:18R-1");
|
||||
expect(
|
||||
pdfManagerMock.pdfDocument.catalog.attachmentDictById.has(data.fileId)
|
||||
).toEqual(false);
|
||||
bytesToString(catalog.attachmentContent("attachmentRef:18R"))
|
||||
).toEqual("Named attachment");
|
||||
expect(bytesToString(catalog.attachmentContent(annotationId))).toEqual(
|
||||
"Annotation attachment"
|
||||
);
|
||||
// An unknown id resolves to no content.
|
||||
expect(catalog.attachmentContent("nonexistent")).toEqual(null);
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@ -3915,6 +3915,34 @@ describe("api", function () {
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("gets FileAttachment annotation content that stays readable after cleanup", async function () {
|
||||
// The embedded files are reachable only via the annotations (no catalog
|
||||
// `/Names` tree), so their content must survive `cleanup` by being
|
||||
// re-derivable from the xref.
|
||||
const loadingTask = getDocument(buildGetDocumentParams("bug1230933.pdf"));
|
||||
const pdfDoc = await loadingTask.promise;
|
||||
const pdfPage = await pdfDoc.getPage(1);
|
||||
const annotations = await pdfPage.getAnnotations();
|
||||
|
||||
const fileAnnotation = annotations.find(
|
||||
a => a.annotationType === AnnotationType.FILEATTACHMENT
|
||||
);
|
||||
const { fileId } = fileAnnotation;
|
||||
expect(fileId.startsWith("attachmentRef:")).toEqual(true);
|
||||
|
||||
const before = await pdfDoc.getAttachmentContent(fileId);
|
||||
expect(before).toBeInstanceOf(Uint8Array);
|
||||
expect(before.length).toEqual(234414);
|
||||
|
||||
await pdfDoc.cleanup();
|
||||
|
||||
const after = await pdfDoc.getAttachmentContent(fileId);
|
||||
expect(after).toBeInstanceOf(Uint8Array);
|
||||
expect(after.length).toEqual(234414);
|
||||
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("gets annotations containing /Launch action with /FileSpec dictionary (issue 17846)", async function () {
|
||||
const loadingTask = getDocument(buildGetDocumentParams("issue17846.pdf"));
|
||||
const pdfDoc = await loadingTask.promise;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user