Add support for encrypted attachments

This PR is related to GH-20732, which is about `AuthEvent` (to delay
promting for a password), but instead adds the actual support for
encrypted attachments.
“Encrypted attachments” means that the main things are plain text.
Note that some PDF viewers, like Preview/QuickLook/Safari or Chrome,
do not support attachments at all.
Note that the file checked into the tests is the same as
`output-no-auth-event.pdf` referenced in
<https://github.com/mozilla/pdf.js/issues/20139#issuecomment-3952462166>.

Closes GH-20139.
This commit is contained in:
Titus Wormer 2026-05-07 13:30:41 +02:00
parent b849567c10
commit 45cdb5d3e8
No known key found for this signature in database
GPG Key ID: E6E581152ED04E2E
5 changed files with 245 additions and 81 deletions

View File

@ -31,6 +31,20 @@ import { calculateMD5 } from "./calculate_md5.js";
import { calculateSHA256 } from "./calculate_sha256.js";
import { DecryptStream } from "./decrypt_stream.js";
/**
* @typedef {typeof AES128Cipher | typeof AES256Cipher | typeof ARCFourCipher
* | typeof NullCipher} CipherConstructors
*/
/**
* @callback ResolveCipher
* Find the appropriate cipher class based on the filter name.
* @param {Name | null} [filterName]
* Name.
* @returns {CipherConstructors}
* Cipher constructor.
*/
class ARCFourCipher {
a = 0;
@ -737,13 +751,46 @@ class PDF20 extends PDFBase {
}
class CipherTransform {
constructor(stringCipherConstructor, streamCipherConstructor) {
this.StringCipherConstructor = stringCipherConstructor;
this.StreamCipherConstructor = streamCipherConstructor;
/** @type {Map<string, CipherConstructors>} */
#cipherCache = new Map();
/**
* @param {ResolveCipher} resolveCipher
* Resolve a cipher constructor from a crypt filter name.
* @param {Name | null} [stringFilterName]
* Default crypt filter for strings.
* @param {Name | null} [streamFilterName]
* Default crypt filter for streams.
*/
constructor(resolveCipher, stringFilterName = null, streamFilterName = null) {
this.resolveCipher = resolveCipher;
this.streamFilterName = streamFilterName;
this.stringFilterName = stringFilterName;
}
createStream(stream, length) {
const cipher = new this.StreamCipherConstructor();
/**
* @param {Name | null} [filterName]
* Crypt filter name.
* @returns {CipherConstructors}
* Cipher constructor.
*/
#getCipher(filterName = null) {
const key = filterName instanceof Name ? filterName.name : "__default__";
return this.#cipherCache.getOrInsertComputed(key, () =>
this.resolveCipher(filterName)
);
}
/**
* @param {BaseStream} stream
* @param {number | null} length
* @param {Name | null} [cryptFilterName]
* @returns {DecryptStream}
*/
createStream(stream, length, cryptFilterName = null) {
const Cipher = this.#getCipher(cryptFilterName || this.streamFilterName);
const cipher = new Cipher();
return new DecryptStream(
stream,
length,
@ -754,14 +801,16 @@ class CipherTransform {
}
decryptString(s) {
const cipher = new this.StringCipherConstructor();
const Cipher = this.#getCipher(this.stringFilterName);
const cipher = new Cipher();
let data = stringToBytes(s);
data = cipher.decryptBlock(data, true);
return bytesToString(data);
}
encryptString(s) {
const cipher = new this.StringCipherConstructor();
const Cipher = this.#getCipher(this.stringFilterName);
const cipher = new Cipher();
if (cipher instanceof AESBaseCipher) {
// Append some chars equal to "16 - (M mod 16)"
// where M is the string length (see section 7.6.2 in PDF specification)
@ -986,41 +1035,6 @@ class CipherTransformFactory {
return hash.subarray(0, Math.min(n + 5, 16));
}
#buildCipherConstructor(cf, name, num, gen, key) {
if (!(name instanceof Name)) {
throw new FormatError("Invalid crypt filter name.");
}
const self = this;
const cryptFilter = cf.get(name.name);
const cfm = cryptFilter?.get("CFM");
if (!cfm || cfm.name === "None") {
return function () {
return new NullCipher();
};
}
if (cfm.name === "V2") {
return function () {
return new ARCFourCipher(
self.#buildObjectKey(num, gen, key, /* isAes = */ false)
);
};
}
if (cfm.name === "AESV2") {
return function () {
return new AES128Cipher(
self.#buildObjectKey(num, gen, key, /* isAes = */ true)
);
};
}
if (cfm.name === "AESV3") {
return function () {
return new AES256Cipher(key);
};
}
throw new FormatError("Unknown crypto method");
}
constructor(dict, fileId, password) {
const filter = dict.get("Filter");
if (!isName(filter, "Standard")) {
@ -1185,36 +1199,66 @@ class CipherTransformFactory {
}
}
/**
* @param {number} num
* Object number.
* @param {number} gen
* Generation number.
* @returns {CipherTransform}
* Cipher transform.
*/
createCipherTransform(num, gen) {
if (this.algorithm === 4 || this.algorithm === 5) {
return new CipherTransform(
this.#buildCipherConstructor(
this.cf,
this.strf,
num,
gen,
this.encryptionKey
),
this.#buildCipherConstructor(
this.cf,
this.stmf,
num,
gen,
this.encryptionKey
)
);
/** @type {ResolveCipher} */
const resolveCipher = filterName => {
if (!(filterName instanceof Name)) {
throw new FormatError("Invalid crypt filter name.");
}
const cryptFilter = this.cf.get(filterName.name);
const cfm = cryptFilter?.get("CFM");
if (!cfm || cfm.name === "None") {
return NullCipher;
}
if (cfm.name === "V2") {
return ARCFourCipher.bind(
null,
this.#buildObjectKey(
num,
gen,
this.encryptionKey,
/* isAes = */ false
)
);
}
if (cfm.name === "AESV2") {
return AES128Cipher.bind(
null,
this.#buildObjectKey(
num,
gen,
this.encryptionKey,
/* isAes = */ true
)
);
}
if (cfm.name === "AESV3") {
return AES256Cipher.bind(null, this.encryptionKey);
}
throw new FormatError("Unknown crypto method");
};
return new CipherTransform(resolveCipher, this.strf, this.stmf);
}
// algorithms 1 and 2
const key = this.#buildObjectKey(
num,
gen,
this.encryptionKey,
/* isAes = */ false
);
const cipherConstructor = function () {
return new ARCFourCipher(key);
};
return new CipherTransform(cipherConstructor, cipherConstructor);
/** @type {ResolveCipher} */
const resolveCipher = () =>
ARCFourCipher.bind(
null,
this.#buildObjectKey(num, gen, this.encryptionKey, /* isAes = */ false)
);
return new CipherTransform(resolveCipher);
}
}
@ -1222,6 +1266,7 @@ export {
AES128Cipher,
AES256Cipher,
ARCFourCipher,
CipherTransform,
CipherTransformFactory,
PDF17,
PDF20,

View File

@ -20,7 +20,7 @@ import {
info,
warn,
} from "../shared/util.js";
import { Cmd, Dict, EOF, isCmd, Name, Ref } from "./primitives.js";
import { Cmd, Dict, EOF, isCmd, isName, Name, Ref } from "./primitives.js";
import {
isWhiteSpace,
MissingDataException,
@ -39,6 +39,17 @@ import { LZWStream } from "./lzw_stream.js";
import { PredictorStream } from "./predictor_stream.js";
import { RunLengthStream } from "./run_length_stream.js";
/**
* @import { BaseStream } from "./base_stream.js"
* @import { CipherTransform } from "./crypto.js"
*/
/**
* @typedef {Ascii85Stream | AsciiHexStream | BaseStream | BrotliStream
* | CCITTFaxStream | FlateStream | Jbig2Stream | JpegStream | JpxStream
* | LZWStream | NullStream | PredictorStream | RunLengthStream} Streams
*/
const MAX_LENGTH_TO_CACHE = 1000;
function getInlineImageCacheKey(bytes) {
@ -100,6 +111,11 @@ class Parser {
}
}
/**
* @param {CipherTransform | null} cipherTransform
* Cipher transform for decryption.
* @returns {unknown}
*/
getObj(cipherTransform = null) {
const buf1 = this.buf1;
this.shift();
@ -515,6 +531,10 @@ class Parser {
}
}
/**
* @param {CipherTransform | null} cipherTransform
* @returns {Streams}
*/
makeInlineImage(cipherTransform) {
const lexer = this.lexer;
const stream = lexer.stream;
@ -539,12 +559,12 @@ class Parser {
}
// Extract the name of the first (i.e. the current) image filter.
const filter = this.xref.fetchIfRef(dictMap.F || dictMap.Filter);
const filter = this.#fetchIfRef(dictMap.F || dictMap.Filter);
let filterName;
if (filter instanceof Name) {
filterName = filter.name;
} else if (Array.isArray(filter)) {
const filterZero = this.xref.fetchIfRef(filter[0]);
const filterZero = this.#fetchIfRef(filter[0]);
if (filterZero instanceof Name) {
filterName = filterZero.name;
}
@ -597,11 +617,11 @@ class Parser {
dict.set(key, dictMap[key]);
}
let imageStream = stream.makeSubStream(startPos, length, dict);
if (cipherTransform) {
if (cipherTransform && !this.#hasCryptFilter(filter)) {
imageStream = cipherTransform.createStream(imageStream, length);
}
imageStream = this.filter(imageStream, dict, length);
imageStream = this.filter(imageStream, dict, length, cipherTransform);
imageStream.dict = dict;
if (cacheKey !== undefined) {
imageStream.cacheKey = `inline_img_${++this._imageId}`;
@ -614,6 +634,38 @@ class Parser {
return imageStream;
}
/**
* Resolve indirect objects when `xref` is available.
*
* @param {unknown} obj
* @returns {unknown}
*/
#fetchIfRef(obj) {
return this.xref ? this.xref.fetchIfRef(obj) : obj;
}
/**
* Check if a stream filter chain contains `/Crypt`.
*
* @param {unknown} [filter]
* Object, probably a name or an array of names.
* @returns {boolean}
* Whether the filter chain contains `/Crypt`.
*/
#hasCryptFilter(filter) {
if (!Array.isArray(filter)) {
return isName(filter, "Crypt");
}
for (const f of filter) {
if (isName(this.#fetchIfRef(f), "Crypt")) {
return true;
}
}
return false;
}
#findStreamLength(startPos) {
const { stream } = this.lexer;
stream.pos = startPos;
@ -727,15 +779,25 @@ class Parser {
this.shift(); // 'endstream'
stream = stream.makeSubStream(startPos, length, dict);
if (cipherTransform) {
const filter = dict.get("F", "Filter");
// Streams that explicitly use `/Crypt` are decrypted in the filter chain,
// so avoid applying the default stream cipher up-front.
if (cipherTransform && !this.#hasCryptFilter(filter)) {
stream = cipherTransform.createStream(stream, length);
}
stream = this.filter(stream, dict, length);
stream = this.filter(stream, dict, length, cipherTransform);
stream.dict = dict;
return stream;
}
filter(stream, dict, length) {
/**
* @param {Streams} stream
* @param {Dict} dict
* @param {number | null} length
* @param {CipherTransform | null} cipherTransform
* @returns {Streams}
*/
filter(stream, dict, length, cipherTransform = null) {
let filter = dict.get("F", "Filter");
let params = dict.get("DP", "DecodeParms");
@ -743,7 +805,13 @@ class Parser {
if (Array.isArray(params)) {
warn("/DecodeParms should not be an Array, when /Filter is a Name.");
}
return this.makeFilter(stream, filter.name, length, params);
return this.makeFilter(
stream,
filter.name,
length,
params,
cipherTransform
);
}
let maybeLength = length;
@ -751,16 +819,22 @@ class Parser {
const filterArray = filter;
const paramsArray = params;
for (let i = 0, ii = filterArray.length; i < ii; ++i) {
filter = this.xref.fetchIfRef(filterArray[i]);
filter = this.#fetchIfRef(filterArray[i]);
if (!(filter instanceof Name)) {
throw new FormatError(`Bad filter name "${filter}"`);
}
params = null;
if (Array.isArray(paramsArray) && i in paramsArray) {
params = this.xref.fetchIfRef(paramsArray[i]);
params = this.#fetchIfRef(paramsArray[i]);
}
stream = this.makeFilter(stream, filter.name, maybeLength, params);
stream = this.makeFilter(
stream,
filter.name,
maybeLength,
params,
cipherTransform
);
// After the first stream the `length` variable is invalid.
maybeLength = null;
}
@ -768,7 +842,15 @@ class Parser {
return stream;
}
makeFilter(stream, name, maybeLength, params) {
/**
* @param {Streams} stream
* @param {string} name
* @param {number | null} maybeLength
* @param {Dict | null} params
* @param {CipherTransform | null | undefined} [cipherTransform]
* @returns {Streams}
*/
makeFilter(stream, name, maybeLength, params, cipherTransform = null) {
// Since the 'Length' entry in the stream dictionary can be completely
// wrong, e.g. zero for non-empty streams, only skip parsing the stream
// when we can be absolutely certain that it actually is empty.
@ -825,6 +907,17 @@ class Parser {
return new Jbig2Stream(stream, maybeLength, params);
case "BrotliDecode":
return new BrotliStream(stream, maybeLength);
case "Crypt": {
if (!cipherTransform) {
warn('Filter "Crypt" is missing a cipher transform.');
return stream;
}
const param = params instanceof Dict ? params.get("Name") : null;
// Default to "Identity" (PDF 7.4.10).
const cryptName =
param instanceof Name ? param : Name.get("Identity");
return cipherTransform.createStream(stream, maybeLength, cryptName);
}
}
warn(`Filter "${name}" is not supported.`);
return stream;

View File

@ -921,6 +921,7 @@
!operator_list_cycle.pdf
!knockout_groups_test.pdf
!issue18032.pdf
!encrypted-attachment.pdf
!Embedded_font.pdf
!issue18548_reduced.pdf
!issue_cff_unsigned_bbox.pdf

Binary file not shown.

View File

@ -1689,6 +1689,31 @@ describe("api", function () {
await loadingTask.destroy();
});
it("gets encrypted attachments in password-protected documents", async function () {
const loadingTask = getDocument(
buildGetDocumentParams("encrypted-attachment.pdf", {
password: "000000",
})
);
let embeddedLoadingTask = null;
try {
const pdfDoc = await loadingTask.promise;
const attachments = await pdfDoc.getAttachments();
const attachment = attachments?.["attachment.pdf"];
expect(attachment).toBeDefined();
expect(attachment.filename).toEqual("attachment.pdf");
embeddedLoadingTask = getDocument({ data: attachment.content });
const embeddedPdfDoc = await embeddedLoadingTask.promise;
expect(embeddedPdfDoc.numPages).toBe(1);
} finally {
await embeddedLoadingTask?.destroy();
await loadingTask.destroy();
}
});
it("gets javascript with printing instructions (JS action)", async function () {
// PDF document with "JavaScript" action in the OpenAction dictionary.
const loadingTask = getDocument(buildGetDocumentParams("issue6106.pdf"));