diff --git a/src/core/crypto.js b/src/core/crypto.js index 7d9ffa3ae..ebd579649 100644 --- a/src/core/crypto.js +++ b/src/core/crypto.js @@ -31,6 +31,20 @@ import { calculateMD5 } from "./calculate_md5.js"; import { calculateSHA256 } from "./calculate_sha256.js"; import { DecryptStream } from "./decrypt_stream.js"; +/** + * @typedef {typeof AES128Cipher | typeof AES256Cipher | typeof ARCFourCipher + * | typeof NullCipher} CipherConstructors + */ + +/** + * @callback ResolveCipher + * Find the appropriate cipher class based on the filter name. + * @param {Name | null} [filterName] + * Name. + * @returns {CipherConstructors} + * Cipher constructor. + */ + class ARCFourCipher { a = 0; @@ -737,13 +751,46 @@ class PDF20 extends PDFBase { } class CipherTransform { - constructor(stringCipherConstructor, streamCipherConstructor) { - this.StringCipherConstructor = stringCipherConstructor; - this.StreamCipherConstructor = streamCipherConstructor; + /** @type {Map} */ + #cipherCache = new Map(); + + /** + * @param {ResolveCipher} resolveCipher + * Resolve a cipher constructor from a crypt filter name. + * @param {Name | null} [stringFilterName] + * Default crypt filter for strings. + * @param {Name | null} [streamFilterName] + * Default crypt filter for streams. + */ + constructor(resolveCipher, stringFilterName = null, streamFilterName = null) { + this.resolveCipher = resolveCipher; + this.streamFilterName = streamFilterName; + this.stringFilterName = stringFilterName; } - createStream(stream, length) { - const cipher = new this.StreamCipherConstructor(); + /** + * @param {Name | null} [filterName] + * Crypt filter name. + * @returns {CipherConstructors} + * Cipher constructor. + */ + #getCipher(filterName = null) { + const key = filterName instanceof Name ? filterName.name : "__default__"; + + return this.#cipherCache.getOrInsertComputed(key, () => + this.resolveCipher(filterName) + ); + } + + /** + * @param {BaseStream} stream + * @param {number | null} length + * @param {Name | null} [cryptFilterName] + * @returns {DecryptStream} + */ + createStream(stream, length, cryptFilterName = null) { + const Cipher = this.#getCipher(cryptFilterName || this.streamFilterName); + const cipher = new Cipher(); return new DecryptStream( stream, length, @@ -754,14 +801,16 @@ class CipherTransform { } decryptString(s) { - const cipher = new this.StringCipherConstructor(); + const Cipher = this.#getCipher(this.stringFilterName); + const cipher = new Cipher(); let data = stringToBytes(s); data = cipher.decryptBlock(data, true); return bytesToString(data); } encryptString(s) { - const cipher = new this.StringCipherConstructor(); + const Cipher = this.#getCipher(this.stringFilterName); + const cipher = new Cipher(); if (cipher instanceof AESBaseCipher) { // Append some chars equal to "16 - (M mod 16)" // where M is the string length (see section 7.6.2 in PDF specification) @@ -986,41 +1035,6 @@ class CipherTransformFactory { return hash.subarray(0, Math.min(n + 5, 16)); } - #buildCipherConstructor(cf, name, num, gen, key) { - if (!(name instanceof Name)) { - throw new FormatError("Invalid crypt filter name."); - } - const self = this; - const cryptFilter = cf.get(name.name); - const cfm = cryptFilter?.get("CFM"); - - if (!cfm || cfm.name === "None") { - return function () { - return new NullCipher(); - }; - } - if (cfm.name === "V2") { - return function () { - return new ARCFourCipher( - self.#buildObjectKey(num, gen, key, /* isAes = */ false) - ); - }; - } - if (cfm.name === "AESV2") { - return function () { - return new AES128Cipher( - self.#buildObjectKey(num, gen, key, /* isAes = */ true) - ); - }; - } - if (cfm.name === "AESV3") { - return function () { - return new AES256Cipher(key); - }; - } - throw new FormatError("Unknown crypto method"); - } - constructor(dict, fileId, password) { const filter = dict.get("Filter"); if (!isName(filter, "Standard")) { @@ -1185,36 +1199,66 @@ class CipherTransformFactory { } } + /** + * @param {number} num + * Object number. + * @param {number} gen + * Generation number. + * @returns {CipherTransform} + * Cipher transform. + */ createCipherTransform(num, gen) { if (this.algorithm === 4 || this.algorithm === 5) { - return new CipherTransform( - this.#buildCipherConstructor( - this.cf, - this.strf, - num, - gen, - this.encryptionKey - ), - this.#buildCipherConstructor( - this.cf, - this.stmf, - num, - gen, - this.encryptionKey - ) - ); + /** @type {ResolveCipher} */ + const resolveCipher = filterName => { + if (!(filterName instanceof Name)) { + throw new FormatError("Invalid crypt filter name."); + } + const cryptFilter = this.cf.get(filterName.name); + const cfm = cryptFilter?.get("CFM"); + + if (!cfm || cfm.name === "None") { + return NullCipher; + } + if (cfm.name === "V2") { + return ARCFourCipher.bind( + null, + this.#buildObjectKey( + num, + gen, + this.encryptionKey, + /* isAes = */ false + ) + ); + } + if (cfm.name === "AESV2") { + return AES128Cipher.bind( + null, + this.#buildObjectKey( + num, + gen, + this.encryptionKey, + /* isAes = */ true + ) + ); + } + if (cfm.name === "AESV3") { + return AES256Cipher.bind(null, this.encryptionKey); + } + throw new FormatError("Unknown crypto method"); + }; + + return new CipherTransform(resolveCipher, this.strf, this.stmf); } + // algorithms 1 and 2 - const key = this.#buildObjectKey( - num, - gen, - this.encryptionKey, - /* isAes = */ false - ); - const cipherConstructor = function () { - return new ARCFourCipher(key); - }; - return new CipherTransform(cipherConstructor, cipherConstructor); + /** @type {ResolveCipher} */ + const resolveCipher = () => + ARCFourCipher.bind( + null, + this.#buildObjectKey(num, gen, this.encryptionKey, /* isAes = */ false) + ); + return new CipherTransform(resolveCipher); } } @@ -1222,6 +1266,7 @@ export { AES128Cipher, AES256Cipher, ARCFourCipher, + CipherTransform, CipherTransformFactory, PDF17, PDF20, diff --git a/src/core/parser.js b/src/core/parser.js index 78c660088..753266a03 100644 --- a/src/core/parser.js +++ b/src/core/parser.js @@ -20,7 +20,7 @@ import { info, warn, } from "../shared/util.js"; -import { Cmd, Dict, EOF, isCmd, Name, Ref } from "./primitives.js"; +import { Cmd, Dict, EOF, isCmd, isName, Name, Ref } from "./primitives.js"; import { isWhiteSpace, MissingDataException, @@ -39,6 +39,17 @@ import { LZWStream } from "./lzw_stream.js"; import { PredictorStream } from "./predictor_stream.js"; import { RunLengthStream } from "./run_length_stream.js"; +/** + * @import { BaseStream } from "./base_stream.js" + * @import { CipherTransform } from "./crypto.js" + */ + +/** + * @typedef {Ascii85Stream | AsciiHexStream | BaseStream | BrotliStream + * | CCITTFaxStream | FlateStream | Jbig2Stream | JpegStream | JpxStream + * | LZWStream | NullStream | PredictorStream | RunLengthStream} Streams + */ + const MAX_LENGTH_TO_CACHE = 1000; function getInlineImageCacheKey(bytes) { @@ -100,6 +111,11 @@ class Parser { } } + /** + * @param {CipherTransform | null} cipherTransform + * Cipher transform for decryption. + * @returns {unknown} + */ getObj(cipherTransform = null) { const buf1 = this.buf1; this.shift(); @@ -515,6 +531,10 @@ class Parser { } } + /** + * @param {CipherTransform | null} cipherTransform + * @returns {Streams} + */ makeInlineImage(cipherTransform) { const lexer = this.lexer; const stream = lexer.stream; @@ -539,12 +559,12 @@ class Parser { } // Extract the name of the first (i.e. the current) image filter. - const filter = this.xref.fetchIfRef(dictMap.F || dictMap.Filter); + const filter = this.#fetchIfRef(dictMap.F || dictMap.Filter); let filterName; if (filter instanceof Name) { filterName = filter.name; } else if (Array.isArray(filter)) { - const filterZero = this.xref.fetchIfRef(filter[0]); + const filterZero = this.#fetchIfRef(filter[0]); if (filterZero instanceof Name) { filterName = filterZero.name; } @@ -597,11 +617,11 @@ class Parser { dict.set(key, dictMap[key]); } let imageStream = stream.makeSubStream(startPos, length, dict); - if (cipherTransform) { + if (cipherTransform && !this.#hasCryptFilter(filter)) { imageStream = cipherTransform.createStream(imageStream, length); } - imageStream = this.filter(imageStream, dict, length); + imageStream = this.filter(imageStream, dict, length, cipherTransform); imageStream.dict = dict; if (cacheKey !== undefined) { imageStream.cacheKey = `inline_img_${++this._imageId}`; @@ -614,6 +634,38 @@ class Parser { return imageStream; } + /** + * Resolve indirect objects when `xref` is available. + * + * @param {unknown} obj + * @returns {unknown} + */ + #fetchIfRef(obj) { + return this.xref ? this.xref.fetchIfRef(obj) : obj; + } + + /** + * Check if a stream filter chain contains `/Crypt`. + * + * @param {unknown} [filter] + * Object, probably a name or an array of names. + * @returns {boolean} + * Whether the filter chain contains `/Crypt`. + */ + #hasCryptFilter(filter) { + if (!Array.isArray(filter)) { + return isName(filter, "Crypt"); + } + + for (const f of filter) { + if (isName(this.#fetchIfRef(f), "Crypt")) { + return true; + } + } + + return false; + } + #findStreamLength(startPos) { const { stream } = this.lexer; stream.pos = startPos; @@ -727,15 +779,25 @@ class Parser { this.shift(); // 'endstream' stream = stream.makeSubStream(startPos, length, dict); - if (cipherTransform) { + const filter = dict.get("F", "Filter"); + // Streams that explicitly use `/Crypt` are decrypted in the filter chain, + // so avoid applying the default stream cipher up-front. + if (cipherTransform && !this.#hasCryptFilter(filter)) { stream = cipherTransform.createStream(stream, length); } - stream = this.filter(stream, dict, length); + stream = this.filter(stream, dict, length, cipherTransform); stream.dict = dict; return stream; } - filter(stream, dict, length) { + /** + * @param {Streams} stream + * @param {Dict} dict + * @param {number | null} length + * @param {CipherTransform | null} cipherTransform + * @returns {Streams} + */ + filter(stream, dict, length, cipherTransform = null) { let filter = dict.get("F", "Filter"); let params = dict.get("DP", "DecodeParms"); @@ -743,7 +805,13 @@ class Parser { if (Array.isArray(params)) { warn("/DecodeParms should not be an Array, when /Filter is a Name."); } - return this.makeFilter(stream, filter.name, length, params); + return this.makeFilter( + stream, + filter.name, + length, + params, + cipherTransform + ); } let maybeLength = length; @@ -751,16 +819,22 @@ class Parser { const filterArray = filter; const paramsArray = params; for (let i = 0, ii = filterArray.length; i < ii; ++i) { - filter = this.xref.fetchIfRef(filterArray[i]); + filter = this.#fetchIfRef(filterArray[i]); if (!(filter instanceof Name)) { throw new FormatError(`Bad filter name "${filter}"`); } params = null; if (Array.isArray(paramsArray) && i in paramsArray) { - params = this.xref.fetchIfRef(paramsArray[i]); + params = this.#fetchIfRef(paramsArray[i]); } - stream = this.makeFilter(stream, filter.name, maybeLength, params); + stream = this.makeFilter( + stream, + filter.name, + maybeLength, + params, + cipherTransform + ); // After the first stream the `length` variable is invalid. maybeLength = null; } @@ -768,7 +842,15 @@ class Parser { return stream; } - makeFilter(stream, name, maybeLength, params) { + /** + * @param {Streams} stream + * @param {string} name + * @param {number | null} maybeLength + * @param {Dict | null} params + * @param {CipherTransform | null | undefined} [cipherTransform] + * @returns {Streams} + */ + makeFilter(stream, name, maybeLength, params, cipherTransform = null) { // Since the 'Length' entry in the stream dictionary can be completely // wrong, e.g. zero for non-empty streams, only skip parsing the stream // when we can be absolutely certain that it actually is empty. @@ -825,6 +907,17 @@ class Parser { return new Jbig2Stream(stream, maybeLength, params); case "BrotliDecode": return new BrotliStream(stream, maybeLength); + case "Crypt": { + if (!cipherTransform) { + warn('Filter "Crypt" is missing a cipher transform.'); + return stream; + } + const param = params instanceof Dict ? params.get("Name") : null; + // Default to "Identity" (PDF 7.4.10). + const cryptName = + param instanceof Name ? param : Name.get("Identity"); + return cipherTransform.createStream(stream, maybeLength, cryptName); + } } warn(`Filter "${name}" is not supported.`); return stream; diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 3da556ba1..8d4aaa9fc 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -921,6 +921,7 @@ !operator_list_cycle.pdf !knockout_groups_test.pdf !issue18032.pdf +!encrypted-attachment.pdf !Embedded_font.pdf !issue18548_reduced.pdf !issue_cff_unsigned_bbox.pdf diff --git a/test/pdfs/encrypted-attachment.pdf b/test/pdfs/encrypted-attachment.pdf new file mode 100644 index 000000000..8ab8fcb97 Binary files /dev/null and b/test/pdfs/encrypted-attachment.pdf differ diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index bec3cd7f6..7152d96d5 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -1689,6 +1689,31 @@ describe("api", function () { await loadingTask.destroy(); }); + it("gets encrypted attachments in password-protected documents", async function () { + const loadingTask = getDocument( + buildGetDocumentParams("encrypted-attachment.pdf", { + password: "000000", + }) + ); + let embeddedLoadingTask = null; + + try { + const pdfDoc = await loadingTask.promise; + const attachments = await pdfDoc.getAttachments(); + const attachment = attachments?.["attachment.pdf"]; + + expect(attachment).toBeDefined(); + expect(attachment.filename).toEqual("attachment.pdf"); + + embeddedLoadingTask = getDocument({ data: attachment.content }); + const embeddedPdfDoc = await embeddedLoadingTask.promise; + expect(embeddedPdfDoc.numPages).toBe(1); + } finally { + await embeddedLoadingTask?.destroy(); + await loadingTask.destroy(); + } + }); + it("gets javascript with printing instructions (JS action)", async function () { // PDF document with "JavaScript" action in the OpenAction dictionary. const loadingTask = getDocument(buildGetDocumentParams("issue6106.pdf"));