From 339f755a527e04a7b748db96fd797d5ec469b534 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Mon, 27 Apr 2026 11:46:17 +0200 Subject: [PATCH 1/2] Add more validation in the `Catalog.prototype.getPageIndex` method - Ensure that the /Kids-entries are Arrays, before trying to iterate through them. - Ensure that the /Count-entries are (positive) integers. --- src/core/catalog.js | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/core/catalog.js b/src/core/catalog.js index 2cab5f961..9199ce177 100644 --- a/src/core/catalog.js +++ b/src/core/catalog.js @@ -1495,6 +1495,9 @@ class Catalog { if (!kids) { return null; } + if (!Array.isArray(kids)) { + throw new FormatError("Kids must be an array."); + } const kidPromises = []; let found = false; @@ -1512,11 +1515,15 @@ class Catalog { throw new FormatError("Kid node must be a dictionary."); } if (obj.has("Count")) { - total += obj.get("Count"); - } else { - // Page leaf node. - total++; + const count = obj.get("Count"); + if (Number.isInteger(count) && count >= 0) { + total += count; + return; + } + throw new FormatError("Count must be a (positive) integer."); } + // Page leaf node. + total++; }) ); } From 3475806311c987743d72be9972d53aa81147f27d Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Mon, 27 Apr 2026 12:25:01 +0200 Subject: [PATCH 2/2] Convert `Catalog.prototype.getPageIndex` to an asynchronous method This simplifies/shortens a piece of old code, which shouldn't hurt. --- src/core/catalog.js | 156 ++++++++++++++++++++------------------------ 1 file changed, 70 insertions(+), 86 deletions(-) diff --git a/src/core/catalog.js b/src/core/catalog.js index 9199ce177..370995826 100644 --- a/src/core/catalog.js +++ b/src/core/catalog.js @@ -1446,107 +1446,91 @@ class Catalog { return map; } - getPageIndex(pageRef) { + async getPageIndex(pageRef) { const cachedPageIndex = this.pageIndexCache.get(pageRef); if (cachedPageIndex !== undefined) { - return Promise.resolve(cachedPageIndex); + return cachedPageIndex; } // The page tree nodes have the count of all the leaves below them. To get // how many pages are before we just have to walk up the tree and keep // adding the count of siblings to the left of the node. const xref = this.xref; + let total = 0, + ref = pageRef; - function pagesBeforeRef(kidRef) { - let total = 0, - parentRef; + while (true) { + const node = await xref.fetchAsync(ref); + if ( + isRefsEqual(ref, pageRef) && + !isDict(node, "Page") && + !(node instanceof Dict && !node.has("Type") && node.has("Contents")) + ) { + throw new FormatError( + "The reference does not point to a /Page dictionary." + ); + } + if (!node) { + break; + } + if (!(node instanceof Dict)) { + throw new FormatError("Node must be a dictionary."); + } + const parentRef = node.getRaw("Parent"); - return xref - .fetchAsync(kidRef) - .then(function (node) { - if ( - isRefsEqual(kidRef, pageRef) && - !isDict(node, "Page") && - !(node instanceof Dict && !node.has("Type") && node.has("Contents")) - ) { - throw new FormatError( - "The reference does not point to a /Page dictionary." - ); - } - if (!node) { - return null; - } - if (!(node instanceof Dict)) { - throw new FormatError("Node must be a dictionary."); - } - parentRef = node.getRaw("Parent"); - return node.getAsync("Parent"); - }) - .then(function (parent) { - if (!parent) { - return null; - } - if (!(parent instanceof Dict)) { - throw new FormatError("Parent must be a dictionary."); - } - return parent.getAsync("Kids"); - }) - .then(function (kids) { - if (!kids) { - return null; - } - if (!Array.isArray(kids)) { - throw new FormatError("Kids must be an array."); - } + const parent = await node.getAsync("Parent"); + if (!parent) { + break; + } + if (!(parent instanceof Dict)) { + throw new FormatError("Parent must be a dictionary."); + } - const kidPromises = []; - let found = false; - for (const kid of kids) { - if (!(kid instanceof Ref)) { - throw new FormatError("Kid must be a reference."); + const kids = await parent.getAsync("Kids"); + if (!kids) { + break; + } + if (!Array.isArray(kids)) { + throw new FormatError("Kids must be an array."); + } + + const kidPromises = []; + let found = false; + for (const kid of kids) { + if (!(kid instanceof Ref)) { + throw new FormatError("Kid must be a reference."); + } + if (isRefsEqual(kid, ref)) { + found = true; + break; + } + kidPromises.push( + xref.fetchAsync(kid).then(obj => { + if (!(obj instanceof Dict)) { + throw new FormatError("Kid node must be a dictionary."); } - if (isRefsEqual(kid, kidRef)) { - found = true; - break; + if (obj.has("Count")) { + const count = obj.get("Count"); + if (Number.isInteger(count) && count >= 0) { + total += count; + return; + } + throw new FormatError("Count must be a (positive) integer."); } - kidPromises.push( - xref.fetchAsync(kid).then(function (obj) { - if (!(obj instanceof Dict)) { - throw new FormatError("Kid node must be a dictionary."); - } - if (obj.has("Count")) { - const count = obj.get("Count"); - if (Number.isInteger(count) && count >= 0) { - total += count; - return; - } - throw new FormatError("Count must be a (positive) integer."); - } - // Page leaf node. - total++; - }) - ); - } - if (!found) { - throw new FormatError("Kid reference not found in parent's kids."); - } - return Promise.all(kidPromises).then(() => [total, parentRef]); - }); + // Page leaf node. + total++; + }) + ); + } + if (!found) { + throw new FormatError("Kid reference not found in parent's kids."); + } + await Promise.all(kidPromises); + ref = parentRef; } - let total = 0; - const next = ref => - pagesBeforeRef(ref).then(args => { - if (!args) { - this.pageIndexCache.put(pageRef, total); - return total; - } - const [count, parentRef] = args; - total += count; - return next(parentRef); - }); - - return next(pageRef); + this.pageIndexCache.put(pageRef, total); + return total; } get baseUrl() {