Simplify '#getFilteredPageIndices' and '#resolveInsertAfterIndices'

This commit is contained in:
Calixte Denizet 2026-05-06 16:59:15 +02:00 committed by calixteman
parent a328294713
commit b39440b6e0
No known key found for this signature in database
GPG Key ID: 0C5442631EE0691F
2 changed files with 145 additions and 112 deletions

View File

@ -563,9 +563,10 @@ class PDFEditor {
* @property {Array<number>} [pageIndices]
* position of the pages in the final document.
* @property {number} [insertAfter]
* 0-based index in the base sequential document after which to insert the
* pages. Sequential pageInfos (those without pageIndices) have their indices
* shifted to accommodate the insertion. Cannot be combined with pageIndices.
* 0-based index in the base sequential sequence after which to insert the
* pages. When every contributing pageInfo has pageIndices, this is
* interpreted against that explicit layout. Cannot be combined with
* pageIndices on the same entry.
*/
/**
@ -578,81 +579,58 @@ class PDFEditor {
if (!document) {
return [];
}
let keptIndices, keptRanges, deletedIndices, deletedRanges;
for (const page of includePages || []) {
if (Array.isArray(page)) {
(keptRanges ||= []).push(page);
} else {
(keptIndices ||= new Set()).add(page);
const compile = list => {
if (!list?.length) {
return null;
}
}
for (const page of excludePages || []) {
if (Array.isArray(page)) {
(deletedRanges ||= []).push(page);
} else {
(deletedIndices ||= new Set()).add(page);
const indices = new Set();
const ranges = [];
for (const item of list) {
if (Array.isArray(item)) {
ranges.push(item);
} else {
indices.add(item);
}
}
}
const indices = [];
return { indices, ranges };
};
const matches = (index, { indices, ranges }) =>
indices.has(index) ||
ranges.some(([start, end]) => index >= start && index <= end);
const inc = compile(includePages);
const exc = compile(excludePages);
const result = [];
for (let i = 0, ii = document.numPages; i < ii; i++) {
if (deletedIndices?.has(i)) {
if (exc && matches(i, exc)) {
continue;
}
if (deletedRanges) {
let isDeleted = false;
for (const [start, end] of deletedRanges) {
if (i >= start && i <= end) {
isDeleted = true;
break;
}
}
if (isDeleted) {
continue;
}
}
let takePage = false;
if (keptIndices) {
takePage = keptIndices.has(i);
}
if (!takePage && keptRanges) {
for (const [start, end] of keptRanges) {
if (i >= start && i <= end) {
takePage = true;
break;
}
}
}
if (!takePage && !keptIndices && !keptRanges) {
takePage = true;
}
if (takePage) {
indices.push(i);
if (!inc || matches(i, inc)) {
result.push(i);
}
}
return indices;
return result;
}
/**
* Resolve insertAfter pageInfos by converting them (and sequential pageInfos)
* to explicit pageIndices, shifting indices to accommodate each insertion.
* insertAfter values are relative to the base sequential sequence (i.e. the
* concatenation of pages from pageInfos that have neither pageIndices nor
* insertAfter), so they are independent of each other.
* @param {Array<PageInfo>} pageInfos
* @returns {Array<PageInfo>}
*/
#resolveInsertAfterIndices(pageInfos) {
// Single pass: build the base sequential sequence and collect insertAfter
// entries, computing each pageInfo's filtered page count only once and only
// for pageInfos that actually contribute pages.
const sequence = []; // each element is the index into pageInfos
const counts = new Array(pageInfos.length);
const sequence = [];
const insertAfterList = [];
for (let i = 0; i < pageInfos.length; i++) {
const info = pageInfos[i];
if (!info.document || info.pageIndices) {
if (!info.document) {
counts[i] = 0;
continue;
}
const count = (counts[i] = this.#getFilteredPageIndices(info).length);
if (info.pageIndices) {
continue;
}
const count = this.#getFilteredPageIndices(info).length;
if (info.insertAfter === undefined) {
for (let j = 0; j < count; j++) {
sequence.push(i);
@ -661,50 +639,49 @@ class PDFEditor {
insertAfterList.push({ i, insertAfter: info.insertAfter, count });
}
}
if (insertAfterList.length === 0) {
return pageInfos;
}
insertAfterList.sort((a, b) => a.insertAfter - b.insertAfter);
// Partial pageIndices rely on auto-fill in extractPages, which races with
// the slots insertAfter assigns here.
for (let i = 0; i < pageInfos.length; i++) {
const info = pageInfos[i];
if (
info.document &&
info.pageIndices &&
info.pageIndices.length < counts[i]
) {
throw new Error(
"extractPages: partial pageIndices cannot be combined with insertAfter entries."
);
}
}
// Partial pageIndices would auto-fill into free slots at extraction time,
// which collides with the positions insertAfter shifts/assigns. Reject.
if (insertAfterList.length > 0) {
insertAfterList.sort((a, b) => a.insertAfter - b.insertAfter || a.i - b.i);
// If there is no base sequential sequence, resolve insertAfter against the
// explicit layout. Shift pageIndices values but keep their array order:
// extractPages maps each filtered source page to the corresponding
// pageIndices entry.
if (
sequence.length === 0 &&
pageInfos.some(info => info.document && info.pageIndices)
) {
const updatedPageInfos = pageInfos.slice();
let maxExistingPos = -1;
for (const info of pageInfos) {
if (!info.document || !info.pageIndices) {
continue;
}
const filteredCount = this.#getFilteredPageIndices(info).length;
if (info.pageIndices.length < filteredCount) {
throw new Error(
"extractPages: partial pageIndices cannot be combined with insertAfter entries."
);
}
}
}
// If the base sequential sequence is empty but some entries carry explicit
// pageIndices (e.g. after a deletion), resolve each insertAfter against
// that explicit layout: shift existing positions past the insertion point
// and fill the gap. Entries without a document are ignored. With no
// explicit entries we fall through to the sequential branch, whose
// Array.splice already clamps to position 0 on an empty sequence.
const hasExplicitLayout =
insertAfterList.length > 0 &&
pageInfos.some(info => info.document && info.pageIndices);
if (sequence.length === 0 && hasExplicitLayout) {
const updatedPageInfos = pageInfos.slice();
let maxExistingPos = -1;
for (const info of pageInfos) {
if (info.document && info.pageIndices) {
for (const idx of info.pageIndices) {
if (idx > maxExistingPos) {
maxExistingPos = idx;
}
for (const idx of info.pageIndices) {
if (idx > maxExistingPos) {
maxExistingPos = idx;
}
}
}
let offset = 0;
for (const { i, insertAfter, count } of insertAfterList) {
// Clamp to [-1, maxExistingPos] so out-of-range values don't produce
// negative or gap-creating positions.
const threshold = Math.min(
Math.max(insertAfter, -1) + offset,
maxExistingPos
@ -725,49 +702,39 @@ class PDFEditor {
),
};
}
const insertedIndices = [];
const pageIndices = [];
for (let k = 0; k < count; k++) {
insertedIndices.push(threshold + 1 + k);
pageIndices.push(threshold + 1 + k);
}
const newInfo = {
...updatedPageInfos[i],
pageIndices: insertedIndices,
};
delete newInfo.insertAfter;
updatedPageInfos[i] = newInfo;
const result = { ...updatedPageInfos[i], pageIndices };
delete result.insertAfter;
updatedPageInfos[i] = result;
offset += count;
maxExistingPos += count;
}
return updatedPageInfos;
}
// insertAfter values are relative to the base sequential sequence (stable
// across entries thanks to the sort above); offset converts them to
// current-sequence positions as we splice.
let offset = 0;
for (const { i, insertAfter, count } of insertAfterList) {
const insertPos = insertAfter + 1 + offset;
const insertPos = Math.max(insertAfter, -1) + 1 + offset;
sequence.splice(insertPos, 0, ...new Array(count).fill(i));
offset += count;
}
// Map each pageInfo index to its final positions in the sequence using a
// plain array (keys are dense integers so no need for a Map).
const pageIndicesArr = new Array(pageInfos.length);
for (let pos = 0; pos < sequence.length; pos++) {
const infoIdx = sequence[pos];
(pageIndicesArr[infoIdx] ||= []).push(pos);
}
// Return updated pageInfos: sequential and insertAfter pageInfos now have
// explicit pageIndices; already-indexed pageInfos are left unchanged.
return pageInfos.map((info, i) => {
if (!info.document || info.pageIndices) {
return info;
}
const newInfo = { ...info, pageIndices: pageIndicesArr[i] || [] };
delete newInfo.insertAfter;
return newInfo;
const result = { ...info, pageIndices: pageIndicesArr[i] || [] };
delete result.insertAfter;
return result;
});
}
@ -792,9 +759,7 @@ class PDFEditor {
task
) {
this.#primaryDocument = primaryDocument;
if (pageInfos.some(info => info.insertAfter !== undefined)) {
pageInfos = this.#resolveInsertAfterIndices(pageInfos);
}
pageInfos = this.#resolveInsertAfterIndices(pageInfos);
const promises = [];
let newIndex = 0;
this.isSingleFile =

View File

@ -5584,6 +5584,35 @@ small scripts as well as for`);
return refs;
};
async function checkPageNumberOrder(pageInfos, expected) {
let loadingTask = getDocument(
buildGetDocumentParams("page_with_number.pdf")
);
let pdfDoc = await loadingTask.promise;
const data = await pdfDoc.extractPages(pageInfos);
await loadingTask.destroy();
expect(data).not.toBeNull();
if (!data) {
return;
}
loadingTask = getDocument(data);
pdfDoc = await loadingTask.promise;
expect(pdfDoc.numPages).toEqual(expected.length);
for (let i = 0, ii = expected.length; i < ii; i++) {
const pageNum = i + 1;
const pdfPage = await pdfDoc.getPage(pageNum);
const { items } = await pdfPage.getTextContent();
expect(mergeText(items))
.withContext(`Page ${pageNum}`)
.toEqual(expected[i]);
}
await loadingTask.destroy();
}
describe("Merge pdfs", function () {
it("should merge three PDFs", async function () {
const loadingTask = getDocument(
@ -7423,6 +7452,45 @@ small scripts as well as for`);
await loadingTask.destroy();
});
it("insertAfter preserves non-monotonic explicit pageIndices", async function () {
// Explicit pageIndices place page 2 before page 1. The inserted page 3
// lands after position 0, so the final order is "2", "3", "1".
await checkPageNumberOrder(
[
{ document: null, includePages: [0, 1], pageIndices: [1, 0] },
{ document: null, includePages: [2], insertAfter: 0 },
],
["2", "3", "1"]
);
});
it("insertAfter shifts only the values past the threshold in non-monotonic pageIndices", async function () {
// pageIndices [2, 0, 1] place pages 1·2·3 as "2", "3", "1". Inserting
// page 4 after position 1 shifts only values > 1, so [2, 0, 1] becomes
// [3, 0, 1] and the final order is "2", "3", "4", "1".
await checkPageNumberOrder(
[
{ document: null, includePages: [0, 1, 2], pageIndices: [2, 0, 1] },
{ document: null, includePages: [3], insertAfter: 1 },
],
["2", "3", "4", "1"]
);
});
it("insertAfter preserves mixed sequential and explicit layouts", async function () {
// Sequential pages 1 and 2 form the base sequence; page 3 is inserted
// after base position 0. The explicit page 4 already accounts for the
// shifted tail at position 3.
await checkPageNumberOrder(
[
{ document: null, includePages: [0, 1] },
{ document: null, includePages: [3], pageIndices: [3] },
{ document: null, includePages: [2], insertAfter: 0 },
],
["1", "3", "2", "4"]
);
});
it("rejects partial pageIndices combined with insertAfter", async function () {
// Partial pageIndices + insertAfter could race over the same slots;
// the resolver throws and the worker surfaces it as a null result.