mirror of
https://github.com/mozilla/pdf.js.git
synced 2026-06-02 08:10:59 +02:00
Simplify '#getFilteredPageIndices' and '#resolveInsertAfterIndices'
This commit is contained in:
parent
a328294713
commit
b39440b6e0
@ -563,9 +563,10 @@ class PDFEditor {
|
||||
* @property {Array<number>} [pageIndices]
|
||||
* position of the pages in the final document.
|
||||
* @property {number} [insertAfter]
|
||||
* 0-based index in the base sequential document after which to insert the
|
||||
* pages. Sequential pageInfos (those without pageIndices) have their indices
|
||||
* shifted to accommodate the insertion. Cannot be combined with pageIndices.
|
||||
* 0-based index in the base sequential sequence after which to insert the
|
||||
* pages. When every contributing pageInfo has pageIndices, this is
|
||||
* interpreted against that explicit layout. Cannot be combined with
|
||||
* pageIndices on the same entry.
|
||||
*/
|
||||
|
||||
/**
|
||||
@ -578,81 +579,58 @@ class PDFEditor {
|
||||
if (!document) {
|
||||
return [];
|
||||
}
|
||||
let keptIndices, keptRanges, deletedIndices, deletedRanges;
|
||||
for (const page of includePages || []) {
|
||||
if (Array.isArray(page)) {
|
||||
(keptRanges ||= []).push(page);
|
||||
} else {
|
||||
(keptIndices ||= new Set()).add(page);
|
||||
const compile = list => {
|
||||
if (!list?.length) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
for (const page of excludePages || []) {
|
||||
if (Array.isArray(page)) {
|
||||
(deletedRanges ||= []).push(page);
|
||||
} else {
|
||||
(deletedIndices ||= new Set()).add(page);
|
||||
const indices = new Set();
|
||||
const ranges = [];
|
||||
for (const item of list) {
|
||||
if (Array.isArray(item)) {
|
||||
ranges.push(item);
|
||||
} else {
|
||||
indices.add(item);
|
||||
}
|
||||
}
|
||||
}
|
||||
const indices = [];
|
||||
return { indices, ranges };
|
||||
};
|
||||
const matches = (index, { indices, ranges }) =>
|
||||
indices.has(index) ||
|
||||
ranges.some(([start, end]) => index >= start && index <= end);
|
||||
const inc = compile(includePages);
|
||||
const exc = compile(excludePages);
|
||||
const result = [];
|
||||
for (let i = 0, ii = document.numPages; i < ii; i++) {
|
||||
if (deletedIndices?.has(i)) {
|
||||
if (exc && matches(i, exc)) {
|
||||
continue;
|
||||
}
|
||||
if (deletedRanges) {
|
||||
let isDeleted = false;
|
||||
for (const [start, end] of deletedRanges) {
|
||||
if (i >= start && i <= end) {
|
||||
isDeleted = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (isDeleted) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
let takePage = false;
|
||||
if (keptIndices) {
|
||||
takePage = keptIndices.has(i);
|
||||
}
|
||||
if (!takePage && keptRanges) {
|
||||
for (const [start, end] of keptRanges) {
|
||||
if (i >= start && i <= end) {
|
||||
takePage = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!takePage && !keptIndices && !keptRanges) {
|
||||
takePage = true;
|
||||
}
|
||||
if (takePage) {
|
||||
indices.push(i);
|
||||
if (!inc || matches(i, inc)) {
|
||||
result.push(i);
|
||||
}
|
||||
}
|
||||
return indices;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve insertAfter pageInfos by converting them (and sequential pageInfos)
|
||||
* to explicit pageIndices, shifting indices to accommodate each insertion.
|
||||
* insertAfter values are relative to the base sequential sequence (i.e. the
|
||||
* concatenation of pages from pageInfos that have neither pageIndices nor
|
||||
* insertAfter), so they are independent of each other.
|
||||
* @param {Array<PageInfo>} pageInfos
|
||||
* @returns {Array<PageInfo>}
|
||||
*/
|
||||
#resolveInsertAfterIndices(pageInfos) {
|
||||
// Single pass: build the base sequential sequence and collect insertAfter
|
||||
// entries, computing each pageInfo's filtered page count only once and only
|
||||
// for pageInfos that actually contribute pages.
|
||||
const sequence = []; // each element is the index into pageInfos
|
||||
const counts = new Array(pageInfos.length);
|
||||
const sequence = [];
|
||||
const insertAfterList = [];
|
||||
for (let i = 0; i < pageInfos.length; i++) {
|
||||
const info = pageInfos[i];
|
||||
if (!info.document || info.pageIndices) {
|
||||
if (!info.document) {
|
||||
counts[i] = 0;
|
||||
continue;
|
||||
}
|
||||
const count = (counts[i] = this.#getFilteredPageIndices(info).length);
|
||||
if (info.pageIndices) {
|
||||
continue;
|
||||
}
|
||||
const count = this.#getFilteredPageIndices(info).length;
|
||||
if (info.insertAfter === undefined) {
|
||||
for (let j = 0; j < count; j++) {
|
||||
sequence.push(i);
|
||||
@ -661,50 +639,49 @@ class PDFEditor {
|
||||
insertAfterList.push({ i, insertAfter: info.insertAfter, count });
|
||||
}
|
||||
}
|
||||
if (insertAfterList.length === 0) {
|
||||
return pageInfos;
|
||||
}
|
||||
|
||||
insertAfterList.sort((a, b) => a.insertAfter - b.insertAfter);
|
||||
// Partial pageIndices rely on auto-fill in extractPages, which races with
|
||||
// the slots insertAfter assigns here.
|
||||
for (let i = 0; i < pageInfos.length; i++) {
|
||||
const info = pageInfos[i];
|
||||
if (
|
||||
info.document &&
|
||||
info.pageIndices &&
|
||||
info.pageIndices.length < counts[i]
|
||||
) {
|
||||
throw new Error(
|
||||
"extractPages: partial pageIndices cannot be combined with insertAfter entries."
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Partial pageIndices would auto-fill into free slots at extraction time,
|
||||
// which collides with the positions insertAfter shifts/assigns. Reject.
|
||||
if (insertAfterList.length > 0) {
|
||||
insertAfterList.sort((a, b) => a.insertAfter - b.insertAfter || a.i - b.i);
|
||||
|
||||
// If there is no base sequential sequence, resolve insertAfter against the
|
||||
// explicit layout. Shift pageIndices values but keep their array order:
|
||||
// extractPages maps each filtered source page to the corresponding
|
||||
// pageIndices entry.
|
||||
if (
|
||||
sequence.length === 0 &&
|
||||
pageInfos.some(info => info.document && info.pageIndices)
|
||||
) {
|
||||
const updatedPageInfos = pageInfos.slice();
|
||||
let maxExistingPos = -1;
|
||||
for (const info of pageInfos) {
|
||||
if (!info.document || !info.pageIndices) {
|
||||
continue;
|
||||
}
|
||||
const filteredCount = this.#getFilteredPageIndices(info).length;
|
||||
if (info.pageIndices.length < filteredCount) {
|
||||
throw new Error(
|
||||
"extractPages: partial pageIndices cannot be combined with insertAfter entries."
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If the base sequential sequence is empty but some entries carry explicit
|
||||
// pageIndices (e.g. after a deletion), resolve each insertAfter against
|
||||
// that explicit layout: shift existing positions past the insertion point
|
||||
// and fill the gap. Entries without a document are ignored. With no
|
||||
// explicit entries we fall through to the sequential branch, whose
|
||||
// Array.splice already clamps to position 0 on an empty sequence.
|
||||
const hasExplicitLayout =
|
||||
insertAfterList.length > 0 &&
|
||||
pageInfos.some(info => info.document && info.pageIndices);
|
||||
if (sequence.length === 0 && hasExplicitLayout) {
|
||||
const updatedPageInfos = pageInfos.slice();
|
||||
let maxExistingPos = -1;
|
||||
for (const info of pageInfos) {
|
||||
if (info.document && info.pageIndices) {
|
||||
for (const idx of info.pageIndices) {
|
||||
if (idx > maxExistingPos) {
|
||||
maxExistingPos = idx;
|
||||
}
|
||||
for (const idx of info.pageIndices) {
|
||||
if (idx > maxExistingPos) {
|
||||
maxExistingPos = idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
let offset = 0;
|
||||
for (const { i, insertAfter, count } of insertAfterList) {
|
||||
// Clamp to [-1, maxExistingPos] so out-of-range values don't produce
|
||||
// negative or gap-creating positions.
|
||||
const threshold = Math.min(
|
||||
Math.max(insertAfter, -1) + offset,
|
||||
maxExistingPos
|
||||
@ -725,49 +702,39 @@ class PDFEditor {
|
||||
),
|
||||
};
|
||||
}
|
||||
const insertedIndices = [];
|
||||
const pageIndices = [];
|
||||
for (let k = 0; k < count; k++) {
|
||||
insertedIndices.push(threshold + 1 + k);
|
||||
pageIndices.push(threshold + 1 + k);
|
||||
}
|
||||
const newInfo = {
|
||||
...updatedPageInfos[i],
|
||||
pageIndices: insertedIndices,
|
||||
};
|
||||
delete newInfo.insertAfter;
|
||||
updatedPageInfos[i] = newInfo;
|
||||
const result = { ...updatedPageInfos[i], pageIndices };
|
||||
delete result.insertAfter;
|
||||
updatedPageInfos[i] = result;
|
||||
offset += count;
|
||||
maxExistingPos += count;
|
||||
}
|
||||
return updatedPageInfos;
|
||||
}
|
||||
|
||||
// insertAfter values are relative to the base sequential sequence (stable
|
||||
// across entries thanks to the sort above); offset converts them to
|
||||
// current-sequence positions as we splice.
|
||||
let offset = 0;
|
||||
for (const { i, insertAfter, count } of insertAfterList) {
|
||||
const insertPos = insertAfter + 1 + offset;
|
||||
const insertPos = Math.max(insertAfter, -1) + 1 + offset;
|
||||
sequence.splice(insertPos, 0, ...new Array(count).fill(i));
|
||||
offset += count;
|
||||
}
|
||||
|
||||
// Map each pageInfo index to its final positions in the sequence using a
|
||||
// plain array (keys are dense integers so no need for a Map).
|
||||
const pageIndicesArr = new Array(pageInfos.length);
|
||||
for (let pos = 0; pos < sequence.length; pos++) {
|
||||
const infoIdx = sequence[pos];
|
||||
(pageIndicesArr[infoIdx] ||= []).push(pos);
|
||||
}
|
||||
|
||||
// Return updated pageInfos: sequential and insertAfter pageInfos now have
|
||||
// explicit pageIndices; already-indexed pageInfos are left unchanged.
|
||||
return pageInfos.map((info, i) => {
|
||||
if (!info.document || info.pageIndices) {
|
||||
return info;
|
||||
}
|
||||
const newInfo = { ...info, pageIndices: pageIndicesArr[i] || [] };
|
||||
delete newInfo.insertAfter;
|
||||
return newInfo;
|
||||
const result = { ...info, pageIndices: pageIndicesArr[i] || [] };
|
||||
delete result.insertAfter;
|
||||
return result;
|
||||
});
|
||||
}
|
||||
|
||||
@ -792,9 +759,7 @@ class PDFEditor {
|
||||
task
|
||||
) {
|
||||
this.#primaryDocument = primaryDocument;
|
||||
if (pageInfos.some(info => info.insertAfter !== undefined)) {
|
||||
pageInfos = this.#resolveInsertAfterIndices(pageInfos);
|
||||
}
|
||||
pageInfos = this.#resolveInsertAfterIndices(pageInfos);
|
||||
const promises = [];
|
||||
let newIndex = 0;
|
||||
this.isSingleFile =
|
||||
|
||||
@ -5584,6 +5584,35 @@ small scripts as well as for`);
|
||||
return refs;
|
||||
};
|
||||
|
||||
async function checkPageNumberOrder(pageInfos, expected) {
|
||||
let loadingTask = getDocument(
|
||||
buildGetDocumentParams("page_with_number.pdf")
|
||||
);
|
||||
let pdfDoc = await loadingTask.promise;
|
||||
const data = await pdfDoc.extractPages(pageInfos);
|
||||
await loadingTask.destroy();
|
||||
|
||||
expect(data).not.toBeNull();
|
||||
if (!data) {
|
||||
return;
|
||||
}
|
||||
|
||||
loadingTask = getDocument(data);
|
||||
pdfDoc = await loadingTask.promise;
|
||||
expect(pdfDoc.numPages).toEqual(expected.length);
|
||||
|
||||
for (let i = 0, ii = expected.length; i < ii; i++) {
|
||||
const pageNum = i + 1;
|
||||
const pdfPage = await pdfDoc.getPage(pageNum);
|
||||
const { items } = await pdfPage.getTextContent();
|
||||
expect(mergeText(items))
|
||||
.withContext(`Page ${pageNum}`)
|
||||
.toEqual(expected[i]);
|
||||
}
|
||||
|
||||
await loadingTask.destroy();
|
||||
}
|
||||
|
||||
describe("Merge pdfs", function () {
|
||||
it("should merge three PDFs", async function () {
|
||||
const loadingTask = getDocument(
|
||||
@ -7423,6 +7452,45 @@ small scripts as well as for`);
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("insertAfter preserves non-monotonic explicit pageIndices", async function () {
|
||||
// Explicit pageIndices place page 2 before page 1. The inserted page 3
|
||||
// lands after position 0, so the final order is "2", "3", "1".
|
||||
await checkPageNumberOrder(
|
||||
[
|
||||
{ document: null, includePages: [0, 1], pageIndices: [1, 0] },
|
||||
{ document: null, includePages: [2], insertAfter: 0 },
|
||||
],
|
||||
["2", "3", "1"]
|
||||
);
|
||||
});
|
||||
|
||||
it("insertAfter shifts only the values past the threshold in non-monotonic pageIndices", async function () {
|
||||
// pageIndices [2, 0, 1] place pages 1·2·3 as "2", "3", "1". Inserting
|
||||
// page 4 after position 1 shifts only values > 1, so [2, 0, 1] becomes
|
||||
// [3, 0, 1] and the final order is "2", "3", "4", "1".
|
||||
await checkPageNumberOrder(
|
||||
[
|
||||
{ document: null, includePages: [0, 1, 2], pageIndices: [2, 0, 1] },
|
||||
{ document: null, includePages: [3], insertAfter: 1 },
|
||||
],
|
||||
["2", "3", "4", "1"]
|
||||
);
|
||||
});
|
||||
|
||||
it("insertAfter preserves mixed sequential and explicit layouts", async function () {
|
||||
// Sequential pages 1 and 2 form the base sequence; page 3 is inserted
|
||||
// after base position 0. The explicit page 4 already accounts for the
|
||||
// shifted tail at position 3.
|
||||
await checkPageNumberOrder(
|
||||
[
|
||||
{ document: null, includePages: [0, 1] },
|
||||
{ document: null, includePages: [3], pageIndices: [3] },
|
||||
{ document: null, includePages: [2], insertAfter: 0 },
|
||||
],
|
||||
["1", "3", "2", "4"]
|
||||
);
|
||||
});
|
||||
|
||||
it("rejects partial pageIndices combined with insertAfter", async function () {
|
||||
// Partial pageIndices + insertAfter could race over the same slots;
|
||||
// the resolver throws and the worker surfaces it as a null result.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user