mirror of
https://github.com/mozilla/pdf.js.git
synced 2026-06-01 15:51:00 +02:00
Merge pull request #21334 from calixteman/merge_images
Allow inserting an image as a new page when editing a PDF (bug 2032967)
This commit is contained in:
commit
3fe3321f7a
@ -25,7 +25,6 @@ import {
|
||||
BASELINE_FACTOR,
|
||||
BBOX_INIT,
|
||||
F32_BBOX_INIT,
|
||||
FeatureTest,
|
||||
info,
|
||||
isArrayEqual,
|
||||
LINE_DESCENT_FACTOR,
|
||||
@ -62,7 +61,6 @@ import {
|
||||
} from "./default_appearance.js";
|
||||
import { DateFormats, TimeFormats } from "../shared/scripting_utils.js";
|
||||
import { Dict, isName, isRefsEqual, Name, Ref, RefSet } from "./primitives.js";
|
||||
import { Stream, StringStream } from "./stream.js";
|
||||
import {
|
||||
stringToAsciiOrUTF16BE,
|
||||
stringToPDFString,
|
||||
@ -72,11 +70,13 @@ import { BaseStream } from "./base_stream.js";
|
||||
import { bidi } from "./bidi.js";
|
||||
import { Catalog } from "./catalog.js";
|
||||
import { ColorSpaceUtils } from "./colorspace_utils.js";
|
||||
import { createImage } from "./editor/pdf_images.js";
|
||||
import { FileSpec } from "./file_spec.js";
|
||||
import { JpegStream } from "./jpeg_stream.js";
|
||||
import { ObjectLoader } from "./object_loader.js";
|
||||
import { OperatorList } from "./operator_list.js";
|
||||
import { parseMarkedContentProps } from "./evaluator_utils.js";
|
||||
import { StringStream } from "./stream.js";
|
||||
import { XFAFactory } from "./xfa/factory.js";
|
||||
|
||||
class AnnotationFactory {
|
||||
@ -351,7 +351,7 @@ class AnnotationFactory {
|
||||
continue;
|
||||
}
|
||||
imagePromises ||= new Map();
|
||||
imagePromises.set(bitmapId, StampAnnotation.createImage(bitmap, xref));
|
||||
imagePromises.set(bitmapId, createImage(bitmap, xref));
|
||||
}
|
||||
|
||||
return imagePromises;
|
||||
@ -427,7 +427,10 @@ class AnnotationFactory {
|
||||
changes.put(imageRef, {
|
||||
data: imageStream,
|
||||
});
|
||||
image.imageStream = image.smaskStream = null;
|
||||
image.imageStream = null;
|
||||
image.imageRenderStream = null;
|
||||
image.smaskStream = null;
|
||||
image.smaskRenderStream = null;
|
||||
}
|
||||
promises.push(
|
||||
StampAnnotation.createNewAnnotation(xref, annotation, changes, {
|
||||
@ -522,12 +525,23 @@ class AnnotationFactory {
|
||||
? await imagePromises?.get(annotation.bitmapId)
|
||||
: null;
|
||||
if (image?.imageStream) {
|
||||
const { imageStream, smaskStream } = image;
|
||||
if (smaskStream) {
|
||||
imageStream.dict.set("SMask", smaskStream);
|
||||
const {
|
||||
imageStream,
|
||||
imageRenderStream,
|
||||
smaskStream,
|
||||
smaskRenderStream,
|
||||
} = image;
|
||||
const imageRef =
|
||||
imageRenderStream ||
|
||||
new JpegStream(imageStream, imageStream.length);
|
||||
if (smaskStream || smaskRenderStream) {
|
||||
imageRef.dict.set("SMask", smaskRenderStream || smaskStream);
|
||||
}
|
||||
image.imageRef = new JpegStream(imageStream, imageStream.length);
|
||||
image.imageStream = image.smaskStream = null;
|
||||
image.imageRef = imageRef;
|
||||
image.imageStream = null;
|
||||
image.imageRenderStream = null;
|
||||
image.smaskStream = null;
|
||||
image.smaskRenderStream = null;
|
||||
}
|
||||
promises.push(
|
||||
StampAnnotation.createNewPrintAnnotation(
|
||||
@ -5097,82 +5111,6 @@ class StampAnnotation extends MarkupAnnotation {
|
||||
return !modifiedIds?.has(this.data.id);
|
||||
}
|
||||
|
||||
static async createImage(bitmap, xref) {
|
||||
// TODO: when printing, we could have a specific internal colorspace
|
||||
// (e.g. something like DeviceRGBA) in order avoid any conversion (i.e. no
|
||||
// jpeg, no rgba to rgb conversion, etc...)
|
||||
|
||||
const { width, height } = bitmap;
|
||||
const canvas = new OffscreenCanvas(width, height);
|
||||
const ctx = canvas.getContext("2d", { alpha: true });
|
||||
|
||||
// Draw the image and get the data in order to extract the transparency.
|
||||
ctx.drawImage(bitmap, 0, 0);
|
||||
const data = ctx.getImageData(0, 0, width, height).data;
|
||||
const buf32 = new Uint32Array(data.buffer);
|
||||
const hasAlpha = buf32.some(
|
||||
FeatureTest.isLittleEndian
|
||||
? x => x >>> 24 !== 0xff
|
||||
: x => (x & 0xff) !== 0xff
|
||||
);
|
||||
|
||||
if (hasAlpha) {
|
||||
// Redraw the image on a white background in order to remove the thin gray
|
||||
// line which can appear when exporting to jpeg.
|
||||
ctx.fillStyle = "white";
|
||||
ctx.fillRect(0, 0, width, height);
|
||||
ctx.drawImage(bitmap, 0, 0);
|
||||
}
|
||||
|
||||
const jpegBytesPromise = canvas
|
||||
.convertToBlob({ type: "image/jpeg", quality: 1 })
|
||||
.then(blob => blob.bytes());
|
||||
|
||||
const xobjectName = Name.get("XObject");
|
||||
const imageName = Name.get("Image");
|
||||
const image = new Dict(xref);
|
||||
image.set("Type", xobjectName);
|
||||
image.set("Subtype", imageName);
|
||||
image.set("BitsPerComponent", 8);
|
||||
image.setIfName("ColorSpace", "DeviceRGB");
|
||||
image.setIfName("Filter", "DCTDecode");
|
||||
image.set("BBox", [0, 0, width, height]);
|
||||
image.set("Width", width);
|
||||
image.set("Height", height);
|
||||
|
||||
let smaskStream = null;
|
||||
if (hasAlpha) {
|
||||
const alphaBuffer = new Uint8Array(buf32.length);
|
||||
if (FeatureTest.isLittleEndian) {
|
||||
for (let i = 0, ii = buf32.length; i < ii; i++) {
|
||||
alphaBuffer[i] = buf32[i] >>> 24;
|
||||
}
|
||||
} else {
|
||||
for (let i = 0, ii = buf32.length; i < ii; i++) {
|
||||
alphaBuffer[i] = buf32[i] & 0xff;
|
||||
}
|
||||
}
|
||||
|
||||
const smask = new Dict(xref);
|
||||
smask.set("Type", xobjectName);
|
||||
smask.set("Subtype", imageName);
|
||||
smask.set("BitsPerComponent", 8);
|
||||
smask.setIfName("ColorSpace", "DeviceGray");
|
||||
smask.set("Width", width);
|
||||
smask.set("Height", height);
|
||||
|
||||
smaskStream = new Stream(alphaBuffer, 0, 0, smask);
|
||||
}
|
||||
const imageStream = new Stream(await jpegBytesPromise, 0, 0, image);
|
||||
|
||||
return {
|
||||
imageStream,
|
||||
smaskStream,
|
||||
width,
|
||||
height,
|
||||
};
|
||||
}
|
||||
|
||||
static createNewDict(annotation, xref, { apRef, ap }) {
|
||||
const { date, oldAnnotation, rect, rotation, user } = annotation;
|
||||
const stamp = oldAnnotation || new Dict(xref);
|
||||
|
||||
@ -2154,4 +2154,4 @@ class PDFDocument {
|
||||
}
|
||||
}
|
||||
|
||||
export { Page, PDFDocument };
|
||||
export { LETTER_SIZE_MEDIABOX, Page, PDFDocument };
|
||||
|
||||
@ -25,14 +25,17 @@ import {
|
||||
getInheritableProperty,
|
||||
getModificationDate,
|
||||
getNewAnnotationsMap,
|
||||
numberToString,
|
||||
} from "../core_utils.js";
|
||||
import { Dict, isName, Name, Ref, RefSet, RefSetCache } from "../primitives.js";
|
||||
import { incrementalUpdate, writeValue } from "../writer.js";
|
||||
import { NameTree, NumberTree } from "../name_number_tree.js";
|
||||
import { Stream, StringStream } from "../stream.js";
|
||||
import { stringToAsciiOrUTF16BE, stringToPDFString } from "../string_utils.js";
|
||||
import { AnnotationFactory } from "../annotation.js";
|
||||
import { BaseStream } from "../base_stream.js";
|
||||
import { StringStream } from "../stream.js";
|
||||
import { createImage } from "./pdf_images.js";
|
||||
import { LETTER_SIZE_MEDIABOX } from "../document.js";
|
||||
import { stringToBytes } from "../../shared/util.js";
|
||||
|
||||
const MAX_LEAVES_PER_PAGES_NODE = 16;
|
||||
@ -112,15 +115,11 @@ class XRefWrapper {
|
||||
}
|
||||
|
||||
class PDFEditor {
|
||||
// Whether the edited PDF contains only one file. This is used to determine if
|
||||
// we can handle some potential duplications.
|
||||
// For example, there are no obvious way to dedup page labels when merging
|
||||
// multiple PDF files.
|
||||
hasSingleFile = false;
|
||||
|
||||
// Whether the edited PDF contains only one file used one or more times.
|
||||
// This is used to determine if we can preserve some information such as
|
||||
// passwords.
|
||||
// Whether the edited PDF is built from a single source file, used one or more
|
||||
// times. This is used to determine if we can preserve information that can't
|
||||
// be meaningfully merged across distinct files, such as page labels, the Info
|
||||
// dictionary, and passwords. For example, there's no obvious way to dedup
|
||||
// page labels when merging multiple PDF files.
|
||||
isSingleFile = false;
|
||||
|
||||
#newAnnotationsParams = null;
|
||||
@ -554,7 +553,9 @@ class PDFEditor {
|
||||
|
||||
/**
|
||||
* @typedef {Object} PageInfo
|
||||
* @property {PDFDocument} document
|
||||
* @property {PDFDocument} [document]
|
||||
* @property {ImageBitmap} [image]
|
||||
* image to insert as a synthetic page.
|
||||
* @property {Array<Array<number>|number>} [includePages]
|
||||
* included ranges (inclusive) or indices.
|
||||
* @property {Array<Array<number>|number>} [excludePages]
|
||||
@ -622,11 +623,15 @@ class PDFEditor {
|
||||
const insertAfterList = [];
|
||||
for (let i = 0; i < pageInfos.length; i++) {
|
||||
const info = pageInfos[i];
|
||||
if (!info.document) {
|
||||
let count;
|
||||
if (info.image) {
|
||||
count = counts[i] = 1;
|
||||
} else if (!info.document) {
|
||||
counts[i] = 0;
|
||||
continue;
|
||||
} else {
|
||||
count = counts[i] = this.#getFilteredPageIndices(info).length;
|
||||
}
|
||||
const count = (counts[i] = this.#getFilteredPageIndices(info).length);
|
||||
if (info.pageIndices) {
|
||||
continue;
|
||||
}
|
||||
@ -642,12 +647,14 @@ class PDFEditor {
|
||||
return pageInfos;
|
||||
}
|
||||
|
||||
const hasContent = info => !!(info.document || info.image);
|
||||
|
||||
// Partial pageIndices rely on auto-fill in extractPages, which races with
|
||||
// the slots insertAfter assigns here.
|
||||
for (let i = 0; i < pageInfos.length; i++) {
|
||||
const info = pageInfos[i];
|
||||
if (
|
||||
info.document &&
|
||||
hasContent(info) &&
|
||||
info.pageIndices &&
|
||||
info.pageIndices.length < counts[i]
|
||||
) {
|
||||
@ -665,12 +672,12 @@ class PDFEditor {
|
||||
// pageIndices entry.
|
||||
if (
|
||||
sequence.length === 0 &&
|
||||
pageInfos.some(info => info.document && info.pageIndices)
|
||||
pageInfos.some(info => hasContent(info) && info.pageIndices)
|
||||
) {
|
||||
const updatedPageInfos = pageInfos.slice();
|
||||
let maxExistingPos = -1;
|
||||
for (const info of pageInfos) {
|
||||
if (!info.document || !info.pageIndices) {
|
||||
if (!hasContent(info) || !info.pageIndices) {
|
||||
continue;
|
||||
}
|
||||
for (const idx of info.pageIndices) {
|
||||
@ -688,7 +695,7 @@ class PDFEditor {
|
||||
for (let j = 0; j < updatedPageInfos.length; j++) {
|
||||
const existingInfo = updatedPageInfos[j];
|
||||
if (
|
||||
!existingInfo.document ||
|
||||
!hasContent(existingInfo) ||
|
||||
!existingInfo.pageIndices ||
|
||||
existingInfo.pageIndices.every(idx => idx <= threshold)
|
||||
) {
|
||||
@ -728,7 +735,7 @@ class PDFEditor {
|
||||
}
|
||||
|
||||
return pageInfos.map((info, i) => {
|
||||
if (!info.document || info.pageIndices) {
|
||||
if (!hasContent(info) || info.pageIndices) {
|
||||
return info;
|
||||
}
|
||||
const result = { ...info, pageIndices: pageIndicesArr[i] || [] };
|
||||
@ -761,10 +768,24 @@ class PDFEditor {
|
||||
pageInfos = this.#resolveInsertAfterIndices(pageInfos);
|
||||
const promises = [];
|
||||
let newIndex = 0;
|
||||
const reservePageSlot = newPageIndex => {
|
||||
if (!Number.isInteger(newPageIndex) || newPageIndex < 0) {
|
||||
throw new Error("extractPages: invalid page index.");
|
||||
}
|
||||
if (this.oldPages[newPageIndex] !== undefined) {
|
||||
throw new Error("extractPages: overlapping pageIndices.");
|
||||
}
|
||||
// Reserve the slot immediately because page/image collection can be
|
||||
// async.
|
||||
this.oldPages[newPageIndex] = null;
|
||||
};
|
||||
// Image entries don't carry document identity, so ignore them when
|
||||
// deciding whether we're operating on a single source PDF.
|
||||
const docPageInfos = pageInfos.filter(info => !!info.document);
|
||||
this.isSingleFile =
|
||||
pageInfos.length === 1 ||
|
||||
pageInfos.every(info => info.document === pageInfos[0].document);
|
||||
this.hasSingleFile = pageInfos.length === 1;
|
||||
docPageInfos.length === 1 ||
|
||||
(docPageInfos.length > 0 &&
|
||||
docPageInfos.every(info => info.document === docPageInfos[0].document));
|
||||
const allDocumentData = [];
|
||||
|
||||
if (annotationStorage) {
|
||||
@ -780,27 +801,57 @@ class PDFEditor {
|
||||
};
|
||||
}
|
||||
|
||||
for (const {
|
||||
document,
|
||||
includePages,
|
||||
excludePages,
|
||||
pageIndices,
|
||||
} of pageInfos) {
|
||||
const imageEntries = [];
|
||||
for (const pageInfo of pageInfos) {
|
||||
const { document, image, includePages, excludePages, pageIndices } =
|
||||
pageInfo;
|
||||
if (image) {
|
||||
if (pageIndices) {
|
||||
newIndex = -1;
|
||||
if (pageIndices.length > 1) {
|
||||
throw new Error("extractPages: too many pageIndices.");
|
||||
}
|
||||
}
|
||||
// Image entries are inserted as synthetic pages. Reserve a slot now;
|
||||
// the actual page dict is built after real pages are collected so
|
||||
// that we know the modal MediaBox dimensions to use.
|
||||
let newPageIndex;
|
||||
if (pageIndices?.length) {
|
||||
newPageIndex = pageIndices[0];
|
||||
} else if (newIndex !== -1) {
|
||||
newPageIndex = newIndex++;
|
||||
} else {
|
||||
for (
|
||||
newPageIndex = 0;
|
||||
this.oldPages[newPageIndex] !== undefined;
|
||||
newPageIndex++
|
||||
) {
|
||||
/* empty */
|
||||
}
|
||||
}
|
||||
reservePageSlot(newPageIndex);
|
||||
imageEntries.push({ image, slot: newPageIndex });
|
||||
continue;
|
||||
}
|
||||
if (!document) {
|
||||
continue;
|
||||
}
|
||||
if (pageIndices) {
|
||||
newIndex = -1;
|
||||
}
|
||||
const filteredPageIndices = this.#getFilteredPageIndices({
|
||||
document,
|
||||
includePages,
|
||||
excludePages,
|
||||
});
|
||||
if (pageIndices && pageIndices.length > filteredPageIndices.length) {
|
||||
throw new Error("extractPages: too many pageIndices.");
|
||||
}
|
||||
const documentData = new DocumentData(document);
|
||||
allDocumentData.push(documentData);
|
||||
promises.push(this.#collectDocumentData(documentData));
|
||||
let pageIndex = 0;
|
||||
for (const i of this.#getFilteredPageIndices({
|
||||
document,
|
||||
includePages,
|
||||
excludePages,
|
||||
})) {
|
||||
for (const i of filteredPageIndices) {
|
||||
let newPageIndex;
|
||||
if (pageIndices) {
|
||||
newPageIndex = pageIndices[pageIndex++];
|
||||
@ -821,8 +872,7 @@ class PDFEditor {
|
||||
}
|
||||
}
|
||||
}
|
||||
// Reserve the slot immediately because the page fetch is async.
|
||||
this.oldPages[newPageIndex] = null;
|
||||
reservePageSlot(newPageIndex);
|
||||
promises.push(
|
||||
document.getPage(i).then(page => {
|
||||
this.oldPages[newPageIndex] = new PageData(page, documentData);
|
||||
@ -831,6 +881,11 @@ class PDFEditor {
|
||||
}
|
||||
}
|
||||
await Promise.all(promises);
|
||||
for (let i = 0, ii = this.oldPages.length; i < ii; i++) {
|
||||
if (this.oldPages[i] === undefined) {
|
||||
throw new Error("extractPages: sparse pageIndices.");
|
||||
}
|
||||
}
|
||||
promises.length = 0;
|
||||
|
||||
this.#collectValidDestinations(allDocumentData);
|
||||
@ -838,15 +893,31 @@ class PDFEditor {
|
||||
this.#collectPageLabels();
|
||||
|
||||
for (const page of this.oldPages) {
|
||||
promises.push(this.#postCollectPageData(page));
|
||||
if (page) {
|
||||
promises.push(this.#postCollectPageData(page));
|
||||
}
|
||||
}
|
||||
await Promise.all(promises);
|
||||
|
||||
this.#findDuplicateNamedDestinations();
|
||||
this.#setPostponedRefCopies(allDocumentData);
|
||||
|
||||
const imageSlots = new Map();
|
||||
for (const entry of imageEntries) {
|
||||
imageSlots.set(entry.slot, entry);
|
||||
}
|
||||
const modalPageSize = imageSlots.size > 0 ? this.#modalPageSize() : null;
|
||||
|
||||
for (let i = 0, ii = this.oldPages.length; i < ii; i++) {
|
||||
this.newPages[i] = await this.#makePageCopy(i, null);
|
||||
const imageEntry = imageSlots.get(i);
|
||||
if (imageEntry) {
|
||||
this.newPages[i] = await this.#makeImagePage(
|
||||
imageEntry.image,
|
||||
modalPageSize
|
||||
);
|
||||
} else {
|
||||
this.newPages[i] = await this.#makePageCopy(i, null);
|
||||
}
|
||||
}
|
||||
|
||||
this.#fixPostponedRefCopies(allDocumentData);
|
||||
@ -1058,6 +1129,9 @@ class PDFEditor {
|
||||
let newStructParentId = 0;
|
||||
const { parentTree: newParentTree } = this;
|
||||
for (let i = 0, ii = this.newPages.length; i < ii; i++) {
|
||||
if (!this.oldPages[i]) {
|
||||
continue;
|
||||
}
|
||||
const {
|
||||
documentData: {
|
||||
parentTree,
|
||||
@ -1362,6 +1436,9 @@ class PDFEditor {
|
||||
};
|
||||
for (let i = 0, ii = this.oldPages.length; i < ii; i++) {
|
||||
const page = this.oldPages[i];
|
||||
if (!page) {
|
||||
continue;
|
||||
}
|
||||
const {
|
||||
documentData: {
|
||||
destinations,
|
||||
@ -2023,19 +2100,23 @@ class PDFEditor {
|
||||
async #collectPageLabels() {
|
||||
// We can only preserve page labels when editing a single PDF file.
|
||||
// This is consistent with behavior in Adobe Acrobat.
|
||||
if (!this.hasSingleFile) {
|
||||
if (!this.isSingleFile) {
|
||||
return;
|
||||
}
|
||||
const firstRealPage = this.oldPages.find(p => !!p);
|
||||
if (!firstRealPage) {
|
||||
return;
|
||||
}
|
||||
const {
|
||||
documentData: { document, pageLabels },
|
||||
} = this.oldPages[0];
|
||||
} = firstRealPage;
|
||||
if (!pageLabels) {
|
||||
return;
|
||||
}
|
||||
const numPages = document.numPages;
|
||||
const oldPageLabels = [];
|
||||
const labelsByPageIndex = new Map();
|
||||
const oldPageIndices = new Set(
|
||||
this.oldPages.map(({ page: { pageIndex } }) => pageIndex)
|
||||
this.oldPages.filter(p => !!p).map(({ page: { pageIndex } }) => pageIndex)
|
||||
);
|
||||
let currentLabel = null;
|
||||
let stFirstIndex = -1;
|
||||
@ -2054,19 +2135,27 @@ class PDFEditor {
|
||||
currentLabel.set("St", st + (i - stFirstIndex));
|
||||
stFirstIndex = -1;
|
||||
}
|
||||
oldPageLabels.push(currentLabel);
|
||||
labelsByPageIndex.set(i, currentLabel);
|
||||
}
|
||||
currentLabel = oldPageLabels[0];
|
||||
let currentIndex = 0;
|
||||
const newPageLabels = (this.pageLabels = [[0, currentLabel]]);
|
||||
for (let i = 0, ii = oldPageLabels.length; i < ii; i++) {
|
||||
const label = oldPageLabels[i];
|
||||
|
||||
const defaultLabel = index => {
|
||||
const label = new Dict();
|
||||
label.setIfName("S", "D");
|
||||
label.set("St", index + 1);
|
||||
return label;
|
||||
};
|
||||
currentLabel = null;
|
||||
const newPageLabels = (this.pageLabels = []);
|
||||
for (let i = 0, ii = this.oldPages.length; i < ii; i++) {
|
||||
const pageData = this.oldPages[i];
|
||||
const label = pageData
|
||||
? labelsByPageIndex.get(pageData.page.pageIndex) || defaultLabel(i)
|
||||
: defaultLabel(i);
|
||||
if (label === currentLabel) {
|
||||
continue;
|
||||
}
|
||||
currentIndex = i;
|
||||
currentLabel = label;
|
||||
newPageLabels.push([currentIndex, currentLabel]);
|
||||
newPageLabels.push([i, currentLabel]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2192,6 +2281,136 @@ class PDFEditor {
|
||||
return pageRef;
|
||||
}
|
||||
|
||||
#modalPageSize() {
|
||||
const counts = new Map();
|
||||
for (const pageData of this.oldPages) {
|
||||
if (!pageData) {
|
||||
continue;
|
||||
}
|
||||
const { page } = pageData;
|
||||
const [x0, y0, x1, y1] = page.view;
|
||||
let width = x1 - x0;
|
||||
let height = y1 - y0;
|
||||
if (width <= 0 || height <= 0) {
|
||||
continue;
|
||||
}
|
||||
// The synthesized page won't carry a /Rotate entry, so swap dimensions
|
||||
// for 90/270 to match what the user sees in the source page.
|
||||
if (page.rotate % 180 !== 0) {
|
||||
[width, height] = [height, width];
|
||||
}
|
||||
const key = `${width}x${height}`;
|
||||
const entry = counts.get(key);
|
||||
if (entry) {
|
||||
entry.count++;
|
||||
} else {
|
||||
counts.set(key, { width, height, count: 1 });
|
||||
}
|
||||
}
|
||||
if (counts.size === 0) {
|
||||
const [, , width, height] = LETTER_SIZE_MEDIABOX;
|
||||
return { width, height };
|
||||
}
|
||||
let best = null;
|
||||
for (const entry of counts.values()) {
|
||||
if (
|
||||
!best ||
|
||||
entry.count > best.count ||
|
||||
(entry.count === best.count &&
|
||||
entry.width * entry.height > best.width * best.height)
|
||||
) {
|
||||
best = entry;
|
||||
}
|
||||
}
|
||||
return { width: best.width, height: best.height };
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a brand-new page that displays a single image, sized to the modal
|
||||
* page dimensions with a margin equal to 10% of the page width on every
|
||||
* side. The image is encoded as JPEG or lossless Flate depending on its
|
||||
* contents; when the source has transparency, an SMask carrying the alpha
|
||||
* channel is attached so the mask is preserved on render.
|
||||
* @param {ImageBitmap} bitmap
|
||||
* @param {{width: number, height: number}} pageSize
|
||||
* @returns {Promise<Ref>}
|
||||
*/
|
||||
async #makeImagePage(bitmap, pageSize) {
|
||||
const { width: pageW, height: pageH } = pageSize;
|
||||
const DEFAULT_MARGIN_RATIO = 0.1;
|
||||
const margin = pageW * DEFAULT_MARGIN_RATIO;
|
||||
const availW = Math.max(1, pageW - 2 * margin);
|
||||
const availH = Math.max(1, pageH - 2 * margin);
|
||||
|
||||
const lastRef = this.newRefCount;
|
||||
|
||||
const {
|
||||
imageStream,
|
||||
smaskStream,
|
||||
width: imgW,
|
||||
height: imgH,
|
||||
} = await createImage(bitmap, this.xrefWrapper, { closeBitmap: true });
|
||||
|
||||
const scale = Math.min(availW / imgW, availH / imgH);
|
||||
const drawW = imgW * scale;
|
||||
const drawH = imgH * scale;
|
||||
const tx = (pageW - drawW) / 2;
|
||||
const ty = (pageH - drawH) / 2;
|
||||
|
||||
if (smaskStream) {
|
||||
const smaskRef = this.newRef;
|
||||
this.xref[smaskRef.num] = smaskStream;
|
||||
imageStream.dict.set("SMask", smaskRef);
|
||||
}
|
||||
const imageRef = this.newRef;
|
||||
this.xref[imageRef.num] = imageStream;
|
||||
|
||||
const xobjectDict = new Dict(this.xrefWrapper);
|
||||
xobjectDict.set("Im0", imageRef);
|
||||
const resourcesDict = new Dict(this.xrefWrapper);
|
||||
resourcesDict.set("XObject", xobjectDict);
|
||||
resourcesDict.set("ProcSet", [Name.get("PDF"), Name.get("ImageC")]);
|
||||
|
||||
const content =
|
||||
`q ${numberToString(drawW)} 0 0 ${numberToString(drawH)} ` +
|
||||
`${numberToString(tx)} ${numberToString(ty)} cm /Im0 Do Q`;
|
||||
const contentsDict = new Dict(this.xrefWrapper);
|
||||
const contentsStream = new Stream(
|
||||
stringToBytes(content),
|
||||
0,
|
||||
0,
|
||||
contentsDict
|
||||
);
|
||||
const contentsRef = this.newRef;
|
||||
this.xref[contentsRef.num] = contentsStream;
|
||||
|
||||
const pageRef = this.newRef;
|
||||
const pageDict = (this.xref[pageRef.num] = new Dict(this.xrefWrapper));
|
||||
pageDict.setIfName("Type", "Page");
|
||||
pageDict.set("MediaBox", [0, 0, pageW, pageH]);
|
||||
pageDict.set("Resources", resourcesDict);
|
||||
pageDict.set("Contents", contentsRef);
|
||||
|
||||
if (this.useObjectStreams) {
|
||||
const newLastRef = this.newRefCount;
|
||||
const pageObjectRefs = [];
|
||||
for (let i = lastRef; i < newLastRef; i++) {
|
||||
const obj = this.xref[i];
|
||||
if (obj instanceof BaseStream) {
|
||||
continue;
|
||||
}
|
||||
pageObjectRefs.push(Ref.get(i, 0));
|
||||
}
|
||||
for (let i = 0; i < pageObjectRefs.length; i += 0xffff) {
|
||||
const objStreamRef = this.newRef;
|
||||
this.objStreamRefs.add(objStreamRef.num);
|
||||
this.xref[objStreamRef.num] = pageObjectRefs.slice(i, i + 0xffff);
|
||||
}
|
||||
}
|
||||
|
||||
return pageRef;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the page tree structure.
|
||||
*/
|
||||
@ -2485,9 +2704,10 @@ class PDFEditor {
|
||||
#makeInfo() {
|
||||
const infoMap = new Map();
|
||||
if (this.isSingleFile) {
|
||||
const firstRealPage = this.oldPages.find(p => !!p);
|
||||
const {
|
||||
xref: { trailer },
|
||||
} = this.oldPages[0].documentData.document;
|
||||
} = firstRealPage.documentData.document;
|
||||
const oldInfoDict = trailer.get("Info");
|
||||
for (const [key, value] of oldInfoDict || []) {
|
||||
if (typeof value === "string") {
|
||||
@ -2520,7 +2740,8 @@ class PDFEditor {
|
||||
if (!this.isSingleFile) {
|
||||
return [null, null, null];
|
||||
}
|
||||
const { documentData } = this.oldPages[0];
|
||||
const firstRealPage = this.oldPages.find(p => !!p);
|
||||
const { documentData } = firstRealPage;
|
||||
const {
|
||||
document: {
|
||||
xref: { trailer, encrypt },
|
||||
|
||||
286
src/core/editor/pdf_images.js
Normal file
286
src/core/editor/pdf_images.js
Normal file
@ -0,0 +1,286 @@
|
||||
/* Copyright 2026 Mozilla Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { Dict, Name } from "../primitives.js";
|
||||
import { FeatureTest } from "../../shared/util.js";
|
||||
import { Stream } from "../stream.js";
|
||||
|
||||
// Below this many distinct RGB triples, Flate+Predictor 15 (PNG-style) is
|
||||
// generally smaller than JPEG at visually equivalent quality, since the data
|
||||
// is dominated by flat regions and sharp edges where JPEG performs poorly.
|
||||
const FLATE_COLOR_COUNT_THRESHOLD = 16384;
|
||||
|
||||
function createImageDict(xref, width, height, colorSpace) {
|
||||
const image = new Dict(xref);
|
||||
image.set("Type", Name.get("XObject"));
|
||||
image.set("Subtype", Name.get("Image"));
|
||||
image.set("BitsPerComponent", 8);
|
||||
image.setIfName("ColorSpace", colorSpace);
|
||||
image.set("Width", width);
|
||||
image.set("Height", height);
|
||||
|
||||
return image;
|
||||
}
|
||||
|
||||
function createRawImage(buffer, dict) {
|
||||
return new Stream(buffer, 0, buffer.length, dict);
|
||||
}
|
||||
|
||||
function paethPredictor(left, above, upperLeft) {
|
||||
const p = left + above - upperLeft;
|
||||
const pa = Math.abs(p - left);
|
||||
const pb = Math.abs(p - above);
|
||||
const pc = Math.abs(p - upperLeft);
|
||||
if (pa <= pb && pa <= pc) {
|
||||
return left;
|
||||
}
|
||||
return pb <= pc ? above : upperLeft;
|
||||
}
|
||||
|
||||
function applyPNGOptimumFilter(data, width, height, bytesPerPixel) {
|
||||
const rowSize = width * bytesPerPixel;
|
||||
const out = new Uint8Array(height * (rowSize + 1));
|
||||
const candidates = [
|
||||
new Uint8Array(rowSize), // 0: None
|
||||
new Uint8Array(rowSize), // 1: Sub
|
||||
new Uint8Array(rowSize), // 2: Up
|
||||
new Uint8Array(rowSize), // 3: Average
|
||||
new Uint8Array(rowSize), // 4: Paeth
|
||||
];
|
||||
|
||||
for (let y = 0; y < height; y++) {
|
||||
const rowOffset = y * rowSize;
|
||||
const prevRowOffset = rowOffset - rowSize;
|
||||
const scores = [0, 0, 0, 0, 0];
|
||||
for (let x = 0; x < rowSize; x++) {
|
||||
const offset = rowOffset + x;
|
||||
const cur = data[offset];
|
||||
const left = x >= bytesPerPixel ? data[offset - bytesPerPixel] : 0;
|
||||
const above = y > 0 ? data[prevRowOffset + x] : 0;
|
||||
const upperLeft =
|
||||
y > 0 && x >= bytesPerPixel
|
||||
? data[prevRowOffset + x - bytesPerPixel]
|
||||
: 0;
|
||||
candidates[0][x] = cur;
|
||||
candidates[1][x] = (cur - left) & 0xff;
|
||||
candidates[2][x] = (cur - above) & 0xff;
|
||||
candidates[3][x] = (cur - ((left + above) >> 1)) & 0xff;
|
||||
candidates[4][x] = (cur - paethPredictor(left, above, upperLeft)) & 0xff;
|
||||
// Sum of absolute signed-byte values: the standard "minimum sum"
|
||||
// heuristic for picking the best filter per row.
|
||||
for (let f = 0; f < 5; f++) {
|
||||
const v = candidates[f][x];
|
||||
scores[f] += v < 128 ? v : 256 - v;
|
||||
}
|
||||
}
|
||||
|
||||
let bestFilter = 0;
|
||||
for (let f = 1; f < 5; f++) {
|
||||
if (scores[f] < scores[bestFilter]) {
|
||||
bestFilter = f;
|
||||
}
|
||||
}
|
||||
|
||||
const outOffset = y * (rowSize + 1);
|
||||
out[outOffset] = bestFilter;
|
||||
out.set(candidates[bestFilter], outOffset + 1);
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
async function deflate(bytes) {
|
||||
const cs = new CompressionStream("deflate");
|
||||
const writer = cs.writable.getWriter();
|
||||
const writePromise = (async () => {
|
||||
try {
|
||||
await writer.ready;
|
||||
await writer.write(bytes);
|
||||
await writer.ready;
|
||||
await writer.close();
|
||||
} catch (reason) {
|
||||
await writer.abort(reason).catch(() => {});
|
||||
throw reason;
|
||||
}
|
||||
})();
|
||||
const [compressed] = await Promise.all([
|
||||
new Response(cs.readable).bytes(),
|
||||
writePromise.then(() => null),
|
||||
]);
|
||||
return compressed;
|
||||
}
|
||||
|
||||
async function createPNGLikeImage(buffer, width, height, dict) {
|
||||
const bytesPerPixel = buffer.length / (width * height);
|
||||
let compressed;
|
||||
if (typeof CompressionStream === "function") {
|
||||
try {
|
||||
const filtered = applyPNGOptimumFilter(
|
||||
buffer,
|
||||
width,
|
||||
height,
|
||||
bytesPerPixel
|
||||
);
|
||||
compressed = await deflate(filtered);
|
||||
} catch {}
|
||||
}
|
||||
|
||||
if (!compressed) {
|
||||
return createRawImage(buffer, dict);
|
||||
}
|
||||
|
||||
dict.setIfName("Filter", "FlateDecode");
|
||||
const decodeParms = new Dict(dict.xref);
|
||||
decodeParms.set("Predictor", 15);
|
||||
decodeParms.set("Columns", width);
|
||||
decodeParms.set("Colors", bytesPerPixel);
|
||||
decodeParms.set("BitsPerComponent", 8);
|
||||
dict.set("DecodeParms", decodeParms);
|
||||
|
||||
return createRawImage(compressed, dict);
|
||||
}
|
||||
|
||||
async function createImage(bitmap, xref, { closeBitmap = false } = {}) {
|
||||
// TODO: when printing, we could have a specific internal colorspace
|
||||
// (e.g. something like DeviceRGBA) in order avoid any conversion (i.e. no
|
||||
// jpeg, no rgba to rgb conversion, etc...)
|
||||
|
||||
const { width, height } = bitmap;
|
||||
if (
|
||||
!Number.isInteger(width) ||
|
||||
!Number.isInteger(height) ||
|
||||
width <= 0 ||
|
||||
height <= 0
|
||||
) {
|
||||
if (closeBitmap) {
|
||||
bitmap.close?.();
|
||||
}
|
||||
throw new Error(
|
||||
`createImage: invalid bitmap dimensions ${width}x${height}`
|
||||
);
|
||||
}
|
||||
const canvas = new OffscreenCanvas(width, height);
|
||||
const ctx = canvas.getContext("2d", {
|
||||
alpha: true,
|
||||
willReadFrequently: true,
|
||||
});
|
||||
|
||||
let data;
|
||||
try {
|
||||
ctx.drawImage(bitmap, 0, 0);
|
||||
data = ctx.getImageData(0, 0, width, height).data;
|
||||
} finally {
|
||||
if (closeBitmap) {
|
||||
bitmap.close?.();
|
||||
}
|
||||
}
|
||||
const buf32 = new Uint32Array(
|
||||
data.buffer,
|
||||
data.byteOffset,
|
||||
data.byteLength >> 2
|
||||
);
|
||||
|
||||
// Bitwise masks are signed in JS, so extracting alpha via `(v & 0xff000000)`
|
||||
// would misclassify every opaque pixel as transparent on little-endian
|
||||
// platforms — use the byte-level shift/mask instead.
|
||||
const isLE = FeatureTest.isLittleEndian;
|
||||
const rgbMask = isLE ? 0x00ffffff : 0xffffff00;
|
||||
const colorCounter = new Set();
|
||||
let hasAlpha = false;
|
||||
let useFlate = true;
|
||||
for (let i = 0, ii = buf32.length; i < ii; i++) {
|
||||
const v = buf32[i];
|
||||
if ((isLE ? v >>> 24 : v & 0xff) !== 0xff) {
|
||||
hasAlpha = true;
|
||||
break;
|
||||
}
|
||||
if (useFlate) {
|
||||
colorCounter.add((v & rgbMask) >>> 0);
|
||||
if (colorCounter.size > FLATE_COLOR_COUNT_THRESHOLD) {
|
||||
useFlate = false;
|
||||
colorCounter.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (hasAlpha) {
|
||||
// JPEG can bleed hidden/edge RGB into semi-transparent pixels. Keep alpha
|
||||
// images lossless instead.
|
||||
useFlate = true;
|
||||
}
|
||||
|
||||
const image = createImageDict(xref, width, height, "DeviceRGB");
|
||||
|
||||
let imageStreamPromise;
|
||||
let imageRenderStream = null;
|
||||
if (useFlate) {
|
||||
// Pack RGB triples without compositing over white: the SMask carries the
|
||||
// original alpha and the lossless RGB stream stays exact.
|
||||
const rgbBuffer = new Uint8Array(width * height * 3);
|
||||
for (let i = 0, j = 0, ii = data.length; i < ii; i += 4, j += 3) {
|
||||
rgbBuffer[j] = data[i];
|
||||
rgbBuffer[j + 1] = data[i + 1];
|
||||
rgbBuffer[j + 2] = data[i + 2];
|
||||
}
|
||||
imageStreamPromise = createPNGLikeImage(rgbBuffer, width, height, image);
|
||||
imageRenderStream = createRawImage(
|
||||
rgbBuffer,
|
||||
createImageDict(xref, width, height, "DeviceRGB")
|
||||
);
|
||||
} else {
|
||||
image.setIfName("Filter", "DCTDecode");
|
||||
imageStreamPromise = canvas
|
||||
.convertToBlob({ type: "image/jpeg", quality: 1 })
|
||||
.then(blob => blob.bytes())
|
||||
.then(bytes => createRawImage(bytes, image));
|
||||
}
|
||||
|
||||
let smaskStreamPromise = Promise.resolve(null);
|
||||
let smaskRenderStream = null;
|
||||
if (hasAlpha) {
|
||||
const alphaBuffer = new Uint8Array(buf32.length);
|
||||
if (isLE) {
|
||||
for (let i = 0, ii = buf32.length; i < ii; i++) {
|
||||
alphaBuffer[i] = buf32[i] >>> 24;
|
||||
}
|
||||
} else {
|
||||
for (let i = 0, ii = buf32.length; i < ii; i++) {
|
||||
alphaBuffer[i] = buf32[i] & 0xff;
|
||||
}
|
||||
}
|
||||
|
||||
const smask = createImageDict(xref, width, height, "DeviceGray");
|
||||
const smaskRenderDict = createImageDict(xref, width, height, "DeviceGray");
|
||||
|
||||
smaskStreamPromise = createPNGLikeImage(alphaBuffer, width, height, smask);
|
||||
smaskRenderStream = createRawImage(alphaBuffer, smaskRenderDict);
|
||||
}
|
||||
|
||||
const [imageStream, smaskStream] = await Promise.all([
|
||||
imageStreamPromise,
|
||||
smaskStreamPromise,
|
||||
]);
|
||||
|
||||
return {
|
||||
imageStream,
|
||||
imageRenderStream,
|
||||
smaskStream,
|
||||
smaskRenderStream,
|
||||
width,
|
||||
height,
|
||||
};
|
||||
}
|
||||
|
||||
export { createImage };
|
||||
@ -569,6 +569,9 @@ class WorkerMessageHandler {
|
||||
}
|
||||
let newDocumentId = 0;
|
||||
for (const pageInfo of pageInfos) {
|
||||
if (pageInfo.image) {
|
||||
continue;
|
||||
}
|
||||
if (pageInfo.document === null) {
|
||||
pageInfo.document = pdfManager.pdfDocument;
|
||||
} else if (ArrayBuffer.isView(pageInfo.document)) {
|
||||
|
||||
@ -67,11 +67,24 @@ async function writeStream(stream, buffer, transform) {
|
||||
: filter;
|
||||
const isFilterZeroFlateDecode = isName(filterZero, "FlateDecode");
|
||||
|
||||
// These filters already compress the data, so we shouldn't try to compress it
|
||||
// again.
|
||||
const isFilterZeroImageDecode =
|
||||
isName(filterZero, "DCTDecode") ||
|
||||
isName(filterZero, "JPXDecode") ||
|
||||
isName(filterZero, "JBIG2Decode") ||
|
||||
isName(filterZero, "CCITTFaxDecode") ||
|
||||
isName(filterZero, "LZWDecode");
|
||||
|
||||
// If the string is too small there is no real benefit in compressing it.
|
||||
// The number 256 is arbitrary, but it should be reasonable.
|
||||
const MIN_LENGTH_FOR_COMPRESSING = 256;
|
||||
|
||||
if (bytes.length >= MIN_LENGTH_FOR_COMPRESSING && !isFilterZeroFlateDecode) {
|
||||
if (
|
||||
!isFilterZeroFlateDecode &&
|
||||
!isFilterZeroImageDecode &&
|
||||
bytes.length >= MIN_LENGTH_FOR_COMPRESSING
|
||||
) {
|
||||
try {
|
||||
const cs = new CompressionStream("deflate");
|
||||
const writer = cs.writable.getWriter();
|
||||
|
||||
@ -961,7 +961,8 @@ class PDFDocumentProxy {
|
||||
|
||||
/**
|
||||
* @typedef {Object} PageInfo
|
||||
* @property {null|Uint8Array} document
|
||||
* @property {null|Uint8Array} [document]
|
||||
* @property {ImageBitmap} [image] Image to insert as a synthetic page.
|
||||
* @property {Array<Array<number>|number>} [includePages]
|
||||
* included ranges or indices.
|
||||
* @property {Array<Array<number>|number>} [excludePages]
|
||||
@ -2899,10 +2900,25 @@ class WorkerTransport {
|
||||
pageInfos,
|
||||
};
|
||||
let transfer;
|
||||
const ImageBitmapCtor = globalThis.ImageBitmap;
|
||||
if (typeof ImageBitmapCtor === "function") {
|
||||
const infos = Array.isArray(pageInfos) ? pageInfos : [pageInfos];
|
||||
for (const pageInfo of infos) {
|
||||
if (pageInfo?.image instanceof ImageBitmapCtor) {
|
||||
(transfer ||= []).push(pageInfo.image);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (this.annotationStorage.size > 0) {
|
||||
const serialized = this.annotationStorage.serializable;
|
||||
let { map } = serialized;
|
||||
transfer = serialized.transfer;
|
||||
if (serialized.transfer?.length) {
|
||||
if (transfer) {
|
||||
transfer.push(...serialized.transfer);
|
||||
} else {
|
||||
transfer = serialized.transfer;
|
||||
}
|
||||
}
|
||||
// Annotation pageIndex tracks the editor's current viewer position; the
|
||||
// worker keys lookups by source index. Remap UI -> source via pagesMapper
|
||||
// so reorganized pages still receive their annotations after extraction.
|
||||
|
||||
@ -123,6 +123,33 @@ async function waitForHavingContents(page, expected) {
|
||||
);
|
||||
}
|
||||
|
||||
async function waitForPageCanvasToHaveImage(page, pageNumber) {
|
||||
const selector = `.page[data-page-number = "${pageNumber}"] .canvasWrapper canvas`;
|
||||
await page.waitForSelector(selector, { visible: true });
|
||||
await page.waitForFunction(
|
||||
sel => {
|
||||
const canvas = document.querySelector(sel);
|
||||
if (!canvas?.width || !canvas.height) {
|
||||
return false;
|
||||
}
|
||||
const { data } = canvas
|
||||
.getContext("2d", { willReadFrequently: true })
|
||||
.getImageData(0, 0, canvas.width, canvas.height);
|
||||
for (let i = 0, ii = data.length; i < ii; i += 4) {
|
||||
if (
|
||||
data[i + 3] !== 0 &&
|
||||
(data[i] !== 255 || data[i + 1] !== 255 || data[i + 2] !== 255)
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
},
|
||||
{},
|
||||
selector
|
||||
);
|
||||
}
|
||||
|
||||
function getSearchResults(page) {
|
||||
return page.evaluate(() => {
|
||||
const pages = document.querySelectorAll(".page");
|
||||
@ -3496,4 +3523,137 @@ describe("Reorganize Pages View", () => {
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Add image as page", () => {
|
||||
let pages;
|
||||
|
||||
beforeEach(async () => {
|
||||
pages = await loadAndWait(
|
||||
"three_pages_with_number.pdf",
|
||||
'.page[data-page-number = "1"] .endOfContent',
|
||||
"1",
|
||||
null,
|
||||
{ enableSplitMerge: true, enableMerge: true }
|
||||
);
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await closePages(pages);
|
||||
});
|
||||
|
||||
it("should insert an image as a new page after the current page", async () => {
|
||||
await Promise.all(
|
||||
pages.map(async ([browserName, page]) => {
|
||||
await waitForThumbnailVisible(page, 1);
|
||||
|
||||
// Navigate to page 2 so the image is inserted after it.
|
||||
await page.evaluate(() => {
|
||||
window.PDFViewerApplication.page = 2;
|
||||
});
|
||||
await page.waitForFunction(
|
||||
() => window.PDFViewerApplication.page === 2
|
||||
);
|
||||
await waitAndClick(page, getThumbnailSelector(2));
|
||||
|
||||
const handleMerged = await createPromise(page, resolve => {
|
||||
window.PDFViewerApplication.eventBus._on(
|
||||
"thumbnailsloaded",
|
||||
resolve,
|
||||
{ once: true }
|
||||
);
|
||||
});
|
||||
|
||||
const picker = await page.$("#viewsManagerAddFilePicker");
|
||||
await picker.uploadFile(
|
||||
path.join(__dirname, "../images/firefox_logo.png")
|
||||
);
|
||||
await awaitPromise(handleMerged);
|
||||
|
||||
// 3 original pages + 1 inserted image page = 4 pages total.
|
||||
await page.waitForFunction(
|
||||
() => parseInt(document.getElementById("pageNumber").max, 10) === 4
|
||||
);
|
||||
|
||||
// Focus must move to the newly inserted page (page 3, since the
|
||||
// image was inserted after page 2).
|
||||
await page.waitForFunction(
|
||||
() => window.PDFViewerApplication.page === 3
|
||||
);
|
||||
await waitForPageCanvasToHaveImage(page, 3);
|
||||
|
||||
// The original text pages must keep their content: pages 1–2 from
|
||||
// the original, then the image page (no text), then page 3 of the
|
||||
// original shifted to position 4. The viewer only renders pages that
|
||||
// are visible, so force all pages into the viewport (WRAPPED scroll
|
||||
// mode + minimum scale) to ensure their text layers render before we
|
||||
// inspect them; otherwise a page outside the viewport (e.g. page 2
|
||||
// when the current page is 3) may not have rendered yet.
|
||||
const expectedTexts = ["1", "2", "", "3"];
|
||||
await page.evaluate(() => {
|
||||
window.PDFViewerApplication.pdfViewer.scrollMode = 2; /* = ScrollMode.WRAPPED = */
|
||||
window.PDFViewerApplication.pdfViewer.updateScale({
|
||||
drawingDelay: 0,
|
||||
scaleFactor: 0.01,
|
||||
});
|
||||
});
|
||||
await page.waitForFunction(
|
||||
expected => {
|
||||
const layers = document.querySelectorAll(".page .textLayer");
|
||||
if (layers.length !== expected.length) {
|
||||
return false;
|
||||
}
|
||||
return Array.from(layers).every((tl, i) => {
|
||||
const _page = tl.closest(".page");
|
||||
return (
|
||||
_page?.getAttribute("data-page-number") === String(i + 1) &&
|
||||
tl.textContent.trim() === expected[i]
|
||||
);
|
||||
});
|
||||
},
|
||||
{},
|
||||
expectedTexts
|
||||
);
|
||||
|
||||
const hasChanges = await page.evaluate(() =>
|
||||
window.PDFViewerApplication._hasChanges()
|
||||
);
|
||||
expect(hasChanges).withContext(`In ${browserName}`).toBeTrue();
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
it("should insert an SVG image as a new page", async () => {
|
||||
await Promise.all(
|
||||
pages.map(async ([browserName, page]) => {
|
||||
await waitForThumbnailVisible(page, 1);
|
||||
|
||||
const handleMerged = await createPromise(page, resolve => {
|
||||
window.PDFViewerApplication.eventBus._on(
|
||||
"thumbnailsloaded",
|
||||
resolve,
|
||||
{ once: true }
|
||||
);
|
||||
});
|
||||
|
||||
const picker = await page.$("#viewsManagerAddFilePicker");
|
||||
await picker.uploadFile(
|
||||
path.join(__dirname, "../images/firefox_logo.svg")
|
||||
);
|
||||
await awaitPromise(handleMerged);
|
||||
|
||||
// The SVG must be rasterized and inserted as a new page, bringing
|
||||
// the document to 4 pages.
|
||||
await page.waitForFunction(
|
||||
() => parseInt(document.getElementById("pageNumber").max, 10) === 4
|
||||
);
|
||||
await waitForPageCanvasToHaveImage(page, 2);
|
||||
|
||||
const hasChanges = await page.evaluate(() =>
|
||||
window.PDFViewerApplication._hasChanges()
|
||||
);
|
||||
expect(hasChanges).withContext(`In ${browserName}`).toBeTrue();
|
||||
})
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@ -5929,6 +5929,41 @@ small scripts as well as for`);
|
||||
expect(labels).toEqual(["i", "ii", "1", "a", "5"]);
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("extract pages with an inserted image and check labels", async function () {
|
||||
if (isNodeJS) {
|
||||
pending("Cannot create a bitmap from Node.js.");
|
||||
}
|
||||
let loadingTask = getDocument(
|
||||
buildGetDocumentParams("labelled_pages.pdf")
|
||||
);
|
||||
const pdfDoc = await loadingTask.promise;
|
||||
const bitmap = await getImageBitmap("firefox_logo.png");
|
||||
|
||||
const data = await pdfDoc.extractPages([
|
||||
{
|
||||
document: null,
|
||||
includePages: [0, 1],
|
||||
pageIndices: [0, 1],
|
||||
},
|
||||
{
|
||||
image: bitmap,
|
||||
pageIndices: [2],
|
||||
},
|
||||
{
|
||||
document: null,
|
||||
includePages: [5],
|
||||
pageIndices: [3],
|
||||
},
|
||||
]);
|
||||
await loadingTask.destroy();
|
||||
|
||||
loadingTask = getDocument({ data });
|
||||
const newPdfDoc = await loadingTask.promise;
|
||||
const labels = await newPdfDoc.getPageLabels();
|
||||
expect(labels).toEqual(["i", "ii", "3", "1"]);
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
});
|
||||
|
||||
describe("Named destinations", function () {
|
||||
@ -6639,6 +6674,41 @@ small scripts as well as for`);
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("fills pages around an explicitly placed image", async function () {
|
||||
if (isNodeJS) {
|
||||
pending("Cannot create a bitmap from Node.js.");
|
||||
}
|
||||
|
||||
let loadingTask = getDocument(
|
||||
buildGetDocumentParams("three_pages_with_number.pdf")
|
||||
);
|
||||
let pdfDoc = await loadingTask.promise;
|
||||
const bitmap = await getImageBitmap("firefox_logo.png");
|
||||
const data = await pdfDoc.extractPages([
|
||||
{ image: bitmap, pageIndices: [1] },
|
||||
{ document: null, includePages: [0, 1] },
|
||||
]);
|
||||
await loadingTask.destroy();
|
||||
|
||||
loadingTask = getDocument({ data });
|
||||
pdfDoc = await loadingTask.promise;
|
||||
expect(pdfDoc.numPages).toEqual(3);
|
||||
|
||||
let pdfPage = await pdfDoc.getPage(1);
|
||||
let { items: textItems } = await pdfPage.getTextContent();
|
||||
expect(mergeText(textItems)).toEqual("1");
|
||||
|
||||
pdfPage = await pdfDoc.getPage(2);
|
||||
({ items: textItems } = await pdfPage.getTextContent());
|
||||
expect(mergeText(textItems)).toEqual("");
|
||||
|
||||
pdfPage = await pdfDoc.getPage(3);
|
||||
({ items: textItems } = await pdfPage.getTextContent());
|
||||
expect(mergeText(textItems)).toEqual("2");
|
||||
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("preserves EmbeddedFiles (attachments) when extracting pages", async function () {
|
||||
let loadingTask = getDocument(buildGetDocumentParams("attachment.pdf"));
|
||||
let pdfDoc = await loadingTask.promise;
|
||||
|
||||
@ -13,7 +13,10 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { createImage } from "../../src/core/editor/pdf_images.js";
|
||||
import { Dict } from "../../src/core/primitives.js";
|
||||
import { FlateStream } from "../../src/core/flate_stream.js";
|
||||
import { isNodeJS } from "../../src/shared/util.js";
|
||||
import { PredictorStream } from "../../src/core/predictor_stream.js";
|
||||
import { Stream } from "../../src/core/stream.js";
|
||||
|
||||
@ -37,5 +40,39 @@ describe("stream", function () {
|
||||
|
||||
expect(result).toEqual(new Uint8Array([100, 3, 101, 2, 102, 1]));
|
||||
});
|
||||
|
||||
it("should decode the FlateDecode stream produced by createImage", async function () {
|
||||
if (isNodeJS) {
|
||||
pending("OffscreenCanvas is not supported in Node.js.");
|
||||
}
|
||||
const width = 2;
|
||||
const height = 2;
|
||||
const canvas = new OffscreenCanvas(width, height);
|
||||
const ctx = canvas.getContext("2d");
|
||||
const source = new Uint8ClampedArray([
|
||||
255, 0, 0, 255, 0, 255, 0, 255, 0, 0, 255, 255, 255, 255, 0, 255,
|
||||
]);
|
||||
ctx.putImageData(new ImageData(source, width, height), 0, 0);
|
||||
const bitmap = canvas.transferToImageBitmap();
|
||||
const { imageStream } = await createImage(bitmap, /* xref = */ null, {
|
||||
closeBitmap: true,
|
||||
});
|
||||
|
||||
expect(imageStream.dict.get("Filter").name).toEqual("FlateDecode");
|
||||
const flate = new FlateStream(imageStream, imageStream.length);
|
||||
const predictor = new PredictorStream(
|
||||
flate,
|
||||
imageStream.length,
|
||||
imageStream.dict.get("DecodeParms")
|
||||
);
|
||||
const decoded = predictor.getBytes(width * height * 3);
|
||||
const expected = new Uint8Array(width * height * 3);
|
||||
for (let i = 0, j = 0; i < source.length; i += 4, j += 3) {
|
||||
expected[j] = source[i];
|
||||
expected[j + 1] = source[i + 1];
|
||||
expected[j + 2] = source[i + 2];
|
||||
}
|
||||
expect(decoded).toEqual(expected);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@ -353,17 +353,28 @@ class PDFThumbnailViewer {
|
||||
|
||||
async #mergeFiles(files, insertAfter) {
|
||||
this.#toggleBar("waiting", "pdfjs-views-manager-waiting-for-file");
|
||||
const buffers = [];
|
||||
const entries = [];
|
||||
for (const file of files) {
|
||||
if (file.type !== "application/pdf") {
|
||||
const isImage = file.type?.startsWith("image/");
|
||||
if (!isImage && file.type !== "application/pdf") {
|
||||
const magic = await file.slice(0, 5).text();
|
||||
if (magic !== "%PDF-") {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
buffers.push(await file.bytes());
|
||||
if (isImage) {
|
||||
let bitmap;
|
||||
try {
|
||||
bitmap = await PDFThumbnailViewer.#fileToImageBitmap(file);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
entries.push({ image: bitmap, insertAfter });
|
||||
} else {
|
||||
entries.push({ document: await file.bytes(), insertAfter });
|
||||
}
|
||||
}
|
||||
if (buffers.length === 0) {
|
||||
if (entries.length === 0) {
|
||||
this.#toggleBar("status");
|
||||
return;
|
||||
}
|
||||
@ -371,12 +382,7 @@ class PDFThumbnailViewer {
|
||||
const data = this.hasStructuralChanges()
|
||||
? this.getStructuralChanges()
|
||||
: [{ document: null }];
|
||||
for (const buffer of buffers) {
|
||||
data.push({
|
||||
document: buffer,
|
||||
insertAfter,
|
||||
});
|
||||
}
|
||||
data.push(...entries);
|
||||
this.eventBus._on(
|
||||
"pagesloaded",
|
||||
() => {
|
||||
@ -657,6 +663,71 @@ class PDFThumbnailViewer {
|
||||
));
|
||||
}
|
||||
|
||||
static #fitImageDimensions(width, height, { minSide = 0, maxSide }) {
|
||||
const longest = Math.max(width, height);
|
||||
let scale = 1;
|
||||
if (minSide > 0 && longest < minSide) {
|
||||
scale = minSide / longest;
|
||||
} else if (longest > maxSide) {
|
||||
scale = maxSide / longest;
|
||||
}
|
||||
return scale === 1
|
||||
? { width, height }
|
||||
: {
|
||||
width: Math.max(1, Math.round(width * scale)),
|
||||
height: Math.max(1, Math.round(height * scale)),
|
||||
};
|
||||
}
|
||||
|
||||
static async #fileToImageBitmap(file) {
|
||||
// Keep image pages large enough to look good when fitted to a PDF page, but
|
||||
// bounded so saving does not allocate worker-side buffers at camera-photo
|
||||
// dimensions.
|
||||
const MIN_RASTER_SIDE = 1024;
|
||||
const MAX_RASTER_SIDE = 4096;
|
||||
|
||||
if (file.type !== "image/svg+xml") {
|
||||
const bitmap = await createImageBitmap(file);
|
||||
const { width, height } = PDFThumbnailViewer.#fitImageDimensions(
|
||||
bitmap.width,
|
||||
bitmap.height,
|
||||
{ maxSide: MAX_RASTER_SIDE }
|
||||
);
|
||||
if (width === bitmap.width && height === bitmap.height) {
|
||||
return bitmap;
|
||||
}
|
||||
const canvas = new OffscreenCanvas(width, height);
|
||||
const ctx = canvas.getContext("2d");
|
||||
ctx.drawImage(bitmap, 0, 0, width, height);
|
||||
bitmap.close();
|
||||
return canvas.transferToImageBitmap();
|
||||
}
|
||||
// createImageBitmap doesn't work with SVG (mirroring the workaround in
|
||||
// src/display/editor/tools.js ImageManager): load the file via an Image
|
||||
// element and rasterize it through an OffscreenCanvas. The target raster
|
||||
// size uses the SVG's intrinsic dimensions, clamped so the longest side
|
||||
// falls in [1024, 4096]: large enough to avoid pixelation when fitted to
|
||||
// a page, but capped to prevent a runaway SVG (e.g. a huge viewBox) from
|
||||
// allocating a multi-gigabyte bitmap.
|
||||
const url = URL.createObjectURL(file);
|
||||
try {
|
||||
const image = new Image();
|
||||
image.src = url;
|
||||
await image.decode();
|
||||
const { width, height } = PDFThumbnailViewer.#fitImageDimensions(
|
||||
image.naturalWidth || MIN_RASTER_SIDE,
|
||||
image.naturalHeight || MIN_RASTER_SIDE,
|
||||
{ minSide: MIN_RASTER_SIDE, maxSide: MAX_RASTER_SIDE }
|
||||
);
|
||||
const canvas = new OffscreenCanvas(width, height);
|
||||
const ctx = canvas.getContext("2d");
|
||||
ctx.drawImage(image, 0, 0, width, height);
|
||||
return canvas.transferToImageBitmap();
|
||||
} finally {
|
||||
URL.revokeObjectURL(url);
|
||||
}
|
||||
}
|
||||
|
||||
#updateThumbnails(currentPageNumber) {
|
||||
this.#resetCurrentThumbnail(0);
|
||||
let newCurrentPageNumber = 0;
|
||||
@ -1580,7 +1651,7 @@ class PDFThumbnailViewer {
|
||||
const container = this.container;
|
||||
const signal = this.#abortSignal;
|
||||
|
||||
const hasPdfItem = dataTransfer => {
|
||||
const hasMergeableItem = dataTransfer => {
|
||||
if (!dataTransfer) {
|
||||
return false;
|
||||
}
|
||||
@ -1590,7 +1661,10 @@ class PDFThumbnailViewer {
|
||||
// here to keep the "copy" cursor honest; if needed, drop-time magic-byte
|
||||
// validation in #mergeFiles would still catch a permissive variant.
|
||||
for (const item of dataTransfer.items) {
|
||||
if (item.kind === "file" && item.type === "application/pdf") {
|
||||
if (
|
||||
item.kind === "file" &&
|
||||
(item.type === "application/pdf" || item.type.startsWith("image/"))
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -1611,7 +1685,7 @@ class PDFThumbnailViewer {
|
||||
// A page-move drag is already in progress.
|
||||
!isNaN(this.#lastDraggedOverIndex) ||
|
||||
!this._thumbnails.length ||
|
||||
!hasPdfItem(e.dataTransfer)
|
||||
!hasMergeableItem(e.dataTransfer)
|
||||
) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -191,7 +191,7 @@ See https://github.com/adobe-type-tools/cmap-resources
|
||||
hidden="true"
|
||||
>
|
||||
<span data-l10n-id="pdfjs-views-manager-add-file-button-label"></span>
|
||||
<input id="viewsManagerAddFilePicker" type="file" accept="application/pdf" multiple />
|
||||
<input id="viewsManagerAddFilePicker" type="file" accept="application/pdf,image/*" multiple />
|
||||
</button>
|
||||
<button
|
||||
id="viewsManagerCurrentOutlineButton"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user