diff --git a/examples/forge-functionality/api_examples.js b/examples/forge-functionality/api_examples.js index 955c0205b..0b598824f 100644 --- a/examples/forge-functionality/api_examples.js +++ b/examples/forge-functionality/api_examples.js @@ -11,7 +11,7 @@ const STANDARD_FONT_DATA_URL = // Loading file from file system into typed array. const pdfPath = - process.argv[2] || "../../web/compressed.tracemonkey-pldi-09.pdf"; + process.argv[2] || "C:\\Users\\kj131\\pdf-forge\\test_pdfs\\ISO_32000-2_2020(en).pdf"; const data = new Uint8Array(fs.readFileSync(pdfPath)); // Load the PDF file. @@ -21,12 +21,46 @@ const loadingTask = getDocument({ cMapPacked: CMAP_PACKED, standardFontDataUrl: STANDARD_FONT_DATA_URL, }); -try { - const pdfDocument = await loadingTask.promise; - console.log("# PDF document loaded."); - const page = await pdfDocument.getPage(1); - const opList = await page.getOperatorList(); - console.log(opList); -} catch (e) { - console.error(e); +test(loadingTask); +async function test(loading) { + try { + const pdfDocument = await loading.promise; + console.log("# PDF document loaded."); + const page = await pdfDocument.getPage(4); + printOpList(page); + console.time("contents"); + const contents = await page.getContents(); + console.timeEnd("contents"); + console.time("oplist"); + const opList = await page.getOperatorList(); + console.timeEnd("oplist"); + // console.log(opList); + let newContents = ""; + for (let i = 0; i < 5; i++) { + const range = opList.rangeArray[i]; + if (range) { + newContents += contents.slice(range[0], range[1]); + newContents += "\n"; + } + } + console.log(newContents); + await page.updateContents(newContents); + await printOpList(page); + } catch (e) { + console.error(e); + } +} + +async function printOpList(page) { + const contents = await page.getContents(); + const opList = await page.getOperatorList(); + // console.log(opList); + const ops = []; + for (let i = 0; i < opList.rangeArray.length; i++) { + const range = opList.rangeArray[i]; + if (range) { + ops.push(contents.slice(range[0], range[1])); + } + } + console.log(ops.slice(0, 100)); } diff --git a/examples/forge-functionality/fetch_primitives.js b/examples/forge-functionality/fetch_primitives.js index 78e751894..9fb94d31c 100644 --- a/examples/forge-functionality/fetch_primitives.js +++ b/examples/forge-functionality/fetch_primitives.js @@ -45,7 +45,7 @@ function bytesToString(bytes) { } async function parse(doc) { - const path = "/Page2/Contents/2"; + const path = "/Page2/Contents/1"; let [stream] = await getPrimitive(path, doc); const lexer = new Lexer(stream); const parser = new Parser({ lexer, xref: doc.xref, trackRanges: true }); @@ -63,12 +63,12 @@ async function parse(doc) { const bytes = stream.getBytes(); const classes = new Set(); for (const o of objs) { - console.log(o[0].constructor.name); + // console.log(o[0].constructor.name); classes.add(o[0].constructor.name); const lexemmeBytes = bytes.slice(o[1], o[2]); - console.log(bytesToString(lexemmeBytes)); + // console.log(bytesToString(lexemmeBytes)); } - console.log("unique classes", classes); + // console.log("unique classes", classes); [stream] = await getPrimitive(path, doc); const preprocessor = new EvaluatorPreprocessor(stream, doc.xref); const operation = {}; @@ -78,10 +78,10 @@ async function parse(doc) { const fn = operation.fn; const range = operation.range; const op = bytesToString(bytes.slice(range[0], range[1])); - console.log(args, fn); - console.log(`${range[0]}------------------------------------`); - console.log(op); - console.log(`${range[1]}------------------------------------`); + // console.log(args, fn); + // console.log(`----------------- ${range} -------------------`); + console.log(`${fn}: ${op}`); + // console.log(`---------------------------------------------`); } // console.time("xref"); // let table = await retrieveXref(doc); diff --git a/src/core/document.js b/src/core/document.js index b3a550c4e..1deffb7ca 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -65,7 +65,7 @@ import { Catalog } from "./catalog.js"; import { clearGlobalCaches } from "./cleanup_helper.js"; import { DatasetReader } from "./dataset_reader.js"; import { Linearization } from "./parser.js"; -import { NullStream } from "./stream.js"; +import { NullStream, StringStream } from "./stream.js"; import { ObjectLoader } from "./object_loader.js"; import { OperatorList } from "./operator_list.js"; import { PartialEvaluator } from "./evaluator.js"; @@ -107,6 +107,8 @@ class Page { this.resourcesPromise = null; this.xfaFactory = xfaFactory; + this.updatedContents = null; + const idCounters = { obj: 0, }; @@ -246,10 +248,19 @@ class Page { throw reason; } + setContents(newContents) { + this.updatedContents = newContents; + } + /** * @returns {Promise} */ getContentStream() { + if (this.updatedContents !== null) { + return new Promise(resolve => { + resolve(new StringStream(this.updatedContents)); + }); + } return this.pdfManager.ensure(this, "content").then(content => { if (content instanceof BaseStream) { return content; @@ -426,8 +437,11 @@ class Page { cacheKey, annotationStorage = null, modifiedIds = null, + contentOverride = null, }) { - const contentStreamPromise = this.getContentStream(); + const contentStreamPromise = contentOverride + ? new StringStream(contentOverride) + : this.getContentStream(); const resourcesPromise = this.loadResources([ "ColorSpace", "ExtGState", diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 08737beb5..0ac5cd3bf 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -802,6 +802,7 @@ class PartialEvaluator { operatorList.addImageOps( OPS.paintImageXObject, args, + range, optionalContent, hasMask ); @@ -1445,15 +1446,20 @@ class PartialEvaluator { minMax = [Infinity, Infinity, -Infinity, -Infinity]; break; } - operatorList.addOp(OPS.constructPath, [[fn], args, minMax], range); + operatorList.addOp( + OPS.constructPath, + [[fn], args, minMax, [range[0], range[1]]], + range + ); if (parsingText) { - operatorList.addOp(OPS.restore, null, range); + operatorList.addOp(OPS.restore, null); } } else { const opArgs = operatorList.argsArray[lastIndex]; opArgs[0].push(fn); opArgs[1].push(...args); + opArgs[3].push(...range); const minMax = opArgs[2]; const opRange = operatorList.rangeArray[lastIndex]; @@ -5215,7 +5221,7 @@ class EvaluatorPreprocessor { operation.fn = fn; operation.args = args; - const end = this.parser.getPosition(); + const end = this.parser.getEnd(); operation.range = [start, end]; return true; } diff --git a/src/core/parser.js b/src/core/parser.js index 3a7ea1de4..b107a9606 100644 --- a/src/core/parser.js +++ b/src/core/parser.js @@ -59,12 +59,7 @@ function getInlineImageCacheKey(bytes) { } class Parser { - constructor({ - lexer, - xref, - allowStreams = false, - recoveryMode = false, - }) { + constructor({ lexer, xref, allowStreams = false, recoveryMode = false }) { this.lexer = lexer; this.xref = xref; this.allowStreams = allowStreams; @@ -75,8 +70,8 @@ class Parser { } refill() { - const [buf1, start1, end1] = this.lexer.getObj(true); - const [buf2, start2, end2] = this.lexer.getObj(true); + const [buf1, start1, end1] = this.lexer.getObjWithRange(); + const [buf2, start2, end2] = this.lexer.getObjWithRange(); this.buf1 = buf1; this.range1 = [start1, end1]; this.buf2 = buf2; @@ -87,12 +82,14 @@ class Parser { if (this.buf2 instanceof Cmd && this.buf2.cmd === "ID") { this.buf1 = this.buf2; this.buf2 = null; + this.lastEnd = this.range1[1]; this.range1 = this.range2; this.range2 = null; } else { this.buf1 = this.buf2; + this.lastEnd = this.range1[1]; this.range1 = this.range2; - const [buf2, start2, end2] = this.lexer.getObj(true); + const [buf2, start2, end2] = this.lexer.getObjWithRange(); this.buf2 = buf2; this.range2 = [start2, end2]; } @@ -116,6 +113,10 @@ class Parser { return this.range1 ? this.range1[0] : 0; } + getEnd() { + return this.lastEnd ?? 0; + } + getObjWithRange(cipherTransform = null) { const start = this.range1[0]; const obj = this.getObj(cipherTransform); @@ -1229,14 +1230,18 @@ class Lexer { } getObjWithRange() { - // at the start of getObj() the stream has stepped beyond currentChar by one - const start = this.stream.pos - 1; + const ch = this._skipWhitespaceAndComments(); + if (ch === EOF) { + return [ch, -1, -1]; + } + // currentChar is always at pos - 1 + const start = Math.max(this.stream.pos - 1, 0); const obj = this.getObj(); - const end = this.stream.pos; + const end = this.stream.pos - 1; return [obj, start, end]; } - getObj(withRange = false) { + _skipWhitespaceAndComments() { // Skip whitespace and comments. let comment = false; let ch = this.currentChar; @@ -1255,16 +1260,14 @@ class Lexer { } ch = this.nextChar(); } - const start = this.stream.pos - 1; - const obj = this.extracted(ch); - const end = this.stream.pos - 1; - if (withRange) { - return [start, obj, end]; - } - return obj; + return ch; } - extracted(ch) { + getObj() { + let ch = this._skipWhitespaceAndComments(); + if (ch === EOF) { + return ch; + } // Start reading a token. switch (ch | 0) { case 0x30: // '0' diff --git a/src/core/worker.js b/src/core/worker.js index 925f648cc..5d433ebc3 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -123,7 +123,7 @@ class WorkerMessageHandler { if (apiVersion !== workerVersion) { throw new Error( `The API version "${apiVersion}" does not match ` + - `the Worker version "${workerVersion}".` + `the Worker version "${workerVersion}".` ); } @@ -141,8 +141,8 @@ class WorkerMessageHandler { if (enumerableProperties.length) { throw new Error( "The `Array.prototype` contains unexpected enumerable properties: " + - enumerableProperties.join(", ") + - "; thus breaking e.g. `for...in` iteration of `Array`s." + enumerableProperties.join(", ") + + "; thus breaking e.g. `for...in` iteration of `Array`s." ); } } @@ -206,15 +206,15 @@ class WorkerMessageHandler { } async function getPdfManager({ - data, - password, - disableAutoFetch, - rangeChunkSize, - length, - docBaseUrl, - enableXfa, - evaluatorOptions, - }) { + data, + password, + disableAutoFetch, + rangeChunkSize, + length, + docBaseUrl, + enableXfa, + evaluatorOptions, + }) { const pdfManagerArgs = { source: null, disableAutoFetch, @@ -242,7 +242,7 @@ class WorkerMessageHandler { loaded = 0; fullRequest.headersReady - .then(function () { + .then(function() { if (!fullRequest.isRangeSupported) { return; } @@ -263,13 +263,13 @@ class WorkerMessageHandler { pdfManagerCapability.resolve(newPdfManager); cancelXHRs = null; }) - .catch(function (reason) { + .catch(function(reason) { pdfManagerCapability.reject(reason); cancelXHRs = null; }); - new Promise(function (resolve, reject) { - const readChunk = function ({ value, done }) { + new Promise(function(resolve, reject) { + const readChunk = function({ value, done }) { try { ensureNotTerminated(); if (done) { @@ -314,7 +314,7 @@ class WorkerMessageHandler { } }; fullRequest.read().then(readChunk, reject); - }).catch(function (e) { + }).catch(function(e) { pdfManagerCapability.reject(e); cancelXHRs = null; }); @@ -341,12 +341,12 @@ class WorkerMessageHandler { handler .sendWithPromise("PasswordRequest", ex) - .then(function ({ password }) { + .then(function({ password }) { finishWorkerTask(task); pdfManager.updatePassword(password); pdfManagerReady(); }) - .catch(function () { + .catch(function() { finishWorkerTask(task); handler.send("DocException", ex); }); @@ -359,7 +359,7 @@ class WorkerMessageHandler { function pdfManagerReady() { ensureNotTerminated(); - loadDocument(false).then(onSuccess, function (reason) { + loadDocument(false).then(onSuccess, function(reason) { ensureNotTerminated(); // Try again with recoveryMode == true @@ -367,7 +367,7 @@ class WorkerMessageHandler { onFailure(reason); return; } - pdfManager.requestLoadedStream().then(function () { + pdfManager.requestLoadedStream().then(function() { ensureNotTerminated(); loadDocument(true).then(onSuccess, onFailure); @@ -378,7 +378,7 @@ class WorkerMessageHandler { ensureNotTerminated(); getPdfManager(data) - .then(function (newPdfManager) { + .then(function(newPdfManager) { if (terminated) { // We were in a process of setting up the manager, but it got // terminated in the middle. @@ -396,14 +396,14 @@ class WorkerMessageHandler { .then(pdfManagerReady, onFailure); } - handler.on("GetPage", function (data) { - return pdfManager.getPage(data.pageIndex).then(function (page) { + handler.on("GetPage", function(data) { + return pdfManager.getPage(data.pageIndex).then(function(page) { return Promise.all([ pdfManager.ensure(page, "rotate"), pdfManager.ensure(page, "ref"), pdfManager.ensure(page, "userUnit"), pdfManager.ensure(page, "view"), - ]).then(function ([rotate, ref, userUnit, view]) { + ]).then(function([rotate, ref, userUnit, view]) { return { rotate, ref, @@ -415,104 +415,104 @@ class WorkerMessageHandler { }); }); - handler.on("GetPageIndex", function (data) { + handler.on("GetPageIndex", function(data) { const pageRef = Ref.get(data.num, data.gen); return pdfManager.ensureCatalog("getPageIndex", [pageRef]); }); - handler.on("GetDestinations", function (data) { + handler.on("GetDestinations", function(data) { return pdfManager.ensureCatalog("destinations"); }); - handler.on("GetDestination", function (data) { + handler.on("GetDestination", function(data) { return pdfManager.ensureCatalog("getDestination", [data.id]); }); - handler.on("GetPageLabels", function (data) { + handler.on("GetPageLabels", function(data) { return pdfManager.ensureCatalog("pageLabels"); }); - handler.on("GetPageLayout", function (data) { + handler.on("GetPageLayout", function(data) { return pdfManager.ensureCatalog("pageLayout"); }); - handler.on("GetPageMode", function (data) { + handler.on("GetPageMode", function(data) { return pdfManager.ensureCatalog("pageMode"); }); - handler.on("GetViewerPreferences", function (data) { + handler.on("GetViewerPreferences", function(data) { return pdfManager.ensureCatalog("viewerPreferences"); }); - handler.on("GetOpenAction", function (data) { + handler.on("GetOpenAction", function(data) { return pdfManager.ensureCatalog("openAction"); }); - handler.on("GetAttachments", function (data) { + handler.on("GetAttachments", function(data) { return pdfManager.ensureCatalog("attachments"); }); - handler.on("GetDocJSActions", function (data) { + handler.on("GetDocJSActions", function(data) { return pdfManager.ensureCatalog("jsActions"); }); - handler.on("GetPageJSActions", function ({ pageIndex }) { - return pdfManager.getPage(pageIndex).then(function (page) { + handler.on("GetPageJSActions", function({ pageIndex }) { + return pdfManager.getPage(pageIndex).then(function(page) { return pdfManager.ensure(page, "jsActions"); }); }); - handler.on("GetOutline", function (data) { + handler.on("GetOutline", function(data) { return pdfManager.ensureCatalog("documentOutline"); }); - handler.on("GetOptionalContentConfig", function (data) { + handler.on("GetOptionalContentConfig", function(data) { return pdfManager.ensureCatalog("optionalContentConfig"); }); - handler.on("GetPermissions", function (data) { + handler.on("GetPermissions", function(data) { return pdfManager.ensureCatalog("permissions"); }); - handler.on("GetMetadata", function (data) { + handler.on("GetMetadata", function(data) { return Promise.all([ pdfManager.ensureDoc("documentInfo"), pdfManager.ensureCatalog("metadata"), ]); }); - handler.on("GetMarkInfo", function (data) { + handler.on("GetMarkInfo", function(data) { return pdfManager.ensureCatalog("markInfo"); }); - handler.on("GetData", function (data) { - return pdfManager.requestLoadedStream().then(function (stream) { + handler.on("GetData", function(data) { + return pdfManager.requestLoadedStream().then(function(stream) { return stream.bytes; }); }); - handler.on("GetPrimitiveByPath", function (path_str) { + handler.on("GetPrimitiveByPath", function(path_str) { return getPrim(path_str, pdfManager.pdfDocument); }); - handler.on("GetXRefEntries", function (data) { + handler.on("GetXRefEntries", function(data) { return retrieveXref(pdfManager.pdfDocument); }); - handler.on("GetPrimTree", function (request) { + handler.on("GetPrimTree", function(request) { return getPrimTree(request, pdfManager.pdfDocument); }); - handler.on("GetImageData", function (path) { + handler.on("GetImageData", function(path) { return getImageAsBlob(path, pdfManager.pdfDocument); }); - handler.on("GetStreamAsString", function (path) { + handler.on("GetStreamAsString", function(path) { return getStreamAsString(path, pdfManager.pdfDocument); }); - handler.on("GetAnnotations", function ({ pageIndex, intent }) { - return pdfManager.getPage(pageIndex).then(function (page) { + handler.on("GetAnnotations", function({ pageIndex, intent }) { + return pdfManager.getPage(pageIndex).then(function(page) { const task = new WorkerTask(`GetAnnotations: page ${pageIndex}`); startWorkerTask(task); @@ -529,23 +529,23 @@ class WorkerMessageHandler { }); }); - handler.on("GetFieldObjects", function (data) { + handler.on("GetFieldObjects", function(data) { return pdfManager .ensureDoc("fieldObjects") .then(fieldObjects => fieldObjects?.allFields || null); }); - handler.on("HasJSActions", function (data) { + handler.on("HasJSActions", function(data) { return pdfManager.ensureDoc("hasJSActions"); }); - handler.on("GetCalculationOrderIds", function (data) { + handler.on("GetCalculationOrderIds", function(data) { return pdfManager.ensureDoc("calculationOrderIds"); }); handler.on( "SaveDocument", - async function ({ isPureXfa, numPages, annotationStorage, filename }) { + async function({ isPureXfa, numPages, annotationStorage, filename }) { const globalPromises = [ pdfManager.requestLoadedStream(), pdfManager.ensureCatalog("acroForm"), @@ -615,7 +615,7 @@ class WorkerMessageHandler { imagePromises, changes ) - .finally(function () { + .finally(function() { finishWorkerTask(task); }); }) @@ -652,13 +652,13 @@ class WorkerMessageHandler { } else { for (let pageIndex = 0; pageIndex < numPages; pageIndex++) { promises.push( - pdfManager.getPage(pageIndex).then(function (page) { + pdfManager.getPage(pageIndex).then(function(page) { const task = new WorkerTask(`Save: page ${pageIndex}`); startWorkerTask(task); return page .save(handler, task, annotationStorage, changes) - .finally(function () { + .finally(function() { finishWorkerTask(task); }); }) @@ -748,6 +748,31 @@ class WorkerMessageHandler { } ); + handler.on("StreamContents", function(data, sink) { + const pageIndex = data.pageIndex; + pdfManager.getPage(pageIndex).then(function(page) { + page.getContentStream().then(stream => { + let byte; + let string = ""; + while ((byte = stream.getByte()) !== -1) { + string += String.fromCharCode(byte); + } + sink.enqueue(string, string.length); + sink.close(); + }); + }); + }); + + handler.on("UpdateContents", function (data) { + return new Promise(resolve => { + const pageIndex = data.pageIndex; + pdfManager.getPage(pageIndex).then(function (page) { + page.setContents(data.value); + resolve(); + }); + }); + }); + handler.on("GetOperatorList", function (data, sink) { const pageIndex = data.pageIndex; pdfManager.getPage(pageIndex).then(function (page) { @@ -767,6 +792,7 @@ class WorkerMessageHandler { cacheKey: data.cacheKey, annotationStorage: data.annotationStorage, modifiedIds: data.modifiedIds, + contentOverride: data.contentOverride, }) .then( function (operatorListInfo) { diff --git a/src/display/api.js b/src/display/api.js index b84a02434..d85fa6dfe 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -35,6 +35,7 @@ import { AbortException, AnnotationMode, assert, + djb2Hash, FeatureTest, getVerbosityLevel, info, @@ -1660,6 +1661,37 @@ class PDFPageProxy { return renderTask; } + updateContents(newContents) { + if (!newContents) { + throw new Error("Contents may not be null or undefined"); + } + this._intentStates.clear(); + return this._transport.updateContents(newContents, this._pageIndex); + } + + getContents() { + const readableStream = this._transport.streamContents(this._pageIndex); + + return new Promise(function (resolve, reject) { + function pump() { + reader.read().then(function ({ value, done }) { + if (done) { + resolve(textContent.text); + return; + } + textContent.text += value; + pump(); + }, reject); + } + + const reader = readableStream.getReader(); + const textContent = { + text: "", + }; + pump(); + }); + } + /** * @param {GetOperatorListParameters} params - Page getOperatorList * parameters. @@ -1671,6 +1703,7 @@ class PDFPageProxy { annotationMode = AnnotationMode.ENABLE, printAnnotationStorage = null, isEditing = false, + contentOverride = null, } = {}) { if (typeof PDFJSDev !== "undefined" && !PDFJSDev.test("GENERIC")) { throw new Error("Not implemented: getOperatorList"); @@ -1688,7 +1721,8 @@ class PDFPageProxy { annotationMode, printAnnotationStorage, isEditing, - /* isOpList = */ true + /* isOpList = */ true, + contentOverride ); let intentState = this._intentStates.get(intentArgs.cacheKey); if (!intentState) { @@ -1910,6 +1944,7 @@ class PDFPageProxy { cacheKey, annotationStorageSerializable, modifiedIds, + contentOverride, }) { if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) { assert( @@ -1927,6 +1962,7 @@ class PDFPageProxy { cacheKey, annotationStorage: map, modifiedIds, + contentOverride, }, transfer ); @@ -2528,7 +2564,8 @@ class WorkerTransport { annotationMode = AnnotationMode.ENABLE, printAnnotationStorage = null, isEditing = false, - isOpList = false + isOpList = false, + contentOverride = null ) { let renderingIntent = RenderingIntentFlag.DISPLAY; // Default value. let annotationStorageSerializable = SerializableEmpty; @@ -2586,11 +2623,16 @@ class WorkerTransport { modifiedIdsHash, ]; + if (contentOverride) { + cacheKeyBuf.push(djb2Hash(contentOverride)); + } + return { renderingIntent, cacheKey: cacheKeyBuf.join("_"), annotationStorageSerializable, modifiedIds, + contentOverride, }; } @@ -2974,6 +3016,23 @@ class WorkerTransport { return this.messageHandler.sendWithPromise("GetXRefEntries", null); } + streamContents(pageIndex) { + return this.messageHandler.sendWithStream("StreamContents", { + pageIndex, + }); + } + + /** + * @returns {Promise} A promise that is resolved once the contents + * are updated. + */ + updateContents(newContents, pageIndex) { + return this.messageHandler.sendWithPromise("UpdateContents", { + value: newContents, + pageIndex, + }); + } + saveDocument() { if (this.annotationStorage.size <= 0) { warn( diff --git a/src/shared/util.js b/src/shared/util.js index baf53a1b0..76b899596 100644 --- a/src/shared/util.js +++ b/src/shared/util.js @@ -578,6 +578,16 @@ function objectFromMap(map) { return obj; } +// fast and easy hash +function djb2Hash(str) { + let hash = 5381; + for (let i = 0; i < str.length; i++) { + hash = (hash << 5) + hash + str.charCodeAt(i); + hash &= hash; // Convert to 32-bit integer + } + return hash >>> 0; // Convert to unsigned +} + // Checks the endianness of the platform. function isLittleEndian() { const buffer8 = new Uint8Array(4); @@ -1136,6 +1146,7 @@ export { BASELINE_FACTOR, bytesToString, createValidAbsoluteUrl, + djb2Hash, DocumentActionEventType, FeatureTest, FONT_IDENTITY_MATRIX,