mirror of
https://github.com/mozilla/pdf.js.git
synced 2026-04-09 14:54:04 +02:00
A couple of small improvements of the new internal viewer
- Mention the internal viewer in the README, such that it's easier to find.
- Implement a new `INTERNAL_VIEWER` define, such that it's easier to limit code to only the "internal-viewer" gulp target.
- Only include the "GetRawData" message-handler when needed. Note that the `MessageHandler` [already throws](eb159abd6a/src/shared/message_handler.js (L121-L123)) for any missing handler.
- Move the various new helper functions from `src/core/document.js` and into their own file. The reasons for doing this are:
- That file is already quite large and complex as-is, and these helper functions are slightly orthogonal to its main functionality.
- Babel isn't able to remove all of the new code, and by moving this into a separate file we can guarantee that no extra code ends up in e.g. Firefox.
This commit is contained in:
parent
eb159abd6a
commit
60d6abdf4f
@ -44,6 +44,10 @@ PDF.js is built into version 19+ of Firefox.
|
||||
Chrome, go to `Tools > Extension` and load the (unpackaged) extension from the
|
||||
directory `build/chromium`.
|
||||
|
||||
### PDF debugger
|
||||
|
||||
Browser the internal structure of a PDF document with https://mozilla.github.io/pdf.js/internal-viewer/web/pdf_internal_viewer.html
|
||||
|
||||
## Getting the Code
|
||||
|
||||
To get a local copy of the current code, clone it using git:
|
||||
|
||||
@ -82,6 +82,7 @@ export default [
|
||||
...globals.worker,
|
||||
PDFJSDev: "readonly",
|
||||
__raw_import__: "readonly",
|
||||
__eager_import__: "readonly",
|
||||
},
|
||||
|
||||
ecmaVersion: 2025,
|
||||
|
||||
@ -190,6 +190,19 @@ function babelPluginPDFJSPreprocessor(babel, ctx) {
|
||||
},
|
||||
];
|
||||
path.replaceWith(t.importExpression(source));
|
||||
} else if (t.isIdentifier(node.callee, { name: "__eager_import__" })) {
|
||||
if (node.arguments.length !== 1) {
|
||||
throw new Error("Invalid `__eager_import__` usage.");
|
||||
}
|
||||
// Replace it with a standard `import`-call and inline the module.
|
||||
const source = node.arguments[0];
|
||||
source.leadingComments = [
|
||||
{
|
||||
type: "CommentBlock",
|
||||
value: "webpackMode: 'eager'",
|
||||
},
|
||||
];
|
||||
path.replaceWith(t.importExpression(source));
|
||||
}
|
||||
},
|
||||
"BlockStatement|StaticBlock": {
|
||||
|
||||
@ -119,6 +119,7 @@ const DEFINES = Object.freeze({
|
||||
COMPONENTS: false,
|
||||
LIB: false,
|
||||
IMAGE_DECODERS: false,
|
||||
INTERNAL_VIEWER: false,
|
||||
});
|
||||
|
||||
function transform(charEncoding, transformFunction) {
|
||||
@ -2410,7 +2411,7 @@ gulp.task(
|
||||
"internal-viewer",
|
||||
gulp.series(createBuildNumber, function createInternalViewer() {
|
||||
console.log("\n### Creating internal viewer");
|
||||
const defines = { ...DEFINES, GENERIC: true };
|
||||
const defines = { ...DEFINES, GENERIC: true, INTERNAL_VIEWER: true };
|
||||
return buildInternalViewer(defines, INTERNAL_VIEWER_DIR);
|
||||
})
|
||||
);
|
||||
|
||||
@ -22,7 +22,6 @@ import {
|
||||
isArrayEqual,
|
||||
makeArr,
|
||||
objectSize,
|
||||
OPS,
|
||||
PageActionEventType,
|
||||
RenderingIntentFlag,
|
||||
shadow,
|
||||
@ -38,17 +37,6 @@ import {
|
||||
PopupAnnotation,
|
||||
WidgetAnnotation,
|
||||
} from "./annotation.js";
|
||||
import {
|
||||
Cmd,
|
||||
Dict,
|
||||
EOF,
|
||||
isName,
|
||||
isRefsEqual,
|
||||
Name,
|
||||
Ref,
|
||||
RefSet,
|
||||
RefSetCache,
|
||||
} from "./primitives.js";
|
||||
import {
|
||||
collectActions,
|
||||
getInheritableProperty,
|
||||
@ -63,9 +51,16 @@ import {
|
||||
XRefEntryException,
|
||||
XRefParseException,
|
||||
} from "./core_utils.js";
|
||||
import { EvaluatorPreprocessor, PartialEvaluator } from "./evaluator.js";
|
||||
import {
|
||||
Dict,
|
||||
isName,
|
||||
isRefsEqual,
|
||||
Name,
|
||||
Ref,
|
||||
RefSet,
|
||||
RefSetCache,
|
||||
} from "./primitives.js";
|
||||
import { getXfaFontDict, getXfaFontName } from "./xfa_fonts.js";
|
||||
import { Lexer, Linearization, Parser } from "./parser.js";
|
||||
import { NullStream, Stream } from "./stream.js";
|
||||
import { BaseStream } from "./base_stream.js";
|
||||
import { calculateMD5 } from "./calculate_md5.js";
|
||||
@ -73,9 +68,11 @@ import { Catalog } from "./catalog.js";
|
||||
import { clearGlobalCaches } from "./cleanup_helper.js";
|
||||
import { DatasetReader } from "./dataset_reader.js";
|
||||
import { Intersector } from "./intersector.js";
|
||||
import { Linearization } from "./parser.js";
|
||||
import { LocalColorSpaceCache } from "./image_utils.js";
|
||||
import { ObjectLoader } from "./object_loader.js";
|
||||
import { OperatorList } from "./operator_list.js";
|
||||
import { PartialEvaluator } from "./evaluator.js";
|
||||
import { PDFFunctionFactory } from "./function.js";
|
||||
import { PDFImage } from "./image.js";
|
||||
import { StreamsSequenceStream } from "./decode_stream.js";
|
||||
@ -2038,9 +2035,16 @@ class PDFDocument {
|
||||
}
|
||||
|
||||
async toJSObject(value, firstCall = true) {
|
||||
if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) {
|
||||
if (
|
||||
typeof PDFJSDev !== "undefined" &&
|
||||
!PDFJSDev.test("TESTING || INTERNAL_VIEWER")
|
||||
) {
|
||||
throw new Error("Not implemented: toJSObject");
|
||||
}
|
||||
const { InternalViewerUtils } =
|
||||
typeof PDFJSDev === "undefined"
|
||||
? await import("./internal_viewer_utils.js")
|
||||
: await __eager_import__("./internal_viewer_utils.js");
|
||||
|
||||
if (value === null && firstCall) {
|
||||
return this.toJSObject(this.xref.trailer, false);
|
||||
@ -2051,7 +2055,7 @@ class PDFDocument {
|
||||
for (const [key, val] of value.getRawEntries()) {
|
||||
obj[key] =
|
||||
isPage && key === "Contents"
|
||||
? _getContentTokens(val, this.xref)
|
||||
? InternalViewerUtils.getContentTokens(val, this.xref)
|
||||
: await this.toJSObject(val, false);
|
||||
}
|
||||
return obj;
|
||||
@ -2109,9 +2113,10 @@ class PDFDocument {
|
||||
if (isName(dict.get("Subtype"), "Form")) {
|
||||
obj.bytes = value.getString();
|
||||
value.reset();
|
||||
const { instructions, cmdNames } = _groupIntoInstructions(
|
||||
_tokenizeStream(value, this.xref)
|
||||
);
|
||||
const { instructions, cmdNames } =
|
||||
InternalViewerUtils.groupIntoInstructions(
|
||||
InternalViewerUtils.tokenizeStream(value, this.xref)
|
||||
);
|
||||
obj.contentStream = true;
|
||||
obj.instructions = instructions;
|
||||
obj.cmdNames = cmdNames;
|
||||
@ -2125,130 +2130,4 @@ class PDFDocument {
|
||||
}
|
||||
}
|
||||
|
||||
function _tokenizeStream(stream, xref) {
|
||||
const tokens = [];
|
||||
const parser = new Parser({
|
||||
lexer: new Lexer(stream),
|
||||
xref,
|
||||
allowStreams: false,
|
||||
});
|
||||
while (true) {
|
||||
let obj;
|
||||
try {
|
||||
obj = parser.getObj();
|
||||
} catch {
|
||||
break;
|
||||
}
|
||||
if (obj === EOF) {
|
||||
break;
|
||||
}
|
||||
const token = _tokenToJSObject(obj);
|
||||
if (token !== null) {
|
||||
tokens.push(token);
|
||||
}
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
|
||||
function _getContentTokens(contentsVal, xref) {
|
||||
const refs = Array.isArray(contentsVal) ? contentsVal : [contentsVal];
|
||||
const rawContents = [];
|
||||
const tokens = [];
|
||||
for (const rawRef of refs) {
|
||||
if (rawRef instanceof Ref) {
|
||||
rawContents.push({ num: rawRef.num, gen: rawRef.gen });
|
||||
}
|
||||
const stream = xref.fetchIfRef(rawRef);
|
||||
if (!(stream instanceof BaseStream)) {
|
||||
continue;
|
||||
}
|
||||
tokens.push(..._tokenizeStream(stream, xref));
|
||||
}
|
||||
const { instructions, cmdNames } = _groupIntoInstructions(tokens);
|
||||
return { contentStream: true, instructions, cmdNames, rawContents };
|
||||
}
|
||||
|
||||
// Lazily-built reverse map: OPS numeric id → property name string.
|
||||
let _opsIdToName = null;
|
||||
|
||||
function _getOpsIdToName() {
|
||||
if (!_opsIdToName) {
|
||||
_opsIdToName = Object.create(null);
|
||||
for (const [name, id] of Object.entries(OPS)) {
|
||||
_opsIdToName[id] = name;
|
||||
}
|
||||
}
|
||||
return _opsIdToName;
|
||||
}
|
||||
|
||||
function _groupIntoInstructions(tokens) {
|
||||
const { opMap } = EvaluatorPreprocessor;
|
||||
const opsIdToName = _getOpsIdToName();
|
||||
const instructions = [];
|
||||
const cmdNames = Object.create(null);
|
||||
const argBuffer = [];
|
||||
for (const token of tokens) {
|
||||
if (token.type !== "cmd") {
|
||||
argBuffer.push(token);
|
||||
continue;
|
||||
}
|
||||
const op = opMap[token.value];
|
||||
if (op && !(token.value in cmdNames)) {
|
||||
cmdNames[token.value] = opsIdToName[op.id];
|
||||
}
|
||||
let args;
|
||||
if (!op || op.variableArgs) {
|
||||
// Unknown command or variable args: consume all pending args.
|
||||
args = argBuffer.splice(0);
|
||||
} else {
|
||||
// Fixed args: consume exactly numArgs, orphan the rest.
|
||||
const orphanCount = Math.max(0, argBuffer.length - op.numArgs);
|
||||
for (let i = 0; i < orphanCount; i++) {
|
||||
instructions.push({ cmd: null, args: [argBuffer.shift()] });
|
||||
}
|
||||
args = argBuffer.splice(0);
|
||||
}
|
||||
instructions.push({ cmd: token.value, args });
|
||||
}
|
||||
for (const t of argBuffer) {
|
||||
instructions.push({ cmd: null, args: [t] });
|
||||
}
|
||||
return { instructions, cmdNames };
|
||||
}
|
||||
|
||||
function _tokenToJSObject(obj) {
|
||||
if (obj instanceof Cmd) {
|
||||
return { type: "cmd", value: obj.cmd };
|
||||
}
|
||||
if (obj instanceof Name) {
|
||||
return { type: "name", value: obj.name };
|
||||
}
|
||||
if (obj instanceof Ref) {
|
||||
return { type: "ref", num: obj.num, gen: obj.gen };
|
||||
}
|
||||
if (Array.isArray(obj)) {
|
||||
return { type: "array", value: obj.map(_tokenToJSObject) };
|
||||
}
|
||||
if (obj instanceof Dict) {
|
||||
const result = Object.create(null);
|
||||
for (const [key, val] of obj.getRawEntries()) {
|
||||
result[key] = _tokenToJSObject(val);
|
||||
}
|
||||
return { type: "dict", value: result };
|
||||
}
|
||||
if (typeof obj === "number") {
|
||||
return { type: "number", value: obj };
|
||||
}
|
||||
if (typeof obj === "string") {
|
||||
return { type: "string", value: obj };
|
||||
}
|
||||
if (typeof obj === "boolean") {
|
||||
return { type: "boolean", value: obj };
|
||||
}
|
||||
if (obj === null) {
|
||||
return { type: "null" };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
export { Page, PDFDocument };
|
||||
|
||||
152
src/core/internal_viewer_utils.js
Normal file
152
src/core/internal_viewer_utils.js
Normal file
@ -0,0 +1,152 @@
|
||||
/* Copyright 2026 Mozilla Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { Cmd, Dict, EOF, Name, Ref } from "./primitives.js";
|
||||
import { Lexer, Parser } from "./parser.js";
|
||||
import { OPS, shadow } from "../shared/util.js";
|
||||
import { BaseStream } from "./base_stream.js";
|
||||
import { EvaluatorPreprocessor } from "./evaluator.js";
|
||||
|
||||
if (
|
||||
typeof PDFJSDev !== "undefined" &&
|
||||
!PDFJSDev.test("TESTING || INTERNAL_VIEWER")
|
||||
) {
|
||||
throw new Error("Not implemented: InternalViewerUtils");
|
||||
}
|
||||
|
||||
const InternalViewerUtils = {
|
||||
tokenizeStream(stream, xref) {
|
||||
const tokens = [];
|
||||
const parser = new Parser({
|
||||
lexer: new Lexer(stream),
|
||||
xref,
|
||||
allowStreams: false,
|
||||
});
|
||||
while (true) {
|
||||
let obj;
|
||||
try {
|
||||
obj = parser.getObj();
|
||||
} catch {
|
||||
break;
|
||||
}
|
||||
if (obj === EOF) {
|
||||
break;
|
||||
}
|
||||
const token = this.tokenToJSObject(obj);
|
||||
if (token !== null) {
|
||||
tokens.push(token);
|
||||
}
|
||||
}
|
||||
return tokens;
|
||||
},
|
||||
|
||||
getContentTokens(contentsVal, xref) {
|
||||
const refs = Array.isArray(contentsVal) ? contentsVal : [contentsVal];
|
||||
const rawContents = [];
|
||||
const tokens = [];
|
||||
for (const rawRef of refs) {
|
||||
if (rawRef instanceof Ref) {
|
||||
rawContents.push({ num: rawRef.num, gen: rawRef.gen });
|
||||
}
|
||||
const stream = xref.fetchIfRef(rawRef);
|
||||
if (!(stream instanceof BaseStream)) {
|
||||
continue;
|
||||
}
|
||||
tokens.push(...this.tokenizeStream(stream, xref));
|
||||
}
|
||||
const { instructions, cmdNames } = this.groupIntoInstructions(tokens);
|
||||
return { contentStream: true, instructions, cmdNames, rawContents };
|
||||
},
|
||||
|
||||
// Lazily-built reverse map: OPS numeric id → property name string.
|
||||
get opsIdToName() {
|
||||
const opsIdToName = Object.create(null);
|
||||
for (const [name, id] of Object.entries(OPS)) {
|
||||
opsIdToName[id] = name;
|
||||
}
|
||||
return shadow(this, "opsIdToName", opsIdToName);
|
||||
},
|
||||
|
||||
groupIntoInstructions(tokens) {
|
||||
const { opMap } = EvaluatorPreprocessor;
|
||||
const instructions = [];
|
||||
const cmdNames = Object.create(null);
|
||||
const argBuffer = [];
|
||||
for (const token of tokens) {
|
||||
if (token.type !== "cmd") {
|
||||
argBuffer.push(token);
|
||||
continue;
|
||||
}
|
||||
const op = opMap[token.value];
|
||||
if (op && !(token.value in cmdNames)) {
|
||||
cmdNames[token.value] = this.opsIdToName[op.id];
|
||||
}
|
||||
let args;
|
||||
if (!op || op.variableArgs) {
|
||||
// Unknown command or variable args: consume all pending args.
|
||||
args = argBuffer.splice(0);
|
||||
} else {
|
||||
// Fixed args: consume exactly numArgs, orphan the rest.
|
||||
const orphanCount = Math.max(0, argBuffer.length - op.numArgs);
|
||||
for (let i = 0; i < orphanCount; i++) {
|
||||
instructions.push({ cmd: null, args: [argBuffer.shift()] });
|
||||
}
|
||||
args = argBuffer.splice(0);
|
||||
}
|
||||
instructions.push({ cmd: token.value, args });
|
||||
}
|
||||
for (const t of argBuffer) {
|
||||
instructions.push({ cmd: null, args: [t] });
|
||||
}
|
||||
return { instructions, cmdNames };
|
||||
},
|
||||
|
||||
tokenToJSObject(obj) {
|
||||
if (obj instanceof Cmd) {
|
||||
return { type: "cmd", value: obj.cmd };
|
||||
}
|
||||
if (obj instanceof Name) {
|
||||
return { type: "name", value: obj.name };
|
||||
}
|
||||
if (obj instanceof Ref) {
|
||||
return { type: "ref", num: obj.num, gen: obj.gen };
|
||||
}
|
||||
if (Array.isArray(obj)) {
|
||||
return { type: "array", value: obj.map(this.tokenToJSObject) };
|
||||
}
|
||||
if (obj instanceof Dict) {
|
||||
const result = Object.create(null);
|
||||
for (const [key, val] of obj.getRawEntries()) {
|
||||
result[key] = this.tokenToJSObject(val);
|
||||
}
|
||||
return { type: "dict", value: result };
|
||||
}
|
||||
if (typeof obj === "number") {
|
||||
return { type: "number", value: obj };
|
||||
}
|
||||
if (typeof obj === "string") {
|
||||
return { type: "string", value: obj };
|
||||
}
|
||||
if (typeof obj === "boolean") {
|
||||
return { type: "boolean", value: obj };
|
||||
}
|
||||
if (obj === null) {
|
||||
return { type: "null" };
|
||||
}
|
||||
return null;
|
||||
},
|
||||
};
|
||||
|
||||
export { InternalViewerUtils };
|
||||
@ -952,21 +952,25 @@ class WorkerMessageHandler {
|
||||
return pdfManager.fontFallback(data.id, handler);
|
||||
});
|
||||
|
||||
handler.on("GetRawData", async function ({ ref, page }) {
|
||||
if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) {
|
||||
throw new Error("Not implemented: GetRawData");
|
||||
}
|
||||
let value = null;
|
||||
if (page >= 1) {
|
||||
value = (await pdfManager.ensureCatalog("getPageDict", [page - 1]))[1];
|
||||
} else if (ref) {
|
||||
value =
|
||||
typeof ref === "string"
|
||||
? Ref.fromString(ref)
|
||||
: Ref.get(ref.num, ref.gen);
|
||||
}
|
||||
return pdfManager.ensureDoc("toJSObject", [value]);
|
||||
});
|
||||
if (
|
||||
typeof PDFJSDev === "undefined" ||
|
||||
PDFJSDev.test("TESTING || INTERNAL_VIEWER")
|
||||
) {
|
||||
handler.on("GetRawData", async function ({ ref, page }) {
|
||||
let value = null;
|
||||
if (page >= 1) {
|
||||
value = (
|
||||
await pdfManager.ensureCatalog("getPageDict", [page - 1])
|
||||
)[1];
|
||||
} else if (ref) {
|
||||
value =
|
||||
typeof ref === "string"
|
||||
? Ref.fromString(ref)
|
||||
: Ref.get(ref.num, ref.gen);
|
||||
}
|
||||
return pdfManager.ensureDoc("toJSObject", [value]);
|
||||
});
|
||||
}
|
||||
|
||||
handler.on("Cleanup", function (data) {
|
||||
return pdfManager.cleanup(/* manuallyTriggered = */ true);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user