A couple of small improvements of the new internal viewer

- Mention the internal viewer in the README, such that it's easier to find.

 - Implement a new `INTERNAL_VIEWER` define, such that it's easier to limit code to only the "internal-viewer" gulp target.

 - Only include the "GetRawData" message-handler when needed. Note that the `MessageHandler` [already throws](eb159abd6a/src/shared/message_handler.js (L121-L123)) for any missing handler.

 - Move the various new helper functions from `src/core/document.js` and into their own file. The reasons for doing this are:
    - That file is already quite large and complex as-is, and these helper functions are slightly orthogonal to its main functionality.
    - Babel isn't able to remove all of the new code, and by moving this into a separate file we can guarantee that no extra code ends up in e.g. Firefox.
This commit is contained in:
Jonas Jenwald 2026-03-10 21:49:29 +01:00
parent eb159abd6a
commit 60d6abdf4f
7 changed files with 215 additions and 161 deletions

View File

@ -44,6 +44,10 @@ PDF.js is built into version 19+ of Firefox.
Chrome, go to `Tools > Extension` and load the (unpackaged) extension from the
directory `build/chromium`.
### PDF debugger
Browser the internal structure of a PDF document with https://mozilla.github.io/pdf.js/internal-viewer/web/pdf_internal_viewer.html
## Getting the Code
To get a local copy of the current code, clone it using git:

View File

@ -82,6 +82,7 @@ export default [
...globals.worker,
PDFJSDev: "readonly",
__raw_import__: "readonly",
__eager_import__: "readonly",
},
ecmaVersion: 2025,

View File

@ -190,6 +190,19 @@ function babelPluginPDFJSPreprocessor(babel, ctx) {
},
];
path.replaceWith(t.importExpression(source));
} else if (t.isIdentifier(node.callee, { name: "__eager_import__" })) {
if (node.arguments.length !== 1) {
throw new Error("Invalid `__eager_import__` usage.");
}
// Replace it with a standard `import`-call and inline the module.
const source = node.arguments[0];
source.leadingComments = [
{
type: "CommentBlock",
value: "webpackMode: 'eager'",
},
];
path.replaceWith(t.importExpression(source));
}
},
"BlockStatement|StaticBlock": {

View File

@ -119,6 +119,7 @@ const DEFINES = Object.freeze({
COMPONENTS: false,
LIB: false,
IMAGE_DECODERS: false,
INTERNAL_VIEWER: false,
});
function transform(charEncoding, transformFunction) {
@ -2410,7 +2411,7 @@ gulp.task(
"internal-viewer",
gulp.series(createBuildNumber, function createInternalViewer() {
console.log("\n### Creating internal viewer");
const defines = { ...DEFINES, GENERIC: true };
const defines = { ...DEFINES, GENERIC: true, INTERNAL_VIEWER: true };
return buildInternalViewer(defines, INTERNAL_VIEWER_DIR);
})
);

View File

@ -22,7 +22,6 @@ import {
isArrayEqual,
makeArr,
objectSize,
OPS,
PageActionEventType,
RenderingIntentFlag,
shadow,
@ -38,17 +37,6 @@ import {
PopupAnnotation,
WidgetAnnotation,
} from "./annotation.js";
import {
Cmd,
Dict,
EOF,
isName,
isRefsEqual,
Name,
Ref,
RefSet,
RefSetCache,
} from "./primitives.js";
import {
collectActions,
getInheritableProperty,
@ -63,9 +51,16 @@ import {
XRefEntryException,
XRefParseException,
} from "./core_utils.js";
import { EvaluatorPreprocessor, PartialEvaluator } from "./evaluator.js";
import {
Dict,
isName,
isRefsEqual,
Name,
Ref,
RefSet,
RefSetCache,
} from "./primitives.js";
import { getXfaFontDict, getXfaFontName } from "./xfa_fonts.js";
import { Lexer, Linearization, Parser } from "./parser.js";
import { NullStream, Stream } from "./stream.js";
import { BaseStream } from "./base_stream.js";
import { calculateMD5 } from "./calculate_md5.js";
@ -73,9 +68,11 @@ import { Catalog } from "./catalog.js";
import { clearGlobalCaches } from "./cleanup_helper.js";
import { DatasetReader } from "./dataset_reader.js";
import { Intersector } from "./intersector.js";
import { Linearization } from "./parser.js";
import { LocalColorSpaceCache } from "./image_utils.js";
import { ObjectLoader } from "./object_loader.js";
import { OperatorList } from "./operator_list.js";
import { PartialEvaluator } from "./evaluator.js";
import { PDFFunctionFactory } from "./function.js";
import { PDFImage } from "./image.js";
import { StreamsSequenceStream } from "./decode_stream.js";
@ -2038,9 +2035,16 @@ class PDFDocument {
}
async toJSObject(value, firstCall = true) {
if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) {
if (
typeof PDFJSDev !== "undefined" &&
!PDFJSDev.test("TESTING || INTERNAL_VIEWER")
) {
throw new Error("Not implemented: toJSObject");
}
const { InternalViewerUtils } =
typeof PDFJSDev === "undefined"
? await import("./internal_viewer_utils.js")
: await __eager_import__("./internal_viewer_utils.js");
if (value === null && firstCall) {
return this.toJSObject(this.xref.trailer, false);
@ -2051,7 +2055,7 @@ class PDFDocument {
for (const [key, val] of value.getRawEntries()) {
obj[key] =
isPage && key === "Contents"
? _getContentTokens(val, this.xref)
? InternalViewerUtils.getContentTokens(val, this.xref)
: await this.toJSObject(val, false);
}
return obj;
@ -2109,9 +2113,10 @@ class PDFDocument {
if (isName(dict.get("Subtype"), "Form")) {
obj.bytes = value.getString();
value.reset();
const { instructions, cmdNames } = _groupIntoInstructions(
_tokenizeStream(value, this.xref)
);
const { instructions, cmdNames } =
InternalViewerUtils.groupIntoInstructions(
InternalViewerUtils.tokenizeStream(value, this.xref)
);
obj.contentStream = true;
obj.instructions = instructions;
obj.cmdNames = cmdNames;
@ -2125,130 +2130,4 @@ class PDFDocument {
}
}
function _tokenizeStream(stream, xref) {
const tokens = [];
const parser = new Parser({
lexer: new Lexer(stream),
xref,
allowStreams: false,
});
while (true) {
let obj;
try {
obj = parser.getObj();
} catch {
break;
}
if (obj === EOF) {
break;
}
const token = _tokenToJSObject(obj);
if (token !== null) {
tokens.push(token);
}
}
return tokens;
}
function _getContentTokens(contentsVal, xref) {
const refs = Array.isArray(contentsVal) ? contentsVal : [contentsVal];
const rawContents = [];
const tokens = [];
for (const rawRef of refs) {
if (rawRef instanceof Ref) {
rawContents.push({ num: rawRef.num, gen: rawRef.gen });
}
const stream = xref.fetchIfRef(rawRef);
if (!(stream instanceof BaseStream)) {
continue;
}
tokens.push(..._tokenizeStream(stream, xref));
}
const { instructions, cmdNames } = _groupIntoInstructions(tokens);
return { contentStream: true, instructions, cmdNames, rawContents };
}
// Lazily-built reverse map: OPS numeric id → property name string.
let _opsIdToName = null;
function _getOpsIdToName() {
if (!_opsIdToName) {
_opsIdToName = Object.create(null);
for (const [name, id] of Object.entries(OPS)) {
_opsIdToName[id] = name;
}
}
return _opsIdToName;
}
function _groupIntoInstructions(tokens) {
const { opMap } = EvaluatorPreprocessor;
const opsIdToName = _getOpsIdToName();
const instructions = [];
const cmdNames = Object.create(null);
const argBuffer = [];
for (const token of tokens) {
if (token.type !== "cmd") {
argBuffer.push(token);
continue;
}
const op = opMap[token.value];
if (op && !(token.value in cmdNames)) {
cmdNames[token.value] = opsIdToName[op.id];
}
let args;
if (!op || op.variableArgs) {
// Unknown command or variable args: consume all pending args.
args = argBuffer.splice(0);
} else {
// Fixed args: consume exactly numArgs, orphan the rest.
const orphanCount = Math.max(0, argBuffer.length - op.numArgs);
for (let i = 0; i < orphanCount; i++) {
instructions.push({ cmd: null, args: [argBuffer.shift()] });
}
args = argBuffer.splice(0);
}
instructions.push({ cmd: token.value, args });
}
for (const t of argBuffer) {
instructions.push({ cmd: null, args: [t] });
}
return { instructions, cmdNames };
}
function _tokenToJSObject(obj) {
if (obj instanceof Cmd) {
return { type: "cmd", value: obj.cmd };
}
if (obj instanceof Name) {
return { type: "name", value: obj.name };
}
if (obj instanceof Ref) {
return { type: "ref", num: obj.num, gen: obj.gen };
}
if (Array.isArray(obj)) {
return { type: "array", value: obj.map(_tokenToJSObject) };
}
if (obj instanceof Dict) {
const result = Object.create(null);
for (const [key, val] of obj.getRawEntries()) {
result[key] = _tokenToJSObject(val);
}
return { type: "dict", value: result };
}
if (typeof obj === "number") {
return { type: "number", value: obj };
}
if (typeof obj === "string") {
return { type: "string", value: obj };
}
if (typeof obj === "boolean") {
return { type: "boolean", value: obj };
}
if (obj === null) {
return { type: "null" };
}
return null;
}
export { Page, PDFDocument };

View File

@ -0,0 +1,152 @@
/* Copyright 2026 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { Cmd, Dict, EOF, Name, Ref } from "./primitives.js";
import { Lexer, Parser } from "./parser.js";
import { OPS, shadow } from "../shared/util.js";
import { BaseStream } from "./base_stream.js";
import { EvaluatorPreprocessor } from "./evaluator.js";
if (
typeof PDFJSDev !== "undefined" &&
!PDFJSDev.test("TESTING || INTERNAL_VIEWER")
) {
throw new Error("Not implemented: InternalViewerUtils");
}
const InternalViewerUtils = {
tokenizeStream(stream, xref) {
const tokens = [];
const parser = new Parser({
lexer: new Lexer(stream),
xref,
allowStreams: false,
});
while (true) {
let obj;
try {
obj = parser.getObj();
} catch {
break;
}
if (obj === EOF) {
break;
}
const token = this.tokenToJSObject(obj);
if (token !== null) {
tokens.push(token);
}
}
return tokens;
},
getContentTokens(contentsVal, xref) {
const refs = Array.isArray(contentsVal) ? contentsVal : [contentsVal];
const rawContents = [];
const tokens = [];
for (const rawRef of refs) {
if (rawRef instanceof Ref) {
rawContents.push({ num: rawRef.num, gen: rawRef.gen });
}
const stream = xref.fetchIfRef(rawRef);
if (!(stream instanceof BaseStream)) {
continue;
}
tokens.push(...this.tokenizeStream(stream, xref));
}
const { instructions, cmdNames } = this.groupIntoInstructions(tokens);
return { contentStream: true, instructions, cmdNames, rawContents };
},
// Lazily-built reverse map: OPS numeric id → property name string.
get opsIdToName() {
const opsIdToName = Object.create(null);
for (const [name, id] of Object.entries(OPS)) {
opsIdToName[id] = name;
}
return shadow(this, "opsIdToName", opsIdToName);
},
groupIntoInstructions(tokens) {
const { opMap } = EvaluatorPreprocessor;
const instructions = [];
const cmdNames = Object.create(null);
const argBuffer = [];
for (const token of tokens) {
if (token.type !== "cmd") {
argBuffer.push(token);
continue;
}
const op = opMap[token.value];
if (op && !(token.value in cmdNames)) {
cmdNames[token.value] = this.opsIdToName[op.id];
}
let args;
if (!op || op.variableArgs) {
// Unknown command or variable args: consume all pending args.
args = argBuffer.splice(0);
} else {
// Fixed args: consume exactly numArgs, orphan the rest.
const orphanCount = Math.max(0, argBuffer.length - op.numArgs);
for (let i = 0; i < orphanCount; i++) {
instructions.push({ cmd: null, args: [argBuffer.shift()] });
}
args = argBuffer.splice(0);
}
instructions.push({ cmd: token.value, args });
}
for (const t of argBuffer) {
instructions.push({ cmd: null, args: [t] });
}
return { instructions, cmdNames };
},
tokenToJSObject(obj) {
if (obj instanceof Cmd) {
return { type: "cmd", value: obj.cmd };
}
if (obj instanceof Name) {
return { type: "name", value: obj.name };
}
if (obj instanceof Ref) {
return { type: "ref", num: obj.num, gen: obj.gen };
}
if (Array.isArray(obj)) {
return { type: "array", value: obj.map(this.tokenToJSObject) };
}
if (obj instanceof Dict) {
const result = Object.create(null);
for (const [key, val] of obj.getRawEntries()) {
result[key] = this.tokenToJSObject(val);
}
return { type: "dict", value: result };
}
if (typeof obj === "number") {
return { type: "number", value: obj };
}
if (typeof obj === "string") {
return { type: "string", value: obj };
}
if (typeof obj === "boolean") {
return { type: "boolean", value: obj };
}
if (obj === null) {
return { type: "null" };
}
return null;
},
};
export { InternalViewerUtils };

View File

@ -952,21 +952,25 @@ class WorkerMessageHandler {
return pdfManager.fontFallback(data.id, handler);
});
handler.on("GetRawData", async function ({ ref, page }) {
if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) {
throw new Error("Not implemented: GetRawData");
}
let value = null;
if (page >= 1) {
value = (await pdfManager.ensureCatalog("getPageDict", [page - 1]))[1];
} else if (ref) {
value =
typeof ref === "string"
? Ref.fromString(ref)
: Ref.get(ref.num, ref.gen);
}
return pdfManager.ensureDoc("toJSObject", [value]);
});
if (
typeof PDFJSDev === "undefined" ||
PDFJSDev.test("TESTING || INTERNAL_VIEWER")
) {
handler.on("GetRawData", async function ({ ref, page }) {
let value = null;
if (page >= 1) {
value = (
await pdfManager.ensureCatalog("getPageDict", [page - 1])
)[1];
} else if (ref) {
value =
typeof ref === "string"
? Ref.fromString(ref)
: Ref.get(ref.num, ref.gen);
}
return pdfManager.ensureDoc("toJSObject", [value]);
});
}
handler.on("Cleanup", function (data) {
return pdfManager.cleanup(/* manuallyTriggered = */ true);