/* Copyright 2026 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { getDocument, GlobalWorkerOptions, PasswordResponses } from "pdfjs-lib";
GlobalWorkerOptions.workerSrc =
typeof PDFJSDev === "undefined"
? "../src/pdf.worker.js"
: "../build/pdf.worker.mjs";
const ARROW_COLLAPSED = "▶";
const ARROW_EXPANDED = "▼";
// Matches indirect object references such as "10 0 R".
const REF_RE = /^\d+ \d+ R$/;
// Parses "num" into { page: num }, or "numR"/"numRgen" into { ref: {num,gen} }.
// Returns null for invalid input.
function parseGoToInput(str) {
const m = str.trim().match(/^(\d+)(R(\d+)?)?$/i);
if (!m) {
return null;
}
if (!m[2]) {
return { page: parseInt(m[1]) };
}
return {
ref: { num: parseInt(m[1]), gen: m[3] !== undefined ? parseInt(m[3]) : 0 },
};
}
// Parses "num", "numR" or "numRgen" into { num, gen }, or returns null.
// Used for URL hash param parsing where a bare number means a ref, not a page.
function parseRefInput(str) {
const m = str.trim().match(/^(\d+)(?:R(\d+)?)?$/i);
if (!m) {
return null;
}
return { num: parseInt(m[1]), gen: m[2] !== undefined ? parseInt(m[2]) : 0 };
}
let pdfDoc = null;
// Cache for getRawData results, keyed by "num:gen". Cleared on each new
// document.
const refCache = new Map();
function updateParseCSClass() {
document
.getElementById("tree")
.classList.toggle(
"parse-cs-active",
document.getElementById("parse-content-stream").checked
);
}
async function loadTree(data, rootLabel = null) {
const treeEl = document.getElementById("tree");
const rootNode = renderNode(rootLabel, await pdfDoc.getRawData(data), pdfDoc);
treeEl.replaceChildren(rootNode);
rootNode.querySelector("[role='button']").click();
}
async function openDocument(source, name) {
const statusEl = document.getElementById("status");
const pdfInfoEl = document.getElementById("pdf-info");
const gotoInput = document.getElementById("goto-input");
statusEl.textContent = `Loading ${name}…`;
pdfInfoEl.textContent = "";
refCache.clear();
if (pdfDoc) {
await pdfDoc.destroy();
pdfDoc = null;
}
const loadingTask = getDocument({ ...source, wasmUrl: "wasm/" });
loadingTask.onPassword = (updateCallback, reason) => {
const dialog = document.getElementById("password-dialog");
const title = document.getElementById("password-dialog-title");
const input = document.getElementById("password-input");
const cancelBtn = document.getElementById("password-cancel");
title.textContent =
reason === PasswordResponses.INCORRECT_PASSWORD
? "Incorrect password. Please try again:"
: "This PDF is password-protected. Please enter the password:";
input.value = "";
dialog.showModal();
const onSubmit = () => {
cleanup();
updateCallback(input.value);
};
const onCancel = () => {
cleanup();
dialog.close();
updateCallback(new Error("Password prompt cancelled."));
};
const cleanup = () => {
dialog.removeEventListener("close", onSubmit);
cancelBtn.removeEventListener("click", onCancel);
};
dialog.addEventListener("close", onSubmit, { once: true });
cancelBtn.addEventListener("click", onCancel, { once: true });
};
pdfDoc = await loadingTask.promise;
const plural = pdfDoc.numPages !== 1 ? "s" : "";
pdfInfoEl.textContent = `${name} — ${pdfDoc.numPages} page${plural}`;
statusEl.textContent = "";
gotoInput.disabled = false;
gotoInput.value = "";
}
function showError(err) {
document.getElementById("status").textContent = "Error: " + err.message;
const msg = document.createElement("div");
msg.setAttribute("role", "alert");
msg.textContent = err.message;
document.getElementById("tree").append(msg);
}
document.getElementById("file-input").value = "";
document
.getElementById("parse-content-stream")
.addEventListener("change", updateParseCSClass);
updateParseCSClass();
document.getElementById("file-input").addEventListener("change", async e => {
const file = e.target.files[0];
if (!file) {
return;
}
try {
await openDocument({ data: await file.arrayBuffer() }, file.name);
await loadTree({ ref: null }, "Trailer");
} catch (err) {
showError(err);
}
});
(async () => {
const searchParams = new URLSearchParams(location.search);
const hashParams = new URLSearchParams(location.hash.slice(1));
const fileUrl = searchParams.get("file");
if (!fileUrl) {
return;
}
try {
await openDocument({ url: fileUrl }, fileUrl.split("/").pop());
const refStr = hashParams.get("ref");
const pageStr = hashParams.get("page");
if (refStr) {
const ref = parseRefInput(refStr);
if (ref) {
document.getElementById("goto-input").value = refStr;
await loadTree({ ref });
return;
}
}
if (pageStr) {
const page = parseInt(pageStr);
if (Number.isInteger(page) && page >= 1 && page <= pdfDoc.numPages) {
document.getElementById("goto-input").value = pageStr;
await loadTree({ page });
return;
}
}
await loadTree({ ref: null }, "Trailer");
} catch (err) {
showError(err);
}
})();
document.getElementById("goto-input").addEventListener("keydown", async e => {
if (e.key !== "Enter" || !pdfDoc) {
return;
}
const input = e.target;
if (input.value.trim() === "") {
input.setAttribute("aria-invalid", "false");
await loadTree({ ref: null }, "Trailer");
return;
}
const result = parseGoToInput(input.value);
if (!result) {
input.setAttribute("aria-invalid", "true");
return;
}
if (
result.page !== undefined &&
(result.page < 1 || result.page > pdfDoc.numPages)
) {
input.setAttribute("aria-invalid", "true");
return;
}
input.setAttribute("aria-invalid", "false");
await (result.page !== undefined
? loadTree({ page: result.page })
: loadTree({ ref: result.ref }));
});
document.getElementById("goto-input").addEventListener("input", e => {
if (e.target.value.trim() === "") {
e.target.setAttribute("aria-invalid", "false");
}
});
// PDF Name objects arrive as { name: "..." } after structured clone.
function isPDFName(val) {
return (
val !== null &&
typeof val === "object" &&
!Array.isArray(val) &&
typeof val.name === "string" &&
Object.keys(val).length === 1
);
}
// Ref objects arrive as { num: N, gen: G } after structured clone.
function isRefObject(val) {
return (
val !== null &&
typeof val === "object" &&
!Array.isArray(val) &&
typeof val.num === "number" &&
typeof val.gen === "number" &&
Object.keys(val).length === 2
);
}
function refLabel(ref) {
return ref.gen !== 0 ? `${ref.num}R${ref.gen}` : `${ref.num}R`;
}
// Page content streams:
// { contentStream: true, instructions, cmdNames, rawContents }.
function isContentStream(val) {
return (
val !== null &&
typeof val === "object" &&
val.contentStream === true &&
Array.isArray(val.instructions) &&
Array.isArray(val.rawContents)
);
}
// Streams: { dict, bytes }, { dict, imageData },
// or { dict, contentStream: true, instructions, cmdNames } (Form XObject).
function isStream(val) {
return (
val !== null &&
typeof val === "object" &&
!Array.isArray(val) &&
Object.prototype.hasOwnProperty.call(val, "dict") &&
(Object.prototype.hasOwnProperty.call(val, "bytes") ||
Object.prototype.hasOwnProperty.call(val, "imageData") ||
val.contentStream === true)
);
}
function isImageStream(val) {
return (
isStream(val) && Object.prototype.hasOwnProperty.call(val, "imageData")
);
}
function isFormXObjectStream(val) {
return isStream(val) && val.contentStream === true;
}
/**
* Render one key/value pair as a
.
* @param {string|null} key Dict key, array index, or null for root.
* @param {*} value
* @param {PDFDocumentProxy} doc
*/
function renderNode(key, value, doc) {
const node = document.createElement("div");
node.className = "node";
node.setAttribute("role", "treeitem");
node.tabIndex = -1;
if (key !== null) {
const keyEl = document.createElement("span");
keyEl.className = "key";
keyEl.textContent = key;
node.append(keyEl);
const sep = document.createElement("span");
sep.className = "separator";
sep.textContent = ": ";
node.append(sep);
}
node.append(renderValue(value, doc));
return node;
}
/**
* Populate a container element with the direct children of a value.
* Used both by renderValue (inside expandables) and renderRef (directly
* into the ref's children container, avoiding an extra toggle level).
*/
function buildChildren(value, doc, container) {
if (isStream(value)) {
for (const [k, v] of Object.entries(value.dict)) {
container.append(renderNode(k, v, doc));
}
if (isImageStream(value)) {
container.append(renderImageData(value.imageData));
} else if (isFormXObjectStream(value)) {
const contentNode = document.createElement("div");
contentNode.className = "node";
contentNode.setAttribute("role", "treeitem");
contentNode.tabIndex = -1;
contentNode.append(makeSpan("key", "content"));
contentNode.append(makeSpan("separator", ": "));
const parsedEl = document.createElement("span");
parsedEl.className = "content-stream-parsed";
parsedEl.append(
renderExpandable(
`[Content Stream, ${value.instructions.length} instructions]`,
"stream-label",
c => buildInstructionLines(value, c)
)
);
const rawEl = document.createElement("span");
rawEl.className = "content-stream-raw";
const byteLabel = makeSpan(
"stream-label",
`<${value.bytes.length} raw bytes>`
);
rawEl.append(byteLabel);
const bytesContentEl = document.createElement("div");
bytesContentEl.className = "bytes-content";
bytesContentEl.append(formatBytes(value.bytes));
rawEl.append(bytesContentEl);
contentNode.append(parsedEl, rawEl);
container.append(contentNode);
} else {
const byteNode = document.createElement("div");
byteNode.className = "node";
const keyEl = document.createElement("span");
keyEl.className = "key";
keyEl.textContent = "bytes";
const sep = document.createElement("span");
sep.className = "separator";
sep.textContent = ": ";
const valEl = document.createElement("span");
valEl.className = "stream-label";
valEl.textContent = `<${value.bytes.length} raw bytes>`;
byteNode.append(keyEl, sep, valEl);
container.append(byteNode);
const bytesContentEl = document.createElement("div");
bytesContentEl.className = "bytes-content";
bytesContentEl.append(formatBytes(value.bytes));
container.append(bytesContentEl);
}
} else if (Array.isArray(value)) {
value.forEach((v, i) => container.append(renderNode(String(i), v, doc)));
} else if (value !== null && typeof value === "object") {
for (const [k, v] of Object.entries(value)) {
container.append(renderNode(k, v, doc));
}
} else {
container.append(renderNode(null, value, doc));
}
}
/**
* Render a single content-stream token as a styled span.
*/
function renderToken(token) {
if (!token) {
return makeSpan("token-null", "null");
}
switch (token.type) {
case "cmd":
return makeSpan("token-cmd", token.value);
case "name":
return makeSpan("token-name", "/" + token.value);
case "ref":
return makeSpan("token-ref", `${token.num} ${token.gen} R`);
case "number":
return makeSpan("token-num", String(token.value));
case "string":
return makeSpan("token-str", JSON.stringify(token.value));
case "boolean":
return makeSpan("token-bool", String(token.value));
case "null":
return makeSpan("token-null", "null");
case "array": {
const span = document.createElement("span");
span.className = "token-array";
span.append(makeSpan("bracket", "["));
for (const item of token.value) {
span.append(document.createTextNode(" "));
span.append(renderToken(item));
}
span.append(document.createTextNode(" "));
span.append(makeSpan("bracket", "]"));
return span;
}
case "dict": {
const span = document.createElement("span");
span.className = "token-dict";
span.append(makeSpan("bracket", "<<"));
for (const [k, v] of Object.entries(token.value)) {
span.append(document.createTextNode(" "));
span.append(makeSpan("token-name", "/" + k));
span.append(document.createTextNode(" "));
span.append(renderToken(v));
}
span.append(document.createTextNode(" "));
span.append(makeSpan("bracket", ">>"));
return span;
}
default:
return makeSpan("token-unknown", String(token.value ?? token.type));
}
}
/**
* Populate container with one .cs-instruction div per instruction.
* Shared by Page content streams and Form XObject streams.
*/
function buildInstructionLines(val, container) {
const pre = document.createElement("div");
pre.className = "content-stream";
let depth = 0;
for (const instr of val.instructions) {
if (instr.cmd === "ET" || instr.cmd === "Q" || instr.cmd === "EMC") {
depth = Math.max(0, depth - 1);
}
const line = document.createElement("div");
line.className = "cs-instruction";
if (depth > 0) {
line.style.paddingInlineStart = `${depth * 1.5}em`;
}
for (const arg of instr.args) {
line.append(renderToken(arg));
line.append(document.createTextNode(" "));
}
if (instr.cmd !== null) {
const cmdEl = makeSpan("token-cmd", instr.cmd);
const opsName = val.cmdNames[instr.cmd];
if (opsName) {
cmdEl.title = opsName;
}
line.append(cmdEl);
}
pre.append(line);
if (instr.cmd === "BT" || instr.cmd === "q" || instr.cmd === "BDC") {
depth++;
}
}
container.append(pre);
}
/**
* Render Page content stream as two pre-built views toggled by CSS:
* - .content-stream-parsed: expandable colorized instruction widget
* - .content-stream-raw: ref widget(s) mirroring the unparsed display
* The active view is controlled by the "parse-cs-active" class on #tree.
*/
function renderContentStream(val, doc) {
const frag = document.createDocumentFragment();
const parsedEl = document.createElement("span");
parsedEl.className = "content-stream-parsed";
parsedEl.append(
renderExpandable(
`[Content Stream, ${val.instructions.length} instructions]`,
"stream-label",
container => buildInstructionLines(val, container)
)
);
const rawEl = document.createElement("span");
rawEl.className = "content-stream-raw";
const rawVal =
val.rawContents.length === 1 ? val.rawContents[0] : val.rawContents;
rawEl.append(renderValue(rawVal, doc));
frag.append(parsedEl, rawEl);
return frag;
}
/**
* Render a value inline (primitive) or as an expandable widget.
* Returns a Node or DocumentFragment suitable for appendChild().
*/
function renderValue(value, doc) {
// Ref string ("10 0 R") – lazy expandable via getRawData()
if (typeof value === "string" && REF_RE.test(value)) {
return renderRef(value, doc);
}
// Ref object { num, gen } – lazy expandable via getRawData()
if (isRefObject(value)) {
return renderRef(value, doc);
}
// PDF Name → /Name
if (isPDFName(value)) {
return makeSpan("name-value", "/" + value.name);
}
// Content stream (Page Contents) → two pre-built views toggled by CSS
if (isContentStream(value)) {
return renderContentStream(value, doc);
}
// Stream → expandable showing dict entries + byte count or image preview
if (isStream(value)) {
return renderExpandable("[Stream]", "stream-label", container =>
buildChildren(value, doc, container)
);
}
// Plain object (dict)
if (value !== null && typeof value === "object" && !Array.isArray(value)) {
const keys = Object.keys(value);
if (keys.length === 0) {
return makeSpan("bracket", "{}");
}
return renderExpandable(`{${keys.length}}`, "bracket", container =>
buildChildren(value, doc, container)
);
}
// Array
if (Array.isArray(value)) {
if (value.length === 0) {
return makeSpan("bracket", "[]");
}
return renderExpandable(`[${value.length}]`, "bracket", container =>
buildChildren(value, doc, container)
);
}
// Primitives
if (typeof value === "string") {
return makeSpan("str-value", JSON.stringify(value));
}
if (typeof value === "number") {
return makeSpan("num-value", String(value));
}
if (typeof value === "boolean") {
return makeSpan("bool-value", String(value));
}
return makeSpan("null-value", "null");
}
/**
* Build a lazy-loading expand/collapse widget for a ref (string or object).
* Results are cached in `refCache` keyed by "num:gen".
*/
function renderRef(ref, doc) {
// Derive the cache key and display label from whichever form we received.
// String refs look like "10 0 R"; object refs are { num, gen }.
let cacheKey, label;
if (typeof ref === "string") {
const parts = ref.split(" ");
cacheKey = `${parts[0]}:${parts[1]}`;
label = ref;
} else {
cacheKey = `${ref.num}:${ref.gen}`;
label = refLabel(ref);
}
const frag = document.createDocumentFragment();
const toggleEl = document.createElement("span");
toggleEl.textContent = ARROW_COLLAPSED;
toggleEl.setAttribute("role", "button");
toggleEl.setAttribute("tabindex", "0");
toggleEl.setAttribute("aria-expanded", "false");
toggleEl.setAttribute("aria-label", `Expand reference ${label}`);
const refEl = document.createElement("span");
refEl.className = "ref";
refEl.textContent = label;
refEl.setAttribute("aria-hidden", "true");
const childrenEl = document.createElement("div");
childrenEl.className = "hidden";
childrenEl.setAttribute("role", "group");
childrenEl.setAttribute("aria-label", `Contents of reference ${label}`);
let open = false;
let loaded = false;
const onToggle = async () => {
open = !open;
toggleEl.textContent = open ? ARROW_EXPANDED : ARROW_COLLAPSED;
toggleEl.setAttribute("aria-expanded", String(open));
childrenEl.classList.toggle("hidden", !open);
if (open && !loaded) {
loaded = true;
const spinner = document.createElement("div");
spinner.setAttribute("role", "status");
spinner.textContent = "Loading…";
childrenEl.append(spinner);
try {
if (!refCache.has(cacheKey)) {
refCache.set(cacheKey, doc.getRawData({ ref }));
}
const result = await refCache.get(cacheKey);
childrenEl.replaceChildren();
buildChildren(result, doc, childrenEl);
} catch (err) {
const errEl = document.createElement("div");
errEl.setAttribute("role", "alert");
errEl.textContent = "Error: " + err.message;
childrenEl.replaceChildren(errEl);
}
}
};
toggleEl.addEventListener("click", onToggle);
toggleEl.addEventListener("keydown", e => {
if (e.key === "Enter" || e.key === " ") {
e.preventDefault();
onToggle();
}
});
refEl.addEventListener("click", onToggle);
frag.append(toggleEl);
frag.append(refEl);
frag.append(childrenEl);
return frag;
}
/**
* Build a synchronous expand/collapse widget.
* @param {string} label Text shown on the collapsed line.
* @param {string} labelClass CSS class for the label.
* @param {function} buildFn Called with (containerEl) on first open.
*/
function renderExpandable(label, labelClass, buildFn) {
const frag = document.createDocumentFragment();
const toggleEl = document.createElement("span");
toggleEl.textContent = ARROW_COLLAPSED;
toggleEl.setAttribute("role", "button");
toggleEl.setAttribute("tabindex", "0");
toggleEl.setAttribute("aria-expanded", "false");
toggleEl.setAttribute("aria-label", `Expand ${label}`);
const labelEl = document.createElement("span");
labelEl.className = labelClass;
labelEl.textContent = label;
labelEl.setAttribute("aria-hidden", "true");
const childrenEl = document.createElement("div");
childrenEl.className = "hidden";
childrenEl.setAttribute("role", "group");
childrenEl.setAttribute("aria-label", `Contents of ${label}`);
let open = false;
let built = false;
const onToggle = () => {
open = !open;
toggleEl.textContent = open ? ARROW_EXPANDED : ARROW_COLLAPSED;
toggleEl.setAttribute("aria-expanded", String(open));
childrenEl.classList.toggle("hidden", !open);
if (open && !built) {
built = true;
buildFn(childrenEl);
}
};
toggleEl.addEventListener("click", onToggle);
toggleEl.addEventListener("keydown", e => {
if (e.key === "Enter" || e.key === " ") {
e.preventDefault();
onToggle();
}
});
labelEl.addEventListener("click", onToggle);
frag.append(toggleEl);
frag.append(labelEl);
frag.append(childrenEl);
return frag;
}
/**
* Build a DocumentFragment for the byte string.
* Printable ASCII (0x20–0x7e) runs become plain text nodes.
* Consecutive non-printable bytes are grouped into a single
* with each byte as uppercase XX separated by
* a narrow space.
*/
/**
* Render image data (RGBA Uint8ClampedArray) into a