Merge pull request #21002 from calixteman/ps_eval

[api-minor] Rewrite the ps lexer & parser and add a small Wasm compiler
This commit is contained in:
calixteman 2026-03-30 09:23:13 +02:00 committed by GitHub
commit a40b91f0bb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 4478 additions and 0 deletions

View File

@ -21,18 +21,30 @@ import {
MathClamp,
shadow,
unreachable,
warn,
} from "../shared/util.js";
import { PostScriptLexer, PostScriptParser } from "./ps_parser.js";
import { BaseStream } from "./base_stream.js";
import { buildPostScriptWasmFunction } from "./postscript/wasm_compiler.js";
import { isNumberArray } from "./core_utils.js";
import { LocalFunctionCache } from "./image_utils.js";
class PDFFunctionFactory {
static #useWasm = true;
static setOptions({ useWasm }) {
PDFFunctionFactory.#useWasm = useWasm;
}
constructor({ xref, isEvalSupported = true }) {
this.xref = xref;
this.isEvalSupported = isEvalSupported !== false;
}
get useWasm() {
return PDFFunctionFactory.#useWasm;
}
create(fn, parseArray = false) {
let fnRef, parsedFn;
@ -358,6 +370,24 @@ class PDFFunction {
throw new FormatError("No range.");
}
if (factory.useWasm) {
try {
const wasmFn = buildPostScriptWasmFunction(
fn.getString(),
domain,
range
);
if (wasmFn) {
return wasmFn; // (src, srcOffset, dest, destOffset) → void
}
} catch {
// Fall through to the existing interpreter-based path.
}
}
warn("Unable to compile PS function, using interpreter");
fn.reset();
const lexer = new PostScriptLexer(fn);
const parser = new PostScriptParser(lexer);
const code = parser.parse();

View File

@ -28,6 +28,7 @@ import { JpxImage } from "./jpx.js";
import { MissingDataException } from "./core_utils.js";
import { OperatorList } from "./operator_list.js";
import { PDFDocument } from "./document.js";
import { PDFFunctionFactory } from "./function.js";
import { Stream } from "./stream.js";
function parseDocBaseUrl(url) {
@ -97,6 +98,7 @@ class BasePdfManager {
IccColorSpace.setOptions(options);
CmykICCBasedCS.setOptions(options);
JBig2CCITTFaxWasmImage.setOptions(options);
PDFFunctionFactory.setOptions(options);
}
get docId() {

1307
src/core/postscript/ast.js Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,225 @@
/* Copyright 2026 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
const TOKEN = {
// Structural tokens — not keyword operators
number: 0,
lbrace: 1,
rbrace: 2,
// Boolean literals
true: 3,
false: 4,
// Arithmetic binary operators
add: 5,
sub: 6,
mul: 7,
div: 8,
idiv: 9,
mod: 10,
exp: 11,
// Comparison binary operators
eq: 12,
ne: 13,
gt: 14,
ge: 15,
lt: 16,
le: 17,
// Bitwise / boolean binary operators
and: 18,
or: 19,
xor: 20,
bitshift: 21,
// Unary arithmetic operators
abs: 22,
neg: 23,
ceiling: 24,
floor: 25,
round: 26,
truncate: 27,
// Unary boolean / bitwise operator
not: 28,
// Mathematical functions — unary
sqrt: 29,
sin: 30,
cos: 31,
ln: 32,
log: 33,
// Mathematical function — binary
atan: 34,
// Type conversion operators
cvi: 35,
cvr: 36,
// Stack operators
dup: 37,
exch: 38,
pop: 39,
copy: 40,
index: 41,
roll: 42,
// Control flow
if: 43,
ifelse: 44,
// End of input
eof: 45,
// Synthetic: produced by the optimizer, never emitted by the lexer.
min: 46,
max: 47,
};
class Token {
constructor(id, value = null) {
this.id = id;
this.value = value;
}
}
class Lexer {
// Singletons for every non-number token, built lazily on first construction.
// Keyword operator tokens carry their name as `value`; structural tokens
// (lbrace, rbrace, eof) carry null.
static #singletons = null;
static #operatorSingletons = null;
static #initSingletons() {
const singletons = Object.create(null);
const operatorSingletons = Object.create(null);
for (const [name, id] of Object.entries(TOKEN)) {
if (name === "number") {
continue;
}
const isOperator = id >= TOKEN.true && id <= TOKEN.ifelse;
const token = new Token(id, isOperator ? name : null);
singletons[name] = token;
if (isOperator) {
operatorSingletons[name] = token;
}
}
Lexer.#singletons = singletons;
Lexer.#operatorSingletons = operatorSingletons;
}
constructor(data) {
if (!Lexer.#singletons) {
Lexer.#initSingletons();
}
this.data = data;
this.pos = 0;
this.len = data.length;
// Sticky regexes: set lastIndex before exec() to match at an exact offset.
this._numberPattern = /[+-]?(?:\d+\.?\d*|\.\d+)(?:[eE][+-]?\d+)?/y;
this._identifierPattern = /[a-z]+/y;
}
// Skip a % comment, advancing past the next \n or \r (or to EOF).
_skipComment() {
const lf = this.data.indexOf("\n", this.pos);
const cr = this.data.indexOf("\r", this.pos);
// Treat a missing EOL as this.len so Math.min picks the one that exists.
const eol = Math.min(lf < 0 ? this.len : lf, cr < 0 ? this.len : cr);
this.pos = Math.min(eol + 1, this.len);
}
_getNumber() {
this._numberPattern.lastIndex = this.pos;
const match = this._numberPattern.exec(this.data);
if (!match) {
return new Token(TOKEN.number, 0);
}
const number = parseFloat(match[0]);
if (!Number.isFinite(number)) {
return new Token(TOKEN.number, 0);
}
this.pos = this._numberPattern.lastIndex;
return new Token(TOKEN.number, number);
}
_getOperator() {
this._identifierPattern.lastIndex = this.pos;
const match = this._identifierPattern.exec(this.data);
if (!match) {
return new Token(TOKEN.number, 0);
}
this.pos = this._identifierPattern.lastIndex;
const op = match[0];
const token = Lexer.#operatorSingletons[op];
if (!token) {
return new Token(TOKEN.number, 0);
}
return token;
}
// Return the next token, or Lexer.#singletons.eof at end of input.
next() {
while (this.pos < this.len) {
const ch = this.data.charCodeAt(this.pos++);
switch (ch) {
// PostScript white-space characters (PDF32000 §7.2.2)
case 0x00 /* NUL */:
case 0x09 /* HT */:
case 0x0a /* LF */:
case 0x0c /* FF */:
case 0x0d /* CR */:
case 0x20 /* SP */:
break;
case 0x25 /* % — comment */:
this._skipComment();
break;
case 0x7b /* { */:
return Lexer.#singletons.lbrace;
case 0x7d /* } */:
return Lexer.#singletons.rbrace;
case 0x2b /* + */:
case 0x2d /* - */:
this.pos--;
return this._getNumber();
case 0x2e /* . */:
this.pos--;
return this._getNumber();
default:
if (ch >= 0x30 /* 0 */ && ch <= 0x39 /* 9 */) {
this.pos--;
return this._getNumber();
}
if (ch >= 0x61 /* a */ && ch <= 0x7a /* z */) {
this.pos--;
return this._getOperator();
}
return new Token(TOKEN.number, 0);
}
}
return Lexer.#singletons.eof;
}
}
export { Lexer, Token, TOKEN };

File diff suppressed because it is too large Load Diff

View File

@ -44,6 +44,7 @@
"pdf_spec.js",
"pdf_viewer.component_spec.js",
"pdf_viewer_spec.js",
"postscript_spec.js",
"primitives_spec.js",
"stream_spec.js",
"struct_tree_spec.js",

View File

@ -87,6 +87,7 @@ async function initializePDFJS(callback) {
"pdfjs-test/unit/pdf_spec.js",
"pdfjs-test/unit/pdf_viewer.component_spec.js",
"pdfjs-test/unit/pdf_viewer_spec.js",
"pdfjs-test/unit/postscript_spec.js",
"pdfjs-test/unit/primitives_spec.js",
"pdfjs-test/unit/scripting_spec.js",
"pdfjs-test/unit/stream_spec.js",

1847
test/unit/postscript_spec.js Normal file

File diff suppressed because it is too large Load Diff