Merge pull request #21002 from calixteman/ps_eval

[api-minor] Rewrite the ps lexer & parser and add a small Wasm compiler
2026-05-31 07:11:00 +02:00 · 2026-03-30 09:23:13 +02:00 · 2026-03-30 09:23:13 +02:00 · a40b91f0bb
commit a40b91f0bb
parent cb2640dc33 952952c905
8 changed files with 4478 additions and 0 deletions
--- a/src/core/function.js
+++ b/src/core/function.js
@ -21,18 +21,30 @@ import {
  MathClamp,
  shadow,
  unreachable,
+  warn,
 } from "../shared/util.js";
 import { PostScriptLexer, PostScriptParser } from "./ps_parser.js";
 import { BaseStream } from "./base_stream.js";
+import { buildPostScriptWasmFunction } from "./postscript/wasm_compiler.js";
 import { isNumberArray } from "./core_utils.js";
 import { LocalFunctionCache } from "./image_utils.js";

 class PDFFunctionFactory {
+  static #useWasm = true;
+
+  static setOptions({ useWasm }) {
+    PDFFunctionFactory.#useWasm = useWasm;
+  }
+
  constructor({ xref, isEvalSupported = true }) {
    this.xref = xref;
    this.isEvalSupported = isEvalSupported !== false;
  }

+  get useWasm() {
+    return PDFFunctionFactory.#useWasm;
+  }
+
  create(fn, parseArray = false) {
    let fnRef, parsedFn;

@ -358,6 +370,24 @@ class PDFFunction {
      throw new FormatError("No range.");
    }

+    if (factory.useWasm) {
+      try {
+        const wasmFn = buildPostScriptWasmFunction(
+          fn.getString(),
+          domain,
+          range
+        );
+        if (wasmFn) {
+          return wasmFn; // (src, srcOffset, dest, destOffset) → void
+        }
+      } catch {
+        // Fall through to the existing interpreter-based path.
+      }
+    }
+
+    warn("Unable to compile PS function, using interpreter");
+    fn.reset();
+
    const lexer = new PostScriptLexer(fn);
    const parser = new PostScriptParser(lexer);
    const code = parser.parse();
--- a/src/core/pdf_manager.js
+++ b/src/core/pdf_manager.js
@ -28,6 +28,7 @@ import { JpxImage } from "./jpx.js";
 import { MissingDataException } from "./core_utils.js";
 import { OperatorList } from "./operator_list.js";
 import { PDFDocument } from "./document.js";
+import { PDFFunctionFactory } from "./function.js";
 import { Stream } from "./stream.js";

 function parseDocBaseUrl(url) {
@ -97,6 +98,7 @@ class BasePdfManager {
    IccColorSpace.setOptions(options);
    CmykICCBasedCS.setOptions(options);
    JBig2CCITTFaxWasmImage.setOptions(options);
+    PDFFunctionFactory.setOptions(options);
  }

  get docId() {
--- a/src/core/postscript/ast.js
+++ b/src/core/postscript/ast.js
--- a/src/core/postscript/lexer.js
+++ b/src/core/postscript/lexer.js
@ -0,0 +1,225 @@
+/* Copyright 2026 Mozilla Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+const TOKEN = {
+  // Structural tokens — not keyword operators
+  number: 0,
+  lbrace: 1,
+  rbrace: 2,
+
+  // Boolean literals
+  true: 3,
+  false: 4,
+
+  // Arithmetic binary operators
+  add: 5,
+  sub: 6,
+  mul: 7,
+  div: 8,
+  idiv: 9,
+  mod: 10,
+  exp: 11,
+
+  // Comparison binary operators
+  eq: 12,
+  ne: 13,
+  gt: 14,
+  ge: 15,
+  lt: 16,
+  le: 17,
+
+  // Bitwise / boolean binary operators
+  and: 18,
+  or: 19,
+  xor: 20,
+  bitshift: 21,
+
+  // Unary arithmetic operators
+  abs: 22,
+  neg: 23,
+  ceiling: 24,
+  floor: 25,
+  round: 26,
+  truncate: 27,
+
+  // Unary boolean / bitwise operator
+  not: 28,
+
+  // Mathematical functions — unary
+  sqrt: 29,
+  sin: 30,
+  cos: 31,
+  ln: 32,
+  log: 33,
+
+  // Mathematical function — binary
+  atan: 34,
+
+  // Type conversion operators
+  cvi: 35,
+  cvr: 36,
+
+  // Stack operators
+  dup: 37,
+  exch: 38,
+  pop: 39,
+  copy: 40,
+  index: 41,
+  roll: 42,
+
+  // Control flow
+  if: 43,
+  ifelse: 44,
+
+  // End of input
+  eof: 45,
+
+  // Synthetic: produced by the optimizer, never emitted by the lexer.
+  min: 46,
+  max: 47,
+};
+
+class Token {
+  constructor(id, value = null) {
+    this.id = id;
+    this.value = value;
+  }
+}
+
+class Lexer {
+  // Singletons for every non-number token, built lazily on first construction.
+  // Keyword operator tokens carry their name as `value`; structural tokens
+  // (lbrace, rbrace, eof) carry null.
+  static #singletons = null;
+
+  static #operatorSingletons = null;
+
+  static #initSingletons() {
+    const singletons = Object.create(null);
+    const operatorSingletons = Object.create(null);
+    for (const [name, id] of Object.entries(TOKEN)) {
+      if (name === "number") {
+        continue;
+      }
+      const isOperator = id >= TOKEN.true && id <= TOKEN.ifelse;
+      const token = new Token(id, isOperator ? name : null);
+      singletons[name] = token;
+      if (isOperator) {
+        operatorSingletons[name] = token;
+      }
+    }
+    Lexer.#singletons = singletons;
+    Lexer.#operatorSingletons = operatorSingletons;
+  }
+
+  constructor(data) {
+    if (!Lexer.#singletons) {
+      Lexer.#initSingletons();
+    }
+    this.data = data;
+    this.pos = 0;
+    this.len = data.length;
+    // Sticky regexes: set lastIndex before exec() to match at an exact offset.
+    this._numberPattern = /[+-]?(?:\d+\.?\d*|\.\d+)(?:[eE][+-]?\d+)?/y;
+    this._identifierPattern = /[a-z]+/y;
+  }
+
+  // Skip a % comment, advancing past the next \n or \r (or to EOF).
+  _skipComment() {
+    const lf = this.data.indexOf("\n", this.pos);
+    const cr = this.data.indexOf("\r", this.pos);
+    // Treat a missing EOL as this.len so Math.min picks the one that exists.
+    const eol = Math.min(lf < 0 ? this.len : lf, cr < 0 ? this.len : cr);
+    this.pos = Math.min(eol + 1, this.len);
+  }
+
+  _getNumber() {
+    this._numberPattern.lastIndex = this.pos;
+    const match = this._numberPattern.exec(this.data);
+    if (!match) {
+      return new Token(TOKEN.number, 0);
+    }
+    const number = parseFloat(match[0]);
+    if (!Number.isFinite(number)) {
+      return new Token(TOKEN.number, 0);
+    }
+    this.pos = this._numberPattern.lastIndex;
+    return new Token(TOKEN.number, number);
+  }
+
+  _getOperator() {
+    this._identifierPattern.lastIndex = this.pos;
+    const match = this._identifierPattern.exec(this.data);
+    if (!match) {
+      return new Token(TOKEN.number, 0);
+    }
+    this.pos = this._identifierPattern.lastIndex;
+    const op = match[0];
+    const token = Lexer.#operatorSingletons[op];
+    if (!token) {
+      return new Token(TOKEN.number, 0);
+    }
+    return token;
+  }
+
+  // Return the next token, or Lexer.#singletons.eof at end of input.
+  next() {
+    while (this.pos < this.len) {
+      const ch = this.data.charCodeAt(this.pos++);
+      switch (ch) {
+        // PostScript white-space characters (PDF32000 §7.2.2)
+        case 0x00 /* NUL */:
+        case 0x09 /* HT */:
+        case 0x0a /* LF */:
+        case 0x0c /* FF */:
+        case 0x0d /* CR */:
+        case 0x20 /* SP */:
+          break;
+
+        case 0x25 /* % — comment */:
+          this._skipComment();
+          break;
+
+        case 0x7b /* { */:
+          return Lexer.#singletons.lbrace;
+        case 0x7d /* } */:
+          return Lexer.#singletons.rbrace;
+
+        case 0x2b /* + */:
+        case 0x2d /* - */:
+          this.pos--;
+          return this._getNumber();
+
+        case 0x2e /* . */:
+          this.pos--;
+          return this._getNumber();
+
+        default:
+          if (ch >= 0x30 /* 0 */ && ch <= 0x39 /* 9 */) {
+            this.pos--;
+            return this._getNumber();
+          }
+          if (ch >= 0x61 /* a */ && ch <= 0x7a /* z */) {
+            this.pos--;
+            return this._getOperator();
+          }
+          return new Token(TOKEN.number, 0);
+      }
+    }
+    return Lexer.#singletons.eof;
+  }
+}
+
+export { Lexer, Token, TOKEN };
--- a/src/core/postscript/wasm_compiler.js
+++ b/src/core/postscript/wasm_compiler.js
--- a/test/unit/clitests.json
+++ b/test/unit/clitests.json
@ -44,6 +44,7 @@
    "pdf_spec.js",
    "pdf_viewer.component_spec.js",
    "pdf_viewer_spec.js",
+    "postscript_spec.js",
    "primitives_spec.js",
    "stream_spec.js",
    "struct_tree_spec.js",
--- a/test/unit/jasmine-boot.js
+++ b/test/unit/jasmine-boot.js
@ -87,6 +87,7 @@ async function initializePDFJS(callback) {
      "pdfjs-test/unit/pdf_spec.js",
      "pdfjs-test/unit/pdf_viewer.component_spec.js",
      "pdfjs-test/unit/pdf_viewer_spec.js",
+      "pdfjs-test/unit/postscript_spec.js",
      "pdfjs-test/unit/primitives_spec.js",
      "pdfjs-test/unit/scripting_spec.js",
      "pdfjs-test/unit/stream_spec.js",
--- a/test/unit/postscript_spec.js
+++ b/test/unit/postscript_spec.js