mirror of
https://github.com/mozilla/pdf.js.git
synced 2026-06-25 01:25:51 +02:00
Merge pull request #21002 from calixteman/ps_eval
[api-minor] Rewrite the ps lexer & parser and add a small Wasm compiler
This commit is contained in:
commit
a40b91f0bb
@ -21,18 +21,30 @@ import {
|
|||||||
MathClamp,
|
MathClamp,
|
||||||
shadow,
|
shadow,
|
||||||
unreachable,
|
unreachable,
|
||||||
|
warn,
|
||||||
} from "../shared/util.js";
|
} from "../shared/util.js";
|
||||||
import { PostScriptLexer, PostScriptParser } from "./ps_parser.js";
|
import { PostScriptLexer, PostScriptParser } from "./ps_parser.js";
|
||||||
import { BaseStream } from "./base_stream.js";
|
import { BaseStream } from "./base_stream.js";
|
||||||
|
import { buildPostScriptWasmFunction } from "./postscript/wasm_compiler.js";
|
||||||
import { isNumberArray } from "./core_utils.js";
|
import { isNumberArray } from "./core_utils.js";
|
||||||
import { LocalFunctionCache } from "./image_utils.js";
|
import { LocalFunctionCache } from "./image_utils.js";
|
||||||
|
|
||||||
class PDFFunctionFactory {
|
class PDFFunctionFactory {
|
||||||
|
static #useWasm = true;
|
||||||
|
|
||||||
|
static setOptions({ useWasm }) {
|
||||||
|
PDFFunctionFactory.#useWasm = useWasm;
|
||||||
|
}
|
||||||
|
|
||||||
constructor({ xref, isEvalSupported = true }) {
|
constructor({ xref, isEvalSupported = true }) {
|
||||||
this.xref = xref;
|
this.xref = xref;
|
||||||
this.isEvalSupported = isEvalSupported !== false;
|
this.isEvalSupported = isEvalSupported !== false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
get useWasm() {
|
||||||
|
return PDFFunctionFactory.#useWasm;
|
||||||
|
}
|
||||||
|
|
||||||
create(fn, parseArray = false) {
|
create(fn, parseArray = false) {
|
||||||
let fnRef, parsedFn;
|
let fnRef, parsedFn;
|
||||||
|
|
||||||
@ -358,6 +370,24 @@ class PDFFunction {
|
|||||||
throw new FormatError("No range.");
|
throw new FormatError("No range.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (factory.useWasm) {
|
||||||
|
try {
|
||||||
|
const wasmFn = buildPostScriptWasmFunction(
|
||||||
|
fn.getString(),
|
||||||
|
domain,
|
||||||
|
range
|
||||||
|
);
|
||||||
|
if (wasmFn) {
|
||||||
|
return wasmFn; // (src, srcOffset, dest, destOffset) → void
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Fall through to the existing interpreter-based path.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
warn("Unable to compile PS function, using interpreter");
|
||||||
|
fn.reset();
|
||||||
|
|
||||||
const lexer = new PostScriptLexer(fn);
|
const lexer = new PostScriptLexer(fn);
|
||||||
const parser = new PostScriptParser(lexer);
|
const parser = new PostScriptParser(lexer);
|
||||||
const code = parser.parse();
|
const code = parser.parse();
|
||||||
|
|||||||
@ -28,6 +28,7 @@ import { JpxImage } from "./jpx.js";
|
|||||||
import { MissingDataException } from "./core_utils.js";
|
import { MissingDataException } from "./core_utils.js";
|
||||||
import { OperatorList } from "./operator_list.js";
|
import { OperatorList } from "./operator_list.js";
|
||||||
import { PDFDocument } from "./document.js";
|
import { PDFDocument } from "./document.js";
|
||||||
|
import { PDFFunctionFactory } from "./function.js";
|
||||||
import { Stream } from "./stream.js";
|
import { Stream } from "./stream.js";
|
||||||
|
|
||||||
function parseDocBaseUrl(url) {
|
function parseDocBaseUrl(url) {
|
||||||
@ -97,6 +98,7 @@ class BasePdfManager {
|
|||||||
IccColorSpace.setOptions(options);
|
IccColorSpace.setOptions(options);
|
||||||
CmykICCBasedCS.setOptions(options);
|
CmykICCBasedCS.setOptions(options);
|
||||||
JBig2CCITTFaxWasmImage.setOptions(options);
|
JBig2CCITTFaxWasmImage.setOptions(options);
|
||||||
|
PDFFunctionFactory.setOptions(options);
|
||||||
}
|
}
|
||||||
|
|
||||||
get docId() {
|
get docId() {
|
||||||
|
|||||||
1307
src/core/postscript/ast.js
Normal file
1307
src/core/postscript/ast.js
Normal file
File diff suppressed because it is too large
Load Diff
225
src/core/postscript/lexer.js
Normal file
225
src/core/postscript/lexer.js
Normal file
@ -0,0 +1,225 @@
|
|||||||
|
/* Copyright 2026 Mozilla Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
const TOKEN = {
|
||||||
|
// Structural tokens — not keyword operators
|
||||||
|
number: 0,
|
||||||
|
lbrace: 1,
|
||||||
|
rbrace: 2,
|
||||||
|
|
||||||
|
// Boolean literals
|
||||||
|
true: 3,
|
||||||
|
false: 4,
|
||||||
|
|
||||||
|
// Arithmetic binary operators
|
||||||
|
add: 5,
|
||||||
|
sub: 6,
|
||||||
|
mul: 7,
|
||||||
|
div: 8,
|
||||||
|
idiv: 9,
|
||||||
|
mod: 10,
|
||||||
|
exp: 11,
|
||||||
|
|
||||||
|
// Comparison binary operators
|
||||||
|
eq: 12,
|
||||||
|
ne: 13,
|
||||||
|
gt: 14,
|
||||||
|
ge: 15,
|
||||||
|
lt: 16,
|
||||||
|
le: 17,
|
||||||
|
|
||||||
|
// Bitwise / boolean binary operators
|
||||||
|
and: 18,
|
||||||
|
or: 19,
|
||||||
|
xor: 20,
|
||||||
|
bitshift: 21,
|
||||||
|
|
||||||
|
// Unary arithmetic operators
|
||||||
|
abs: 22,
|
||||||
|
neg: 23,
|
||||||
|
ceiling: 24,
|
||||||
|
floor: 25,
|
||||||
|
round: 26,
|
||||||
|
truncate: 27,
|
||||||
|
|
||||||
|
// Unary boolean / bitwise operator
|
||||||
|
not: 28,
|
||||||
|
|
||||||
|
// Mathematical functions — unary
|
||||||
|
sqrt: 29,
|
||||||
|
sin: 30,
|
||||||
|
cos: 31,
|
||||||
|
ln: 32,
|
||||||
|
log: 33,
|
||||||
|
|
||||||
|
// Mathematical function — binary
|
||||||
|
atan: 34,
|
||||||
|
|
||||||
|
// Type conversion operators
|
||||||
|
cvi: 35,
|
||||||
|
cvr: 36,
|
||||||
|
|
||||||
|
// Stack operators
|
||||||
|
dup: 37,
|
||||||
|
exch: 38,
|
||||||
|
pop: 39,
|
||||||
|
copy: 40,
|
||||||
|
index: 41,
|
||||||
|
roll: 42,
|
||||||
|
|
||||||
|
// Control flow
|
||||||
|
if: 43,
|
||||||
|
ifelse: 44,
|
||||||
|
|
||||||
|
// End of input
|
||||||
|
eof: 45,
|
||||||
|
|
||||||
|
// Synthetic: produced by the optimizer, never emitted by the lexer.
|
||||||
|
min: 46,
|
||||||
|
max: 47,
|
||||||
|
};
|
||||||
|
|
||||||
|
class Token {
|
||||||
|
constructor(id, value = null) {
|
||||||
|
this.id = id;
|
||||||
|
this.value = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class Lexer {
|
||||||
|
// Singletons for every non-number token, built lazily on first construction.
|
||||||
|
// Keyword operator tokens carry their name as `value`; structural tokens
|
||||||
|
// (lbrace, rbrace, eof) carry null.
|
||||||
|
static #singletons = null;
|
||||||
|
|
||||||
|
static #operatorSingletons = null;
|
||||||
|
|
||||||
|
static #initSingletons() {
|
||||||
|
const singletons = Object.create(null);
|
||||||
|
const operatorSingletons = Object.create(null);
|
||||||
|
for (const [name, id] of Object.entries(TOKEN)) {
|
||||||
|
if (name === "number") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const isOperator = id >= TOKEN.true && id <= TOKEN.ifelse;
|
||||||
|
const token = new Token(id, isOperator ? name : null);
|
||||||
|
singletons[name] = token;
|
||||||
|
if (isOperator) {
|
||||||
|
operatorSingletons[name] = token;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Lexer.#singletons = singletons;
|
||||||
|
Lexer.#operatorSingletons = operatorSingletons;
|
||||||
|
}
|
||||||
|
|
||||||
|
constructor(data) {
|
||||||
|
if (!Lexer.#singletons) {
|
||||||
|
Lexer.#initSingletons();
|
||||||
|
}
|
||||||
|
this.data = data;
|
||||||
|
this.pos = 0;
|
||||||
|
this.len = data.length;
|
||||||
|
// Sticky regexes: set lastIndex before exec() to match at an exact offset.
|
||||||
|
this._numberPattern = /[+-]?(?:\d+\.?\d*|\.\d+)(?:[eE][+-]?\d+)?/y;
|
||||||
|
this._identifierPattern = /[a-z]+/y;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip a % comment, advancing past the next \n or \r (or to EOF).
|
||||||
|
_skipComment() {
|
||||||
|
const lf = this.data.indexOf("\n", this.pos);
|
||||||
|
const cr = this.data.indexOf("\r", this.pos);
|
||||||
|
// Treat a missing EOL as this.len so Math.min picks the one that exists.
|
||||||
|
const eol = Math.min(lf < 0 ? this.len : lf, cr < 0 ? this.len : cr);
|
||||||
|
this.pos = Math.min(eol + 1, this.len);
|
||||||
|
}
|
||||||
|
|
||||||
|
_getNumber() {
|
||||||
|
this._numberPattern.lastIndex = this.pos;
|
||||||
|
const match = this._numberPattern.exec(this.data);
|
||||||
|
if (!match) {
|
||||||
|
return new Token(TOKEN.number, 0);
|
||||||
|
}
|
||||||
|
const number = parseFloat(match[0]);
|
||||||
|
if (!Number.isFinite(number)) {
|
||||||
|
return new Token(TOKEN.number, 0);
|
||||||
|
}
|
||||||
|
this.pos = this._numberPattern.lastIndex;
|
||||||
|
return new Token(TOKEN.number, number);
|
||||||
|
}
|
||||||
|
|
||||||
|
_getOperator() {
|
||||||
|
this._identifierPattern.lastIndex = this.pos;
|
||||||
|
const match = this._identifierPattern.exec(this.data);
|
||||||
|
if (!match) {
|
||||||
|
return new Token(TOKEN.number, 0);
|
||||||
|
}
|
||||||
|
this.pos = this._identifierPattern.lastIndex;
|
||||||
|
const op = match[0];
|
||||||
|
const token = Lexer.#operatorSingletons[op];
|
||||||
|
if (!token) {
|
||||||
|
return new Token(TOKEN.number, 0);
|
||||||
|
}
|
||||||
|
return token;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the next token, or Lexer.#singletons.eof at end of input.
|
||||||
|
next() {
|
||||||
|
while (this.pos < this.len) {
|
||||||
|
const ch = this.data.charCodeAt(this.pos++);
|
||||||
|
switch (ch) {
|
||||||
|
// PostScript white-space characters (PDF32000 §7.2.2)
|
||||||
|
case 0x00 /* NUL */:
|
||||||
|
case 0x09 /* HT */:
|
||||||
|
case 0x0a /* LF */:
|
||||||
|
case 0x0c /* FF */:
|
||||||
|
case 0x0d /* CR */:
|
||||||
|
case 0x20 /* SP */:
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 0x25 /* % — comment */:
|
||||||
|
this._skipComment();
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 0x7b /* { */:
|
||||||
|
return Lexer.#singletons.lbrace;
|
||||||
|
case 0x7d /* } */:
|
||||||
|
return Lexer.#singletons.rbrace;
|
||||||
|
|
||||||
|
case 0x2b /* + */:
|
||||||
|
case 0x2d /* - */:
|
||||||
|
this.pos--;
|
||||||
|
return this._getNumber();
|
||||||
|
|
||||||
|
case 0x2e /* . */:
|
||||||
|
this.pos--;
|
||||||
|
return this._getNumber();
|
||||||
|
|
||||||
|
default:
|
||||||
|
if (ch >= 0x30 /* 0 */ && ch <= 0x39 /* 9 */) {
|
||||||
|
this.pos--;
|
||||||
|
return this._getNumber();
|
||||||
|
}
|
||||||
|
if (ch >= 0x61 /* a */ && ch <= 0x7a /* z */) {
|
||||||
|
this.pos--;
|
||||||
|
return this._getOperator();
|
||||||
|
}
|
||||||
|
return new Token(TOKEN.number, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Lexer.#singletons.eof;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export { Lexer, Token, TOKEN };
|
||||||
1065
src/core/postscript/wasm_compiler.js
Normal file
1065
src/core/postscript/wasm_compiler.js
Normal file
File diff suppressed because it is too large
Load Diff
@ -44,6 +44,7 @@
|
|||||||
"pdf_spec.js",
|
"pdf_spec.js",
|
||||||
"pdf_viewer.component_spec.js",
|
"pdf_viewer.component_spec.js",
|
||||||
"pdf_viewer_spec.js",
|
"pdf_viewer_spec.js",
|
||||||
|
"postscript_spec.js",
|
||||||
"primitives_spec.js",
|
"primitives_spec.js",
|
||||||
"stream_spec.js",
|
"stream_spec.js",
|
||||||
"struct_tree_spec.js",
|
"struct_tree_spec.js",
|
||||||
|
|||||||
@ -87,6 +87,7 @@ async function initializePDFJS(callback) {
|
|||||||
"pdfjs-test/unit/pdf_spec.js",
|
"pdfjs-test/unit/pdf_spec.js",
|
||||||
"pdfjs-test/unit/pdf_viewer.component_spec.js",
|
"pdfjs-test/unit/pdf_viewer.component_spec.js",
|
||||||
"pdfjs-test/unit/pdf_viewer_spec.js",
|
"pdfjs-test/unit/pdf_viewer_spec.js",
|
||||||
|
"pdfjs-test/unit/postscript_spec.js",
|
||||||
"pdfjs-test/unit/primitives_spec.js",
|
"pdfjs-test/unit/primitives_spec.js",
|
||||||
"pdfjs-test/unit/scripting_spec.js",
|
"pdfjs-test/unit/scripting_spec.js",
|
||||||
"pdfjs-test/unit/stream_spec.js",
|
"pdfjs-test/unit/stream_spec.js",
|
||||||
|
|||||||
1847
test/unit/postscript_spec.js
Normal file
1847
test/unit/postscript_spec.js
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user