Remove scientific notation parsing.

This behaviour comes from the initial pdf.js commit but is wrong and
doesn't match other PDF readers like muPDF or pdfium.

From PDF Spec 7.3.3:

A PDF writer shall not use the PostScript language syntax for numbers with non-decimal radices (such
as 16#FFFE) or in exponential format (such as 6.02E23).
This commit is contained in:
Jeff Muizelaar 2026-02-25 13:34:06 -05:00
parent 3c434140ea
commit 8fa6ef36e4
5 changed files with 46 additions and 30 deletions

View File

@ -908,7 +908,6 @@ class Lexer {
getNumber() {
let ch = this.currentChar;
let eNotation = false;
let divideBy = 0; // Different from 0 if it's a floating point value.
let sign = 1;
@ -951,22 +950,15 @@ class Lexer {
}
let baseValue = ch - 0x30; // '0'
let powerValue = 0;
let powerValueSign = 1;
while ((ch = this.nextChar()) >= 0) {
if (ch >= /* '0' = */ 0x30 && ch <= /* '9' = */ 0x39) {
const currentDigit = ch - 0x30; // '0'
if (eNotation) {
// We are after an 'e' or 'E'.
powerValue = powerValue * 10 + currentDigit;
} else {
if (divideBy !== 0) {
// We are after a point.
divideBy *= 10;
}
baseValue = baseValue * 10 + currentDigit;
if (divideBy !== 0) {
// We are after a point.
divideBy *= 10;
}
baseValue = baseValue * 10 + currentDigit;
} else if (ch === /* '.' = */ 0x2e) {
if (divideBy === 0) {
divideBy = 1;
@ -978,18 +970,6 @@ class Lexer {
// Ignore minus signs in the middle of numbers to match
// Adobe's behavior.
warn("Badly formatted number: minus sign in the middle");
} else if (ch === /* 'E' = */ 0x45 || ch === /* 'e' = */ 0x65) {
// 'E' can be either a scientific notation or the beginning of a new
// operator.
ch = this.peekChar();
if (ch === /* '+' = */ 0x2b || ch === /* '-' = */ 0x2d) {
powerValueSign = ch === 0x2d ? -1 : 1;
this.nextChar(); // Consume the sign character.
} else if (ch < /* '0' = */ 0x30 || ch > /* '9' = */ 0x39) {
// The 'E' must be the beginning of a new operator.
break;
}
eNotation = true;
} else {
// The last character doesn't belong to us.
break;
@ -999,9 +979,6 @@ class Lexer {
if (divideBy !== 0) {
baseValue /= divideBy;
}
if (eNotation) {
baseValue *= 10 ** (powerValueSign * powerValue);
}
return sign * baseValue;
}

View File

@ -874,3 +874,4 @@
!bug2013793.pdf
!bug2014080.pdf
!two_pages.pdf
!sci-notation.pdf

View File

@ -0,0 +1,33 @@
%PDF-1.0
1 0 obj
<< /Type /Catalog /Pages 2 0 R >>
endobj
2 0 obj
<< /Type /Pages /Kids [3 0 R] /Count 1 >>
endobj
3 0 obj
<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R /Resources << /Font << /F1 5 0 R >> >> >>
endobj
4 0 obj
<< /Length 106 >>
stream
BT /F1 24 Tf 50 500 Td (This line uses normal syntax) Tj ETBT /F1 1e2 Tf 50 600 Td (Hello from 1e2) Tj ET
endstream
endobj
5 0 obj
<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
endobj
xref
0 6
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
0000000241 00000 n
0000000398 00000 n
trailer
<< /Size 6 /Root 1 0 R >>
startxref
468
%%EOF

View File

@ -13964,5 +13964,12 @@
"firstPage": 171,
"lastPage": 171,
"type": "eq"
},
{
"id": "sci-notation",
"file": "pdfs/sci-notation.pdf",
"md5": "ead167e0328f1a1f4f8901cee501a9c4",
"rounds": 1,
"type": "eq"
}
]

View File

@ -94,13 +94,11 @@ describe("parser", function () {
expect(lexer.getNumber()).toEqual(11.234);
});
it("should parse PostScript numbers", function () {
it("should parse PDF numbers", function () {
const numbers = [
"-.002",
"34.5",
"-3.62",
"123.6e10",
"1E-5",
"-1.",
"0.0",
"123",