mirror of
https://github.com/mozilla/pdf.js.git
synced 2026-06-25 01:25:51 +02:00
Remove scientific notation parsing.
This behaviour comes from the initial pdf.js commit but is wrong and doesn't match other PDF readers like muPDF or pdfium. From PDF Spec 7.3.3: A PDF writer shall not use the PostScript language syntax for numbers with non-decimal radices (such as 16#FFFE) or in exponential format (such as 6.02E23).
This commit is contained in:
parent
3c434140ea
commit
8fa6ef36e4
@ -908,7 +908,6 @@ class Lexer {
|
|||||||
|
|
||||||
getNumber() {
|
getNumber() {
|
||||||
let ch = this.currentChar;
|
let ch = this.currentChar;
|
||||||
let eNotation = false;
|
|
||||||
let divideBy = 0; // Different from 0 if it's a floating point value.
|
let divideBy = 0; // Different from 0 if it's a floating point value.
|
||||||
let sign = 1;
|
let sign = 1;
|
||||||
|
|
||||||
@ -951,22 +950,15 @@ class Lexer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let baseValue = ch - 0x30; // '0'
|
let baseValue = ch - 0x30; // '0'
|
||||||
let powerValue = 0;
|
|
||||||
let powerValueSign = 1;
|
|
||||||
|
|
||||||
while ((ch = this.nextChar()) >= 0) {
|
while ((ch = this.nextChar()) >= 0) {
|
||||||
if (ch >= /* '0' = */ 0x30 && ch <= /* '9' = */ 0x39) {
|
if (ch >= /* '0' = */ 0x30 && ch <= /* '9' = */ 0x39) {
|
||||||
const currentDigit = ch - 0x30; // '0'
|
const currentDigit = ch - 0x30; // '0'
|
||||||
if (eNotation) {
|
if (divideBy !== 0) {
|
||||||
// We are after an 'e' or 'E'.
|
// We are after a point.
|
||||||
powerValue = powerValue * 10 + currentDigit;
|
divideBy *= 10;
|
||||||
} else {
|
|
||||||
if (divideBy !== 0) {
|
|
||||||
// We are after a point.
|
|
||||||
divideBy *= 10;
|
|
||||||
}
|
|
||||||
baseValue = baseValue * 10 + currentDigit;
|
|
||||||
}
|
}
|
||||||
|
baseValue = baseValue * 10 + currentDigit;
|
||||||
} else if (ch === /* '.' = */ 0x2e) {
|
} else if (ch === /* '.' = */ 0x2e) {
|
||||||
if (divideBy === 0) {
|
if (divideBy === 0) {
|
||||||
divideBy = 1;
|
divideBy = 1;
|
||||||
@ -978,18 +970,6 @@ class Lexer {
|
|||||||
// Ignore minus signs in the middle of numbers to match
|
// Ignore minus signs in the middle of numbers to match
|
||||||
// Adobe's behavior.
|
// Adobe's behavior.
|
||||||
warn("Badly formatted number: minus sign in the middle");
|
warn("Badly formatted number: minus sign in the middle");
|
||||||
} else if (ch === /* 'E' = */ 0x45 || ch === /* 'e' = */ 0x65) {
|
|
||||||
// 'E' can be either a scientific notation or the beginning of a new
|
|
||||||
// operator.
|
|
||||||
ch = this.peekChar();
|
|
||||||
if (ch === /* '+' = */ 0x2b || ch === /* '-' = */ 0x2d) {
|
|
||||||
powerValueSign = ch === 0x2d ? -1 : 1;
|
|
||||||
this.nextChar(); // Consume the sign character.
|
|
||||||
} else if (ch < /* '0' = */ 0x30 || ch > /* '9' = */ 0x39) {
|
|
||||||
// The 'E' must be the beginning of a new operator.
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
eNotation = true;
|
|
||||||
} else {
|
} else {
|
||||||
// The last character doesn't belong to us.
|
// The last character doesn't belong to us.
|
||||||
break;
|
break;
|
||||||
@ -999,9 +979,6 @@ class Lexer {
|
|||||||
if (divideBy !== 0) {
|
if (divideBy !== 0) {
|
||||||
baseValue /= divideBy;
|
baseValue /= divideBy;
|
||||||
}
|
}
|
||||||
if (eNotation) {
|
|
||||||
baseValue *= 10 ** (powerValueSign * powerValue);
|
|
||||||
}
|
|
||||||
return sign * baseValue;
|
return sign * baseValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
@ -874,3 +874,4 @@
|
|||||||
!bug2013793.pdf
|
!bug2013793.pdf
|
||||||
!bug2014080.pdf
|
!bug2014080.pdf
|
||||||
!two_pages.pdf
|
!two_pages.pdf
|
||||||
|
!sci-notation.pdf
|
||||||
|
|||||||
33
test/pdfs/sci-notation.pdf
Normal file
33
test/pdfs/sci-notation.pdf
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
%PDF-1.0
|
||||||
|
1 0 obj
|
||||||
|
<< /Type /Catalog /Pages 2 0 R >>
|
||||||
|
endobj
|
||||||
|
2 0 obj
|
||||||
|
<< /Type /Pages /Kids [3 0 R] /Count 1 >>
|
||||||
|
endobj
|
||||||
|
3 0 obj
|
||||||
|
<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R /Resources << /Font << /F1 5 0 R >> >> >>
|
||||||
|
endobj
|
||||||
|
4 0 obj
|
||||||
|
<< /Length 106 >>
|
||||||
|
stream
|
||||||
|
BT /F1 24 Tf 50 500 Td (This line uses normal syntax) Tj ETBT /F1 1e2 Tf 50 600 Td (Hello from 1e2) Tj ET
|
||||||
|
|
||||||
|
endstream
|
||||||
|
endobj
|
||||||
|
5 0 obj
|
||||||
|
<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
|
||||||
|
endobj
|
||||||
|
xref
|
||||||
|
0 6
|
||||||
|
0000000000 65535 f
|
||||||
|
0000000009 00000 n
|
||||||
|
0000000058 00000 n
|
||||||
|
0000000115 00000 n
|
||||||
|
0000000241 00000 n
|
||||||
|
0000000398 00000 n
|
||||||
|
trailer
|
||||||
|
<< /Size 6 /Root 1 0 R >>
|
||||||
|
startxref
|
||||||
|
468
|
||||||
|
%%EOF
|
||||||
@ -13964,5 +13964,12 @@
|
|||||||
"firstPage": 171,
|
"firstPage": 171,
|
||||||
"lastPage": 171,
|
"lastPage": 171,
|
||||||
"type": "eq"
|
"type": "eq"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "sci-notation",
|
||||||
|
"file": "pdfs/sci-notation.pdf",
|
||||||
|
"md5": "ead167e0328f1a1f4f8901cee501a9c4",
|
||||||
|
"rounds": 1,
|
||||||
|
"type": "eq"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@ -94,13 +94,11 @@ describe("parser", function () {
|
|||||||
expect(lexer.getNumber()).toEqual(11.234);
|
expect(lexer.getNumber()).toEqual(11.234);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("should parse PostScript numbers", function () {
|
it("should parse PDF numbers", function () {
|
||||||
const numbers = [
|
const numbers = [
|
||||||
"-.002",
|
"-.002",
|
||||||
"34.5",
|
"34.5",
|
||||||
"-3.62",
|
"-3.62",
|
||||||
"123.6e10",
|
|
||||||
"1E-5",
|
|
||||||
"-1.",
|
"-1.",
|
||||||
"0.0",
|
"0.0",
|
||||||
"123",
|
"123",
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user