mirror of
https://github.com/mozilla/pdf.js.git
synced 2026-06-22 16:05:56 +02:00
Don't add an EOL after a superscript
This commit is contained in:
parent
869f25a489
commit
ec24053ddf
@ -2458,6 +2458,7 @@ class PartialEvaluator {
|
|||||||
height: 0,
|
height: 0,
|
||||||
vertical: false,
|
vertical: false,
|
||||||
prevTransform: null,
|
prevTransform: null,
|
||||||
|
prevTextRise: 0,
|
||||||
textAdvanceScale: 0,
|
textAdvanceScale: 0,
|
||||||
spaceInFlowMin: 0,
|
spaceInFlowMin: 0,
|
||||||
spaceInFlowMax: 0,
|
spaceInFlowMax: 0,
|
||||||
@ -2906,7 +2907,19 @@ class PartialEvaluator {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Math.abs(advanceY) > textContentItem.height) {
|
// Compensate for a textRise change (e.g. superscript/subscript dropping
|
||||||
|
// back to baseline): textRise is baked into posY/lastPosY via tsm[5] in
|
||||||
|
// getCurrentTextTransform(), scaled by the Y component of the CTM×TM
|
||||||
|
// product, which equals currentTransform[3] / textState.fontSize.
|
||||||
|
// Without this correction a superscript whose textRise exceeds the line
|
||||||
|
// height triggers a spurious EOL when the rise returns to 0.
|
||||||
|
const textRiseDelta = textState.textRise - textContentItem.prevTextRise;
|
||||||
|
const advanceYCorrected =
|
||||||
|
textRiseDelta === 0
|
||||||
|
? advanceY
|
||||||
|
: advanceY -
|
||||||
|
(currentTransform[3] / textState.fontSize) * textRiseDelta;
|
||||||
|
if (Math.abs(advanceYCorrected) > textContentItem.height) {
|
||||||
appendEOL();
|
appendEOL();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -3068,6 +3081,7 @@ class PartialEvaluator {
|
|||||||
if (scaledDim) {
|
if (scaledDim) {
|
||||||
// Save the position of the last visible character.
|
// Save the position of the last visible character.
|
||||||
textChunk.prevTransform = getCurrentTextTransform();
|
textChunk.prevTransform = getCurrentTextTransform();
|
||||||
|
textChunk.prevTextRise = textState.textRise;
|
||||||
}
|
}
|
||||||
|
|
||||||
const glyphUnicode = glyph.unicode;
|
const glyphUnicode = glyph.unicode;
|
||||||
|
|||||||
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
@ -890,3 +890,4 @@
|
|||||||
!acroform_calculation_order.pdf
|
!acroform_calculation_order.pdf
|
||||||
!extractPages_null_in_array.pdf
|
!extractPages_null_in_array.pdf
|
||||||
!issue20930.pdf
|
!issue20930.pdf
|
||||||
|
!text_rise_eol_bug.pdf
|
||||||
|
|||||||
46
test/pdfs/text_rise_eol_bug.pdf
Normal file
46
test/pdfs/text_rise_eol_bug.pdf
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
%PDF-1.7
|
||||||
|
%âãÏÓ
|
||||||
|
1 0 obj
|
||||||
|
<< /Type /Catalog /Pages 2 0 R >>
|
||||||
|
endobj
|
||||||
|
2 0 obj
|
||||||
|
<< /Type /Pages /Kids [3 0 R] /Count 1 >>
|
||||||
|
endobj
|
||||||
|
3 0 obj
|
||||||
|
<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792]
|
||||||
|
/Contents 4 0 R
|
||||||
|
/Resources << /Font << /F1 5 0 R >> >>
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
4 0 obj
|
||||||
|
<< /Length 113 >>
|
||||||
|
stream
|
||||||
|
BT
|
||||||
|
/F1 12 Tf
|
||||||
|
100 700 Td
|
||||||
|
(E = mc) Tj
|
||||||
|
12 Ts
|
||||||
|
/F1 8 Tf
|
||||||
|
(2) Tj
|
||||||
|
0 Ts
|
||||||
|
/F1 12 Tf
|
||||||
|
( is the mass-energy equivalence.) Tj
|
||||||
|
ET
|
||||||
|
endstream
|
||||||
|
endobj
|
||||||
|
5 0 obj
|
||||||
|
<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
|
||||||
|
endobj
|
||||||
|
xref
|
||||||
|
0 6
|
||||||
|
0000000000 65535 f
|
||||||
|
0000000015 00000 n
|
||||||
|
0000000064 00000 n
|
||||||
|
0000000121 00000 n
|
||||||
|
0000000253 00000 n
|
||||||
|
0000000417 00000 n
|
||||||
|
trailer
|
||||||
|
<< /Size 6 /Root 1 0 R >>
|
||||||
|
startxref
|
||||||
|
487
|
||||||
|
%%EOF
|
||||||
@ -3985,6 +3985,27 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
|
|||||||
await loadingTask.destroy();
|
await loadingTask.destroy();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("gets text content without spurious EOL after a superscript (text_rise_eol_bug.pdf)", async function () {
|
||||||
|
const loadingTask = getDocument(
|
||||||
|
buildGetDocumentParams("text_rise_eol_bug.pdf")
|
||||||
|
);
|
||||||
|
const pdfDoc = await loadingTask.promise;
|
||||||
|
const pdfPage = await pdfDoc.getPage(1);
|
||||||
|
const { items } = await pdfPage.getTextContent({
|
||||||
|
disableNormalization: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
// No item should carry a hasEOL flag between the superscript and the
|
||||||
|
// text that follows it.
|
||||||
|
expect(items.every(i => !i.hasEOL)).toEqual(true);
|
||||||
|
|
||||||
|
// Full sentence must be reconstructable without a newline.
|
||||||
|
const text = mergeText(items);
|
||||||
|
expect(text).toEqual("E = mc2 is the mass-energy equivalence.");
|
||||||
|
|
||||||
|
await loadingTask.destroy();
|
||||||
|
});
|
||||||
|
|
||||||
it("gets text content with a specific view box", async function () {
|
it("gets text content with a specific view box", async function () {
|
||||||
const loadingTask = getDocument(buildGetDocumentParams("issue16316.pdf"));
|
const loadingTask = getDocument(buildGetDocumentParams("issue16316.pdf"));
|
||||||
const pdfDoc = await loadingTask.promise;
|
const pdfDoc = await loadingTask.promise;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user