Don't add an EOL after a superscript

This commit is contained in:
calixteman 2026-03-22 14:18:26 +01:00
parent 869f25a489
commit ec24053ddf
No known key found for this signature in database
GPG Key ID: 0C5442631EE0691F
4 changed files with 83 additions and 1 deletions

View File

@ -2458,6 +2458,7 @@ class PartialEvaluator {
height: 0,
vertical: false,
prevTransform: null,
prevTextRise: 0,
textAdvanceScale: 0,
spaceInFlowMin: 0,
spaceInFlowMax: 0,
@ -2906,7 +2907,19 @@ class PartialEvaluator {
return true;
}
if (Math.abs(advanceY) > textContentItem.height) {
// Compensate for a textRise change (e.g. superscript/subscript dropping
// back to baseline): textRise is baked into posY/lastPosY via tsm[5] in
// getCurrentTextTransform(), scaled by the Y component of the CTM×TM
// product, which equals currentTransform[3] / textState.fontSize.
// Without this correction a superscript whose textRise exceeds the line
// height triggers a spurious EOL when the rise returns to 0.
const textRiseDelta = textState.textRise - textContentItem.prevTextRise;
const advanceYCorrected =
textRiseDelta === 0
? advanceY
: advanceY -
(currentTransform[3] / textState.fontSize) * textRiseDelta;
if (Math.abs(advanceYCorrected) > textContentItem.height) {
appendEOL();
return true;
}
@ -3068,6 +3081,7 @@ class PartialEvaluator {
if (scaledDim) {
// Save the position of the last visible character.
textChunk.prevTransform = getCurrentTextTransform();
textChunk.prevTextRise = textState.textRise;
}
const glyphUnicode = glyph.unicode;

View File

@ -890,3 +890,4 @@
!acroform_calculation_order.pdf
!extractPages_null_in_array.pdf
!issue20930.pdf
!text_rise_eol_bug.pdf

View File

@ -0,0 +1,46 @@
%PDF-1.7
%âãÏÓ
1 0 obj
<< /Type /Catalog /Pages 2 0 R >>
endobj
2 0 obj
<< /Type /Pages /Kids [3 0 R] /Count 1 >>
endobj
3 0 obj
<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792]
/Contents 4 0 R
/Resources << /Font << /F1 5 0 R >> >>
>>
endobj
4 0 obj
<< /Length 113 >>
stream
BT
/F1 12 Tf
100 700 Td
(E = mc) Tj
12 Ts
/F1 8 Tf
(2) Tj
0 Ts
/F1 12 Tf
( is the mass-energy equivalence.) Tj
ET
endstream
endobj
5 0 obj
<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
endobj
xref
0 6
0000000000 65535 f
0000000015 00000 n
0000000064 00000 n
0000000121 00000 n
0000000253 00000 n
0000000417 00000 n
trailer
<< /Size 6 /Root 1 0 R >>
startxref
487
%%EOF

View File

@ -3985,6 +3985,27 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
await loadingTask.destroy();
});
it("gets text content without spurious EOL after a superscript (text_rise_eol_bug.pdf)", async function () {
const loadingTask = getDocument(
buildGetDocumentParams("text_rise_eol_bug.pdf")
);
const pdfDoc = await loadingTask.promise;
const pdfPage = await pdfDoc.getPage(1);
const { items } = await pdfPage.getTextContent({
disableNormalization: true,
});
// No item should carry a hasEOL flag between the superscript and the
// text that follows it.
expect(items.every(i => !i.hasEOL)).toEqual(true);
// Full sentence must be reconstructable without a newline.
const text = mergeText(items);
expect(text).toEqual("E = mc2 is the mass-energy equivalence.");
await loadingTask.destroy();
});
it("gets text content with a specific view box", async function () {
const loadingTask = getDocument(buildGetDocumentParams("issue16316.pdf"));
const pdfDoc = await loadingTask.promise;