Merge pull request #20624 from calixteman/bug2013793

Flush the text content chunk only on real font changes (bug 2013793)
This commit is contained in:
Tim van der Meij 2026-02-05 21:14:49 +01:00 committed by GitHub
commit a0f3528053
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 46 additions and 14 deletions

View File

@ -2538,7 +2538,7 @@ class PartialEvaluator {
const preprocessor = new EvaluatorPreprocessor(stream, xref, stateManager);
let textState;
let textState, currentTextState;
function pushWhitespace({
width = 0,
@ -2800,7 +2800,9 @@ class PartialEvaluator {
// When the total height of the current chunk is negative
// then we're writing from bottom to top.
const textOrientation = Math.sign(textContentItem.height);
const textOrientation = Math.sign(
textContentItem.height || textContentItem.totalHeight
);
if (advanceY < textOrientation * textContentItem.negativeSpaceMax) {
if (
Math.abs(advanceX) >
@ -2864,7 +2866,9 @@ class PartialEvaluator {
// When the total width of the current chunk is negative
// then we're writing from right to left.
const textOrientation = Math.sign(textContentItem.width);
const textOrientation = Math.sign(
textContentItem.width || textContentItem.totalWidth
);
if (advanceX < textOrientation * textContentItem.negativeSpaceMax) {
if (
Math.abs(advanceY) >
@ -2922,6 +2926,15 @@ class PartialEvaluator {
}
function buildTextContentItem({ chars, extraSpacing }) {
if (
currentTextState !== textState &&
(currentTextState.fontName !== textState.fontName ||
currentTextState.fontSize !== textState.fontSize)
) {
flushTextContentItem();
currentTextState = textState.clone();
}
const font = textState.font;
if (!chars) {
// Just move according to the space we have.
@ -3177,8 +3190,8 @@ class PartialEvaluator {
break;
}
const previousState = textState;
textState = stateManager.state;
currentTextState ||= textState.clone();
const fn = operation.fn;
args = operation.args;
@ -3195,7 +3208,6 @@ class PartialEvaluator {
break;
}
flushTextContentItem();
textState.fontName = fontNameArg;
textState.fontSize = fontSizeArg;
next(handleSetFont(fontNameArg, null));
@ -3552,14 +3564,10 @@ class PartialEvaluator {
}
break;
case OPS.restore:
if (
previousState &&
(previousState.font !== textState.font ||
previousState.fontSize !== textState.fontSize ||
previousState.fontName !== textState.fontName)
) {
flushTextContentItem();
}
stateManager.restore();
break;
case OPS.save:
stateManager.save();
break;
} // switch
if (textContent.items.length >= (sink?.desiredSize ?? 1)) {
@ -5083,7 +5091,7 @@ class TextState {
}
clone() {
const clone = Object.create(this);
const clone = Object.assign(Object.create(this), this);
clone.textMatrix = this.textMatrix.slice();
clone.textLineMatrix = this.textLineMatrix.slice();
clone.fontMatrix = this.fontMatrix.slice();

View File

@ -871,3 +871,4 @@
!page_with_number.pdf
!page_with_number_and_link.pdf
!Brotli-Prototype-FileA.pdf
!bug2013793.pdf

BIN
test/pdfs/bug2013793.pdf Normal file

Binary file not shown.

View File

@ -4069,6 +4069,29 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
await loadingTask.destroy();
});
it("gets text content with some fake font changes (bug 2013793)", async function () {
const loadingTask = getDocument(buildGetDocumentParams("bug2013793.pdf"));
const pdfDoc = await loadingTask.promise;
const pdfPage = await pdfDoc.getPage(1);
const { items } = await pdfPage.getTextContent({
disableNormalization: true,
});
const text = mergeText(items);
expect(text)
.toEqual(`This is a great deal of nothing. The purpose is to help in identifying a bug when the PDF
is read by Firefox. I want to know whether any of the two words in this paragraph run
together. If they do, I will file a bug report. The problem seems to occur somewhere
between the 240th and 260th character in the paragraph. I should have written that much
by now. So, heres to squashing bugs.
This is a great deal of nothing. The purpose is to help in identifying a bug when the
PDF is read by Firefox. I want to know whether any of the two words in this
paragraph run together. If they do, I will file a bug report. The problem seems to
occur somewhere between the 240th and 260th character in the paragraph. I should
have written that much by now. So, heres to squashing bugs.`);
await loadingTask.destroy();
});
it("gets empty structure tree", async function () {
const tree = await page.getStructTree();