RED-10365: InvisibleElementRemovalService crashes for specific file

This commit is contained in:
Kilian Schuettler 2024-11-05 12:18:29 +01:00
parent ff9fd7bd44
commit e86e6fba2a
4 changed files with 20 additions and 4 deletions

View File

@ -27,7 +27,7 @@ repositories {
dependencies {
api("org.projectlombok:lombok:1.18.30")
api("com.google.guava:guava:33.0.0-jre")
api("com.pdftron:PDFNet:10.11.0")
api("com.pdftron:PDFNet:11.0.0")
testImplementation("net.sourceforge.lept4j:lept4j:1.19.1")
testImplementation("org.junit.jupiter:junit-jupiter:5.10.2")
testImplementation("org.assertj:assertj-core:3.24.2")

View File

@ -290,7 +290,8 @@ public class InvisibleElementRemovalService {
Therefore, the position of a following Tj is affected by not writing the first Element.
This is why, we write only the Tm command:
*/
writer.writeGStateChanges(textElement);
textElement.setTextData(new byte[]{});
writer.writeElement(textElement);
}
} else {
if (!inClippingPath) {
@ -431,7 +432,7 @@ public class InvisibleElementRemovalService {
context.reader().end();
if (!context.overlappedElements().isEmpty()) {
log.warn(context.overlappedElements().size() + " overlapped elements have not been found or removed");
log.debug(context.overlappedElements().size() + " overlapped elements have not been found or removed");
}
}
@ -490,7 +491,8 @@ public class InvisibleElementRemovalService {
Therefore, the position of a following Tj is affected by not writing the first Element.
This is why, we write only the Tm command:
*/
writer.writeGStateChanges(element);
element.setTextData(new byte[]{});
writer.writeElement(element);
}
} else {
writer.writeElement(element);

View File

@ -3,6 +3,7 @@ package com.iqser.red.pdftronlogic.commons;
import static com.iqser.red.pdftronlogic.commons.PdfTextExtraction.extractAllTextFromDocument;
import static org.assertj.core.api.AssertionsForClassTypes.assertThat;
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.io.FileOutputStream;
@ -64,6 +65,19 @@ class InvisibleElementRemovalServiceTest {
}
@Test
@SneakyThrows
void page32DoesNotCrash() {
String fileName = "files/Page32.pdf";
try (var in = this.getClass().getClassLoader().getResourceAsStream(fileName); var out = new ByteArrayOutputStream()) {
invisibleElementRemovalService.removeInvisibleElements(in, out, false);
}
}
@Test
@SneakyThrows
void removeInvisibleTextClippedByFormObjects() {

Binary file not shown.