diff --git a/build.gradle.kts b/build.gradle.kts index d4db1f9..43e3ca5 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -27,7 +27,7 @@ repositories { dependencies { api("org.projectlombok:lombok:1.18.30") api("com.google.guava:guava:33.0.0-jre") - api("com.pdftron:PDFNet:10.11.0") + api("com.pdftron:PDFNet:11.0.0") testImplementation("net.sourceforge.lept4j:lept4j:1.19.1") testImplementation("org.junit.jupiter:junit-jupiter:5.10.2") testImplementation("org.assertj:assertj-core:3.24.2") diff --git a/src/main/java/com/iqser/red/pdftronlogic/commons/InvisibleElementRemovalService.java b/src/main/java/com/iqser/red/pdftronlogic/commons/InvisibleElementRemovalService.java index 1ba1dcc..aeb2c71 100644 --- a/src/main/java/com/iqser/red/pdftronlogic/commons/InvisibleElementRemovalService.java +++ b/src/main/java/com/iqser/red/pdftronlogic/commons/InvisibleElementRemovalService.java @@ -290,7 +290,8 @@ public class InvisibleElementRemovalService { Therefore, the position of a following Tj is affected by not writing the first Element. This is why, we write only the Tm command: */ - writer.writeGStateChanges(textElement); + textElement.setTextData(new byte[]{}); + writer.writeElement(textElement); } } else { if (!inClippingPath) { @@ -431,7 +432,7 @@ public class InvisibleElementRemovalService { context.reader().end(); if (!context.overlappedElements().isEmpty()) { - log.warn(context.overlappedElements().size() + " overlapped elements have not been found or removed"); + log.debug(context.overlappedElements().size() + " overlapped elements have not been found or removed"); } } @@ -490,7 +491,8 @@ public class InvisibleElementRemovalService { Therefore, the position of a following Tj is affected by not writing the first Element. This is why, we write only the Tm command: */ - writer.writeGStateChanges(element); + element.setTextData(new byte[]{}); + writer.writeElement(element); } } else { writer.writeElement(element); diff --git a/src/test/java/com/iqser/red/pdftronlogic/commons/InvisibleElementRemovalServiceTest.java b/src/test/java/com/iqser/red/pdftronlogic/commons/InvisibleElementRemovalServiceTest.java index 20f86b6..3d86e27 100644 --- a/src/test/java/com/iqser/red/pdftronlogic/commons/InvisibleElementRemovalServiceTest.java +++ b/src/test/java/com/iqser/red/pdftronlogic/commons/InvisibleElementRemovalServiceTest.java @@ -3,6 +3,7 @@ package com.iqser.red.pdftronlogic.commons; import static com.iqser.red.pdftronlogic.commons.PdfTextExtraction.extractAllTextFromDocument; import static org.assertj.core.api.AssertionsForClassTypes.assertThat; +import java.io.ByteArrayOutputStream; import java.io.FileInputStream; import java.io.FileOutputStream; @@ -64,6 +65,19 @@ class InvisibleElementRemovalServiceTest { } + @Test + @SneakyThrows + void page32DoesNotCrash() { + + String fileName = "files/Page32.pdf"; + + try (var in = this.getClass().getClassLoader().getResourceAsStream(fileName); var out = new ByteArrayOutputStream()) { + invisibleElementRemovalService.removeInvisibleElements(in, out, false); + } + + } + + @Test @SneakyThrows void removeInvisibleTextClippedByFormObjects() { diff --git a/src/test/resources/files/Page32.pdf b/src/test/resources/files/Page32.pdf new file mode 100644 index 0000000..8495893 Binary files /dev/null and b/src/test/resources/files/Page32.pdf differ