From 01d1b352202249d050b77052ccdcc2da01e27c8d Mon Sep 17 00:00:00 2001 From: Kilian Schuettler Date: Thu, 15 Aug 2024 21:10:38 +0200 Subject: [PATCH] RED-9746: Document hardly editable * revert quadtree lookup, since the lib does not seem to work reliably, also, no significant speed boost * check each individual glyph instead of only a text run and remember past overlaps in glyph * added logic to extract all glyphs exactly * check for optional content or transparency in form objects and marked content --- .../InvisibleElementRemovalService.java | 19 +++++-- .../commons/MarkedContentStack.java | 53 ++++++++++++++++++- .../commons/VisualEqualityTest.java | 2 +- 3 files changed, 67 insertions(+), 7 deletions(-) diff --git a/src/main/java/com/iqser/red/pdftronlogic/commons/InvisibleElementRemovalService.java b/src/main/java/com/iqser/red/pdftronlogic/commons/InvisibleElementRemovalService.java index a9e19e3..c066338 100644 --- a/src/main/java/com/iqser/red/pdftronlogic/commons/InvisibleElementRemovalService.java +++ b/src/main/java/com/iqser/red/pdftronlogic/commons/InvisibleElementRemovalService.java @@ -38,6 +38,7 @@ import com.pdftron.pdf.Page; import com.pdftron.pdf.PageIterator; import com.pdftron.pdf.PathData; import com.pdftron.pdf.Rect; +import com.pdftron.pdf.ocg.Group; import com.pdftron.sdf.Obj; import com.pdftron.sdf.SDFDoc; @@ -172,7 +173,7 @@ public class InvisibleElementRemovalService { InvisibleElementRemovalContext context = InvisibleElementRemovalContext.builder() .reader(reader) .clippingPathStack(new ClippingPathStack(page.getMediaBox())) - .markedContentStack(new MarkedContentStack()) + .markedContentStack(new MarkedContentStack(pdfDoc)) .removePaths(removePaths) .delta(delta) .overlappedElements(new ElementFeatureLookup()) @@ -251,10 +252,9 @@ public class InvisibleElementRemovalService { } boolean inClippingPath = context.clippingPathStack().almostIntersects(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight()); - if (inClippingPath) { ImageFeatures imageFeatures = ElementFeatureFactory.buildImage(imageElement); - if (!(imageFeatures.isTransparent() || imageFeatures.isImageMask() || imageFeatures.isSoftMask())) { + if (!(context.markedContentStack.contextHasTransparency() || imageFeatures.isTransparent() || imageFeatures.isImageMask() || imageFeatures.isSoftMask())) { calculateOverlaps(context, imageFeatures); } context.visibleElements().add(imageFeatures); @@ -328,9 +328,11 @@ public class InvisibleElementRemovalService { if (!context.visitedXObjIds().contains(formObj.getObjNum())) { context.visitedXObjIds().add(formObj.getObjNum()); + // writer needs to be newly initialized when entering a new content stream // see ElementEditTest in PDFTron (https://www.pdftron.com/documentation/samples/android/java/ElementEditTest) try (ElementWriter formWriter = new ElementWriter()) { + context.markedContentStack.enterForm(formElement); context.clippingPathStack().enterNewGState(); try (var formElementBBOX = formElement.getBBox()) { context.clippingPathStack().intersectClippingPath(Converter.toRectangle2D(formElementBBOX)); @@ -344,6 +346,7 @@ public class InvisibleElementRemovalService { formWriter.end(); context.reader().end(); context.clippingPathStack().leaveGState(); + context.markedContentStack.leaveForm(); } } } @@ -386,7 +389,7 @@ public class InvisibleElementRemovalService { } if (inClippingPath) { - if (isFilledAndNonTransparent(pathElement)) { + if (!context.markedContentStack.contextHasTransparency() && isFilledAndNonTransparent(pathElement)) { calculateOverlaps(context, pathFeatures); } context.visibleElements().add(ElementFeatureFactory.extractFeatures(pathElement)); @@ -473,8 +476,16 @@ public class InvisibleElementRemovalService { private static void removeOverlappedElement(ElementWriter writer, InvisibleElementRemovalContext context, Element element) throws PDFNetException { + try (Rect bbox = element.getBBox()) { + if (bbox == null) { + writer.writeElement(element); + return; + } + } + Optional optionalElementMatch = context.overlappedElements() .anyMatch(ElementFeatureFactory.extractFeatures(element)); + if (optionalElementMatch.isPresent()) { context.overlappedElements().remove(optionalElementMatch.get()); if (element.getType() == 3 && element.hasTextMatrix()) { diff --git a/src/main/java/com/iqser/red/pdftronlogic/commons/MarkedContentStack.java b/src/main/java/com/iqser/red/pdftronlogic/commons/MarkedContentStack.java index d38caa2..03bc969 100644 --- a/src/main/java/com/iqser/red/pdftronlogic/commons/MarkedContentStack.java +++ b/src/main/java/com/iqser/red/pdftronlogic/commons/MarkedContentStack.java @@ -3,16 +3,45 @@ package com.iqser.red.pdftronlogic.commons; import java.util.Deque; import java.util.Iterator; import java.util.LinkedList; +import java.util.Optional; import java.util.Set; +import com.pdftron.pdf.Element; +import com.pdftron.pdf.PDFDoc; +import com.pdftron.pdf.ocg.Group; +import com.pdftron.sdf.Obj; + +import lombok.RequiredArgsConstructor; +import lombok.SneakyThrows; + +@RequiredArgsConstructor public class MarkedContentStack { + private final PDFDoc pdfDoc; Deque stack = new LinkedList<>(); + Deque
formStack = new LinkedList<>(); public void enterMarkedContent(String name) { - stack.push(new MarkedContent(name)); + stack.push(new MarkedContent(name, name.startsWith("OC"))); + } + + + @SneakyThrows + public void enterForm(Element formElement) { + + Obj oc = formElement.getXObject().findObj("OC"); + Obj group = formElement.getXObject().findObj("Group"); + boolean transparency = false; + if (group != null) { + Obj groupSubType = group.findObj("S"); + if (groupSubType != null && groupSubType.isName() && groupSubType.getName().equals("Transparency")) { + transparency = true; + } + } + + formStack.push(new Form(formElement.getXObject().getObjNum(), oc != null, transparency)); } @@ -66,7 +95,27 @@ public class MarkedContentStack { } - private record MarkedContent(String name) { + public boolean contextHasTransparency() { + + return formStack.stream() + .anyMatch(form -> form.optionalContent || form.transparency) // + || stack.stream() + .anyMatch(MarkedContent::optionalContent); + } + + + public void leaveForm() { + + formStack.pop(); + + } + + + private record MarkedContent(String name, boolean optionalContent) { + + } + + private record Form(long ref, boolean optionalContent, boolean transparency) { } diff --git a/src/test/java/com/iqser/red/pdftronlogic/commons/VisualEqualityTest.java b/src/test/java/com/iqser/red/pdftronlogic/commons/VisualEqualityTest.java index ae53e6a..7a66ee0 100644 --- a/src/test/java/com/iqser/red/pdftronlogic/commons/VisualEqualityTest.java +++ b/src/test/java/com/iqser/red/pdftronlogic/commons/VisualEqualityTest.java @@ -54,7 +54,7 @@ public class VisualEqualityTest { @SneakyThrows public void assertVisualEqualityOfProcessedFile() { - Path file = Path.of("/home/kschuettler/Dokumente/TestFiles/syn-dm-testfiles/ITEM 19_A15149AC - Primary Skin Irritation Rabbit.pdf"); + Path file = Path.of("/home/kschuettler/Dokumente/TestFiles/syn-dm-testfiles/SOLICITA_VICTRATO-GOLD-II_Item 20_Sensibilizacao_02.pdf"); Context context = new Context(TEST_OUTPUT_DIR, new HashMap<>()); runForFile(file, context);