From 6b6417ed802691fb14faaa72ad35e7b560549bb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kilian=20Sch=C3=BCttler?= Date: Tue, 23 Jan 2024 10:50:09 +0100 Subject: [PATCH] RED-8212: fix tables for ocred documents --- .../InvisibleElementRemovalService.java | 87 ++++++++++++------- 1 file changed, 58 insertions(+), 29 deletions(-) diff --git a/src/main/java/com/iqser/red/pdftronlogic/commons/InvisibleElementRemovalService.java b/src/main/java/com/iqser/red/pdftronlogic/commons/InvisibleElementRemovalService.java index 6d99256..9f8a69b 100644 --- a/src/main/java/com/iqser/red/pdftronlogic/commons/InvisibleElementRemovalService.java +++ b/src/main/java/com/iqser/red/pdftronlogic/commons/InvisibleElementRemovalService.java @@ -8,13 +8,9 @@ import java.awt.geom.Rectangle2D; import java.io.InputStream; import java.io.OutputStream; import java.util.ArrayList; -import java.util.LinkedList; import java.util.List; -import java.util.Map; import java.util.Set; import java.util.TreeSet; -import java.util.concurrent.atomic.AtomicReference; -import java.util.stream.Collectors; import com.pdftron.common.PDFNetException; import com.pdftron.pdf.ColorPt; @@ -55,16 +51,17 @@ public class InvisibleElementRemovalService { * -Any Text set to clipping with its many interactions with other elements * * @param pdfFile The PDF file to process + * @param removePaths If this flag is set, invisible path elements will be removed * @param delta If this flag is set only the removed Elements will be written to the output file. * The Elements are red if they are removed by clipping path, blue for transparency, and a green bounding box for overlap. * @param out OutputStream to write the resulting file to **/ @SneakyThrows - public void removeInvisibleElements(InputStream pdfFile, OutputStream out, boolean delta) { + public void removeInvisibleElements(InputStream pdfFile, OutputStream out, boolean delta, boolean removePaths) { PDFDoc pdfDoc = new PDFDoc(pdfFile); - execute(pdfDoc, delta); + execute(pdfDoc, delta, removePaths); try { pdfDoc.save(out, SDFDoc.SaveMode.LINEARIZED, null); @@ -79,17 +76,36 @@ public class InvisibleElementRemovalService { /** - * This method is similar to {@link #removeInvisibleElements(InputStream, OutputStream, boolean)}, just with a PDFDoc. + * This method is equal to {@link #removeInvisibleElements(InputStream, OutputStream, boolean, boolean)}, with removePaths == true. + */ + @SneakyThrows + public void removeInvisibleElements(InputStream pdfFile, OutputStream out, boolean delta) { + + removeInvisibleElements(pdfFile, out, delta, true); + + } + + /** + * This method is similar to {@link #removeInvisibleElements(InputStream, OutputStream, boolean, boolean)}, just with a PDFDoc. + */ + @SneakyThrows + public void removeInvisibleElements(PDFDoc pdfDoc, boolean removePaths, boolean delta) { + + execute(pdfDoc, delta, removePaths); + } + + /** + * This method is equal to {@link #removeInvisibleElements(PDFDoc, boolean, boolean)}, with removePaths == true. */ @SneakyThrows public void removeInvisibleElements(PDFDoc pdfDoc, boolean delta) { - execute(pdfDoc, delta); + execute(pdfDoc, delta, true); } @SneakyThrows - private void execute(PDFDoc pdfDoc, boolean delta) { + private void execute(PDFDoc pdfDoc, boolean delta, boolean removePaths) { log.info("Start removing invisible Elements"); ElementWriter writer = new ElementWriter(); @@ -105,6 +121,7 @@ public class InvisibleElementRemovalService { InvisibleElementRemovalContext context = InvisibleElementRemovalContext.builder() .reader(reader) .clippingPathStack(new ClippingPathStack(page.getMediaBox())) + .removePaths(removePaths) .delta(delta) .overlappedElements(new ArrayList<>()) .visibleElements(new ArrayList<>()) @@ -297,11 +314,11 @@ public class InvisibleElementRemovalService { context.visibleElements().removeAll(currentOverlappedElements); } context.visibleElements().add(ElementFeatureFactory.extractFeatures(pathElement)); - if (!context.delta()) { + if (!context.delta() || !context.removePaths()) { writer.writeElement(pathElement); } } - if (context.delta() && !inClippingPath) { + if (context.delta() && !inClippingPath && context.removePaths()) { pathElement.getGState().setFillColorSpace(ColorSpace.createDeviceRGB()); pathElement.getGState().setFillColor(new ColorPt(1, 0, 0)); pathElement.getGState().setStrokeColorSpace(ColorSpace.createDeviceRGB()); @@ -336,25 +353,12 @@ public class InvisibleElementRemovalService { for (Element element = context.reader().next(); element != null; element = context.reader().next()) { switch (element.getType()) { case Element.e_form -> processFormOverlappedElements(writer, element, context); - case Element.e_path, Element.e_image, Element.e_inline_image, Element.e_text -> { - boolean anyMatch = false; - for (ElementFeatures elementToRemove : context.overlappedElements()) { - if (elementToRemove.almostMatches(element)) { - context.overlappedElements().remove(elementToRemove); - anyMatch = true; - break; - } - } - if (!anyMatch) { + case Element.e_image, Element.e_inline_image, Element.e_text -> removeOverlappedElement(writer, context, element); + case Element.e_path -> { + if (context.removePaths()) { + removeOverlappedElement(writer, context, element); + } else { writer.writeElement(element); - } else if (element.getType() == 3 && element.hasTextMatrix()) { - /* - PDFTron Element with type "text" refers to a Tj command. If a Tm command is just above it in the pdf file, PDFTron will join the two commands and treat them as one Element. - hasTextMatrix() checks for this case specifically. Also, Tm changes the position for a whole BT/ET segment, possibly containing multiple Tj commands. - Therefore, the position of a following Tj is affected by not writing the first Element. - This is why, we write only the Tm command: - */ - writer.writeGStateChanges(element); } } default -> writer.writeElement(element); @@ -363,6 +367,30 @@ public class InvisibleElementRemovalService { } + private static void removeOverlappedElement(ElementWriter writer, InvisibleElementRemovalContext context, Element element) throws PDFNetException { + + boolean anyMatch = false; + for (ElementFeatures elementToRemove : context.overlappedElements()) { + if (elementToRemove.almostMatches(element)) { + context.overlappedElements().remove(elementToRemove); + anyMatch = true; + break; + } + } + if (!anyMatch) { + writer.writeElement(element); + } else if (element.getType() == 3 && element.hasTextMatrix()) { + /* + PDFTron Element with type "text" refers to a Tj command. If a Tm command is just above it in the pdf file, PDFTron will join the two commands and treat them as one Element. + hasTextMatrix() checks for this case specifically. Also, Tm changes the position for a whole BT/ET segment, possibly containing multiple Tj commands. + Therefore, the position of a following Tj is affected by not writing the first Element. + This is why, we write only the Tm command: + */ + writer.writeGStateChanges(element); + } + } + + private void processFormOverlappedElements(ElementWriter writer, Element formElement, InvisibleElementRemovalContext context) throws PDFNetException { writer.writeElement(formElement); @@ -490,6 +518,7 @@ public class InvisibleElementRemovalService { @Builder private record InvisibleElementRemovalContext( + boolean removePaths, boolean delta, ElementReader reader, ClippingPathStack clippingPathStack,