From f9607c979324741a76c6600685dfadf7f3da5c42 Mon Sep 17 00:00:00 2001 From: Kilian Schuettler Date: Tue, 11 Jul 2023 15:23:24 +0200 Subject: [PATCH] DOC-301: implement invisible element removal for same color as background --- .../red/pdftronlogic/commons/Converter.java | 67 +++++++++++++++++ .../pdftronlogic/commons/ElementFeatures.java | 48 +++++++++--- .../InvisibleElementRemovalService.java | 73 +++++++++---------- 3 files changed, 139 insertions(+), 49 deletions(-) create mode 100644 src/main/java/com/iqser/red/pdftronlogic/commons/Converter.java diff --git a/src/main/java/com/iqser/red/pdftronlogic/commons/Converter.java b/src/main/java/com/iqser/red/pdftronlogic/commons/Converter.java new file mode 100644 index 0000000..9a5c5c1 --- /dev/null +++ b/src/main/java/com/iqser/red/pdftronlogic/commons/Converter.java @@ -0,0 +1,67 @@ +package com.iqser.red.pdftronlogic.commons; + +import java.awt.Color; +import java.awt.geom.GeneralPath; +import java.awt.geom.Rectangle2D; +import java.util.Iterator; + +import com.google.common.primitives.Bytes; +import com.google.common.primitives.Doubles; +import com.pdftron.common.PDFNetException; +import com.pdftron.pdf.ColorPt; +import com.pdftron.pdf.ColorSpace; +import com.pdftron.pdf.PathData; +import com.pdftron.pdf.Rect; + +import lombok.SneakyThrows; +import lombok.experimental.UtilityClass; + +@UtilityClass +public class Converter { + + public GeneralPath convertToGeneralPath(PathData pathData) throws PDFNetException { + + GeneralPath linePath = new GeneralPath(); + Iterator points = Doubles.asList(pathData.getPoints()).iterator(); + Iterable operators = Bytes.asList(pathData.getOperators()); + for (var operator : operators) { + switch (operator) { + case PathData.e_moveto -> linePath.moveTo(points.next(), points.next()); + case PathData.e_lineto -> linePath.lineTo(points.next(), points.next()); + case PathData.e_cubicto -> linePath.curveTo(points.next(), points.next(), points.next(), points.next(), points.next(), points.next()); + case PathData.e_closepath -> linePath.closePath(); + case PathData.e_rect -> { + double x = points.next(); + double y = points.next(); + double w = points.next(); + double h = points.next(); + linePath.moveTo(x, y); + linePath.lineTo(x + w, y); + linePath.lineTo(x + w, y + h); + linePath.lineTo(x, y + h); + linePath.closePath(); + } + default -> throw new PDFNetException("Invalid Element Type", 0, "", "", ""); + } + } + return linePath; + } + + + @SneakyThrows + public static Color convertColor(ColorSpace colorSpace, ColorPt colorPt) { + + ColorPt rgbColor = colorSpace.convert2RGB(colorPt); + Color color = new Color((float) rgbColor.get(0), (float) rgbColor.get(1), (float) rgbColor.get(2)); + rgbColor.destroy(); + return color; + } + + + @SneakyThrows + public static Rectangle2D toRectangle2D(Rect rect) { + + return new Rectangle2D.Double(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight()); + } + +} diff --git a/src/main/java/com/iqser/red/pdftronlogic/commons/ElementFeatures.java b/src/main/java/com/iqser/red/pdftronlogic/commons/ElementFeatures.java index 4114d96..b4bef18 100644 --- a/src/main/java/com/iqser/red/pdftronlogic/commons/ElementFeatures.java +++ b/src/main/java/com/iqser/red/pdftronlogic/commons/ElementFeatures.java @@ -2,6 +2,8 @@ package com.iqser.red.pdftronlogic.commons; import static com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService.TOLERANCE; +import java.awt.Color; +import java.awt.geom.GeneralPath; import java.awt.geom.Rectangle2D; import com.pdftron.common.PDFNetException; @@ -38,6 +40,12 @@ public class ElementFeatures { } + public boolean isBackground(Rect area) { + + return false; + } + + @SneakyThrows private boolean rectsAlmostMatch(Rect bBox) { // To address the inconsistencies in the calculation of the bounding box we check equality with a tolerance @@ -49,6 +57,12 @@ public class ElementFeatures { } + public boolean matchesFillColor(Color color) { + + return false; + } + + @EqualsAndHashCode(callSuper = true) @Getter @SuperBuilder @@ -82,6 +96,9 @@ public class ElementFeatures { boolean isStroked; boolean isFilled; boolean isWindingFill; + Color strokeColor; + Color fillColor; + GeneralPath linePath; @Override @@ -96,6 +113,22 @@ public class ElementFeatures { } + + @Override + public boolean matchesFillColor(Color color) { + + return color.equals(fillColor); + } + + + @SneakyThrows + public boolean isBackground(Rect area) { + + return isFilled && // + getBoundingBox().intersects(area.getX1(), area.getY1(), area.getWidth(), area.getHeight()) && // + linePath.contains(area.getX1(), area.getY1(), area.getWidth(), area.getHeight()); + } + } @EqualsAndHashCode(callSuper = true) @@ -132,23 +165,26 @@ public class ElementFeatures { return switch (element.getType()) { case Element.e_path -> Path.builder() .elementType(element.getType()) - .boundingBox(toRectangle2D(element.getBBox())) + .boundingBox(Converter.toRectangle2D(element.getBBox())) .isClippingPath(element.isClippingPath()) .isClipWindingFill(element.isClipWindingFill()) .isStroked(element.isStroked()) .isFilled(element.isFilled()) .isWindingFill(element.isWindingFill()) + .fillColor(Converter.convertColor(element.getGState().getFillColorSpace(), element.getGState().getFillColor())) + .strokeColor(Converter.convertColor(element.getGState().getStrokeColorSpace(), element.getGState().getStrokeColor())) + .linePath(Converter.convertToGeneralPath(element.getPathData())) .build(); case Element.e_text -> Text.builder() .elementType(element.getType()) - .boundingBox(toRectangle2D(element.getBBox())) + .boundingBox(Converter.toRectangle2D(element.getBBox())) .text(element.getTextString()) .font(element.getGState().getFont().getType()) .fontsize(element.getGState().getFontSize()) .build(); case Element.e_image, Element.e_inline_image -> Image.builder() .elementType(element.getType()) - .boundingBox(toRectangle2D(element.getBBox())) + .boundingBox(Converter.toRectangle2D(element.getBBox())) .dataSize(element.getImageDataSize()) .height(element.getImageHeight()) .width(element.getImageWidth()) @@ -161,10 +197,4 @@ public class ElementFeatures { }; } - - private static Rectangle2D toRectangle2D(Rect rect) throws PDFNetException { - - return new Rectangle2D.Double(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight()); - } - } diff --git a/src/main/java/com/iqser/red/pdftronlogic/commons/InvisibleElementRemovalService.java b/src/main/java/com/iqser/red/pdftronlogic/commons/InvisibleElementRemovalService.java index 83601e6..cacb66f 100644 --- a/src/main/java/com/iqser/red/pdftronlogic/commons/InvisibleElementRemovalService.java +++ b/src/main/java/com/iqser/red/pdftronlogic/commons/InvisibleElementRemovalService.java @@ -1,5 +1,6 @@ package com.iqser.red.pdftronlogic.commons; +import java.awt.Color; import java.awt.Shape; import java.awt.geom.AffineTransform; import java.awt.geom.GeneralPath; @@ -7,13 +8,10 @@ import java.awt.geom.Rectangle2D; import java.io.InputStream; import java.io.OutputStream; import java.util.ArrayList; -import java.util.Iterator; import java.util.List; import java.util.Set; import java.util.TreeSet; -import com.google.common.primitives.Bytes; -import com.google.common.primitives.Doubles; import com.pdftron.common.Matrix2D; import com.pdftron.common.PDFNetException; import com.pdftron.pdf.ColorPt; @@ -47,9 +45,9 @@ public class InvisibleElementRemovalService { * -Text which is transparent or is set to not render * -Elements outside of clipping path * -Elements that have been painted over by visible and filled Paths + * -Elements with the same color as background * unhandled cases: * -Elements covered by widely stroked path - * -Elements with the same color as background * -Any Text set to clipping with its many interactions with other elements * * @param pdfFile The PDF file to process @@ -178,18 +176,18 @@ public class InvisibleElementRemovalService { private void processText(Element textElement, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException { - Rect rect = textElement.getBBox(); + Rect textBBox = textElement.getBBox(); - if (rect == null) { + if (textBBox == null) { writer.writeElement(textElement); return; } GState gState = textElement.getGState(); - boolean inClippingPath = context.clippingPathStack().almostIntersects(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight()); + boolean inClippingPath = context.clippingPathStack().almostIntersects(textBBox.getX1(), textBBox.getY1(), textBBox.getWidth(), textBBox.getHeight()); - boolean isTextVisible = isTextRenderedVisibly(gState); + boolean isTextVisible = isTextRenderedVisibly(gState, textBBox, context); if (inClippingPath && isTextVisible) { context.visibleElements().add(ElementFeatures.extractFeatures(textElement)); @@ -215,7 +213,7 @@ public class InvisibleElementRemovalService { } if (!isTextVisible) { gState.setFillColorSpace(ColorSpace.createDeviceRGB()); - // blue for elements removed due to transparency or not rendered + // blue for elements removed due to transparency or not rendered or same color as background gState.setFillColor(new ColorPt(0, 0, 1)); gState.setTextRenderMode(GState.e_fill_text); gState.setFillOpacity(1); @@ -254,11 +252,11 @@ public class InvisibleElementRemovalService { PathData pathData = pathElement.getPathData(); if (pathData.getOperators().length == 0 && pathData.getPoints().length == 0 || pathElement.getBBox() == null) { - writer.writeGStateChanges(pathElement); + writer.writeElement(pathElement); return; } - GeneralPath linePath = convertToGeneralPath(pathData); + GeneralPath linePath = Converter.convertToGeneralPath(pathData); //transform path to initial user space var ctm = pathElement.getCTM(); @@ -325,7 +323,7 @@ public class InvisibleElementRemovalService { writer.end(); context.reader().end(); - if (context.overlappedElements().size() > 0) { + if (!context.overlappedElements().isEmpty()) { log.warn(context.overlappedElements().size() + " overlapped elements have not been found or removed"); } } @@ -387,41 +385,36 @@ public class InvisibleElementRemovalService { } - private boolean isTextRenderedVisibly(GState gState) throws PDFNetException { + private boolean isTextRenderedVisibly(GState gState, Rect textBBox, InvisibleElementRemovalContext context) throws PDFNetException { return gState.getTextRenderMode() != GState.e_invisible_text && // - !(gState.getTextRenderMode() == GState.e_fill_text && gState.getFillOpacity() == 0) && // - !(gState.getTextRenderMode() == GState.e_stroke_text && gState.getStrokeOpacity() == 0) && // - !(gState.getTextRenderMode() == GState.e_fill_stroke_text && gState.getFillOpacity() == 0 && gState.getStrokeOpacity() == 0); + !(gState.getTextRenderMode() == GState.e_fill_text && fillIsVisible(gState, textBBox, context)) && // + !(gState.getTextRenderMode() == GState.e_stroke_text && strokeIsVisible(gState, textBBox, context)) && // + !(gState.getTextRenderMode() == GState.e_fill_stroke_text && (fillIsVisible(gState, textBBox, context) || strokeIsVisible(gState, textBBox, context))); } - private GeneralPath convertToGeneralPath(PathData pathData) throws PDFNetException { + private boolean strokeIsVisible(GState gState, Rect textBBox, InvisibleElementRemovalContext context) throws PDFNetException { - GeneralPath linePath = new GeneralPath(); - Iterator points = Doubles.asList(pathData.getPoints()).iterator(); - Iterable operators = Bytes.asList(pathData.getOperators()); - for (var operator : operators) { - switch (operator) { - case PathData.e_moveto -> linePath.moveTo(points.next(), points.next()); - case PathData.e_lineto -> linePath.lineTo(points.next(), points.next()); - case PathData.e_cubicto -> linePath.curveTo(points.next(), points.next(), points.next(), points.next(), points.next(), points.next()); - case PathData.e_closepath -> linePath.closePath(); - case PathData.e_rect -> { - double x = points.next(); - double y = points.next(); - double w = points.next(); - double h = points.next(); - linePath.moveTo(x, y); - linePath.lineTo(x + w, y); - linePath.lineTo(x + w, y + h); - linePath.lineTo(x, y + h); - linePath.closePath(); - } - default -> throw new PDFNetException("Invalid Element Type", 0, "", "", ""); - } + return gState.getStrokeOpacity() == 0 && differentColorThanBackgroundColor(Converter.convertColor(gState.getStrokeColorSpace(), gState.getStrokeColor()), + textBBox, + context); + } + + + private boolean fillIsVisible(GState gState, Rect textBBox, InvisibleElementRemovalContext context) throws PDFNetException { + + return gState.getFillOpacity() == 0 && differentColorThanBackgroundColor(Converter.convertColor(gState.getFillColorSpace(), gState.getFillColor()), textBBox, context); + } + + + private boolean differentColorThanBackgroundColor(Color fillColor, Rect textBBox, InvisibleElementRemovalContext context) { + + List backgroundElements = context.visibleElements().stream().filter(element -> element.isBackground(textBBox)).toList(); + if (backgroundElements.isEmpty()) { + return !fillColor.equals(Color.WHITE); } - return linePath; + return backgroundElements.stream().anyMatch(element -> !element.matchesFillColor(fillColor)); }