Merge branch 'documine' into 'master'

DOC-301: implement invisible element removal for same color as background

See merge request redactmanager/commons/pdftron-logic-commons!2
This commit is contained in:
Kilian Schüttler 2023-07-11 15:24:44 +02:00
commit c88d0cf186
3 changed files with 139 additions and 49 deletions

View File

@ -0,0 +1,67 @@
package com.iqser.red.pdftronlogic.commons;
import java.awt.Color;
import java.awt.geom.GeneralPath;
import java.awt.geom.Rectangle2D;
import java.util.Iterator;
import com.google.common.primitives.Bytes;
import com.google.common.primitives.Doubles;
import com.pdftron.common.PDFNetException;
import com.pdftron.pdf.ColorPt;
import com.pdftron.pdf.ColorSpace;
import com.pdftron.pdf.PathData;
import com.pdftron.pdf.Rect;
import lombok.SneakyThrows;
import lombok.experimental.UtilityClass;
@UtilityClass
public class Converter {
public GeneralPath convertToGeneralPath(PathData pathData) throws PDFNetException {
GeneralPath linePath = new GeneralPath();
Iterator<Double> points = Doubles.asList(pathData.getPoints()).iterator();
Iterable<Byte> operators = Bytes.asList(pathData.getOperators());
for (var operator : operators) {
switch (operator) {
case PathData.e_moveto -> linePath.moveTo(points.next(), points.next());
case PathData.e_lineto -> linePath.lineTo(points.next(), points.next());
case PathData.e_cubicto -> linePath.curveTo(points.next(), points.next(), points.next(), points.next(), points.next(), points.next());
case PathData.e_closepath -> linePath.closePath();
case PathData.e_rect -> {
double x = points.next();
double y = points.next();
double w = points.next();
double h = points.next();
linePath.moveTo(x, y);
linePath.lineTo(x + w, y);
linePath.lineTo(x + w, y + h);
linePath.lineTo(x, y + h);
linePath.closePath();
}
default -> throw new PDFNetException("Invalid Element Type", 0, "", "", "");
}
}
return linePath;
}
@SneakyThrows
public static Color convertColor(ColorSpace colorSpace, ColorPt colorPt) {
ColorPt rgbColor = colorSpace.convert2RGB(colorPt);
Color color = new Color((float) rgbColor.get(0), (float) rgbColor.get(1), (float) rgbColor.get(2));
rgbColor.destroy();
return color;
}
@SneakyThrows
public static Rectangle2D toRectangle2D(Rect rect) {
return new Rectangle2D.Double(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight());
}
}

View File

@ -2,6 +2,8 @@ package com.iqser.red.pdftronlogic.commons;
import static com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService.TOLERANCE;
import java.awt.Color;
import java.awt.geom.GeneralPath;
import java.awt.geom.Rectangle2D;
import com.pdftron.common.PDFNetException;
@ -38,6 +40,12 @@ public class ElementFeatures {
}
public boolean isBackground(Rect area) {
return false;
}
@SneakyThrows
private boolean rectsAlmostMatch(Rect bBox) {
// To address the inconsistencies in the calculation of the bounding box we check equality with a tolerance
@ -49,6 +57,12 @@ public class ElementFeatures {
}
public boolean matchesFillColor(Color color) {
return false;
}
@EqualsAndHashCode(callSuper = true)
@Getter
@SuperBuilder
@ -82,6 +96,9 @@ public class ElementFeatures {
boolean isStroked;
boolean isFilled;
boolean isWindingFill;
Color strokeColor;
Color fillColor;
GeneralPath linePath;
@Override
@ -96,6 +113,22 @@ public class ElementFeatures {
}
@Override
public boolean matchesFillColor(Color color) {
return color.equals(fillColor);
}
@SneakyThrows
public boolean isBackground(Rect area) {
return isFilled && //
getBoundingBox().intersects(area.getX1(), area.getY1(), area.getWidth(), area.getHeight()) && //
linePath.contains(area.getX1(), area.getY1(), area.getWidth(), area.getHeight());
}
}
@EqualsAndHashCode(callSuper = true)
@ -132,23 +165,26 @@ public class ElementFeatures {
return switch (element.getType()) {
case Element.e_path -> Path.builder()
.elementType(element.getType())
.boundingBox(toRectangle2D(element.getBBox()))
.boundingBox(Converter.toRectangle2D(element.getBBox()))
.isClippingPath(element.isClippingPath())
.isClipWindingFill(element.isClipWindingFill())
.isStroked(element.isStroked())
.isFilled(element.isFilled())
.isWindingFill(element.isWindingFill())
.fillColor(Converter.convertColor(element.getGState().getFillColorSpace(), element.getGState().getFillColor()))
.strokeColor(Converter.convertColor(element.getGState().getStrokeColorSpace(), element.getGState().getStrokeColor()))
.linePath(Converter.convertToGeneralPath(element.getPathData()))
.build();
case Element.e_text -> Text.builder()
.elementType(element.getType())
.boundingBox(toRectangle2D(element.getBBox()))
.boundingBox(Converter.toRectangle2D(element.getBBox()))
.text(element.getTextString())
.font(element.getGState().getFont().getType())
.fontsize(element.getGState().getFontSize())
.build();
case Element.e_image, Element.e_inline_image -> Image.builder()
.elementType(element.getType())
.boundingBox(toRectangle2D(element.getBBox()))
.boundingBox(Converter.toRectangle2D(element.getBBox()))
.dataSize(element.getImageDataSize())
.height(element.getImageHeight())
.width(element.getImageWidth())
@ -161,10 +197,4 @@ public class ElementFeatures {
};
}
private static Rectangle2D toRectangle2D(Rect rect) throws PDFNetException {
return new Rectangle2D.Double(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight());
}
}

View File

@ -1,5 +1,6 @@
package com.iqser.red.pdftronlogic.commons;
import java.awt.Color;
import java.awt.Shape;
import java.awt.geom.AffineTransform;
import java.awt.geom.GeneralPath;
@ -7,13 +8,10 @@ import java.awt.geom.Rectangle2D;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import com.google.common.primitives.Bytes;
import com.google.common.primitives.Doubles;
import com.pdftron.common.Matrix2D;
import com.pdftron.common.PDFNetException;
import com.pdftron.pdf.ColorPt;
@ -47,9 +45,9 @@ public class InvisibleElementRemovalService {
* -Text which is transparent or is set to not render
* -Elements outside of clipping path
* -Elements that have been painted over by visible and filled Paths
* -Elements with the same color as background
* unhandled cases:
* -Elements covered by widely stroked path
* -Elements with the same color as background
* -Any Text set to clipping with its many interactions with other elements
*
* @param pdfFile The PDF file to process
@ -178,18 +176,18 @@ public class InvisibleElementRemovalService {
private void processText(Element textElement, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException {
Rect rect = textElement.getBBox();
Rect textBBox = textElement.getBBox();
if (rect == null) {
if (textBBox == null) {
writer.writeElement(textElement);
return;
}
GState gState = textElement.getGState();
boolean inClippingPath = context.clippingPathStack().almostIntersects(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight());
boolean inClippingPath = context.clippingPathStack().almostIntersects(textBBox.getX1(), textBBox.getY1(), textBBox.getWidth(), textBBox.getHeight());
boolean isTextVisible = isTextRenderedVisibly(gState);
boolean isTextVisible = isTextRenderedVisibly(gState, textBBox, context);
if (inClippingPath && isTextVisible) {
context.visibleElements().add(ElementFeatures.extractFeatures(textElement));
@ -215,7 +213,7 @@ public class InvisibleElementRemovalService {
}
if (!isTextVisible) {
gState.setFillColorSpace(ColorSpace.createDeviceRGB());
// blue for elements removed due to transparency or not rendered
// blue for elements removed due to transparency or not rendered or same color as background
gState.setFillColor(new ColorPt(0, 0, 1));
gState.setTextRenderMode(GState.e_fill_text);
gState.setFillOpacity(1);
@ -254,11 +252,11 @@ public class InvisibleElementRemovalService {
PathData pathData = pathElement.getPathData();
if (pathData.getOperators().length == 0 && pathData.getPoints().length == 0 || pathElement.getBBox() == null) {
writer.writeGStateChanges(pathElement);
writer.writeElement(pathElement);
return;
}
GeneralPath linePath = convertToGeneralPath(pathData);
GeneralPath linePath = Converter.convertToGeneralPath(pathData);
//transform path to initial user space
var ctm = pathElement.getCTM();
@ -325,7 +323,7 @@ public class InvisibleElementRemovalService {
writer.end();
context.reader().end();
if (context.overlappedElements().size() > 0) {
if (!context.overlappedElements().isEmpty()) {
log.warn(context.overlappedElements().size() + " overlapped elements have not been found or removed");
}
}
@ -387,41 +385,36 @@ public class InvisibleElementRemovalService {
}
private boolean isTextRenderedVisibly(GState gState) throws PDFNetException {
private boolean isTextRenderedVisibly(GState gState, Rect textBBox, InvisibleElementRemovalContext context) throws PDFNetException {
return gState.getTextRenderMode() != GState.e_invisible_text && //
!(gState.getTextRenderMode() == GState.e_fill_text && gState.getFillOpacity() == 0) && //
!(gState.getTextRenderMode() == GState.e_stroke_text && gState.getStrokeOpacity() == 0) && //
!(gState.getTextRenderMode() == GState.e_fill_stroke_text && gState.getFillOpacity() == 0 && gState.getStrokeOpacity() == 0);
!(gState.getTextRenderMode() == GState.e_fill_text && fillIsVisible(gState, textBBox, context)) && //
!(gState.getTextRenderMode() == GState.e_stroke_text && strokeIsVisible(gState, textBBox, context)) && //
!(gState.getTextRenderMode() == GState.e_fill_stroke_text && (fillIsVisible(gState, textBBox, context) || strokeIsVisible(gState, textBBox, context)));
}
private GeneralPath convertToGeneralPath(PathData pathData) throws PDFNetException {
private boolean strokeIsVisible(GState gState, Rect textBBox, InvisibleElementRemovalContext context) throws PDFNetException {
GeneralPath linePath = new GeneralPath();
Iterator<Double> points = Doubles.asList(pathData.getPoints()).iterator();
Iterable<Byte> operators = Bytes.asList(pathData.getOperators());
for (var operator : operators) {
switch (operator) {
case PathData.e_moveto -> linePath.moveTo(points.next(), points.next());
case PathData.e_lineto -> linePath.lineTo(points.next(), points.next());
case PathData.e_cubicto -> linePath.curveTo(points.next(), points.next(), points.next(), points.next(), points.next(), points.next());
case PathData.e_closepath -> linePath.closePath();
case PathData.e_rect -> {
double x = points.next();
double y = points.next();
double w = points.next();
double h = points.next();
linePath.moveTo(x, y);
linePath.lineTo(x + w, y);
linePath.lineTo(x + w, y + h);
linePath.lineTo(x, y + h);
linePath.closePath();
}
default -> throw new PDFNetException("Invalid Element Type", 0, "", "", "");
}
return gState.getStrokeOpacity() == 0 && differentColorThanBackgroundColor(Converter.convertColor(gState.getStrokeColorSpace(), gState.getStrokeColor()),
textBBox,
context);
}
private boolean fillIsVisible(GState gState, Rect textBBox, InvisibleElementRemovalContext context) throws PDFNetException {
return gState.getFillOpacity() == 0 && differentColorThanBackgroundColor(Converter.convertColor(gState.getFillColorSpace(), gState.getFillColor()), textBBox, context);
}
private boolean differentColorThanBackgroundColor(Color fillColor, Rect textBBox, InvisibleElementRemovalContext context) {
List<ElementFeatures> backgroundElements = context.visibleElements().stream().filter(element -> element.isBackground(textBBox)).toList();
if (backgroundElements.isEmpty()) {
return !fillColor.equals(Color.WHITE);
}
return linePath;
return backgroundElements.stream().anyMatch(element -> !element.matchesFillColor(fillColor));
}