Merge branch 'documine' into 'master'
DOC-301: implement invisible element removal for same color as background See merge request redactmanager/commons/pdftron-logic-commons!2
This commit is contained in:
commit
c88d0cf186
@ -0,0 +1,67 @@
|
||||
package com.iqser.red.pdftronlogic.commons;
|
||||
|
||||
import java.awt.Color;
|
||||
import java.awt.geom.GeneralPath;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.Iterator;
|
||||
|
||||
import com.google.common.primitives.Bytes;
|
||||
import com.google.common.primitives.Doubles;
|
||||
import com.pdftron.common.PDFNetException;
|
||||
import com.pdftron.pdf.ColorPt;
|
||||
import com.pdftron.pdf.ColorSpace;
|
||||
import com.pdftron.pdf.PathData;
|
||||
import com.pdftron.pdf.Rect;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@UtilityClass
|
||||
public class Converter {
|
||||
|
||||
public GeneralPath convertToGeneralPath(PathData pathData) throws PDFNetException {
|
||||
|
||||
GeneralPath linePath = new GeneralPath();
|
||||
Iterator<Double> points = Doubles.asList(pathData.getPoints()).iterator();
|
||||
Iterable<Byte> operators = Bytes.asList(pathData.getOperators());
|
||||
for (var operator : operators) {
|
||||
switch (operator) {
|
||||
case PathData.e_moveto -> linePath.moveTo(points.next(), points.next());
|
||||
case PathData.e_lineto -> linePath.lineTo(points.next(), points.next());
|
||||
case PathData.e_cubicto -> linePath.curveTo(points.next(), points.next(), points.next(), points.next(), points.next(), points.next());
|
||||
case PathData.e_closepath -> linePath.closePath();
|
||||
case PathData.e_rect -> {
|
||||
double x = points.next();
|
||||
double y = points.next();
|
||||
double w = points.next();
|
||||
double h = points.next();
|
||||
linePath.moveTo(x, y);
|
||||
linePath.lineTo(x + w, y);
|
||||
linePath.lineTo(x + w, y + h);
|
||||
linePath.lineTo(x, y + h);
|
||||
linePath.closePath();
|
||||
}
|
||||
default -> throw new PDFNetException("Invalid Element Type", 0, "", "", "");
|
||||
}
|
||||
}
|
||||
return linePath;
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public static Color convertColor(ColorSpace colorSpace, ColorPt colorPt) {
|
||||
|
||||
ColorPt rgbColor = colorSpace.convert2RGB(colorPt);
|
||||
Color color = new Color((float) rgbColor.get(0), (float) rgbColor.get(1), (float) rgbColor.get(2));
|
||||
rgbColor.destroy();
|
||||
return color;
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public static Rectangle2D toRectangle2D(Rect rect) {
|
||||
|
||||
return new Rectangle2D.Double(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight());
|
||||
}
|
||||
|
||||
}
|
||||
@ -2,6 +2,8 @@ package com.iqser.red.pdftronlogic.commons;
|
||||
|
||||
import static com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService.TOLERANCE;
|
||||
|
||||
import java.awt.Color;
|
||||
import java.awt.geom.GeneralPath;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
|
||||
import com.pdftron.common.PDFNetException;
|
||||
@ -38,6 +40,12 @@ public class ElementFeatures {
|
||||
}
|
||||
|
||||
|
||||
public boolean isBackground(Rect area) {
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private boolean rectsAlmostMatch(Rect bBox) {
|
||||
// To address the inconsistencies in the calculation of the bounding box we check equality with a tolerance
|
||||
@ -49,6 +57,12 @@ public class ElementFeatures {
|
||||
}
|
||||
|
||||
|
||||
public boolean matchesFillColor(Color color) {
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@Getter
|
||||
@SuperBuilder
|
||||
@ -82,6 +96,9 @@ public class ElementFeatures {
|
||||
boolean isStroked;
|
||||
boolean isFilled;
|
||||
boolean isWindingFill;
|
||||
Color strokeColor;
|
||||
Color fillColor;
|
||||
GeneralPath linePath;
|
||||
|
||||
|
||||
@Override
|
||||
@ -96,6 +113,22 @@ public class ElementFeatures {
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean matchesFillColor(Color color) {
|
||||
|
||||
return color.equals(fillColor);
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public boolean isBackground(Rect area) {
|
||||
|
||||
return isFilled && //
|
||||
getBoundingBox().intersects(area.getX1(), area.getY1(), area.getWidth(), area.getHeight()) && //
|
||||
linePath.contains(area.getX1(), area.getY1(), area.getWidth(), area.getHeight());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@ -132,23 +165,26 @@ public class ElementFeatures {
|
||||
return switch (element.getType()) {
|
||||
case Element.e_path -> Path.builder()
|
||||
.elementType(element.getType())
|
||||
.boundingBox(toRectangle2D(element.getBBox()))
|
||||
.boundingBox(Converter.toRectangle2D(element.getBBox()))
|
||||
.isClippingPath(element.isClippingPath())
|
||||
.isClipWindingFill(element.isClipWindingFill())
|
||||
.isStroked(element.isStroked())
|
||||
.isFilled(element.isFilled())
|
||||
.isWindingFill(element.isWindingFill())
|
||||
.fillColor(Converter.convertColor(element.getGState().getFillColorSpace(), element.getGState().getFillColor()))
|
||||
.strokeColor(Converter.convertColor(element.getGState().getStrokeColorSpace(), element.getGState().getStrokeColor()))
|
||||
.linePath(Converter.convertToGeneralPath(element.getPathData()))
|
||||
.build();
|
||||
case Element.e_text -> Text.builder()
|
||||
.elementType(element.getType())
|
||||
.boundingBox(toRectangle2D(element.getBBox()))
|
||||
.boundingBox(Converter.toRectangle2D(element.getBBox()))
|
||||
.text(element.getTextString())
|
||||
.font(element.getGState().getFont().getType())
|
||||
.fontsize(element.getGState().getFontSize())
|
||||
.build();
|
||||
case Element.e_image, Element.e_inline_image -> Image.builder()
|
||||
.elementType(element.getType())
|
||||
.boundingBox(toRectangle2D(element.getBBox()))
|
||||
.boundingBox(Converter.toRectangle2D(element.getBBox()))
|
||||
.dataSize(element.getImageDataSize())
|
||||
.height(element.getImageHeight())
|
||||
.width(element.getImageWidth())
|
||||
@ -161,10 +197,4 @@ public class ElementFeatures {
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
private static Rectangle2D toRectangle2D(Rect rect) throws PDFNetException {
|
||||
|
||||
return new Rectangle2D.Double(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
package com.iqser.red.pdftronlogic.commons;
|
||||
|
||||
import java.awt.Color;
|
||||
import java.awt.Shape;
|
||||
import java.awt.geom.AffineTransform;
|
||||
import java.awt.geom.GeneralPath;
|
||||
@ -7,13 +8,10 @@ import java.awt.geom.Rectangle2D;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import com.google.common.primitives.Bytes;
|
||||
import com.google.common.primitives.Doubles;
|
||||
import com.pdftron.common.Matrix2D;
|
||||
import com.pdftron.common.PDFNetException;
|
||||
import com.pdftron.pdf.ColorPt;
|
||||
@ -47,9 +45,9 @@ public class InvisibleElementRemovalService {
|
||||
* -Text which is transparent or is set to not render
|
||||
* -Elements outside of clipping path
|
||||
* -Elements that have been painted over by visible and filled Paths
|
||||
* -Elements with the same color as background
|
||||
* unhandled cases:
|
||||
* -Elements covered by widely stroked path
|
||||
* -Elements with the same color as background
|
||||
* -Any Text set to clipping with its many interactions with other elements
|
||||
*
|
||||
* @param pdfFile The PDF file to process
|
||||
@ -178,18 +176,18 @@ public class InvisibleElementRemovalService {
|
||||
|
||||
private void processText(Element textElement, ElementWriter writer, InvisibleElementRemovalContext context) throws PDFNetException {
|
||||
|
||||
Rect rect = textElement.getBBox();
|
||||
Rect textBBox = textElement.getBBox();
|
||||
|
||||
if (rect == null) {
|
||||
if (textBBox == null) {
|
||||
writer.writeElement(textElement);
|
||||
return;
|
||||
}
|
||||
|
||||
GState gState = textElement.getGState();
|
||||
|
||||
boolean inClippingPath = context.clippingPathStack().almostIntersects(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight());
|
||||
boolean inClippingPath = context.clippingPathStack().almostIntersects(textBBox.getX1(), textBBox.getY1(), textBBox.getWidth(), textBBox.getHeight());
|
||||
|
||||
boolean isTextVisible = isTextRenderedVisibly(gState);
|
||||
boolean isTextVisible = isTextRenderedVisibly(gState, textBBox, context);
|
||||
|
||||
if (inClippingPath && isTextVisible) {
|
||||
context.visibleElements().add(ElementFeatures.extractFeatures(textElement));
|
||||
@ -215,7 +213,7 @@ public class InvisibleElementRemovalService {
|
||||
}
|
||||
if (!isTextVisible) {
|
||||
gState.setFillColorSpace(ColorSpace.createDeviceRGB());
|
||||
// blue for elements removed due to transparency or not rendered
|
||||
// blue for elements removed due to transparency or not rendered or same color as background
|
||||
gState.setFillColor(new ColorPt(0, 0, 1));
|
||||
gState.setTextRenderMode(GState.e_fill_text);
|
||||
gState.setFillOpacity(1);
|
||||
@ -254,11 +252,11 @@ public class InvisibleElementRemovalService {
|
||||
PathData pathData = pathElement.getPathData();
|
||||
|
||||
if (pathData.getOperators().length == 0 && pathData.getPoints().length == 0 || pathElement.getBBox() == null) {
|
||||
writer.writeGStateChanges(pathElement);
|
||||
writer.writeElement(pathElement);
|
||||
return;
|
||||
}
|
||||
|
||||
GeneralPath linePath = convertToGeneralPath(pathData);
|
||||
GeneralPath linePath = Converter.convertToGeneralPath(pathData);
|
||||
|
||||
//transform path to initial user space
|
||||
var ctm = pathElement.getCTM();
|
||||
@ -325,7 +323,7 @@ public class InvisibleElementRemovalService {
|
||||
writer.end();
|
||||
context.reader().end();
|
||||
|
||||
if (context.overlappedElements().size() > 0) {
|
||||
if (!context.overlappedElements().isEmpty()) {
|
||||
log.warn(context.overlappedElements().size() + " overlapped elements have not been found or removed");
|
||||
}
|
||||
}
|
||||
@ -387,41 +385,36 @@ public class InvisibleElementRemovalService {
|
||||
}
|
||||
|
||||
|
||||
private boolean isTextRenderedVisibly(GState gState) throws PDFNetException {
|
||||
private boolean isTextRenderedVisibly(GState gState, Rect textBBox, InvisibleElementRemovalContext context) throws PDFNetException {
|
||||
|
||||
return gState.getTextRenderMode() != GState.e_invisible_text && //
|
||||
!(gState.getTextRenderMode() == GState.e_fill_text && gState.getFillOpacity() == 0) && //
|
||||
!(gState.getTextRenderMode() == GState.e_stroke_text && gState.getStrokeOpacity() == 0) && //
|
||||
!(gState.getTextRenderMode() == GState.e_fill_stroke_text && gState.getFillOpacity() == 0 && gState.getStrokeOpacity() == 0);
|
||||
!(gState.getTextRenderMode() == GState.e_fill_text && fillIsVisible(gState, textBBox, context)) && //
|
||||
!(gState.getTextRenderMode() == GState.e_stroke_text && strokeIsVisible(gState, textBBox, context)) && //
|
||||
!(gState.getTextRenderMode() == GState.e_fill_stroke_text && (fillIsVisible(gState, textBBox, context) || strokeIsVisible(gState, textBBox, context)));
|
||||
}
|
||||
|
||||
|
||||
private GeneralPath convertToGeneralPath(PathData pathData) throws PDFNetException {
|
||||
private boolean strokeIsVisible(GState gState, Rect textBBox, InvisibleElementRemovalContext context) throws PDFNetException {
|
||||
|
||||
GeneralPath linePath = new GeneralPath();
|
||||
Iterator<Double> points = Doubles.asList(pathData.getPoints()).iterator();
|
||||
Iterable<Byte> operators = Bytes.asList(pathData.getOperators());
|
||||
for (var operator : operators) {
|
||||
switch (operator) {
|
||||
case PathData.e_moveto -> linePath.moveTo(points.next(), points.next());
|
||||
case PathData.e_lineto -> linePath.lineTo(points.next(), points.next());
|
||||
case PathData.e_cubicto -> linePath.curveTo(points.next(), points.next(), points.next(), points.next(), points.next(), points.next());
|
||||
case PathData.e_closepath -> linePath.closePath();
|
||||
case PathData.e_rect -> {
|
||||
double x = points.next();
|
||||
double y = points.next();
|
||||
double w = points.next();
|
||||
double h = points.next();
|
||||
linePath.moveTo(x, y);
|
||||
linePath.lineTo(x + w, y);
|
||||
linePath.lineTo(x + w, y + h);
|
||||
linePath.lineTo(x, y + h);
|
||||
linePath.closePath();
|
||||
}
|
||||
default -> throw new PDFNetException("Invalid Element Type", 0, "", "", "");
|
||||
}
|
||||
return gState.getStrokeOpacity() == 0 && differentColorThanBackgroundColor(Converter.convertColor(gState.getStrokeColorSpace(), gState.getStrokeColor()),
|
||||
textBBox,
|
||||
context);
|
||||
}
|
||||
|
||||
|
||||
private boolean fillIsVisible(GState gState, Rect textBBox, InvisibleElementRemovalContext context) throws PDFNetException {
|
||||
|
||||
return gState.getFillOpacity() == 0 && differentColorThanBackgroundColor(Converter.convertColor(gState.getFillColorSpace(), gState.getFillColor()), textBBox, context);
|
||||
}
|
||||
|
||||
|
||||
private boolean differentColorThanBackgroundColor(Color fillColor, Rect textBBox, InvisibleElementRemovalContext context) {
|
||||
|
||||
List<ElementFeatures> backgroundElements = context.visibleElements().stream().filter(element -> element.isBackground(textBBox)).toList();
|
||||
if (backgroundElements.isEmpty()) {
|
||||
return !fillColor.equals(Color.WHITE);
|
||||
}
|
||||
return linePath;
|
||||
return backgroundElements.stream().anyMatch(element -> !element.matchesFillColor(fillColor));
|
||||
}
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user