RED-9746: use quadtree for visible and overlapped elements
This commit is contained in:
parent
0b19f2d04c
commit
d5c506d080
@ -27,7 +27,9 @@ repositories {
|
||||
dependencies {
|
||||
api("org.projectlombok:lombok:1.18.30")
|
||||
api("com.google.guava:guava:33.0.0-jre")
|
||||
api("com.pdftron:PDFNet:10.3.0")
|
||||
api("com.pdftron:PDFNet:10.11.0")
|
||||
implementation("org.locationtech.jts:jts-core:1.19.0")
|
||||
implementation("net.sourceforge.lept4j:lept4j:1.19.1")
|
||||
testImplementation("org.junit.jupiter:junit-jupiter:5.10.2")
|
||||
testImplementation("org.assertj:assertj-core:3.24.2")
|
||||
testImplementation("org.mockito:mockito-core:5.2.0")
|
||||
|
||||
@ -0,0 +1,29 @@
|
||||
package com.iqser.red.pdftronlogic.commons;
|
||||
|
||||
import static com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService.TOLERANCE;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
|
||||
import com.iqser.red.pdftronlogic.commons.features.ElementFeatures;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@UtilityClass
|
||||
public class ComparisonUtils {
|
||||
public static Rectangle2D getPaddedRectangle(ElementFeatures elementFeatures) {
|
||||
|
||||
Rectangle2D inner = elementFeatures.getBoundingBox();
|
||||
|
||||
//To address inconsistencies in the calculation of the bounding box we slightly shrink the inner rectangle
|
||||
double x_with_tolerance = inner.getX() >= 0 ? inner.getX() + TOLERANCE : inner.getX() - TOLERANCE;
|
||||
double y_with_tolerance = inner.getY() >= 0 ? inner.getY() + TOLERANCE : inner.getY() - TOLERANCE;
|
||||
double height_with_tolerance = inner.getHeight() - (2 * TOLERANCE);
|
||||
double width_with_tolerance = inner.getWidth() - (2 * TOLERANCE);
|
||||
return new Rectangle2D.Double(x_with_tolerance, y_with_tolerance, width_with_tolerance, height_with_tolerance);
|
||||
}
|
||||
|
||||
public static boolean almostEqual(double a, double b) {
|
||||
|
||||
return Math.abs(a - b) < TOLERANCE;
|
||||
}
|
||||
}
|
||||
@ -1,280 +0,0 @@
|
||||
package com.iqser.red.pdftronlogic.commons;
|
||||
|
||||
import static com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService.TOLERANCE;
|
||||
|
||||
import java.awt.Color;
|
||||
import java.awt.geom.GeneralPath;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
|
||||
import com.pdftron.common.PDFNetException;
|
||||
import com.pdftron.pdf.Element;
|
||||
import com.pdftron.pdf.Rect;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.Getter;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.experimental.SuperBuilder;
|
||||
|
||||
@Getter
|
||||
@SuperBuilder
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class ElementFeatures {
|
||||
|
||||
final private static double RECT_POSITION_SIMILARITY_THRESHOLD_FACTOR = 0.2; // specify how much the x and y value are allowed to differ
|
||||
final private static double RECT_SIZE_SIMILARITY_THRESHOLD_FACTOR = 0.1; // the scale the images are allowed to differ
|
||||
final private static double HAMMING_DISTANCE_THRESHOLD = 4; // defines the similarity of the hash of images
|
||||
int elementType;
|
||||
Rectangle2D boundingBox;
|
||||
|
||||
|
||||
public boolean almostMatches(Element element) throws PDFNetException {
|
||||
|
||||
try (var bbox = element.getBBox()) {
|
||||
return element.getType() == elementType && //
|
||||
bbox != null && //
|
||||
rectsAlmostMatch(bbox);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private boolean rectsAlmostMatch(Rect bBox) {
|
||||
// To address the inconsistencies in the calculation of the bounding box we check equality with a tolerance
|
||||
|
||||
return almostEqual(bBox.getX1(), boundingBox.getX()) && //
|
||||
almostEqual(bBox.getY1(), boundingBox.getY()) && //
|
||||
almostEqual(bBox.getWidth(), boundingBox.getWidth()) && //
|
||||
almostEqual(bBox.getHeight(), boundingBox.getHeight());
|
||||
}
|
||||
|
||||
|
||||
protected boolean almostEqual(double a, double b) {
|
||||
|
||||
return Math.abs(a - b) < TOLERANCE;
|
||||
}
|
||||
|
||||
|
||||
public boolean almostMatches(ElementFeatures elementFeatures) {
|
||||
|
||||
return elementFeatures.getElementType() == elementType && elementFeatures.getBoundingBox() != null && rectsAlmostMatch(elementFeatures.getBoundingBox());
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private boolean rectsAlmostMatch(Rectangle2D bBox) {
|
||||
// To address the inconsistencies in the calculation of the bounding box we check equality with a tolerance
|
||||
|
||||
return almostEqual(bBox.getX(), boundingBox.getX()) && //
|
||||
almostEqual(bBox.getY(), boundingBox.getY()) && //
|
||||
almostEqual(bBox.getWidth(), boundingBox.getWidth()) && //
|
||||
almostEqual(bBox.getHeight(), boundingBox.getHeight());
|
||||
}
|
||||
|
||||
|
||||
public boolean isSimilarTo(ElementFeatures elementFeatures) {
|
||||
|
||||
return elementFeatures.getElementType() == elementType && elementFeatures.getBoundingBox() != null && areRectsSimilar(elementFeatures.getBoundingBox());
|
||||
}
|
||||
|
||||
|
||||
private boolean areRectsSimilar(Rectangle2D rectangle2D) {
|
||||
// To address the inconsistencies in the calculation of the bounding box we check equality with a tolerance
|
||||
|
||||
return isPositionSimilar(rectangle2D.getX(), boundingBox.getX(), rectangle2D.getWidth()) && //
|
||||
isPositionSimilar(rectangle2D.getY(), boundingBox.getY(), rectangle2D.getHeight()) && //
|
||||
isSizeSimilar(rectangle2D.getWidth(), boundingBox.getWidth()) && //
|
||||
isSizeSimilar(rectangle2D.getHeight(), boundingBox.getHeight());
|
||||
}
|
||||
|
||||
|
||||
protected boolean isPositionSimilar(double a, double b, double boxSize) {
|
||||
|
||||
return Math.abs(a - b) < boxSize * RECT_POSITION_SIMILARITY_THRESHOLD_FACTOR;
|
||||
}
|
||||
|
||||
|
||||
protected boolean isSizeSimilar(double a, double b) {
|
||||
|
||||
return Math.abs(a - b) < a * RECT_SIZE_SIMILARITY_THRESHOLD_FACTOR;
|
||||
}
|
||||
|
||||
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@Getter
|
||||
@SuperBuilder
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
@SuppressWarnings("PMD")
|
||||
public static class Text extends ElementFeatures {
|
||||
|
||||
String text;
|
||||
int font;
|
||||
double fontsize;
|
||||
|
||||
|
||||
@Override
|
||||
public boolean almostMatches(Element element) throws PDFNetException {
|
||||
|
||||
return super.almostMatches(element) && //
|
||||
text.equals(element.getTextString()) && //
|
||||
font == element.getGState().getFont().getType() && //
|
||||
almostEqual(fontsize, element.getGState().getFontSize());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@Getter
|
||||
@SuperBuilder
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public static class Path extends ElementFeatures {
|
||||
|
||||
boolean isClippingPath;
|
||||
boolean isClipWindingFill;
|
||||
boolean isStroked;
|
||||
boolean isFilled;
|
||||
boolean isWindingFill;
|
||||
Color strokeColor;
|
||||
Color fillColor;
|
||||
GeneralPath linePath;
|
||||
|
||||
|
||||
@Override
|
||||
public boolean almostMatches(Element element) throws PDFNetException {
|
||||
|
||||
return super.almostMatches(element) && //
|
||||
isClippingPath == element.isClippingPath() && //
|
||||
isClipWindingFill == element.isClipWindingFill() && //
|
||||
isStroked == element.isStroked() && //
|
||||
isFilled == element.isFilled() && //
|
||||
isWindingFill == element.isWindingFill();
|
||||
|
||||
}
|
||||
|
||||
|
||||
public boolean matchesFillColor(Color color) {
|
||||
|
||||
return color.equals(fillColor);
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public boolean isBackground(Rect area) {
|
||||
|
||||
return isFilled && //
|
||||
getBoundingBox().intersects(area.getX1(), area.getY1(), area.getWidth(), area.getHeight());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@Getter
|
||||
@SuperBuilder
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public static class Image extends ElementFeatures {
|
||||
|
||||
int dataSize;
|
||||
int height;
|
||||
int width;
|
||||
int renderingIntent;
|
||||
int componentNum;
|
||||
int bitsPerComponent;
|
||||
String hashOfImage;
|
||||
|
||||
|
||||
@Override
|
||||
public boolean almostMatches(Element element) throws PDFNetException {
|
||||
|
||||
return super.almostMatches(element) && //
|
||||
dataSize == element.getImageDataSize() && //
|
||||
height == element.getImageHeight() && //
|
||||
width == element.getImageWidth() && //
|
||||
renderingIntent == element.getImageRenderingIntent() && //
|
||||
componentNum == element.getComponentNum() && //
|
||||
bitsPerComponent == element.getBitsPerComponent();
|
||||
}
|
||||
|
||||
|
||||
public boolean almostMatches(ElementFeatures elementFeatures) {
|
||||
|
||||
if (elementFeatures.getClass() != this.getClass()) {
|
||||
return false;
|
||||
}
|
||||
return super.almostMatches(elementFeatures) && this.dataSize == ((Image) elementFeatures).getDataSize() && this.height == ((Image) elementFeatures).getHeight() &&
|
||||
this.width == ((Image) elementFeatures).getWidth() && this.renderingIntent == ((Image) elementFeatures).getRenderingIntent() &&
|
||||
this.componentNum == ((Image) elementFeatures).getComponentNum() && this.bitsPerComponent == ((Image) elementFeatures).getBitsPerComponent() &&
|
||||
calculateHammingDistance(
|
||||
((Image) elementFeatures).getHashOfImage()) <= HAMMING_DISTANCE_THRESHOLD;
|
||||
}
|
||||
|
||||
|
||||
public boolean isSimilarTo(ElementFeatures elementFeatures) {
|
||||
|
||||
return super.isSimilarTo(elementFeatures) && //
|
||||
calculateHammingDistance(((Image) elementFeatures).getHashOfImage()) <= HAMMING_DISTANCE_THRESHOLD;
|
||||
}
|
||||
|
||||
|
||||
// Helper method to calculate the Hamming distance between two hexadecimal strings
|
||||
private int calculateHammingDistance(String hash2) {
|
||||
|
||||
int distance = 0;
|
||||
int maxLength = Math.max(this.hashOfImage.length(), hash2.length());
|
||||
for (int i = 0; i < maxLength; i++) {
|
||||
char char1 = i < this.hashOfImage.length() ? this.hashOfImage.charAt(i) : '0';
|
||||
char char2 = i < hash2.length() ? hash2.charAt(i) : '0';
|
||||
if (char1 != char2) {
|
||||
distance++;
|
||||
}
|
||||
}
|
||||
return distance;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@Getter
|
||||
@SuperBuilder
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public static class Form extends ElementFeatures {
|
||||
|
||||
int xObjectType;
|
||||
long dictOrArrayOrStreamLength;
|
||||
|
||||
|
||||
@Override
|
||||
public boolean almostMatches(Element element) throws PDFNetException {
|
||||
|
||||
try (var bbox = element.getBBox()) {
|
||||
return element.getType() == getElementType() && //
|
||||
bbox != null && //
|
||||
(super.rectsAlmostMatch(bbox) || almostRotateMatches(bbox.getRectangle())) && xObjectType == element.getXObject()
|
||||
.getType() && dictOrArrayOrStreamLength == element.getXObject().getDecodedStream().size();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public boolean almostMatches(ElementFeatures elementFeatures) {
|
||||
|
||||
if (elementFeatures.getClass() != this.getClass()) {
|
||||
return false;
|
||||
}
|
||||
return elementFeatures.getElementType() == getElementType() && elementFeatures.getBoundingBox() != null &&
|
||||
(super.rectsAlmostMatch(elementFeatures.getBoundingBox()) || almostRotateMatches(
|
||||
elementFeatures.getBoundingBox()
|
||||
.getBounds2D())) && xObjectType == ((Form) elementFeatures).getXObjectType() &&
|
||||
dictOrArrayOrStreamLength == ((Form) elementFeatures).getDictOrArrayOrStreamLength();
|
||||
|
||||
}
|
||||
|
||||
|
||||
private boolean almostRotateMatches(Rectangle2D bBox) {
|
||||
|
||||
return almostEqual(bBox.getWidth(), getBoundingBox().getHeight()) && //
|
||||
almostEqual(bBox.getHeight(), getBoundingBox().getWidth());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -13,6 +13,11 @@ import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import com.iqser.red.pdftronlogic.commons.features.ElementFeatureFactory;
|
||||
import com.iqser.red.pdftronlogic.commons.features.ElementFeatures;
|
||||
import com.iqser.red.pdftronlogic.commons.features.ImageFeatures;
|
||||
import com.iqser.red.pdftronlogic.commons.features.PathFeatures;
|
||||
import com.iqser.red.pdftronlogic.commons.lookup.ElementFeatureLookup;
|
||||
import com.pdftron.common.PDFNetException;
|
||||
import com.pdftron.pdf.ColorPt;
|
||||
import com.pdftron.pdf.ColorSpace;
|
||||
@ -148,9 +153,9 @@ public class InvisibleElementRemovalService {
|
||||
private void execute(PDFDoc pdfDoc, boolean delta, boolean removePaths, Set<String> markedContentToIgnore) {
|
||||
|
||||
log.info("Start removing invisible Elements");
|
||||
try (ElementWriter writer = new ElementWriter();
|
||||
ElementReader reader = new ElementReader()) {
|
||||
try (ElementWriter writer = new ElementWriter(); ElementReader reader = new ElementReader()) {
|
||||
Set<Long> visitedXObjIds = new TreeSet<>();
|
||||
int pageIndex = 1;
|
||||
|
||||
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
|
||||
|
||||
@ -164,8 +169,8 @@ public class InvisibleElementRemovalService {
|
||||
.markedContentStack(new MarkedContentStack())
|
||||
.removePaths(removePaths)
|
||||
.delta(delta)
|
||||
.overlappedElements(new ArrayList<>())
|
||||
.visibleElements(new ArrayList<>())
|
||||
.overlappedElements(new ElementFeatureLookup())
|
||||
.visibleElements(new ElementFeatureLookup())
|
||||
.visitedXObjIds(visitedXObjIds)
|
||||
.markedContentToIgnore(markedContentToIgnore)
|
||||
.build();
|
||||
@ -176,6 +181,7 @@ public class InvisibleElementRemovalService {
|
||||
context.markedContentStack().clear();
|
||||
|
||||
removeOverlappedElements(page, writer, context);
|
||||
pageIndex++;
|
||||
}
|
||||
}
|
||||
log.info("Finished removing invisible Elements");
|
||||
@ -240,9 +246,17 @@ public class InvisibleElementRemovalService {
|
||||
}
|
||||
|
||||
boolean inClippingPath = context.clippingPathStack().almostIntersects(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight());
|
||||
boolean nonTransparent = imageElement.getGState().getBlendMode() == GState.e_bl_normal
|
||||
&& imageElement.getGState().getFillOpacity() == 1
|
||||
&& imageElement.getGState().getStrokeOpacity() == 1
|
||||
&& imageElement.getGState().getSoftMask() == null;
|
||||
|
||||
if (!context.delta() && inClippingPath) {
|
||||
context.visibleElements().add(ElementFeatureFactory.extractFeatures(imageElement));
|
||||
if (inClippingPath) {
|
||||
ImageFeatures image = ElementFeatureFactory.buildImage(imageElement);
|
||||
if (nonTransparent) {
|
||||
calculateOverlaps(context, image, writer);
|
||||
}
|
||||
context.visibleElements().add(image);
|
||||
}
|
||||
|
||||
if (context.delta() ^ inClippingPath) {
|
||||
@ -345,60 +359,59 @@ public class InvisibleElementRemovalService {
|
||||
return;
|
||||
}
|
||||
|
||||
try (var ctm = pathElement.getCTM()) {
|
||||
GeneralPath linePath = Converter.convertToGeneralPathAndTransformToInitialUserSpace(pathData, ctm);
|
||||
PathFeatures pathFeatures = ElementFeatureFactory.buildPath(pathElement);
|
||||
GeneralPath linePath = pathFeatures.getLinePath();
|
||||
|
||||
var rect = linePath.getBounds2D();
|
||||
var rect = linePath.getBounds2D();
|
||||
|
||||
boolean inClippingPath = context.clippingPathStack().almostIntersects(rect.getX(), rect.getY(), rect.getWidth(), rect.getHeight());
|
||||
|
||||
if (pathElement.isClippingPath()) {
|
||||
if (pathElement.isClipWindingFill()) {
|
||||
linePath.setWindingRule(GeneralPath.WIND_NON_ZERO);
|
||||
} else {
|
||||
linePath.setWindingRule(GeneralPath.WIND_EVEN_ODD);
|
||||
}
|
||||
|
||||
context.clippingPathStack().intersectClippingPath(linePath);
|
||||
pathElement.setPathClip(!context.delta());
|
||||
writer.writeElement(pathElement);
|
||||
boolean inClippingPath = context.clippingPathStack().almostIntersects(rect.getX(), rect.getY(), rect.getWidth(), rect.getHeight());
|
||||
|
||||
if (pathElement.isClippingPath()) {
|
||||
if (pathElement.isClipWindingFill()) {
|
||||
linePath.setWindingRule(GeneralPath.WIND_NON_ZERO);
|
||||
} else {
|
||||
if (pathElement.isWindingFill()) {
|
||||
linePath.setWindingRule(GeneralPath.WIND_NON_ZERO);
|
||||
} else {
|
||||
linePath.setWindingRule(GeneralPath.WIND_EVEN_ODD);
|
||||
}
|
||||
linePath.setWindingRule(GeneralPath.WIND_EVEN_ODD);
|
||||
}
|
||||
|
||||
if (inClippingPath) {
|
||||
if (isFilledAndNonTransparent(pathElement)) {
|
||||
calculateOverlapsForLinePath(context, linePath);
|
||||
}
|
||||
context.visibleElements().add(ElementFeatureFactory.extractFeatures(pathElement));
|
||||
}
|
||||
context.clippingPathStack().intersectClippingPath(linePath);
|
||||
pathElement.setPathClip(!context.delta());
|
||||
writer.writeElement(pathElement);
|
||||
|
||||
if (!context.delta() && (inClippingPath || !context.removePaths())) {
|
||||
} else {
|
||||
if (pathElement.isWindingFill()) {
|
||||
linePath.setWindingRule(GeneralPath.WIND_NON_ZERO);
|
||||
} else {
|
||||
linePath.setWindingRule(GeneralPath.WIND_EVEN_ODD);
|
||||
}
|
||||
|
||||
if (inClippingPath) {
|
||||
if (isFilledAndNonTransparent(pathElement)) {
|
||||
calculateOverlaps(context, pathFeatures, writer);
|
||||
}
|
||||
context.visibleElements().add(ElementFeatureFactory.extractFeatures(pathElement));
|
||||
}
|
||||
|
||||
if (!context.delta() && (inClippingPath || !context.removePaths())) {
|
||||
writer.writeElement(pathElement);
|
||||
}
|
||||
|
||||
if (context.delta() && !inClippingPath && context.removePaths()) {
|
||||
try (var color = new ColorPt(1, 0, 0)) {
|
||||
pathElement.getGState().setFillColorSpace(ColorSpace.createDeviceRGB());
|
||||
pathElement.getGState().setFillColor(color);
|
||||
pathElement.getGState().setStrokeColorSpace(ColorSpace.createDeviceRGB());
|
||||
pathElement.getGState().setStrokeColor(color);
|
||||
writer.writeElement(pathElement);
|
||||
}
|
||||
|
||||
if (context.delta() && !inClippingPath && context.removePaths()) {
|
||||
try (var color = new ColorPt(1, 0, 0)) {
|
||||
pathElement.getGState().setFillColorSpace(ColorSpace.createDeviceRGB());
|
||||
pathElement.getGState().setFillColor(color);
|
||||
pathElement.getGState().setStrokeColorSpace(ColorSpace.createDeviceRGB());
|
||||
pathElement.getGState().setStrokeColor(color);
|
||||
writer.writeElement(pathElement);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void calculateOverlapsForLinePath(InvisibleElementRemovalContext context, GeneralPath linePath) {
|
||||
private void calculateOverlaps(InvisibleElementRemovalContext context, ElementFeatures elementFeatures, ElementWriter writer) {
|
||||
|
||||
List<ElementFeatures> currentOverlappedElements = context.visibleElements().stream().filter(features -> almostContains(linePath, features.getBoundingBox())).toList();
|
||||
List<ElementFeatures> currentOverlappedElements = context.visibleElements().findAlmostContained(elementFeatures);
|
||||
context.overlappedElements().addAll(currentOverlappedElements);
|
||||
context.visibleElements().removeAll(currentOverlappedElements);
|
||||
}
|
||||
@ -410,7 +423,8 @@ public class InvisibleElementRemovalService {
|
||||
writer.begin(page, ElementWriter.e_replacement, false, true, page.getResourceDict());
|
||||
if (context.delta()) {
|
||||
// green for element removed due to overlapping
|
||||
context.overlappedElements().forEach(feature -> drawBBox(writer, feature.getBoundingBox(), "#00FF00"));
|
||||
context.overlappedElements()
|
||||
.forEach(feature -> drawBBox(writer, feature.getBoundingBox(), "#00FF00"));
|
||||
context.overlappedElements().clear();
|
||||
}
|
||||
processOverlappedElements(writer, context);
|
||||
@ -458,24 +472,16 @@ public class InvisibleElementRemovalService {
|
||||
|
||||
private static void removeOverlappedElement(ElementWriter writer, InvisibleElementRemovalContext context, Element element) throws PDFNetException {
|
||||
|
||||
boolean anyMatch = false;
|
||||
for (ElementFeatures elementToRemove : context.overlappedElements()) {
|
||||
if (elementToRemove.almostMatches(element)) {
|
||||
context.overlappedElements().remove(elementToRemove);
|
||||
anyMatch = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!anyMatch) {
|
||||
if (context.overlappedElements.matchesAny(ElementFeatureFactory.extractFeatures(element))) {
|
||||
/*
|
||||
PDFTron Element with type "text" refers to a Tj command. If a Tm command is just above it in the pdf file, PDFTron will join the two commands and treat them as one Element.
|
||||
hasTextMatrix() checks for this case specifically. Also, Tm changes the position for a whole BT/ET segment, possibly containing multiple Tj commands.
|
||||
Therefore, the position of a following Tj is affected by not writing the first Element.
|
||||
This is why, we write only the Tm command:
|
||||
*/
|
||||
writer.writeGStateChanges(element);
|
||||
} else {
|
||||
writer.writeElement(element);
|
||||
} else if (element.getType() == 3 && element.hasTextMatrix()) {
|
||||
/*
|
||||
PDFTron Element with type "text" refers to a Tj command. If a Tm command is just above it in the pdf file, PDFTron will join the two commands and treat them as one Element.
|
||||
hasTextMatrix() checks for this case specifically. Also, Tm changes the position for a whole BT/ET segment, possibly containing multiple Tj commands.
|
||||
Therefore, the position of a following Tj is affected by not writing the first Element.
|
||||
This is why, we write only the Tm command:
|
||||
*/
|
||||
writer.writeGStateChanges(element);
|
||||
}
|
||||
}
|
||||
|
||||
@ -518,8 +524,8 @@ public class InvisibleElementRemovalService {
|
||||
private boolean strokeIsVisible(GState gState, Rect textBBox, InvisibleElementRemovalContext context) throws PDFNetException {
|
||||
|
||||
return gState.getStrokeOpacity() != 0 && differentColorThanBackgroundColor(Converter.convertColor(gState.getStrokeColorSpace(), gState.getStrokeColor()),
|
||||
textBBox,
|
||||
context);
|
||||
textBBox,
|
||||
context);
|
||||
}
|
||||
|
||||
|
||||
@ -534,13 +540,15 @@ public class InvisibleElementRemovalService {
|
||||
@SneakyThrows
|
||||
private boolean differentColorThanBackgroundColor(Color fillColor, Rect textBBox, InvisibleElementRemovalContext context) {
|
||||
|
||||
List<ElementFeatures.Path> backgroundElements = findVisiblePathElementsThatIntersect(textBBox, context);
|
||||
List<PathFeatures> backgroundElements = findVisiblePathElementsThatIntersect(textBBox, context);
|
||||
|
||||
if (backgroundElements.isEmpty()) {
|
||||
return !fillColor.equals(Color.WHITE);
|
||||
}
|
||||
|
||||
List<ElementFeatures.Path> pathElementsByColor = backgroundElements.stream().filter(path -> path.getFillColor().equals(fillColor)).toList();
|
||||
List<PathFeatures> pathElementsByColor = backgroundElements.stream()
|
||||
.filter(path -> path.getFillColor().equals(fillColor))
|
||||
.toList();
|
||||
if (pathElementsByColor.isEmpty()) {
|
||||
return true;
|
||||
}
|
||||
@ -550,25 +558,30 @@ public class InvisibleElementRemovalService {
|
||||
}
|
||||
|
||||
|
||||
private static List<ElementFeatures.Path> findVisiblePathElementsThatIntersect(Rect textBBox, InvisibleElementRemovalContext context) {
|
||||
private static List<PathFeatures> findVisiblePathElementsThatIntersect(Rect textBBox, InvisibleElementRemovalContext context) {
|
||||
|
||||
var result = new ArrayList<ElementFeatures.Path>();
|
||||
for (var element : context.visibleElements()) {
|
||||
if (element.getElementType() == Element.e_path
|
||||
&& !((ElementFeatures.Path) element).getFillColor().equals(Color.WHITE)
|
||||
&& ((ElementFeatures.Path) element).isBackground(textBBox)) {
|
||||
result.add((ElementFeatures.Path) element);
|
||||
}
|
||||
}
|
||||
var result = new ArrayList<PathFeatures>();
|
||||
context.visibleElements().findIntersecting(textBBox)
|
||||
.forEach(element -> {
|
||||
if (element instanceof PathFeatures pathFeatures
|
||||
&& pathFeatures.isBackground(textBBox)
|
||||
&& !pathFeatures.getFillColor().equals(Color.WHITE)
|
||||
&& pathFeatures.isFilled()) {
|
||||
result.add(pathFeatures);
|
||||
}
|
||||
});
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
|
||||
private static Area mergeLinePathsToArea(List<ElementFeatures.Path> pathElementsWithSameColor) {
|
||||
private static Area mergeLinePathsToArea(List<PathFeatures> pathElementsWithSameColor) {
|
||||
|
||||
Area backgroundArea = new Area();
|
||||
pathElementsWithSameColor.stream().map(ElementFeatures.Path::getLinePath).map(Area::new).forEach(backgroundArea::add);
|
||||
pathElementsWithSameColor.stream()
|
||||
.map(PathFeatures::getLinePath)
|
||||
.map(Area::new)
|
||||
.forEach(backgroundArea::add);
|
||||
return backgroundArea;
|
||||
}
|
||||
|
||||
@ -596,9 +609,8 @@ public class InvisibleElementRemovalService {
|
||||
private void drawBBox(ElementWriter writer, Rectangle2D r, String hexcolor) {
|
||||
|
||||
try (ColorPt colorPt = new ColorPt(Integer.valueOf(hexcolor.substring(1, 3), 16) / 255d,
|
||||
Integer.valueOf(hexcolor.substring(3, 5), 16) / 255d,
|
||||
Integer.valueOf(hexcolor.substring(5, 7), 16) / 255d);
|
||||
ElementBuilder eb = new ElementBuilder()) {
|
||||
Integer.valueOf(hexcolor.substring(3, 5), 16) / 255d,
|
||||
Integer.valueOf(hexcolor.substring(5, 7), 16) / 255d); ElementBuilder eb = new ElementBuilder()) {
|
||||
Element rect = eb.createRect(r.getX(), r.getY(), r.getWidth(), r.getHeight());
|
||||
rect.setPathStroke(true);
|
||||
rect.getGState().setStrokeColorSpace(ColorSpace.createDeviceRGB());
|
||||
@ -615,10 +627,11 @@ public class InvisibleElementRemovalService {
|
||||
ElementReader reader,
|
||||
ClippingPathStack clippingPathStack,
|
||||
MarkedContentStack markedContentStack,
|
||||
List<ElementFeatures> overlappedElements,
|
||||
List<ElementFeatures> visibleElements,
|
||||
ElementFeatureLookup overlappedElements,
|
||||
ElementFeatureLookup visibleElements,
|
||||
Set<Long> visitedXObjIds,
|
||||
Set<String> markedContentToIgnore) {
|
||||
Set<String> markedContentToIgnore
|
||||
) {
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -10,6 +10,8 @@ import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import com.iqser.red.pdftronlogic.commons.features.ElementFeatureFactory;
|
||||
import com.iqser.red.pdftronlogic.commons.features.ElementFeatures;
|
||||
import com.pdftron.common.PDFNetException;
|
||||
import com.pdftron.pdf.Element;
|
||||
import com.pdftron.pdf.ElementReader;
|
||||
@ -165,8 +167,7 @@ public class WatermarkRemovalService {
|
||||
private boolean isTextRotated(Element element) {
|
||||
|
||||
try (var ctm = element.getCTM()) {
|
||||
return Math.abs(ctm.getB()) < Math.sin(Math.toRadians(ROTATED_TEXT_THRESHOLD)) || Math.abs(ctm
|
||||
.getB()) > Math.sin(Math.toRadians(70 - ROTATED_TEXT_THRESHOLD));
|
||||
return Math.abs(ctm.getB()) < Math.sin(Math.toRadians(ROTATED_TEXT_THRESHOLD)) || Math.abs(ctm.getB()) > Math.sin(Math.toRadians(70 - ROTATED_TEXT_THRESHOLD));
|
||||
}
|
||||
}
|
||||
|
||||
@ -197,7 +198,7 @@ public class WatermarkRemovalService {
|
||||
}
|
||||
|
||||
String hashOfImage = ImageHashFactory.calculate(element);
|
||||
ElementFeatures elementFeatures = ElementFeatureFactory.extractFeaturesWithHash(element, hashOfImage);
|
||||
ElementFeatures elementFeatures = ElementFeatureFactory.buildImageWithHash(element, hashOfImage);
|
||||
elementFeaturesLinkedList.add(elementFeatures);
|
||||
}
|
||||
}
|
||||
@ -208,16 +209,10 @@ public class WatermarkRemovalService {
|
||||
private boolean isLocatedNearBorder(Element element, Page page) {
|
||||
|
||||
try (var bbox = element.getBBox(); var contentBox = page.getVisibleContentBox();) {
|
||||
return bbox.getY1() < contentBox.getY1() + page.getPageHeight() * IMAGE_POSITION_HEIGHT_THRESHOLD || bbox
|
||||
.getY2() >
|
||||
contentBox.getY2() -
|
||||
page.getPageHeight() *
|
||||
IMAGE_POSITION_HEIGHT_THRESHOLD ||
|
||||
bbox.getX1() < contentBox
|
||||
.getX1() + page.getPageWidth() * IMAGE_POSITION_WIDTH_THRESHOLD || bbox.getX2() > contentBox
|
||||
.getX2() -
|
||||
page.getPageWidth() *
|
||||
IMAGE_POSITION_WIDTH_THRESHOLD;
|
||||
return bbox.getY1() < contentBox.getY1() + page.getPageHeight() * IMAGE_POSITION_HEIGHT_THRESHOLD
|
||||
|| bbox.getY2() > contentBox.getY2() - page.getPageHeight() * IMAGE_POSITION_HEIGHT_THRESHOLD
|
||||
|| bbox.getX1() < contentBox.getX1() + page.getPageWidth() * IMAGE_POSITION_WIDTH_THRESHOLD
|
||||
|| bbox.getX2() > contentBox.getX2() - page.getPageWidth() * IMAGE_POSITION_WIDTH_THRESHOLD;
|
||||
}
|
||||
}
|
||||
|
||||
@ -264,9 +259,9 @@ public class WatermarkRemovalService {
|
||||
.filter(elementFeature -> formObjectsPerPage.values()
|
||||
.stream()
|
||||
.filter(elementFeaturesOnPage -> elementFeaturesOnPage.stream()
|
||||
.anyMatch(
|
||||
elementFeature.getElementType() == Element.e_image || elementFeature.getElementType() == Element.e_inline_image ?
|
||||
elementFeature::isSimilarTo : elementFeature::almostMatches))
|
||||
.anyMatch(elementFeature.getElementType() == Element.e_image
|
||||
|| elementFeature.getElementType()
|
||||
== Element.e_inline_image ? elementFeature::similar : elementFeature::matches))
|
||||
.count() >= minPagesFilter)
|
||||
.toList();
|
||||
}
|
||||
@ -275,8 +270,7 @@ public class WatermarkRemovalService {
|
||||
@SneakyThrows
|
||||
private void removeAllWatermarks(PDFDoc pdfDoc, List<ElementFeatures> watermarksElementFeaturesList) {
|
||||
|
||||
try (ElementReader reader = new ElementReader();
|
||||
ElementWriter writer = new ElementWriter()) {
|
||||
try (ElementReader reader = new ElementReader(); ElementWriter writer = new ElementWriter()) {
|
||||
Set<Long> visitedXObjIds = new TreeSet<>();
|
||||
|
||||
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
|
||||
@ -322,12 +316,9 @@ public class WatermarkRemovalService {
|
||||
}
|
||||
}
|
||||
try (var bbox = element.getBBox()) {
|
||||
if (bbox.getHeight() * bbox.getWidth() < minAreaCoveringFromPage && isLocatedNearBorder(element, page) && bbox
|
||||
.getHeight() *
|
||||
bbox
|
||||
.getWidth() <
|
||||
minAreaCoveringFromPage ||
|
||||
element.getXObject() == null) {
|
||||
if (bbox.getHeight() * bbox.getWidth() < minAreaCoveringFromPage
|
||||
&& isLocatedNearBorder(element, page)
|
||||
&& bbox.getHeight() * bbox.getWidth() < minAreaCoveringFromPage || element.getXObject() == null) {
|
||||
|
||||
writer.writeElement(element);
|
||||
continue;
|
||||
@ -353,7 +344,7 @@ public class WatermarkRemovalService {
|
||||
}
|
||||
|
||||
for (ElementFeatures elementFeatures : watermarksElementFeaturesList) {
|
||||
if (elementFeatures.almostMatches(element)) {
|
||||
if (elementFeatures.matches(ElementFeatureFactory.extractFeatures(element))) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -385,9 +376,9 @@ public class WatermarkRemovalService {
|
||||
private void removeImages(Element element, ElementWriter writer, List<ElementFeatures> watermarksElementFeaturesList) {
|
||||
|
||||
String hashValueOfImage = ImageHashFactory.calculate(element);
|
||||
ElementFeatures imageFeatures = ElementFeatureFactory.extractFeaturesWithHash(element, hashValueOfImage);
|
||||
ElementFeatures imageFeatures = ElementFeatureFactory.buildImageWithHash(element, hashValueOfImage);
|
||||
for (ElementFeatures elementFeatures : watermarksElementFeaturesList) {
|
||||
if (elementFeatures.isSimilarTo(imageFeatures)) {
|
||||
if (elementFeatures.similar(imageFeatures)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -404,7 +395,7 @@ public class WatermarkRemovalService {
|
||||
Set<Long> visitedXObjIds) throws PDFNetException {
|
||||
|
||||
for (ElementFeatures elementFeatures : watermarksElementFeaturesList) {
|
||||
if (elementFeatures.almostMatches(element)) {
|
||||
if (elementFeatures.matches(ElementFeatureFactory.extractFeatures(element))) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
package com.iqser.red.pdftronlogic.commons;
|
||||
package com.iqser.red.pdftronlogic.commons.features;
|
||||
|
||||
import com.iqser.red.pdftronlogic.commons.Converter;
|
||||
import com.pdftron.common.PDFNetException;
|
||||
import com.pdftron.pdf.Element;
|
||||
|
||||
@ -10,7 +11,7 @@ public class ElementFeatureFactory {
|
||||
return switch (element.getType()) {
|
||||
case Element.e_path -> buildPath(element);
|
||||
case Element.e_text -> buildText(element);
|
||||
case Element.e_image, Element.e_inline_image -> buildImage(element).build();
|
||||
case Element.e_image, Element.e_inline_image -> buildImage(element);
|
||||
case Element.e_form -> buildForm(element);
|
||||
// This technically should never happen, it's a safetynet
|
||||
default -> throw new RuntimeException("Feature Extraction is not supported for PDFTron.Element with type: " + element.getType());
|
||||
@ -18,18 +19,22 @@ public class ElementFeatureFactory {
|
||||
}
|
||||
|
||||
|
||||
public static ElementFeatures extractFeaturesWithHash(Element element, String hashObject) throws PDFNetException {
|
||||
public static ImageFeatures buildImageWithHash(Element element, String hashObject) throws PDFNetException {
|
||||
|
||||
return buildImage(element)
|
||||
.hashOfImage(hashObject)
|
||||
.build();
|
||||
return buildImageBase(element).hashOfImage(hashObject).build();
|
||||
}
|
||||
|
||||
|
||||
private static ElementFeatures.Form buildForm(Element element) throws PDFNetException {
|
||||
public static ImageFeatures buildImage(Element element) throws PDFNetException {
|
||||
|
||||
return buildImageBase(element).build();
|
||||
}
|
||||
|
||||
|
||||
public static FormFeatures buildForm(Element element) throws PDFNetException {
|
||||
|
||||
try (var bbox = element.getBBox();) {
|
||||
return ElementFeatures.Form.builder()
|
||||
return FormFeatures.builder()
|
||||
.elementType(element.getType())
|
||||
.boundingBox(Converter.toRectangle2D(bbox))
|
||||
.xObjectType(element.getXObject().getType())
|
||||
@ -39,10 +44,10 @@ public class ElementFeatureFactory {
|
||||
}
|
||||
|
||||
|
||||
private static ElementFeatures.Image.ImageBuilder<?, ?> buildImage(Element element) throws PDFNetException {
|
||||
private static ImageFeatures.ImageFeaturesBuilder<?, ?> buildImageBase(Element element) throws PDFNetException {
|
||||
|
||||
try (var bbox = element.getBBox();) {
|
||||
return ElementFeatures.Image.builder()
|
||||
return ImageFeatures.builder()
|
||||
.elementType(element.getType())
|
||||
.boundingBox(Converter.toRectangle2D(bbox))
|
||||
.dataSize(element.getImageDataSize())
|
||||
@ -55,10 +60,10 @@ public class ElementFeatureFactory {
|
||||
}
|
||||
|
||||
|
||||
private static ElementFeatures.Text buildText(Element element) throws PDFNetException {
|
||||
public static TextFeatures buildText(Element element) throws PDFNetException {
|
||||
|
||||
try (var bbox = element.getBBox();) {
|
||||
return ElementFeatures.Text.builder()
|
||||
try (var bbox = element.getBBox()) {
|
||||
return TextFeatures.builder()
|
||||
.elementType(element.getType())
|
||||
.boundingBox(Converter.toRectangle2D(bbox))
|
||||
.text(element.getTextString())
|
||||
@ -69,19 +74,17 @@ public class ElementFeatureFactory {
|
||||
}
|
||||
|
||||
|
||||
private static ElementFeatures.Path buildPath(Element element) throws PDFNetException {
|
||||
public static PathFeatures buildPath(Element element) throws PDFNetException {
|
||||
|
||||
try (var bbox = element.getBBox(); var ctm = element.getCTM();
|
||||
var fillColor = element.getGState().getFillColor();
|
||||
var strokeColor = element.getGState().getStrokeColor()) {
|
||||
return ElementFeatures.Path.builder()
|
||||
try (var bbox = element.getBBox(); var ctm = element.getCTM(); var fillColor = element.getGState().getFillColor(); var strokeColor = element.getGState().getStrokeColor()) {
|
||||
return PathFeatures.builder()
|
||||
.elementType(element.getType())
|
||||
.boundingBox(Converter.toRectangle2D(bbox))
|
||||
.isClippingPath(element.isClippingPath())
|
||||
.isClipWindingFill(element.isClipWindingFill())
|
||||
.isStroked(element.isStroked())
|
||||
.isFilled(element.isFilled())
|
||||
.isWindingFill(element.isWindingFill())
|
||||
.clippingPath(element.isClippingPath())
|
||||
.clipWindingFill(element.isClipWindingFill())
|
||||
.stroked(element.isStroked())
|
||||
.filled(element.isFilled())
|
||||
.windingFill(element.isWindingFill())
|
||||
.fillColor(Converter.convertColor(element.getGState().getFillColorSpace(), fillColor))
|
||||
.strokeColor(Converter.convertColor(element.getGState().getStrokeColorSpace(), strokeColor))
|
||||
.linePath(Converter.convertToGeneralPathAndTransformToInitialUserSpace(element.getPathData(), ctm))
|
||||
@ -0,0 +1,76 @@
|
||||
package com.iqser.red.pdftronlogic.commons.features;
|
||||
|
||||
import static com.iqser.red.pdftronlogic.commons.ComparisonUtils.almostEqual;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.Getter;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.experimental.SuperBuilder;
|
||||
|
||||
@Getter
|
||||
@SuperBuilder
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class ElementFeatures {
|
||||
|
||||
final private static double RECT_POSITION_SIMILARITY_THRESHOLD_FACTOR = 0.2; // specify how much the x and y value are allowed to differ
|
||||
final private static double RECT_SIZE_SIMILARITY_THRESHOLD_FACTOR = 0.1; // the scale the images are allowed to differ
|
||||
|
||||
int elementType;
|
||||
Rectangle2D boundingBox;
|
||||
|
||||
|
||||
public boolean matches(ElementFeatures elementFeatures) {
|
||||
|
||||
return elementFeatures.getElementType() == elementType && elementFeatures.getBoundingBox() != null && rectsAlmostMatch(elementFeatures.getBoundingBox());
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
protected boolean rectsAlmostMatch(Rectangle2D bBox) {
|
||||
// To address the inconsistencies in the calculation of the bounding box we check equality with a tolerance
|
||||
|
||||
return almostEqual(bBox.getX(), boundingBox.getX()) && //
|
||||
almostEqual(bBox.getY(), boundingBox.getY()) && //
|
||||
almostEqual(bBox.getWidth(), boundingBox.getWidth()) && //
|
||||
almostEqual(bBox.getHeight(), boundingBox.getHeight());
|
||||
}
|
||||
|
||||
|
||||
public boolean similar(ElementFeatures elementFeatures) {
|
||||
|
||||
return elementFeatures.getElementType() == elementType && elementFeatures.getBoundingBox() != null && areRectsSimilar(elementFeatures.getBoundingBox());
|
||||
}
|
||||
|
||||
|
||||
protected boolean areRectsSimilar(Rectangle2D rectangle2D) {
|
||||
// To address the inconsistencies in the calculation of the bounding box we check equality with a tolerance
|
||||
|
||||
return isPositionSimilar(rectangle2D.getX(), boundingBox.getX(), rectangle2D.getWidth()) && //
|
||||
isPositionSimilar(rectangle2D.getY(), boundingBox.getY(), rectangle2D.getHeight()) && //
|
||||
isSizeSimilar(rectangle2D.getWidth(), boundingBox.getWidth()) && //
|
||||
isSizeSimilar(rectangle2D.getHeight(), boundingBox.getHeight());
|
||||
}
|
||||
|
||||
|
||||
protected boolean isPositionSimilar(double a, double b, double boxSize) {
|
||||
|
||||
return Math.abs(a - b) < boxSize * RECT_POSITION_SIMILARITY_THRESHOLD_FACTOR;
|
||||
}
|
||||
|
||||
|
||||
protected boolean isSizeSimilar(double a, double b) {
|
||||
|
||||
return Math.abs(a - b) < a * RECT_SIZE_SIMILARITY_THRESHOLD_FACTOR;
|
||||
}
|
||||
|
||||
|
||||
public boolean almostContains(ElementFeatures features) {
|
||||
|
||||
Rectangle2D inner = features.getBoundingBox();
|
||||
return boundingBox.contains(inner);
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,45 @@
|
||||
package com.iqser.red.pdftronlogic.commons.features;
|
||||
|
||||
import static com.iqser.red.pdftronlogic.commons.ComparisonUtils.almostEqual;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.Getter;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.experimental.SuperBuilder;
|
||||
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@Getter
|
||||
@SuperBuilder
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class FormFeatures extends ElementFeatures {
|
||||
|
||||
int xObjectType;
|
||||
long dictOrArrayOrStreamLength;
|
||||
|
||||
|
||||
public boolean matches(ElementFeatures elementFeatures) {
|
||||
|
||||
if (elementFeatures.getClass() != this.getClass()) {
|
||||
return false;
|
||||
}
|
||||
return elementFeatures.getElementType() == getElementType()
|
||||
&& elementFeatures.getBoundingBox() != null
|
||||
&& (super.rectsAlmostMatch(elementFeatures.getBoundingBox())
|
||||
|| almostRotateMatches(elementFeatures.getBoundingBox()
|
||||
.getBounds2D()))
|
||||
&& xObjectType == ((FormFeatures) elementFeatures).getXObjectType()
|
||||
&& dictOrArrayOrStreamLength == ((FormFeatures) elementFeatures).getDictOrArrayOrStreamLength();
|
||||
|
||||
}
|
||||
|
||||
|
||||
private boolean almostRotateMatches(Rectangle2D bBox) {
|
||||
|
||||
return almostEqual(bBox.getWidth(), getBoundingBox().getHeight()) && //
|
||||
almostEqual(bBox.getHeight(), getBoundingBox().getWidth());
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,68 @@
|
||||
package com.iqser.red.pdftronlogic.commons.features;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.Getter;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.experimental.SuperBuilder;
|
||||
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@Getter
|
||||
@SuperBuilder
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class ImageFeatures extends ElementFeatures {
|
||||
|
||||
final private static double HAMMING_DISTANCE_THRESHOLD = 4; // defines the similarity of the hash of images
|
||||
int dataSize;
|
||||
int height;
|
||||
int width;
|
||||
int renderingIntent;
|
||||
int componentNum;
|
||||
int bitsPerComponent;
|
||||
String hashOfImage;
|
||||
|
||||
|
||||
@Override
|
||||
public boolean matches(ElementFeatures elementFeatures) {
|
||||
|
||||
if (elementFeatures instanceof ImageFeatures imageFeatures) {
|
||||
return super.matches(elementFeatures)
|
||||
&& this.dataSize == imageFeatures.getDataSize()
|
||||
&& this.height == imageFeatures.getHeight()
|
||||
&& this.width == imageFeatures.getWidth()
|
||||
&& this.renderingIntent == imageFeatures.getRenderingIntent()
|
||||
&& this.componentNum == imageFeatures.getComponentNum()
|
||||
&& this.bitsPerComponent == imageFeatures.getBitsPerComponent()
|
||||
&& calculateHammingDistance(imageFeatures.getHashOfImage()) <= HAMMING_DISTANCE_THRESHOLD;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
public boolean similar(ElementFeatures elementFeatures) {
|
||||
|
||||
return super.similar(elementFeatures) && //
|
||||
calculateHammingDistance(((ImageFeatures) elementFeatures).getHashOfImage()) <= HAMMING_DISTANCE_THRESHOLD;
|
||||
}
|
||||
|
||||
|
||||
// Helper method to calculate the Hamming distance between two hexadecimal strings
|
||||
private int calculateHammingDistance(String hash2) {
|
||||
|
||||
if (hash2 == null) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int distance = 0;
|
||||
int maxLength = Math.max(this.hashOfImage.length(), hash2.length());
|
||||
for (int i = 0; i < maxLength; i++) {
|
||||
char char1 = i < this.hashOfImage.length() ? this.hashOfImage.charAt(i) : '0';
|
||||
char char2 = i < hash2.length() ? hash2.charAt(i) : '0';
|
||||
if (char1 != char2) {
|
||||
distance++;
|
||||
}
|
||||
}
|
||||
return distance;
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,72 @@
|
||||
package com.iqser.red.pdftronlogic.commons.features;
|
||||
|
||||
import static com.iqser.red.pdftronlogic.commons.ComparisonUtils.getPaddedRectangle;
|
||||
|
||||
import java.awt.Color;
|
||||
import java.awt.geom.GeneralPath;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
|
||||
import com.pdftron.pdf.Rect;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.Getter;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.experimental.SuperBuilder;
|
||||
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@Getter
|
||||
@SuperBuilder
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class PathFeatures extends ElementFeatures {
|
||||
|
||||
boolean clippingPath;
|
||||
boolean clipWindingFill;
|
||||
boolean stroked;
|
||||
boolean filled;
|
||||
boolean windingFill;
|
||||
Color strokeColor;
|
||||
Color fillColor;
|
||||
GeneralPath linePath;
|
||||
|
||||
|
||||
@Override
|
||||
public boolean matches(ElementFeatures element) {
|
||||
|
||||
if (element instanceof PathFeatures pathFeaturesElement) {
|
||||
return super.matches(element)
|
||||
&& clippingPath == pathFeaturesElement.isClippingPath()
|
||||
&& clipWindingFill == pathFeaturesElement.isClipWindingFill()
|
||||
&& stroked == pathFeaturesElement.isStroked()
|
||||
&& filled == pathFeaturesElement.isFilled()
|
||||
&& windingFill == pathFeaturesElement.isWindingFill();
|
||||
}
|
||||
return false;
|
||||
|
||||
}
|
||||
|
||||
|
||||
public boolean matchesFillColor(Color color) {
|
||||
|
||||
return color.equals(fillColor);
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public boolean isBackground(Rect area) {
|
||||
|
||||
return filled && //
|
||||
getBoundingBox().intersects(area.getX1(), area.getY1(), area.getWidth(), area.getHeight());
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean almostContains(ElementFeatures elementFeatures) {
|
||||
|
||||
Rectangle2D innerRect = getPaddedRectangle(elementFeatures);
|
||||
|
||||
return linePath.contains(innerRect);
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,36 @@
|
||||
package com.iqser.red.pdftronlogic.commons.features;
|
||||
|
||||
import static com.iqser.red.pdftronlogic.commons.ComparisonUtils.almostEqual;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.Getter;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.experimental.SuperBuilder;
|
||||
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@Getter
|
||||
@SuperBuilder
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
@SuppressWarnings("PMD")
|
||||
public class TextFeatures extends ElementFeatures {
|
||||
|
||||
String text;
|
||||
int font;
|
||||
double fontsize;
|
||||
|
||||
|
||||
@Override
|
||||
public boolean matches(ElementFeatures element) {
|
||||
|
||||
if (element instanceof TextFeatures textFeaturesElement) {
|
||||
|
||||
return super.matches(textFeaturesElement) //
|
||||
&& text.equals(textFeaturesElement.getText()) //
|
||||
&& font == textFeaturesElement.getFont() //
|
||||
&& almostEqual(fontsize, textFeaturesElement.getFontsize());
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,37 @@
|
||||
package com.iqser.red.pdftronlogic.commons.lookup;
|
||||
|
||||
import org.locationtech.jts.index.ItemVisitor;
|
||||
|
||||
import com.iqser.red.pdftronlogic.commons.features.ElementFeatures;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@RequiredArgsConstructor
|
||||
public class AnyMatchVisitor implements ItemVisitor {
|
||||
|
||||
private final ElementFeatures queryFeatures;
|
||||
private boolean anyMatch = false;
|
||||
|
||||
|
||||
public boolean hasAnyMatch() {
|
||||
|
||||
return anyMatch;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void visitItem(Object o) {
|
||||
|
||||
if (anyMatch) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (o instanceof ElementFeatures features) {
|
||||
if (queryFeatures.matches(features)) {
|
||||
anyMatch = true;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,136 @@
|
||||
package com.iqser.red.pdftronlogic.commons.lookup;
|
||||
|
||||
import static com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService.TOLERANCE;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
import org.locationtech.jts.geom.Envelope;
|
||||
import org.locationtech.jts.index.quadtree.Quadtree;
|
||||
|
||||
import com.iqser.red.pdftronlogic.commons.Converter;
|
||||
import com.iqser.red.pdftronlogic.commons.features.ElementFeatures;
|
||||
import com.pdftron.pdf.Rect;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
@RequiredArgsConstructor
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class ElementFeatureLookup {
|
||||
|
||||
Quadtree quadTree = new Quadtree();
|
||||
|
||||
|
||||
public void add(ElementFeatures elementFeatures) {
|
||||
|
||||
quadTree.insert(envelop(elementFeatures), elementFeatures);
|
||||
}
|
||||
|
||||
|
||||
public void remove(ElementFeatures elementFeatures) {
|
||||
|
||||
quadTree.remove(envelop(elementFeatures), elementFeatures);
|
||||
}
|
||||
|
||||
|
||||
public boolean matchesAny(ElementFeatures elementFeatures) {
|
||||
|
||||
AnyMatchVisitor visitor = new AnyMatchVisitor(elementFeatures);
|
||||
quadTree.query(queryEnvelop(elementFeatures), visitor);
|
||||
return visitor.hasAnyMatch();
|
||||
}
|
||||
|
||||
|
||||
public void forEach(Consumer<ElementFeatures> consumer) {
|
||||
|
||||
quadTree.queryAll()
|
||||
.forEach(consumer);
|
||||
}
|
||||
|
||||
|
||||
public void clear() {
|
||||
|
||||
forEach(this::remove);
|
||||
}
|
||||
|
||||
|
||||
public List<ElementFeatures> findAlmostContained(ElementFeatures elementFeatures) {
|
||||
|
||||
PredicateItemVisitor visitor = new PredicateItemVisitor(elementFeatures::almostContains);
|
||||
quadTree.query(queryEnvelop(elementFeatures), visitor);
|
||||
return visitor.getMatchingFeatures();
|
||||
}
|
||||
|
||||
|
||||
public List<ElementFeatures> query(ElementFeatures elementFeatures, Predicate<ElementFeatures> predicate) {
|
||||
|
||||
PredicateItemVisitor visitor = new PredicateItemVisitor(predicate);
|
||||
quadTree.query(queryEnvelop(elementFeatures), visitor);
|
||||
return visitor.getMatchingFeatures();
|
||||
}
|
||||
|
||||
|
||||
private static Envelope envelop(ElementFeatures elementFeatures) {
|
||||
|
||||
Rectangle2D r = elementFeatures.getBoundingBox();
|
||||
return new Envelope(r.getX(), r.getY(), r.getWidth(), r.getHeight());
|
||||
}
|
||||
|
||||
|
||||
private static Envelope queryEnvelop(ElementFeatures elementFeatures) {
|
||||
|
||||
Rectangle2D r = elementFeatures.getBoundingBox();
|
||||
return new Envelope(r.getX() - TOLERANCE, r.getY() - TOLERANCE, r.getWidth() + 2 * TOLERANCE, r.getHeight() + 2 * TOLERANCE);
|
||||
}
|
||||
|
||||
|
||||
public boolean isEmpty() {
|
||||
|
||||
return quadTree.isEmpty();
|
||||
}
|
||||
|
||||
|
||||
public int size() {
|
||||
|
||||
return quadTree.size();
|
||||
}
|
||||
|
||||
|
||||
public void addAll(List<ElementFeatures> currentOverlappedElements) {
|
||||
|
||||
currentOverlappedElements.forEach(this::add);
|
||||
}
|
||||
|
||||
|
||||
public void removeAll(List<ElementFeatures> currentOverlappedElements) {
|
||||
|
||||
currentOverlappedElements.forEach(this::remove);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public List<ElementFeatures> query(Rect bbox, Predicate<ElementFeatures> predicate) {
|
||||
|
||||
PredicateItemVisitor visitor = new PredicateItemVisitor(predicate);
|
||||
quadTree.query(new Envelope(bbox.getX1(), bbox.getY1(), bbox.getWidth(), bbox.getHeight()), visitor);
|
||||
return visitor.getMatchingFeatures();
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public List<ElementFeatures> findIntersecting(Rect bbox) {
|
||||
|
||||
Rectangle2D r = Converter.toRectangle2D(bbox);
|
||||
PredicateItemVisitor visitor = new PredicateItemVisitor(elementFeatures -> elementFeatures.getBoundingBox().intersects(r));
|
||||
quadTree.query(new Envelope(r.getX(), r.getY(), r.getWidth(), r.getHeight()), visitor);
|
||||
return visitor.getMatchingFeatures();
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,33 @@
|
||||
package com.iqser.red.pdftronlogic.commons.lookup;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
import org.locationtech.jts.index.ItemVisitor;
|
||||
|
||||
import com.iqser.red.pdftronlogic.commons.features.ElementFeatures;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@RequiredArgsConstructor
|
||||
public class PredicateItemVisitor implements ItemVisitor {
|
||||
|
||||
private final Predicate<ElementFeatures> predicate;
|
||||
@Getter
|
||||
private final List<ElementFeatures> matchingFeatures = new ArrayList<>();
|
||||
|
||||
|
||||
@Override
|
||||
public void visitItem(Object o) {
|
||||
|
||||
if (o instanceof ElementFeatures features) {
|
||||
if (predicate.test(features)) {
|
||||
matchingFeatures.add(features);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,145 @@
|
||||
package com.iqser.red.pdftronlogic.commons.rendering;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.Map;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class GhostScriptOutputHandler extends Thread {
|
||||
|
||||
static Pattern pageFinishedPattern = Pattern.compile("Page (\\d+)");
|
||||
|
||||
// If the stdError or stdOut buffer of a thread is not being emptied it might lock the process in case of errors, so we need to empty both streams to prevent a deadlock.
|
||||
// Since both need to read simultaneously we need to implement the readers as separate threads.
|
||||
|
||||
final InputStream is;
|
||||
final String processName;
|
||||
final Type type;
|
||||
|
||||
final Map<Integer, ImageFile> pagesToProcess;
|
||||
final Consumer<ImageFile> outputHandler;
|
||||
final Consumer<String> errorHandler;
|
||||
|
||||
int currentPageNumber;
|
||||
|
||||
|
||||
public static GhostScriptOutputHandler stdError(InputStream is, Consumer<String> errorHandler) {
|
||||
|
||||
return new GhostScriptOutputHandler(is, "GS", Type.ERROR, null, null, errorHandler);
|
||||
}
|
||||
|
||||
|
||||
public static GhostScriptOutputHandler stdOut(InputStream is, Map<Integer, ImageFile> pagesToProcess, Consumer<ImageFile> imageFileOutput, Consumer<String> errorHandler) {
|
||||
|
||||
return new GhostScriptOutputHandler(is, "GS", Type.STD_OUT, pagesToProcess, imageFileOutput, errorHandler);
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public void run() {
|
||||
|
||||
try (InputStreamReader isr = new InputStreamReader(is); BufferedReader br = new BufferedReader(isr)) {
|
||||
|
||||
String line;
|
||||
while (true) {
|
||||
line = br.readLine();
|
||||
|
||||
if (line == null) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (type.equals(Type.ERROR)) {
|
||||
log.error("{}_{}>{}", processName, type.name(), line);
|
||||
} else {
|
||||
log.debug("{}_{}>{}", processName, type.name(), line);
|
||||
addProcessedImageToQueue(line);
|
||||
}
|
||||
}
|
||||
}
|
||||
is.close();
|
||||
if (type.equals(Type.STD_OUT)) {
|
||||
queueFinishedPage(currentPageNumber);
|
||||
|
||||
if (!pagesToProcess.isEmpty()) {
|
||||
errorHandler.accept(String.format("Ghostscript finished for batch, but pages %s remain unprocessed.", formatPagesToProcess()));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
private String formatPagesToProcess() {
|
||||
|
||||
if (pagesToProcess.isEmpty()) {
|
||||
return "-";
|
||||
}
|
||||
|
||||
if (pagesToProcess.size() == 1) {
|
||||
return pagesToProcess.keySet()
|
||||
.iterator().next().toString();
|
||||
}
|
||||
return pagesToProcess.keySet()
|
||||
.stream()
|
||||
.mapToInt(Integer::intValue)
|
||||
.min()
|
||||
.orElse(0) + "-" + pagesToProcess.keySet()
|
||||
.stream()
|
||||
.mapToInt(Integer::intValue)
|
||||
.max()
|
||||
.orElse(0);
|
||||
}
|
||||
|
||||
|
||||
private void addProcessedImageToQueue(String line) {
|
||||
|
||||
/*
|
||||
Ghostscript prints the pageNumber it is currently working on, so we remember the current page and queue it as soon as the next comes in.
|
||||
*/
|
||||
Matcher pageNumberMatcher = pageFinishedPattern.matcher(line);
|
||||
if (pageNumberMatcher.find()) {
|
||||
int pageNumber = Integer.parseInt(pageNumberMatcher.group(1));
|
||||
|
||||
if (currentPageNumber == 0) {
|
||||
currentPageNumber = pageNumber;
|
||||
return;
|
||||
}
|
||||
|
||||
queueFinishedPage(currentPageNumber);
|
||||
currentPageNumber = pageNumber;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void queueFinishedPage(int pageNumber) {
|
||||
|
||||
var imageFile = this.pagesToProcess.remove(pageNumber);
|
||||
if (imageFile == null) {
|
||||
errorHandler.accept(String.format("Page number %d does not exist in this thread. It only has pagenumbers %s", pageNumber, pagesToProcess.keySet()));
|
||||
} else {
|
||||
if (!new File(imageFile.absoluteFilePath()).exists()) {
|
||||
errorHandler.accept(String.format("Rendered page with number %d does not exist!", pageNumber));
|
||||
}
|
||||
}
|
||||
outputHandler.accept(imageFile);
|
||||
}
|
||||
|
||||
|
||||
public enum Type {
|
||||
ERROR,
|
||||
STD_OUT
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,192 @@
|
||||
package com.iqser.red.pdftronlogic.commons.rendering;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
import com.pdftron.common.PDFNetException;
|
||||
import com.pdftron.pdf.PDFDoc;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@AllArgsConstructor
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
@SuppressWarnings("PMD") // can't figure out how to safely close the stdOut and stdError streams in line 142/144
|
||||
public class GhostScriptService {
|
||||
|
||||
int BATCH_SIZE = 256;
|
||||
String FORMAT = ".tiff";
|
||||
String DEVICE = "tiffgray";
|
||||
int DPI = 125;
|
||||
int PROCESS_COUNT = 1;
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public CompletableFuture<List<ImageFile>> renderDocument(Path documentFile, Path imageDir) {
|
||||
|
||||
int pageCount = getPageCount(documentFile);
|
||||
List<Integer> allPages = IntStream.range(1, pageCount + 1).boxed()
|
||||
.toList();
|
||||
ImageSupervisorImpl supervisor = new ImageSupervisorImpl(allPages);
|
||||
renderPagesBatched(allPages, documentFile.toFile().toString(), imageDir, supervisor, supervisor.successHandler(), supervisor.errorHandler());
|
||||
return CompletableFuture.supplyAsync(() -> awaitImageFiles(supervisor));
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private static List<ImageFile> awaitImageFiles(ImageSupervisorImpl supervisor) {
|
||||
|
||||
supervisor.awaitAll();
|
||||
return supervisor.getRenderedImages();
|
||||
}
|
||||
|
||||
|
||||
private static int getPageCount(Path documentFile) throws PDFNetException {
|
||||
|
||||
try (PDFDoc doc = new PDFDoc(documentFile.toFile().toString())) {
|
||||
return doc.getPageCount();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public void renderPagesBatched(List<Integer> pagesToProcess,
|
||||
String documentAbsolutePath,
|
||||
Path tmpImageDir,
|
||||
ImageSupervisor supervisor,
|
||||
Consumer<ImageFile> successHandler,
|
||||
Consumer<String> errorHandler) {
|
||||
|
||||
List<List<ProcessInfo>> processInfoBatches = buildSubListForEachProcess(pagesToProcess,
|
||||
PROCESS_COUNT,
|
||||
BATCH_SIZE
|
||||
* PROCESS_COUNT); // GS has a limit on how many pageIndices per call are possible, so we limit it to 256 pages per process
|
||||
for (int batchIdx = 0; batchIdx < processInfoBatches.size(); batchIdx++) {
|
||||
|
||||
supervisor.requireNoErrors();
|
||||
|
||||
List<ProcessInfo> processInfos = processInfoBatches.get(batchIdx);
|
||||
|
||||
log.info("Batch {}: Running {} gs processes with ({}) pages each",
|
||||
batchIdx,
|
||||
processInfos.size(),
|
||||
processInfos.stream()
|
||||
.map(info -> info.pageNumbers().size())
|
||||
.map(String::valueOf)
|
||||
.collect(Collectors.joining(", ")));
|
||||
|
||||
int finalBatchIdx = batchIdx;
|
||||
List<Process> processes = processInfos.stream()
|
||||
.parallel()
|
||||
.map(info -> buildCmdArgs(info.processIdx(), finalBatchIdx, info.pageNumbers(), tmpImageDir, documentAbsolutePath))
|
||||
.peek(s -> log.debug(String.join(" ", s.cmdArgs())))
|
||||
.map(processInfo -> executeProcess(processInfo, successHandler, errorHandler))
|
||||
.toList();
|
||||
|
||||
List<Integer> processExitCodes = new LinkedList<>();
|
||||
for (Process process : processes) {
|
||||
processExitCodes.add(process.waitFor());
|
||||
}
|
||||
log.info("Batch {}: Ghostscript processes finished with exit codes {}", batchIdx, processExitCodes);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private List<List<ProcessInfo>> buildSubListForEachProcess(List<Integer> stitchedPageNumbers, int processCount, int batchSize) {
|
||||
|
||||
// GhostScript command line can only handle so many page numbers at once, so we split it into batches
|
||||
int batchCount = (int) Math.ceil((double) stitchedPageNumbers.size() / batchSize);
|
||||
|
||||
log.info("Splitting {} page renderings across {} process(es) in {} batch(es) with size {}", stitchedPageNumbers.size(), processCount, batchCount, batchSize);
|
||||
|
||||
List<List<ProcessInfo>> processInfoBatches = new ArrayList<>(batchCount);
|
||||
List<List<List<Integer>>> batchedBalancedSublist = ListSplittingUtils.buildBatchedBalancedSublist(stitchedPageNumbers.stream()
|
||||
.sorted()
|
||||
.toList(), processCount, batchCount);
|
||||
|
||||
for (var batch : batchedBalancedSublist) {
|
||||
List<ProcessInfo> processInfos = new ArrayList<>(processCount);
|
||||
for (int threadIdx = 0; threadIdx < batch.size(); threadIdx++) {
|
||||
List<Integer> balancedPageNumbersSubList = batch.get(threadIdx);
|
||||
processInfos.add(new ProcessInfo(threadIdx, balancedPageNumbersSubList));
|
||||
}
|
||||
processInfoBatches.add(processInfos);
|
||||
}
|
||||
return processInfoBatches;
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private ProcessCmdsAndRenderedImageFiles buildCmdArgs(Integer processIdx,
|
||||
Integer batchIdx,
|
||||
List<Integer> stitchedImagePageIndices,
|
||||
Path outputDir,
|
||||
String documentAbsolutePath) {
|
||||
|
||||
String imagePathFormat = outputDir.resolve("output_" + processIdx + "_" + batchIdx + ".%04d" + FORMAT).toFile().toString();
|
||||
|
||||
Map<Integer, ImageFile> fullPageImages = new HashMap<>();
|
||||
for (int i = 0; i < stitchedImagePageIndices.size(); i++) {
|
||||
Integer pageNumber = stitchedImagePageIndices.get(i);
|
||||
fullPageImages.put(pageNumber, new ImageFile(pageNumber, String.format(imagePathFormat, i + 1)));
|
||||
}
|
||||
|
||||
String[] cmdArgs = buildCmdArgs(stitchedImagePageIndices, documentAbsolutePath, imagePathFormat);
|
||||
|
||||
return new ProcessCmdsAndRenderedImageFiles(cmdArgs, fullPageImages);
|
||||
}
|
||||
|
||||
|
||||
private String[] buildCmdArgs(List<Integer> pageNumbers, String documentAbsolutePath, String imagePathFormat) {
|
||||
|
||||
StringBuilder sPageList = new StringBuilder();
|
||||
int i = 1;
|
||||
for (Integer integer : pageNumbers) {
|
||||
sPageList.append(integer);
|
||||
if (i < pageNumbers.size()) {
|
||||
sPageList.append(",");
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
return new String[]{"gs", "-dNOPAUSE", "-sDEVICE=" + DEVICE, "-r" + DPI, "-sPageList=" + sPageList, "-sOutputFile=" + imagePathFormat, documentAbsolutePath, "-c", "quit"};
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private Process executeProcess(ProcessCmdsAndRenderedImageFiles processInfo, Consumer<ImageFile> successHandler, Consumer<String> errorHandler) {
|
||||
|
||||
Process p = Runtime.getRuntime().exec(processInfo.cmdArgs());
|
||||
InputStream stdOut = p.getInputStream();
|
||||
GhostScriptOutputHandler stdOutLogger = GhostScriptOutputHandler.stdOut(stdOut, processInfo.renderedPageImageFiles(), successHandler, errorHandler);
|
||||
InputStream stdError = p.getErrorStream();
|
||||
GhostScriptOutputHandler stdErrorLogger = GhostScriptOutputHandler.stdError(stdError, errorHandler);
|
||||
|
||||
stdOutLogger.start();
|
||||
stdErrorLogger.start();
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
private record ProcessCmdsAndRenderedImageFiles(String[] cmdArgs, Map<Integer, ImageFile> renderedPageImageFiles) {
|
||||
|
||||
}
|
||||
|
||||
private record ProcessInfo(Integer processIdx, List<Integer> pageNumbers) {
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,13 @@
|
||||
package com.iqser.red.pdftronlogic.commons.rendering;
|
||||
|
||||
import net.sourceforge.lept4j.Leptonica1;
|
||||
import net.sourceforge.lept4j.Pix;
|
||||
|
||||
public record ImageFile(int pageNumber, String absoluteFilePath) {
|
||||
|
||||
public Pix readPix() {
|
||||
|
||||
return Leptonica1.pixRead(absoluteFilePath);
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,7 @@
|
||||
package com.iqser.red.pdftronlogic.commons.rendering;
|
||||
|
||||
public interface ImageSupervisor {
|
||||
|
||||
void requireNoErrors();
|
||||
|
||||
}
|
||||
@ -0,0 +1,114 @@
|
||||
package com.iqser.red.pdftronlogic.commons.rendering;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class ImageSupervisorImpl implements ImageSupervisor {
|
||||
|
||||
final Map<Integer, CountDownLatch> pageLatches;
|
||||
final Map<Integer, ImageFile> images;
|
||||
final List<String> errors;
|
||||
|
||||
final ImageFile[] finishedPages;
|
||||
|
||||
|
||||
public ImageSupervisorImpl(List<Integer> pageNumbers) {
|
||||
|
||||
this.pageLatches = Collections.synchronizedMap(new HashMap<>());
|
||||
this.images = Collections.synchronizedMap(new HashMap<>());
|
||||
this.errors = Collections.synchronizedList(new ArrayList<>());
|
||||
this.finishedPages = new ImageFile[pageNumbers.size()];
|
||||
for (Integer pageNumber : pageNumbers) {
|
||||
pageLatches.put(pageNumber, new CountDownLatch(1));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public List<ImageFile> getRenderedImages() {
|
||||
|
||||
return new ArrayList<>(images.values());
|
||||
}
|
||||
|
||||
|
||||
public void markPageFinished(ImageFile imageFile) {
|
||||
|
||||
log.debug("finished page: {}", imageFile.pageNumber());
|
||||
getPageLatch(imageFile.pageNumber()).countDown();
|
||||
images.put(imageFile.pageNumber(), imageFile);
|
||||
finishedPages[imageFile.pageNumber() - 1] = imageFile;
|
||||
}
|
||||
|
||||
|
||||
public Consumer<ImageFile> successHandler() {
|
||||
|
||||
return this::markPageFinished;
|
||||
}
|
||||
|
||||
|
||||
public Consumer<String> errorHandler() {
|
||||
|
||||
return this::markError;
|
||||
}
|
||||
|
||||
|
||||
private CountDownLatch getPageLatch(Integer pageNumber) {
|
||||
|
||||
if (pageNumber == null || !pageLatches.containsKey(pageNumber)) {
|
||||
throw new IllegalArgumentException("awaiting non-existent page " + pageNumber);
|
||||
}
|
||||
return pageLatches.get(pageNumber);
|
||||
}
|
||||
|
||||
|
||||
public ImageFile awaitProcessedPage(Integer pageNumber) throws InterruptedException {
|
||||
|
||||
if (hasErrors()) {
|
||||
return null;
|
||||
}
|
||||
getPageLatch(pageNumber).await();
|
||||
return images.get(pageNumber);
|
||||
}
|
||||
|
||||
|
||||
private boolean hasErrors() {
|
||||
|
||||
return errors.isEmpty();
|
||||
}
|
||||
|
||||
|
||||
public void markError(String errorMessage) {
|
||||
|
||||
this.errors.add(errorMessage);
|
||||
}
|
||||
|
||||
|
||||
public void awaitAll() throws InterruptedException {
|
||||
|
||||
for (CountDownLatch countDownLatch : pageLatches.values()) {
|
||||
countDownLatch.await();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void requireNoErrors() {
|
||||
// GS will log
|
||||
if (this.errors.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
throw new IllegalStateException("Error(s) occurred during image processing: " + String.join("\n", errors));
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,106 @@
|
||||
package com.iqser.red.pdftronlogic.commons.rendering;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@UtilityClass
|
||||
public class ListSplittingUtils {
|
||||
|
||||
public List<List<Integer>> buildBalancedContinuousSublist(Integer totalNumberOfEntries, int threadCount) {
|
||||
|
||||
return buildBalancedSublist(IntStream.range(0, totalNumberOfEntries)
|
||||
.map(i -> i + 1).boxed()
|
||||
.toList(), threadCount);
|
||||
}
|
||||
|
||||
|
||||
public <T> List<List<T>> buildBalancedSublist(List<T> entries, int threadCount) {
|
||||
|
||||
List<Integer> balancedEntryCounts = buildBalancedEntryCounts(entries.size(), threadCount);
|
||||
List<List<T>> balancedSublist = new ArrayList<>(threadCount);
|
||||
int startIdx = 0;
|
||||
for (Integer numberOfEntriesPerThread : balancedEntryCounts) {
|
||||
balancedSublist.add(entries.subList(startIdx, startIdx + numberOfEntriesPerThread));
|
||||
startIdx += numberOfEntriesPerThread;
|
||||
}
|
||||
return balancedSublist;
|
||||
}
|
||||
|
||||
|
||||
public <T> List<List<List<T>>> buildBatchedBalancedSublist(List<T> entries, int threadCount, int batchSize) {
|
||||
|
||||
// batches -> threads -> entries
|
||||
List<List<List<T>>> batchedBalancedSubList = new LinkedList<>();
|
||||
List<List<List<T>>> threadsWithBatches = buildBalancedSublist(entries, threadCount).stream()
|
||||
.map(list -> buildBalancedSublist(list, batchSize))
|
||||
.toList();
|
||||
// swap first two dimensions
|
||||
for (int batchIdx = 0; batchIdx < batchSize; batchIdx++) {
|
||||
List<List<T>> threadEntriesPerBatch = new ArrayList<>(threadCount);
|
||||
for (int threadIdx = 0; threadIdx < threadCount; threadIdx++) {
|
||||
threadEntriesPerBatch.add(threadsWithBatches.get(threadIdx).get(batchIdx));
|
||||
}
|
||||
batchedBalancedSubList.add(threadEntriesPerBatch);
|
||||
|
||||
}
|
||||
return batchedBalancedSubList;
|
||||
}
|
||||
|
||||
|
||||
public List<Integer> buildBalancedEntryCounts(int totalNumberOfEntries, int threadCount) {
|
||||
|
||||
List<Integer> numberOfPagesPerThread = new ArrayList<>(threadCount);
|
||||
for (int i = 0; i < threadCount; i++) {
|
||||
numberOfPagesPerThread.add(0);
|
||||
}
|
||||
int threadIdx;
|
||||
for (int i = 0; i < totalNumberOfEntries; i++) {
|
||||
threadIdx = i % threadCount;
|
||||
numberOfPagesPerThread.set(threadIdx, numberOfPagesPerThread.get(threadIdx) + 1);
|
||||
}
|
||||
return numberOfPagesPerThread;
|
||||
}
|
||||
|
||||
|
||||
public static List<String> formatIntervals(List<Integer> sortedList) {
|
||||
|
||||
List<String> intervals = new ArrayList<>();
|
||||
|
||||
if (sortedList.isEmpty()) {
|
||||
return intervals;
|
||||
}
|
||||
|
||||
int start = sortedList.get(0);
|
||||
int end = start;
|
||||
|
||||
for (int i = 1; i < sortedList.size(); i++) {
|
||||
int current = sortedList.get(i);
|
||||
|
||||
if (current == end + 1) {
|
||||
end = current;
|
||||
} else {
|
||||
intervals.add(formatInterval(start, end));
|
||||
start = current;
|
||||
end = start;
|
||||
}
|
||||
}
|
||||
|
||||
intervals.add(formatInterval(start, end));
|
||||
return intervals;
|
||||
}
|
||||
|
||||
|
||||
private static String formatInterval(int start, int end) {
|
||||
|
||||
if (start == end) {
|
||||
return String.valueOf(start);
|
||||
} else {
|
||||
return start + "-" + end;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,249 @@
|
||||
package com.iqser.red.pdftronlogic.commons;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.IntBuffer;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.iqser.red.pdftronlogic.commons.rendering.GhostScriptService;
|
||||
import com.iqser.red.pdftronlogic.commons.rendering.ImageFile;
|
||||
import com.pdftron.pdf.PDFNet;
|
||||
import com.sun.jna.NativeLibrary;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
import net.sourceforge.lept4j.Leptonica1;
|
||||
import net.sourceforge.lept4j.Pix;
|
||||
import net.sourceforge.lept4j.util.LeptUtils;
|
||||
|
||||
public class VisualEqualityTest {
|
||||
|
||||
public static final double SIMILARITY_THRESHOLD = 0.015; // percentage of pixels which differ by more than 10 points in luminance
|
||||
GhostScriptService ghostScriptService = new GhostScriptService();
|
||||
InvisibleElementRemovalService invisibleElementRemovalService = new InvisibleElementRemovalService();
|
||||
Path stem = Path.of("/tmp/AAA_EQUALITY_TEST/");
|
||||
|
||||
|
||||
@BeforeEach
|
||||
public void setup() {
|
||||
|
||||
PDFNet.initialize(PDFTronConfig.license);
|
||||
System.setProperty("jna.library.path", "/home/kschuettler/software/leptonica/vcpkg/installed/x64-linux-dynamic/lib/");
|
||||
|
||||
try (NativeLibrary leptonicaLib = NativeLibrary.getInstance("leptonica")) {
|
||||
assert leptonicaLib != null;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
public void assertVisualEqualityOfProcessedFile() {
|
||||
|
||||
Path folder = Path.of("/home/kschuettler/Dokumente/TestFiles/syn-dm-testfiles/ITEM 19_A15149AC - Primary Skin Irritation Rabbit.pdf");
|
||||
Context context = new Context(stem, new HashMap<>());
|
||||
|
||||
Files.walk(folder)
|
||||
.filter(Files::isRegularFile)
|
||||
.map(Path::toFile)
|
||||
.filter(file -> file.toString().endsWith(".pdf"))
|
||||
.map(File::toPath)
|
||||
.peek(file -> runForFile(file, context))
|
||||
.forEach(f -> System.out.println(context));
|
||||
|
||||
|
||||
assert context.failedFiles.isEmpty();
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private void runForFile(Path originFile, Context context) {
|
||||
|
||||
System.out.println(originFile.toFile());
|
||||
Path fileFolder = context.getFileFolder(originFile);
|
||||
Files.createDirectories(fileFolder);
|
||||
Path processedFile = fileFolder.resolve("processed.pdf");
|
||||
Path deltaFile = fileFolder.resolve("delta.pdf");
|
||||
Path copiedOriginFile = fileFolder.resolve("origin.pdf");
|
||||
Files.copy(originFile, copiedOriginFile, StandardCopyOption.REPLACE_EXISTING);
|
||||
|
||||
try (var in = new FileInputStream(copiedOriginFile.toFile()); var out = new FileOutputStream(processedFile.toFile())) {
|
||||
invisibleElementRemovalService.removeInvisibleElements(in, out, false);
|
||||
}
|
||||
try (var in = new FileInputStream(copiedOriginFile.toFile()); var out = new FileOutputStream(deltaFile.toFile())) {
|
||||
invisibleElementRemovalService.removeInvisibleElements(in, out, true);
|
||||
}
|
||||
|
||||
assertVisualEquality(originFile, processedFile, context);
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private void assertVisualEquality(Path originFile, Path processedFile, Context context) {
|
||||
|
||||
Path imageDir = context.getFileFolder(originFile).resolve("images");
|
||||
Path originDir = imageDir.resolve("origin");
|
||||
Files.createDirectories(originDir);
|
||||
CompletableFuture<List<ImageFile>> originalPagesFuture = ghostScriptService.renderDocument(originFile, originDir);
|
||||
Path processedDir = imageDir.resolve("processed");
|
||||
Files.createDirectories(processedDir);
|
||||
CompletableFuture<List<ImageFile>> processedPagesFuture = ghostScriptService.renderDocument(processedFile, processedDir);
|
||||
Files.walk(context.getErrorFolder(originFile))
|
||||
.map(Path::toFile)
|
||||
.filter(File::isFile)
|
||||
.forEach(File::delete);
|
||||
|
||||
List<ImageFile> originalPages = originalPagesFuture.join();
|
||||
List<ImageFile> processedPages = processedPagesFuture.join();
|
||||
|
||||
if (originalPages.size() != processedPages.size()) {
|
||||
context.getFailedFile(originFile).addErrorMessage("Differing page counts!");
|
||||
return;
|
||||
}
|
||||
|
||||
for (ImageFile originalPage : originalPages) {
|
||||
Optional<ImageFile> samePage = processedPages.stream()
|
||||
.filter(p -> p.pageNumber() == originalPage.pageNumber())
|
||||
.findFirst();
|
||||
if (samePage.isEmpty()) {
|
||||
context.getFailedFile(originFile).addErrorMessage("Page " + originalPage.pageNumber() + " missing!");
|
||||
return;
|
||||
}
|
||||
ImageFile processedPage = samePage.get();
|
||||
Pix originalPagePix;
|
||||
Pix processedPagePix;
|
||||
|
||||
synchronized (VisualEqualityTest.class) {
|
||||
originalPagePix = originalPage.readPix();
|
||||
processedPagePix = processedPage.readPix();
|
||||
}
|
||||
|
||||
String errorFile = context.getErrorFolder(originFile).resolve(originalPage.pageNumber() + ".tiff").toFile().toString();
|
||||
double diffRatio = detectErrors(originalPagePix, processedPagePix, errorFile);
|
||||
|
||||
if (diffRatio > SIMILARITY_THRESHOLD) {
|
||||
context.getFailedFile(originFile).addErrorMessage("Page " + originalPage.pageNumber() + " differs by " + formatPercentage(diffRatio) + "%!");
|
||||
}
|
||||
|
||||
synchronized (VisualEqualityTest.class) {
|
||||
LeptUtils.disposePix(originalPagePix);
|
||||
LeptUtils.disposePix(processedPagePix);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static String formatPercentage(double diffRatio) {
|
||||
|
||||
return String.format("%.2f", diffRatio * 100);
|
||||
}
|
||||
|
||||
|
||||
public double detectErrors(Pix pix1, Pix pix2, String errorFile) {
|
||||
// First, check if dimensions are the same
|
||||
if (pix1.w != pix2.w || pix1.h != pix2.h || pix1.d != pix2.d) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Create a new Pix for the absolute difference
|
||||
Pix pixDiff = Leptonica1.pixAbsDifference(pix1, pix2);
|
||||
|
||||
// Set a threshold for pixel difference (e.g., 10 out of 255)
|
||||
int threshold = 10;
|
||||
Pix pixThresh = Leptonica1.pixThresholdToBinary(pixDiff, threshold);
|
||||
|
||||
IntBuffer pCount = IntBuffer.allocate(1);
|
||||
Leptonica1.pixCountPixels(pixThresh, pCount, null);
|
||||
long totalPixels = (long) pix1.w * pix1.h;
|
||||
long samePixels = pCount.get();
|
||||
double percentDifference = 1 - (double) samePixels / totalPixels;
|
||||
if (percentDifference > SIMILARITY_THRESHOLD) {
|
||||
Leptonica1.pixWrite(errorFile, pixThresh, 5);
|
||||
}
|
||||
|
||||
LeptUtils.disposePix(pixDiff);
|
||||
LeptUtils.disposePix(pixThresh);
|
||||
return percentDifference;
|
||||
}
|
||||
|
||||
|
||||
private record Context(Path outFolder, Map<Path, FailedFile> failedFiles) {
|
||||
|
||||
public FailedFile getFailedFile(Path path) {
|
||||
|
||||
return failedFiles.computeIfAbsent(path, p -> FailedFile.init());
|
||||
}
|
||||
|
||||
|
||||
public Path getFileFolder(Path file) {
|
||||
|
||||
return outFolder.resolve(file.getFileName());
|
||||
}
|
||||
|
||||
|
||||
public String toString() {
|
||||
|
||||
if (failedFiles.isEmpty()) {
|
||||
return "All files visually equal!";
|
||||
}
|
||||
StringBuilder sb = new StringBuilder();
|
||||
failedFiles.forEach((file, failedFile) -> sb.append(file.getFileName().toFile()).append(": ").append(failedFile.toString()).append("\n"));
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public Path getErrorFolder(Path originFile) {
|
||||
|
||||
Path errorDir = getFileFolder(originFile).resolve("error");
|
||||
Files.createDirectories(errorDir);
|
||||
return errorDir;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private record FailedFile(Map<ImageFile, FailedPage> failedPages, List<String> errors) {
|
||||
|
||||
public static FailedFile init() {
|
||||
|
||||
return new FailedFile(new HashMap<>(), new LinkedList<>());
|
||||
}
|
||||
|
||||
|
||||
public void addErrorMessage(String s) {
|
||||
|
||||
errors.add(s);
|
||||
}
|
||||
|
||||
|
||||
public void addFailedPage(ImageFile imageFile, double location) {
|
||||
|
||||
failedPages.computeIfAbsent(imageFile, file -> new FailedPage(new LinkedList<>())).locations().add(location);
|
||||
}
|
||||
|
||||
|
||||
public String toString() {
|
||||
|
||||
return String.join(", ", errors);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private record FailedPage(List<Double> locations) {
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
1243907
src/test/resources/files/everyCharIsImage.pdf
Normal file
1243907
src/test/resources/files/everyCharIsImage.pdf
Normal file
File diff suppressed because one or more lines are too long
Loading…
x
Reference in New Issue
Block a user