RED-9746: use quadtree for visible and overlapped elements

This commit is contained in:
Kilian Schuettler 2024-08-14 11:16:22 +02:00
parent 0b19f2d04c
commit d5c506d080
22 changed files with 1245410 additions and 416 deletions

View File

@ -27,7 +27,9 @@ repositories {
dependencies {
api("org.projectlombok:lombok:1.18.30")
api("com.google.guava:guava:33.0.0-jre")
api("com.pdftron:PDFNet:10.3.0")
api("com.pdftron:PDFNet:10.11.0")
implementation("org.locationtech.jts:jts-core:1.19.0")
implementation("net.sourceforge.lept4j:lept4j:1.19.1")
testImplementation("org.junit.jupiter:junit-jupiter:5.10.2")
testImplementation("org.assertj:assertj-core:3.24.2")
testImplementation("org.mockito:mockito-core:5.2.0")

View File

@ -0,0 +1,29 @@
package com.iqser.red.pdftronlogic.commons;
import static com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService.TOLERANCE;
import java.awt.geom.Rectangle2D;
import com.iqser.red.pdftronlogic.commons.features.ElementFeatures;
import lombok.experimental.UtilityClass;
@UtilityClass
public class ComparisonUtils {
public static Rectangle2D getPaddedRectangle(ElementFeatures elementFeatures) {
Rectangle2D inner = elementFeatures.getBoundingBox();
//To address inconsistencies in the calculation of the bounding box we slightly shrink the inner rectangle
double x_with_tolerance = inner.getX() >= 0 ? inner.getX() + TOLERANCE : inner.getX() - TOLERANCE;
double y_with_tolerance = inner.getY() >= 0 ? inner.getY() + TOLERANCE : inner.getY() - TOLERANCE;
double height_with_tolerance = inner.getHeight() - (2 * TOLERANCE);
double width_with_tolerance = inner.getWidth() - (2 * TOLERANCE);
return new Rectangle2D.Double(x_with_tolerance, y_with_tolerance, width_with_tolerance, height_with_tolerance);
}
public static boolean almostEqual(double a, double b) {
return Math.abs(a - b) < TOLERANCE;
}
}

View File

@ -1,280 +0,0 @@
package com.iqser.red.pdftronlogic.commons;
import static com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService.TOLERANCE;
import java.awt.Color;
import java.awt.geom.GeneralPath;
import java.awt.geom.Rectangle2D;
import com.pdftron.common.PDFNetException;
import com.pdftron.pdf.Element;
import com.pdftron.pdf.Rect;
import lombok.AccessLevel;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.SneakyThrows;
import lombok.experimental.FieldDefaults;
import lombok.experimental.SuperBuilder;
@Getter
@SuperBuilder
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class ElementFeatures {
final private static double RECT_POSITION_SIMILARITY_THRESHOLD_FACTOR = 0.2; // specify how much the x and y value are allowed to differ
final private static double RECT_SIZE_SIMILARITY_THRESHOLD_FACTOR = 0.1; // the scale the images are allowed to differ
final private static double HAMMING_DISTANCE_THRESHOLD = 4; // defines the similarity of the hash of images
int elementType;
Rectangle2D boundingBox;
public boolean almostMatches(Element element) throws PDFNetException {
try (var bbox = element.getBBox()) {
return element.getType() == elementType && //
bbox != null && //
rectsAlmostMatch(bbox);
}
}
@SneakyThrows
private boolean rectsAlmostMatch(Rect bBox) {
// To address the inconsistencies in the calculation of the bounding box we check equality with a tolerance
return almostEqual(bBox.getX1(), boundingBox.getX()) && //
almostEqual(bBox.getY1(), boundingBox.getY()) && //
almostEqual(bBox.getWidth(), boundingBox.getWidth()) && //
almostEqual(bBox.getHeight(), boundingBox.getHeight());
}
protected boolean almostEqual(double a, double b) {
return Math.abs(a - b) < TOLERANCE;
}
public boolean almostMatches(ElementFeatures elementFeatures) {
return elementFeatures.getElementType() == elementType && elementFeatures.getBoundingBox() != null && rectsAlmostMatch(elementFeatures.getBoundingBox());
}
@SneakyThrows
private boolean rectsAlmostMatch(Rectangle2D bBox) {
// To address the inconsistencies in the calculation of the bounding box we check equality with a tolerance
return almostEqual(bBox.getX(), boundingBox.getX()) && //
almostEqual(bBox.getY(), boundingBox.getY()) && //
almostEqual(bBox.getWidth(), boundingBox.getWidth()) && //
almostEqual(bBox.getHeight(), boundingBox.getHeight());
}
public boolean isSimilarTo(ElementFeatures elementFeatures) {
return elementFeatures.getElementType() == elementType && elementFeatures.getBoundingBox() != null && areRectsSimilar(elementFeatures.getBoundingBox());
}
private boolean areRectsSimilar(Rectangle2D rectangle2D) {
// To address the inconsistencies in the calculation of the bounding box we check equality with a tolerance
return isPositionSimilar(rectangle2D.getX(), boundingBox.getX(), rectangle2D.getWidth()) && //
isPositionSimilar(rectangle2D.getY(), boundingBox.getY(), rectangle2D.getHeight()) && //
isSizeSimilar(rectangle2D.getWidth(), boundingBox.getWidth()) && //
isSizeSimilar(rectangle2D.getHeight(), boundingBox.getHeight());
}
protected boolean isPositionSimilar(double a, double b, double boxSize) {
return Math.abs(a - b) < boxSize * RECT_POSITION_SIMILARITY_THRESHOLD_FACTOR;
}
protected boolean isSizeSimilar(double a, double b) {
return Math.abs(a - b) < a * RECT_SIZE_SIMILARITY_THRESHOLD_FACTOR;
}
@EqualsAndHashCode(callSuper = true)
@Getter
@SuperBuilder
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
@SuppressWarnings("PMD")
public static class Text extends ElementFeatures {
String text;
int font;
double fontsize;
@Override
public boolean almostMatches(Element element) throws PDFNetException {
return super.almostMatches(element) && //
text.equals(element.getTextString()) && //
font == element.getGState().getFont().getType() && //
almostEqual(fontsize, element.getGState().getFontSize());
}
}
@EqualsAndHashCode(callSuper = true)
@Getter
@SuperBuilder
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public static class Path extends ElementFeatures {
boolean isClippingPath;
boolean isClipWindingFill;
boolean isStroked;
boolean isFilled;
boolean isWindingFill;
Color strokeColor;
Color fillColor;
GeneralPath linePath;
@Override
public boolean almostMatches(Element element) throws PDFNetException {
return super.almostMatches(element) && //
isClippingPath == element.isClippingPath() && //
isClipWindingFill == element.isClipWindingFill() && //
isStroked == element.isStroked() && //
isFilled == element.isFilled() && //
isWindingFill == element.isWindingFill();
}
public boolean matchesFillColor(Color color) {
return color.equals(fillColor);
}
@SneakyThrows
public boolean isBackground(Rect area) {
return isFilled && //
getBoundingBox().intersects(area.getX1(), area.getY1(), area.getWidth(), area.getHeight());
}
}
@EqualsAndHashCode(callSuper = true)
@Getter
@SuperBuilder
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public static class Image extends ElementFeatures {
int dataSize;
int height;
int width;
int renderingIntent;
int componentNum;
int bitsPerComponent;
String hashOfImage;
@Override
public boolean almostMatches(Element element) throws PDFNetException {
return super.almostMatches(element) && //
dataSize == element.getImageDataSize() && //
height == element.getImageHeight() && //
width == element.getImageWidth() && //
renderingIntent == element.getImageRenderingIntent() && //
componentNum == element.getComponentNum() && //
bitsPerComponent == element.getBitsPerComponent();
}
public boolean almostMatches(ElementFeatures elementFeatures) {
if (elementFeatures.getClass() != this.getClass()) {
return false;
}
return super.almostMatches(elementFeatures) && this.dataSize == ((Image) elementFeatures).getDataSize() && this.height == ((Image) elementFeatures).getHeight() &&
this.width == ((Image) elementFeatures).getWidth() && this.renderingIntent == ((Image) elementFeatures).getRenderingIntent() &&
this.componentNum == ((Image) elementFeatures).getComponentNum() && this.bitsPerComponent == ((Image) elementFeatures).getBitsPerComponent() &&
calculateHammingDistance(
((Image) elementFeatures).getHashOfImage()) <= HAMMING_DISTANCE_THRESHOLD;
}
public boolean isSimilarTo(ElementFeatures elementFeatures) {
return super.isSimilarTo(elementFeatures) && //
calculateHammingDistance(((Image) elementFeatures).getHashOfImage()) <= HAMMING_DISTANCE_THRESHOLD;
}
// Helper method to calculate the Hamming distance between two hexadecimal strings
private int calculateHammingDistance(String hash2) {
int distance = 0;
int maxLength = Math.max(this.hashOfImage.length(), hash2.length());
for (int i = 0; i < maxLength; i++) {
char char1 = i < this.hashOfImage.length() ? this.hashOfImage.charAt(i) : '0';
char char2 = i < hash2.length() ? hash2.charAt(i) : '0';
if (char1 != char2) {
distance++;
}
}
return distance;
}
}
@EqualsAndHashCode(callSuper = true)
@Getter
@SuperBuilder
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public static class Form extends ElementFeatures {
int xObjectType;
long dictOrArrayOrStreamLength;
@Override
public boolean almostMatches(Element element) throws PDFNetException {
try (var bbox = element.getBBox()) {
return element.getType() == getElementType() && //
bbox != null && //
(super.rectsAlmostMatch(bbox) || almostRotateMatches(bbox.getRectangle())) && xObjectType == element.getXObject()
.getType() && dictOrArrayOrStreamLength == element.getXObject().getDecodedStream().size();
}
}
public boolean almostMatches(ElementFeatures elementFeatures) {
if (elementFeatures.getClass() != this.getClass()) {
return false;
}
return elementFeatures.getElementType() == getElementType() && elementFeatures.getBoundingBox() != null &&
(super.rectsAlmostMatch(elementFeatures.getBoundingBox()) || almostRotateMatches(
elementFeatures.getBoundingBox()
.getBounds2D())) && xObjectType == ((Form) elementFeatures).getXObjectType() &&
dictOrArrayOrStreamLength == ((Form) elementFeatures).getDictOrArrayOrStreamLength();
}
private boolean almostRotateMatches(Rectangle2D bBox) {
return almostEqual(bBox.getWidth(), getBoundingBox().getHeight()) && //
almostEqual(bBox.getHeight(), getBoundingBox().getWidth());
}
}
}

View File

@ -13,6 +13,11 @@ import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import com.iqser.red.pdftronlogic.commons.features.ElementFeatureFactory;
import com.iqser.red.pdftronlogic.commons.features.ElementFeatures;
import com.iqser.red.pdftronlogic.commons.features.ImageFeatures;
import com.iqser.red.pdftronlogic.commons.features.PathFeatures;
import com.iqser.red.pdftronlogic.commons.lookup.ElementFeatureLookup;
import com.pdftron.common.PDFNetException;
import com.pdftron.pdf.ColorPt;
import com.pdftron.pdf.ColorSpace;
@ -148,9 +153,9 @@ public class InvisibleElementRemovalService {
private void execute(PDFDoc pdfDoc, boolean delta, boolean removePaths, Set<String> markedContentToIgnore) {
log.info("Start removing invisible Elements");
try (ElementWriter writer = new ElementWriter();
ElementReader reader = new ElementReader()) {
try (ElementWriter writer = new ElementWriter(); ElementReader reader = new ElementReader()) {
Set<Long> visitedXObjIds = new TreeSet<>();
int pageIndex = 1;
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
@ -164,8 +169,8 @@ public class InvisibleElementRemovalService {
.markedContentStack(new MarkedContentStack())
.removePaths(removePaths)
.delta(delta)
.overlappedElements(new ArrayList<>())
.visibleElements(new ArrayList<>())
.overlappedElements(new ElementFeatureLookup())
.visibleElements(new ElementFeatureLookup())
.visitedXObjIds(visitedXObjIds)
.markedContentToIgnore(markedContentToIgnore)
.build();
@ -176,6 +181,7 @@ public class InvisibleElementRemovalService {
context.markedContentStack().clear();
removeOverlappedElements(page, writer, context);
pageIndex++;
}
}
log.info("Finished removing invisible Elements");
@ -240,9 +246,17 @@ public class InvisibleElementRemovalService {
}
boolean inClippingPath = context.clippingPathStack().almostIntersects(rect.getX1(), rect.getY1(), rect.getWidth(), rect.getHeight());
boolean nonTransparent = imageElement.getGState().getBlendMode() == GState.e_bl_normal
&& imageElement.getGState().getFillOpacity() == 1
&& imageElement.getGState().getStrokeOpacity() == 1
&& imageElement.getGState().getSoftMask() == null;
if (!context.delta() && inClippingPath) {
context.visibleElements().add(ElementFeatureFactory.extractFeatures(imageElement));
if (inClippingPath) {
ImageFeatures image = ElementFeatureFactory.buildImage(imageElement);
if (nonTransparent) {
calculateOverlaps(context, image, writer);
}
context.visibleElements().add(image);
}
if (context.delta() ^ inClippingPath) {
@ -345,60 +359,59 @@ public class InvisibleElementRemovalService {
return;
}
try (var ctm = pathElement.getCTM()) {
GeneralPath linePath = Converter.convertToGeneralPathAndTransformToInitialUserSpace(pathData, ctm);
PathFeatures pathFeatures = ElementFeatureFactory.buildPath(pathElement);
GeneralPath linePath = pathFeatures.getLinePath();
var rect = linePath.getBounds2D();
var rect = linePath.getBounds2D();
boolean inClippingPath = context.clippingPathStack().almostIntersects(rect.getX(), rect.getY(), rect.getWidth(), rect.getHeight());
if (pathElement.isClippingPath()) {
if (pathElement.isClipWindingFill()) {
linePath.setWindingRule(GeneralPath.WIND_NON_ZERO);
} else {
linePath.setWindingRule(GeneralPath.WIND_EVEN_ODD);
}
context.clippingPathStack().intersectClippingPath(linePath);
pathElement.setPathClip(!context.delta());
writer.writeElement(pathElement);
boolean inClippingPath = context.clippingPathStack().almostIntersects(rect.getX(), rect.getY(), rect.getWidth(), rect.getHeight());
if (pathElement.isClippingPath()) {
if (pathElement.isClipWindingFill()) {
linePath.setWindingRule(GeneralPath.WIND_NON_ZERO);
} else {
if (pathElement.isWindingFill()) {
linePath.setWindingRule(GeneralPath.WIND_NON_ZERO);
} else {
linePath.setWindingRule(GeneralPath.WIND_EVEN_ODD);
}
linePath.setWindingRule(GeneralPath.WIND_EVEN_ODD);
}
if (inClippingPath) {
if (isFilledAndNonTransparent(pathElement)) {
calculateOverlapsForLinePath(context, linePath);
}
context.visibleElements().add(ElementFeatureFactory.extractFeatures(pathElement));
}
context.clippingPathStack().intersectClippingPath(linePath);
pathElement.setPathClip(!context.delta());
writer.writeElement(pathElement);
if (!context.delta() && (inClippingPath || !context.removePaths())) {
} else {
if (pathElement.isWindingFill()) {
linePath.setWindingRule(GeneralPath.WIND_NON_ZERO);
} else {
linePath.setWindingRule(GeneralPath.WIND_EVEN_ODD);
}
if (inClippingPath) {
if (isFilledAndNonTransparent(pathElement)) {
calculateOverlaps(context, pathFeatures, writer);
}
context.visibleElements().add(ElementFeatureFactory.extractFeatures(pathElement));
}
if (!context.delta() && (inClippingPath || !context.removePaths())) {
writer.writeElement(pathElement);
}
if (context.delta() && !inClippingPath && context.removePaths()) {
try (var color = new ColorPt(1, 0, 0)) {
pathElement.getGState().setFillColorSpace(ColorSpace.createDeviceRGB());
pathElement.getGState().setFillColor(color);
pathElement.getGState().setStrokeColorSpace(ColorSpace.createDeviceRGB());
pathElement.getGState().setStrokeColor(color);
writer.writeElement(pathElement);
}
if (context.delta() && !inClippingPath && context.removePaths()) {
try (var color = new ColorPt(1, 0, 0)) {
pathElement.getGState().setFillColorSpace(ColorSpace.createDeviceRGB());
pathElement.getGState().setFillColor(color);
pathElement.getGState().setStrokeColorSpace(ColorSpace.createDeviceRGB());
pathElement.getGState().setStrokeColor(color);
writer.writeElement(pathElement);
}
}
}
}
}
}
private void calculateOverlapsForLinePath(InvisibleElementRemovalContext context, GeneralPath linePath) {
private void calculateOverlaps(InvisibleElementRemovalContext context, ElementFeatures elementFeatures, ElementWriter writer) {
List<ElementFeatures> currentOverlappedElements = context.visibleElements().stream().filter(features -> almostContains(linePath, features.getBoundingBox())).toList();
List<ElementFeatures> currentOverlappedElements = context.visibleElements().findAlmostContained(elementFeatures);
context.overlappedElements().addAll(currentOverlappedElements);
context.visibleElements().removeAll(currentOverlappedElements);
}
@ -410,7 +423,8 @@ public class InvisibleElementRemovalService {
writer.begin(page, ElementWriter.e_replacement, false, true, page.getResourceDict());
if (context.delta()) {
// green for element removed due to overlapping
context.overlappedElements().forEach(feature -> drawBBox(writer, feature.getBoundingBox(), "#00FF00"));
context.overlappedElements()
.forEach(feature -> drawBBox(writer, feature.getBoundingBox(), "#00FF00"));
context.overlappedElements().clear();
}
processOverlappedElements(writer, context);
@ -458,24 +472,16 @@ public class InvisibleElementRemovalService {
private static void removeOverlappedElement(ElementWriter writer, InvisibleElementRemovalContext context, Element element) throws PDFNetException {
boolean anyMatch = false;
for (ElementFeatures elementToRemove : context.overlappedElements()) {
if (elementToRemove.almostMatches(element)) {
context.overlappedElements().remove(elementToRemove);
anyMatch = true;
break;
}
}
if (!anyMatch) {
if (context.overlappedElements.matchesAny(ElementFeatureFactory.extractFeatures(element))) {
/*
PDFTron Element with type "text" refers to a Tj command. If a Tm command is just above it in the pdf file, PDFTron will join the two commands and treat them as one Element.
hasTextMatrix() checks for this case specifically. Also, Tm changes the position for a whole BT/ET segment, possibly containing multiple Tj commands.
Therefore, the position of a following Tj is affected by not writing the first Element.
This is why, we write only the Tm command:
*/
writer.writeGStateChanges(element);
} else {
writer.writeElement(element);
} else if (element.getType() == 3 && element.hasTextMatrix()) {
/*
PDFTron Element with type "text" refers to a Tj command. If a Tm command is just above it in the pdf file, PDFTron will join the two commands and treat them as one Element.
hasTextMatrix() checks for this case specifically. Also, Tm changes the position for a whole BT/ET segment, possibly containing multiple Tj commands.
Therefore, the position of a following Tj is affected by not writing the first Element.
This is why, we write only the Tm command:
*/
writer.writeGStateChanges(element);
}
}
@ -518,8 +524,8 @@ public class InvisibleElementRemovalService {
private boolean strokeIsVisible(GState gState, Rect textBBox, InvisibleElementRemovalContext context) throws PDFNetException {
return gState.getStrokeOpacity() != 0 && differentColorThanBackgroundColor(Converter.convertColor(gState.getStrokeColorSpace(), gState.getStrokeColor()),
textBBox,
context);
textBBox,
context);
}
@ -534,13 +540,15 @@ public class InvisibleElementRemovalService {
@SneakyThrows
private boolean differentColorThanBackgroundColor(Color fillColor, Rect textBBox, InvisibleElementRemovalContext context) {
List<ElementFeatures.Path> backgroundElements = findVisiblePathElementsThatIntersect(textBBox, context);
List<PathFeatures> backgroundElements = findVisiblePathElementsThatIntersect(textBBox, context);
if (backgroundElements.isEmpty()) {
return !fillColor.equals(Color.WHITE);
}
List<ElementFeatures.Path> pathElementsByColor = backgroundElements.stream().filter(path -> path.getFillColor().equals(fillColor)).toList();
List<PathFeatures> pathElementsByColor = backgroundElements.stream()
.filter(path -> path.getFillColor().equals(fillColor))
.toList();
if (pathElementsByColor.isEmpty()) {
return true;
}
@ -550,25 +558,30 @@ public class InvisibleElementRemovalService {
}
private static List<ElementFeatures.Path> findVisiblePathElementsThatIntersect(Rect textBBox, InvisibleElementRemovalContext context) {
private static List<PathFeatures> findVisiblePathElementsThatIntersect(Rect textBBox, InvisibleElementRemovalContext context) {
var result = new ArrayList<ElementFeatures.Path>();
for (var element : context.visibleElements()) {
if (element.getElementType() == Element.e_path
&& !((ElementFeatures.Path) element).getFillColor().equals(Color.WHITE)
&& ((ElementFeatures.Path) element).isBackground(textBBox)) {
result.add((ElementFeatures.Path) element);
}
}
var result = new ArrayList<PathFeatures>();
context.visibleElements().findIntersecting(textBBox)
.forEach(element -> {
if (element instanceof PathFeatures pathFeatures
&& pathFeatures.isBackground(textBBox)
&& !pathFeatures.getFillColor().equals(Color.WHITE)
&& pathFeatures.isFilled()) {
result.add(pathFeatures);
}
});
return result;
}
private static Area mergeLinePathsToArea(List<ElementFeatures.Path> pathElementsWithSameColor) {
private static Area mergeLinePathsToArea(List<PathFeatures> pathElementsWithSameColor) {
Area backgroundArea = new Area();
pathElementsWithSameColor.stream().map(ElementFeatures.Path::getLinePath).map(Area::new).forEach(backgroundArea::add);
pathElementsWithSameColor.stream()
.map(PathFeatures::getLinePath)
.map(Area::new)
.forEach(backgroundArea::add);
return backgroundArea;
}
@ -596,9 +609,8 @@ public class InvisibleElementRemovalService {
private void drawBBox(ElementWriter writer, Rectangle2D r, String hexcolor) {
try (ColorPt colorPt = new ColorPt(Integer.valueOf(hexcolor.substring(1, 3), 16) / 255d,
Integer.valueOf(hexcolor.substring(3, 5), 16) / 255d,
Integer.valueOf(hexcolor.substring(5, 7), 16) / 255d);
ElementBuilder eb = new ElementBuilder()) {
Integer.valueOf(hexcolor.substring(3, 5), 16) / 255d,
Integer.valueOf(hexcolor.substring(5, 7), 16) / 255d); ElementBuilder eb = new ElementBuilder()) {
Element rect = eb.createRect(r.getX(), r.getY(), r.getWidth(), r.getHeight());
rect.setPathStroke(true);
rect.getGState().setStrokeColorSpace(ColorSpace.createDeviceRGB());
@ -615,10 +627,11 @@ public class InvisibleElementRemovalService {
ElementReader reader,
ClippingPathStack clippingPathStack,
MarkedContentStack markedContentStack,
List<ElementFeatures> overlappedElements,
List<ElementFeatures> visibleElements,
ElementFeatureLookup overlappedElements,
ElementFeatureLookup visibleElements,
Set<Long> visitedXObjIds,
Set<String> markedContentToIgnore) {
Set<String> markedContentToIgnore
) {
}

View File

@ -10,6 +10,8 @@ import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import com.iqser.red.pdftronlogic.commons.features.ElementFeatureFactory;
import com.iqser.red.pdftronlogic.commons.features.ElementFeatures;
import com.pdftron.common.PDFNetException;
import com.pdftron.pdf.Element;
import com.pdftron.pdf.ElementReader;
@ -165,8 +167,7 @@ public class WatermarkRemovalService {
private boolean isTextRotated(Element element) {
try (var ctm = element.getCTM()) {
return Math.abs(ctm.getB()) < Math.sin(Math.toRadians(ROTATED_TEXT_THRESHOLD)) || Math.abs(ctm
.getB()) > Math.sin(Math.toRadians(70 - ROTATED_TEXT_THRESHOLD));
return Math.abs(ctm.getB()) < Math.sin(Math.toRadians(ROTATED_TEXT_THRESHOLD)) || Math.abs(ctm.getB()) > Math.sin(Math.toRadians(70 - ROTATED_TEXT_THRESHOLD));
}
}
@ -197,7 +198,7 @@ public class WatermarkRemovalService {
}
String hashOfImage = ImageHashFactory.calculate(element);
ElementFeatures elementFeatures = ElementFeatureFactory.extractFeaturesWithHash(element, hashOfImage);
ElementFeatures elementFeatures = ElementFeatureFactory.buildImageWithHash(element, hashOfImage);
elementFeaturesLinkedList.add(elementFeatures);
}
}
@ -208,16 +209,10 @@ public class WatermarkRemovalService {
private boolean isLocatedNearBorder(Element element, Page page) {
try (var bbox = element.getBBox(); var contentBox = page.getVisibleContentBox();) {
return bbox.getY1() < contentBox.getY1() + page.getPageHeight() * IMAGE_POSITION_HEIGHT_THRESHOLD || bbox
.getY2() >
contentBox.getY2() -
page.getPageHeight() *
IMAGE_POSITION_HEIGHT_THRESHOLD ||
bbox.getX1() < contentBox
.getX1() + page.getPageWidth() * IMAGE_POSITION_WIDTH_THRESHOLD || bbox.getX2() > contentBox
.getX2() -
page.getPageWidth() *
IMAGE_POSITION_WIDTH_THRESHOLD;
return bbox.getY1() < contentBox.getY1() + page.getPageHeight() * IMAGE_POSITION_HEIGHT_THRESHOLD
|| bbox.getY2() > contentBox.getY2() - page.getPageHeight() * IMAGE_POSITION_HEIGHT_THRESHOLD
|| bbox.getX1() < contentBox.getX1() + page.getPageWidth() * IMAGE_POSITION_WIDTH_THRESHOLD
|| bbox.getX2() > contentBox.getX2() - page.getPageWidth() * IMAGE_POSITION_WIDTH_THRESHOLD;
}
}
@ -264,9 +259,9 @@ public class WatermarkRemovalService {
.filter(elementFeature -> formObjectsPerPage.values()
.stream()
.filter(elementFeaturesOnPage -> elementFeaturesOnPage.stream()
.anyMatch(
elementFeature.getElementType() == Element.e_image || elementFeature.getElementType() == Element.e_inline_image ?
elementFeature::isSimilarTo : elementFeature::almostMatches))
.anyMatch(elementFeature.getElementType() == Element.e_image
|| elementFeature.getElementType()
== Element.e_inline_image ? elementFeature::similar : elementFeature::matches))
.count() >= minPagesFilter)
.toList();
}
@ -275,8 +270,7 @@ public class WatermarkRemovalService {
@SneakyThrows
private void removeAllWatermarks(PDFDoc pdfDoc, List<ElementFeatures> watermarksElementFeaturesList) {
try (ElementReader reader = new ElementReader();
ElementWriter writer = new ElementWriter()) {
try (ElementReader reader = new ElementReader(); ElementWriter writer = new ElementWriter()) {
Set<Long> visitedXObjIds = new TreeSet<>();
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
@ -322,12 +316,9 @@ public class WatermarkRemovalService {
}
}
try (var bbox = element.getBBox()) {
if (bbox.getHeight() * bbox.getWidth() < minAreaCoveringFromPage && isLocatedNearBorder(element, page) && bbox
.getHeight() *
bbox
.getWidth() <
minAreaCoveringFromPage ||
element.getXObject() == null) {
if (bbox.getHeight() * bbox.getWidth() < minAreaCoveringFromPage
&& isLocatedNearBorder(element, page)
&& bbox.getHeight() * bbox.getWidth() < minAreaCoveringFromPage || element.getXObject() == null) {
writer.writeElement(element);
continue;
@ -353,7 +344,7 @@ public class WatermarkRemovalService {
}
for (ElementFeatures elementFeatures : watermarksElementFeaturesList) {
if (elementFeatures.almostMatches(element)) {
if (elementFeatures.matches(ElementFeatureFactory.extractFeatures(element))) {
return;
}
}
@ -385,9 +376,9 @@ public class WatermarkRemovalService {
private void removeImages(Element element, ElementWriter writer, List<ElementFeatures> watermarksElementFeaturesList) {
String hashValueOfImage = ImageHashFactory.calculate(element);
ElementFeatures imageFeatures = ElementFeatureFactory.extractFeaturesWithHash(element, hashValueOfImage);
ElementFeatures imageFeatures = ElementFeatureFactory.buildImageWithHash(element, hashValueOfImage);
for (ElementFeatures elementFeatures : watermarksElementFeaturesList) {
if (elementFeatures.isSimilarTo(imageFeatures)) {
if (elementFeatures.similar(imageFeatures)) {
return;
}
}
@ -404,7 +395,7 @@ public class WatermarkRemovalService {
Set<Long> visitedXObjIds) throws PDFNetException {
for (ElementFeatures elementFeatures : watermarksElementFeaturesList) {
if (elementFeatures.almostMatches(element)) {
if (elementFeatures.matches(ElementFeatureFactory.extractFeatures(element))) {
return;
}
}

View File

@ -1,5 +1,6 @@
package com.iqser.red.pdftronlogic.commons;
package com.iqser.red.pdftronlogic.commons.features;
import com.iqser.red.pdftronlogic.commons.Converter;
import com.pdftron.common.PDFNetException;
import com.pdftron.pdf.Element;
@ -10,7 +11,7 @@ public class ElementFeatureFactory {
return switch (element.getType()) {
case Element.e_path -> buildPath(element);
case Element.e_text -> buildText(element);
case Element.e_image, Element.e_inline_image -> buildImage(element).build();
case Element.e_image, Element.e_inline_image -> buildImage(element);
case Element.e_form -> buildForm(element);
// This technically should never happen, it's a safetynet
default -> throw new RuntimeException("Feature Extraction is not supported for PDFTron.Element with type: " + element.getType());
@ -18,18 +19,22 @@ public class ElementFeatureFactory {
}
public static ElementFeatures extractFeaturesWithHash(Element element, String hashObject) throws PDFNetException {
public static ImageFeatures buildImageWithHash(Element element, String hashObject) throws PDFNetException {
return buildImage(element)
.hashOfImage(hashObject)
.build();
return buildImageBase(element).hashOfImage(hashObject).build();
}
private static ElementFeatures.Form buildForm(Element element) throws PDFNetException {
public static ImageFeatures buildImage(Element element) throws PDFNetException {
return buildImageBase(element).build();
}
public static FormFeatures buildForm(Element element) throws PDFNetException {
try (var bbox = element.getBBox();) {
return ElementFeatures.Form.builder()
return FormFeatures.builder()
.elementType(element.getType())
.boundingBox(Converter.toRectangle2D(bbox))
.xObjectType(element.getXObject().getType())
@ -39,10 +44,10 @@ public class ElementFeatureFactory {
}
private static ElementFeatures.Image.ImageBuilder<?, ?> buildImage(Element element) throws PDFNetException {
private static ImageFeatures.ImageFeaturesBuilder<?, ?> buildImageBase(Element element) throws PDFNetException {
try (var bbox = element.getBBox();) {
return ElementFeatures.Image.builder()
return ImageFeatures.builder()
.elementType(element.getType())
.boundingBox(Converter.toRectangle2D(bbox))
.dataSize(element.getImageDataSize())
@ -55,10 +60,10 @@ public class ElementFeatureFactory {
}
private static ElementFeatures.Text buildText(Element element) throws PDFNetException {
public static TextFeatures buildText(Element element) throws PDFNetException {
try (var bbox = element.getBBox();) {
return ElementFeatures.Text.builder()
try (var bbox = element.getBBox()) {
return TextFeatures.builder()
.elementType(element.getType())
.boundingBox(Converter.toRectangle2D(bbox))
.text(element.getTextString())
@ -69,19 +74,17 @@ public class ElementFeatureFactory {
}
private static ElementFeatures.Path buildPath(Element element) throws PDFNetException {
public static PathFeatures buildPath(Element element) throws PDFNetException {
try (var bbox = element.getBBox(); var ctm = element.getCTM();
var fillColor = element.getGState().getFillColor();
var strokeColor = element.getGState().getStrokeColor()) {
return ElementFeatures.Path.builder()
try (var bbox = element.getBBox(); var ctm = element.getCTM(); var fillColor = element.getGState().getFillColor(); var strokeColor = element.getGState().getStrokeColor()) {
return PathFeatures.builder()
.elementType(element.getType())
.boundingBox(Converter.toRectangle2D(bbox))
.isClippingPath(element.isClippingPath())
.isClipWindingFill(element.isClipWindingFill())
.isStroked(element.isStroked())
.isFilled(element.isFilled())
.isWindingFill(element.isWindingFill())
.clippingPath(element.isClippingPath())
.clipWindingFill(element.isClipWindingFill())
.stroked(element.isStroked())
.filled(element.isFilled())
.windingFill(element.isWindingFill())
.fillColor(Converter.convertColor(element.getGState().getFillColorSpace(), fillColor))
.strokeColor(Converter.convertColor(element.getGState().getStrokeColorSpace(), strokeColor))
.linePath(Converter.convertToGeneralPathAndTransformToInitialUserSpace(element.getPathData(), ctm))

View File

@ -0,0 +1,76 @@
package com.iqser.red.pdftronlogic.commons.features;
import static com.iqser.red.pdftronlogic.commons.ComparisonUtils.almostEqual;
import java.awt.geom.Rectangle2D;
import lombok.AccessLevel;
import lombok.Getter;
import lombok.SneakyThrows;
import lombok.experimental.FieldDefaults;
import lombok.experimental.SuperBuilder;
@Getter
@SuperBuilder
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class ElementFeatures {
final private static double RECT_POSITION_SIMILARITY_THRESHOLD_FACTOR = 0.2; // specify how much the x and y value are allowed to differ
final private static double RECT_SIZE_SIMILARITY_THRESHOLD_FACTOR = 0.1; // the scale the images are allowed to differ
int elementType;
Rectangle2D boundingBox;
public boolean matches(ElementFeatures elementFeatures) {
return elementFeatures.getElementType() == elementType && elementFeatures.getBoundingBox() != null && rectsAlmostMatch(elementFeatures.getBoundingBox());
}
@SneakyThrows
protected boolean rectsAlmostMatch(Rectangle2D bBox) {
// To address the inconsistencies in the calculation of the bounding box we check equality with a tolerance
return almostEqual(bBox.getX(), boundingBox.getX()) && //
almostEqual(bBox.getY(), boundingBox.getY()) && //
almostEqual(bBox.getWidth(), boundingBox.getWidth()) && //
almostEqual(bBox.getHeight(), boundingBox.getHeight());
}
public boolean similar(ElementFeatures elementFeatures) {
return elementFeatures.getElementType() == elementType && elementFeatures.getBoundingBox() != null && areRectsSimilar(elementFeatures.getBoundingBox());
}
protected boolean areRectsSimilar(Rectangle2D rectangle2D) {
// To address the inconsistencies in the calculation of the bounding box we check equality with a tolerance
return isPositionSimilar(rectangle2D.getX(), boundingBox.getX(), rectangle2D.getWidth()) && //
isPositionSimilar(rectangle2D.getY(), boundingBox.getY(), rectangle2D.getHeight()) && //
isSizeSimilar(rectangle2D.getWidth(), boundingBox.getWidth()) && //
isSizeSimilar(rectangle2D.getHeight(), boundingBox.getHeight());
}
protected boolean isPositionSimilar(double a, double b, double boxSize) {
return Math.abs(a - b) < boxSize * RECT_POSITION_SIMILARITY_THRESHOLD_FACTOR;
}
protected boolean isSizeSimilar(double a, double b) {
return Math.abs(a - b) < a * RECT_SIZE_SIMILARITY_THRESHOLD_FACTOR;
}
public boolean almostContains(ElementFeatures features) {
Rectangle2D inner = features.getBoundingBox();
return boundingBox.contains(inner);
}
}

View File

@ -0,0 +1,45 @@
package com.iqser.red.pdftronlogic.commons.features;
import static com.iqser.red.pdftronlogic.commons.ComparisonUtils.almostEqual;
import java.awt.geom.Rectangle2D;
import lombok.AccessLevel;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.experimental.FieldDefaults;
import lombok.experimental.SuperBuilder;
@EqualsAndHashCode(callSuper = true)
@Getter
@SuperBuilder
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class FormFeatures extends ElementFeatures {
int xObjectType;
long dictOrArrayOrStreamLength;
public boolean matches(ElementFeatures elementFeatures) {
if (elementFeatures.getClass() != this.getClass()) {
return false;
}
return elementFeatures.getElementType() == getElementType()
&& elementFeatures.getBoundingBox() != null
&& (super.rectsAlmostMatch(elementFeatures.getBoundingBox())
|| almostRotateMatches(elementFeatures.getBoundingBox()
.getBounds2D()))
&& xObjectType == ((FormFeatures) elementFeatures).getXObjectType()
&& dictOrArrayOrStreamLength == ((FormFeatures) elementFeatures).getDictOrArrayOrStreamLength();
}
private boolean almostRotateMatches(Rectangle2D bBox) {
return almostEqual(bBox.getWidth(), getBoundingBox().getHeight()) && //
almostEqual(bBox.getHeight(), getBoundingBox().getWidth());
}
}

View File

@ -0,0 +1,68 @@
package com.iqser.red.pdftronlogic.commons.features;
import lombok.AccessLevel;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.experimental.FieldDefaults;
import lombok.experimental.SuperBuilder;
@EqualsAndHashCode(callSuper = true)
@Getter
@SuperBuilder
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class ImageFeatures extends ElementFeatures {
final private static double HAMMING_DISTANCE_THRESHOLD = 4; // defines the similarity of the hash of images
int dataSize;
int height;
int width;
int renderingIntent;
int componentNum;
int bitsPerComponent;
String hashOfImage;
@Override
public boolean matches(ElementFeatures elementFeatures) {
if (elementFeatures instanceof ImageFeatures imageFeatures) {
return super.matches(elementFeatures)
&& this.dataSize == imageFeatures.getDataSize()
&& this.height == imageFeatures.getHeight()
&& this.width == imageFeatures.getWidth()
&& this.renderingIntent == imageFeatures.getRenderingIntent()
&& this.componentNum == imageFeatures.getComponentNum()
&& this.bitsPerComponent == imageFeatures.getBitsPerComponent()
&& calculateHammingDistance(imageFeatures.getHashOfImage()) <= HAMMING_DISTANCE_THRESHOLD;
}
return false;
}
public boolean similar(ElementFeatures elementFeatures) {
return super.similar(elementFeatures) && //
calculateHammingDistance(((ImageFeatures) elementFeatures).getHashOfImage()) <= HAMMING_DISTANCE_THRESHOLD;
}
// Helper method to calculate the Hamming distance between two hexadecimal strings
private int calculateHammingDistance(String hash2) {
if (hash2 == null) {
return 0;
}
int distance = 0;
int maxLength = Math.max(this.hashOfImage.length(), hash2.length());
for (int i = 0; i < maxLength; i++) {
char char1 = i < this.hashOfImage.length() ? this.hashOfImage.charAt(i) : '0';
char char2 = i < hash2.length() ? hash2.charAt(i) : '0';
if (char1 != char2) {
distance++;
}
}
return distance;
}
}

View File

@ -0,0 +1,72 @@
package com.iqser.red.pdftronlogic.commons.features;
import static com.iqser.red.pdftronlogic.commons.ComparisonUtils.getPaddedRectangle;
import java.awt.Color;
import java.awt.geom.GeneralPath;
import java.awt.geom.Rectangle2D;
import com.pdftron.pdf.Rect;
import lombok.AccessLevel;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.SneakyThrows;
import lombok.experimental.FieldDefaults;
import lombok.experimental.SuperBuilder;
@EqualsAndHashCode(callSuper = true)
@Getter
@SuperBuilder
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class PathFeatures extends ElementFeatures {
boolean clippingPath;
boolean clipWindingFill;
boolean stroked;
boolean filled;
boolean windingFill;
Color strokeColor;
Color fillColor;
GeneralPath linePath;
@Override
public boolean matches(ElementFeatures element) {
if (element instanceof PathFeatures pathFeaturesElement) {
return super.matches(element)
&& clippingPath == pathFeaturesElement.isClippingPath()
&& clipWindingFill == pathFeaturesElement.isClipWindingFill()
&& stroked == pathFeaturesElement.isStroked()
&& filled == pathFeaturesElement.isFilled()
&& windingFill == pathFeaturesElement.isWindingFill();
}
return false;
}
public boolean matchesFillColor(Color color) {
return color.equals(fillColor);
}
@SneakyThrows
public boolean isBackground(Rect area) {
return filled && //
getBoundingBox().intersects(area.getX1(), area.getY1(), area.getWidth(), area.getHeight());
}
@Override
public boolean almostContains(ElementFeatures elementFeatures) {
Rectangle2D innerRect = getPaddedRectangle(elementFeatures);
return linePath.contains(innerRect);
}
}

View File

@ -0,0 +1,36 @@
package com.iqser.red.pdftronlogic.commons.features;
import static com.iqser.red.pdftronlogic.commons.ComparisonUtils.almostEqual;
import lombok.AccessLevel;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.experimental.FieldDefaults;
import lombok.experimental.SuperBuilder;
@EqualsAndHashCode(callSuper = true)
@Getter
@SuperBuilder
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
@SuppressWarnings("PMD")
public class TextFeatures extends ElementFeatures {
String text;
int font;
double fontsize;
@Override
public boolean matches(ElementFeatures element) {
if (element instanceof TextFeatures textFeaturesElement) {
return super.matches(textFeaturesElement) //
&& text.equals(textFeaturesElement.getText()) //
&& font == textFeaturesElement.getFont() //
&& almostEqual(fontsize, textFeaturesElement.getFontsize());
}
return false;
}
}

View File

@ -0,0 +1,37 @@
package com.iqser.red.pdftronlogic.commons.lookup;
import org.locationtech.jts.index.ItemVisitor;
import com.iqser.red.pdftronlogic.commons.features.ElementFeatures;
import lombok.RequiredArgsConstructor;
@RequiredArgsConstructor
public class AnyMatchVisitor implements ItemVisitor {
private final ElementFeatures queryFeatures;
private boolean anyMatch = false;
public boolean hasAnyMatch() {
return anyMatch;
}
@Override
public void visitItem(Object o) {
if (anyMatch) {
return;
}
if (o instanceof ElementFeatures features) {
if (queryFeatures.matches(features)) {
anyMatch = true;
}
}
}
}

View File

@ -0,0 +1,136 @@
package com.iqser.red.pdftronlogic.commons.lookup;
import static com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService.TOLERANCE;
import java.awt.geom.Rectangle2D;
import java.util.List;
import java.util.Map;
import java.util.function.Consumer;
import java.util.function.Predicate;
import org.locationtech.jts.geom.Envelope;
import org.locationtech.jts.index.quadtree.Quadtree;
import com.iqser.red.pdftronlogic.commons.Converter;
import com.iqser.red.pdftronlogic.commons.features.ElementFeatures;
import com.pdftron.pdf.Rect;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.experimental.FieldDefaults;
@RequiredArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class ElementFeatureLookup {
Quadtree quadTree = new Quadtree();
public void add(ElementFeatures elementFeatures) {
quadTree.insert(envelop(elementFeatures), elementFeatures);
}
public void remove(ElementFeatures elementFeatures) {
quadTree.remove(envelop(elementFeatures), elementFeatures);
}
public boolean matchesAny(ElementFeatures elementFeatures) {
AnyMatchVisitor visitor = new AnyMatchVisitor(elementFeatures);
quadTree.query(queryEnvelop(elementFeatures), visitor);
return visitor.hasAnyMatch();
}
public void forEach(Consumer<ElementFeatures> consumer) {
quadTree.queryAll()
.forEach(consumer);
}
public void clear() {
forEach(this::remove);
}
public List<ElementFeatures> findAlmostContained(ElementFeatures elementFeatures) {
PredicateItemVisitor visitor = new PredicateItemVisitor(elementFeatures::almostContains);
quadTree.query(queryEnvelop(elementFeatures), visitor);
return visitor.getMatchingFeatures();
}
public List<ElementFeatures> query(ElementFeatures elementFeatures, Predicate<ElementFeatures> predicate) {
PredicateItemVisitor visitor = new PredicateItemVisitor(predicate);
quadTree.query(queryEnvelop(elementFeatures), visitor);
return visitor.getMatchingFeatures();
}
private static Envelope envelop(ElementFeatures elementFeatures) {
Rectangle2D r = elementFeatures.getBoundingBox();
return new Envelope(r.getX(), r.getY(), r.getWidth(), r.getHeight());
}
private static Envelope queryEnvelop(ElementFeatures elementFeatures) {
Rectangle2D r = elementFeatures.getBoundingBox();
return new Envelope(r.getX() - TOLERANCE, r.getY() - TOLERANCE, r.getWidth() + 2 * TOLERANCE, r.getHeight() + 2 * TOLERANCE);
}
public boolean isEmpty() {
return quadTree.isEmpty();
}
public int size() {
return quadTree.size();
}
public void addAll(List<ElementFeatures> currentOverlappedElements) {
currentOverlappedElements.forEach(this::add);
}
public void removeAll(List<ElementFeatures> currentOverlappedElements) {
currentOverlappedElements.forEach(this::remove);
}
@SneakyThrows
public List<ElementFeatures> query(Rect bbox, Predicate<ElementFeatures> predicate) {
PredicateItemVisitor visitor = new PredicateItemVisitor(predicate);
quadTree.query(new Envelope(bbox.getX1(), bbox.getY1(), bbox.getWidth(), bbox.getHeight()), visitor);
return visitor.getMatchingFeatures();
}
@SneakyThrows
public List<ElementFeatures> findIntersecting(Rect bbox) {
Rectangle2D r = Converter.toRectangle2D(bbox);
PredicateItemVisitor visitor = new PredicateItemVisitor(elementFeatures -> elementFeatures.getBoundingBox().intersects(r));
quadTree.query(new Envelope(r.getX(), r.getY(), r.getWidth(), r.getHeight()), visitor);
return visitor.getMatchingFeatures();
}
}

View File

@ -0,0 +1,33 @@
package com.iqser.red.pdftronlogic.commons.lookup;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Predicate;
import org.locationtech.jts.index.ItemVisitor;
import com.iqser.red.pdftronlogic.commons.features.ElementFeatures;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
@RequiredArgsConstructor
public class PredicateItemVisitor implements ItemVisitor {
private final Predicate<ElementFeatures> predicate;
@Getter
private final List<ElementFeatures> matchingFeatures = new ArrayList<>();
@Override
public void visitItem(Object o) {
if (o instanceof ElementFeatures features) {
if (predicate.test(features)) {
matchingFeatures.add(features);
}
}
}
}

View File

@ -0,0 +1,145 @@
package com.iqser.red.pdftronlogic.commons.rendering;
import java.io.BufferedReader;
import java.io.File;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Map;
import java.util.function.Consumer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.experimental.FieldDefaults;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@RequiredArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
public class GhostScriptOutputHandler extends Thread {
static Pattern pageFinishedPattern = Pattern.compile("Page (\\d+)");
// If the stdError or stdOut buffer of a thread is not being emptied it might lock the process in case of errors, so we need to empty both streams to prevent a deadlock.
// Since both need to read simultaneously we need to implement the readers as separate threads.
final InputStream is;
final String processName;
final Type type;
final Map<Integer, ImageFile> pagesToProcess;
final Consumer<ImageFile> outputHandler;
final Consumer<String> errorHandler;
int currentPageNumber;
public static GhostScriptOutputHandler stdError(InputStream is, Consumer<String> errorHandler) {
return new GhostScriptOutputHandler(is, "GS", Type.ERROR, null, null, errorHandler);
}
public static GhostScriptOutputHandler stdOut(InputStream is, Map<Integer, ImageFile> pagesToProcess, Consumer<ImageFile> imageFileOutput, Consumer<String> errorHandler) {
return new GhostScriptOutputHandler(is, "GS", Type.STD_OUT, pagesToProcess, imageFileOutput, errorHandler);
}
@SneakyThrows
public void run() {
try (InputStreamReader isr = new InputStreamReader(is); BufferedReader br = new BufferedReader(isr)) {
String line;
while (true) {
line = br.readLine();
if (line == null) {
break;
}
if (type.equals(Type.ERROR)) {
log.error("{}_{}>{}", processName, type.name(), line);
} else {
log.debug("{}_{}>{}", processName, type.name(), line);
addProcessedImageToQueue(line);
}
}
}
is.close();
if (type.equals(Type.STD_OUT)) {
queueFinishedPage(currentPageNumber);
if (!pagesToProcess.isEmpty()) {
errorHandler.accept(String.format("Ghostscript finished for batch, but pages %s remain unprocessed.", formatPagesToProcess()));
}
}
}
private String formatPagesToProcess() {
if (pagesToProcess.isEmpty()) {
return "-";
}
if (pagesToProcess.size() == 1) {
return pagesToProcess.keySet()
.iterator().next().toString();
}
return pagesToProcess.keySet()
.stream()
.mapToInt(Integer::intValue)
.min()
.orElse(0) + "-" + pagesToProcess.keySet()
.stream()
.mapToInt(Integer::intValue)
.max()
.orElse(0);
}
private void addProcessedImageToQueue(String line) {
/*
Ghostscript prints the pageNumber it is currently working on, so we remember the current page and queue it as soon as the next comes in.
*/
Matcher pageNumberMatcher = pageFinishedPattern.matcher(line);
if (pageNumberMatcher.find()) {
int pageNumber = Integer.parseInt(pageNumberMatcher.group(1));
if (currentPageNumber == 0) {
currentPageNumber = pageNumber;
return;
}
queueFinishedPage(currentPageNumber);
currentPageNumber = pageNumber;
}
}
private void queueFinishedPage(int pageNumber) {
var imageFile = this.pagesToProcess.remove(pageNumber);
if (imageFile == null) {
errorHandler.accept(String.format("Page number %d does not exist in this thread. It only has pagenumbers %s", pageNumber, pagesToProcess.keySet()));
} else {
if (!new File(imageFile.absoluteFilePath()).exists()) {
errorHandler.accept(String.format("Rendered page with number %d does not exist!", pageNumber));
}
}
outputHandler.accept(imageFile);
}
public enum Type {
ERROR,
STD_OUT
}
}

View File

@ -0,0 +1,192 @@
package com.iqser.red.pdftronlogic.commons.rendering;
import java.io.InputStream;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import com.pdftron.common.PDFNetException;
import com.pdftron.pdf.PDFDoc;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.SneakyThrows;
import lombok.experimental.FieldDefaults;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@AllArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
@SuppressWarnings("PMD") // can't figure out how to safely close the stdOut and stdError streams in line 142/144
public class GhostScriptService {
int BATCH_SIZE = 256;
String FORMAT = ".tiff";
String DEVICE = "tiffgray";
int DPI = 125;
int PROCESS_COUNT = 1;
@SneakyThrows
public CompletableFuture<List<ImageFile>> renderDocument(Path documentFile, Path imageDir) {
int pageCount = getPageCount(documentFile);
List<Integer> allPages = IntStream.range(1, pageCount + 1).boxed()
.toList();
ImageSupervisorImpl supervisor = new ImageSupervisorImpl(allPages);
renderPagesBatched(allPages, documentFile.toFile().toString(), imageDir, supervisor, supervisor.successHandler(), supervisor.errorHandler());
return CompletableFuture.supplyAsync(() -> awaitImageFiles(supervisor));
}
@SneakyThrows
private static List<ImageFile> awaitImageFiles(ImageSupervisorImpl supervisor) {
supervisor.awaitAll();
return supervisor.getRenderedImages();
}
private static int getPageCount(Path documentFile) throws PDFNetException {
try (PDFDoc doc = new PDFDoc(documentFile.toFile().toString())) {
return doc.getPageCount();
}
}
@SneakyThrows
public void renderPagesBatched(List<Integer> pagesToProcess,
String documentAbsolutePath,
Path tmpImageDir,
ImageSupervisor supervisor,
Consumer<ImageFile> successHandler,
Consumer<String> errorHandler) {
List<List<ProcessInfo>> processInfoBatches = buildSubListForEachProcess(pagesToProcess,
PROCESS_COUNT,
BATCH_SIZE
* PROCESS_COUNT); // GS has a limit on how many pageIndices per call are possible, so we limit it to 256 pages per process
for (int batchIdx = 0; batchIdx < processInfoBatches.size(); batchIdx++) {
supervisor.requireNoErrors();
List<ProcessInfo> processInfos = processInfoBatches.get(batchIdx);
log.info("Batch {}: Running {} gs processes with ({}) pages each",
batchIdx,
processInfos.size(),
processInfos.stream()
.map(info -> info.pageNumbers().size())
.map(String::valueOf)
.collect(Collectors.joining(", ")));
int finalBatchIdx = batchIdx;
List<Process> processes = processInfos.stream()
.parallel()
.map(info -> buildCmdArgs(info.processIdx(), finalBatchIdx, info.pageNumbers(), tmpImageDir, documentAbsolutePath))
.peek(s -> log.debug(String.join(" ", s.cmdArgs())))
.map(processInfo -> executeProcess(processInfo, successHandler, errorHandler))
.toList();
List<Integer> processExitCodes = new LinkedList<>();
for (Process process : processes) {
processExitCodes.add(process.waitFor());
}
log.info("Batch {}: Ghostscript processes finished with exit codes {}", batchIdx, processExitCodes);
}
}
private List<List<ProcessInfo>> buildSubListForEachProcess(List<Integer> stitchedPageNumbers, int processCount, int batchSize) {
// GhostScript command line can only handle so many page numbers at once, so we split it into batches
int batchCount = (int) Math.ceil((double) stitchedPageNumbers.size() / batchSize);
log.info("Splitting {} page renderings across {} process(es) in {} batch(es) with size {}", stitchedPageNumbers.size(), processCount, batchCount, batchSize);
List<List<ProcessInfo>> processInfoBatches = new ArrayList<>(batchCount);
List<List<List<Integer>>> batchedBalancedSublist = ListSplittingUtils.buildBatchedBalancedSublist(stitchedPageNumbers.stream()
.sorted()
.toList(), processCount, batchCount);
for (var batch : batchedBalancedSublist) {
List<ProcessInfo> processInfos = new ArrayList<>(processCount);
for (int threadIdx = 0; threadIdx < batch.size(); threadIdx++) {
List<Integer> balancedPageNumbersSubList = batch.get(threadIdx);
processInfos.add(new ProcessInfo(threadIdx, balancedPageNumbersSubList));
}
processInfoBatches.add(processInfos);
}
return processInfoBatches;
}
@SneakyThrows
private ProcessCmdsAndRenderedImageFiles buildCmdArgs(Integer processIdx,
Integer batchIdx,
List<Integer> stitchedImagePageIndices,
Path outputDir,
String documentAbsolutePath) {
String imagePathFormat = outputDir.resolve("output_" + processIdx + "_" + batchIdx + ".%04d" + FORMAT).toFile().toString();
Map<Integer, ImageFile> fullPageImages = new HashMap<>();
for (int i = 0; i < stitchedImagePageIndices.size(); i++) {
Integer pageNumber = stitchedImagePageIndices.get(i);
fullPageImages.put(pageNumber, new ImageFile(pageNumber, String.format(imagePathFormat, i + 1)));
}
String[] cmdArgs = buildCmdArgs(stitchedImagePageIndices, documentAbsolutePath, imagePathFormat);
return new ProcessCmdsAndRenderedImageFiles(cmdArgs, fullPageImages);
}
private String[] buildCmdArgs(List<Integer> pageNumbers, String documentAbsolutePath, String imagePathFormat) {
StringBuilder sPageList = new StringBuilder();
int i = 1;
for (Integer integer : pageNumbers) {
sPageList.append(integer);
if (i < pageNumbers.size()) {
sPageList.append(",");
}
i++;
}
return new String[]{"gs", "-dNOPAUSE", "-sDEVICE=" + DEVICE, "-r" + DPI, "-sPageList=" + sPageList, "-sOutputFile=" + imagePathFormat, documentAbsolutePath, "-c", "quit"};
}
@SneakyThrows
private Process executeProcess(ProcessCmdsAndRenderedImageFiles processInfo, Consumer<ImageFile> successHandler, Consumer<String> errorHandler) {
Process p = Runtime.getRuntime().exec(processInfo.cmdArgs());
InputStream stdOut = p.getInputStream();
GhostScriptOutputHandler stdOutLogger = GhostScriptOutputHandler.stdOut(stdOut, processInfo.renderedPageImageFiles(), successHandler, errorHandler);
InputStream stdError = p.getErrorStream();
GhostScriptOutputHandler stdErrorLogger = GhostScriptOutputHandler.stdError(stdError, errorHandler);
stdOutLogger.start();
stdErrorLogger.start();
return p;
}
private record ProcessCmdsAndRenderedImageFiles(String[] cmdArgs, Map<Integer, ImageFile> renderedPageImageFiles) {
}
private record ProcessInfo(Integer processIdx, List<Integer> pageNumbers) {
}
}

View File

@ -0,0 +1,13 @@
package com.iqser.red.pdftronlogic.commons.rendering;
import net.sourceforge.lept4j.Leptonica1;
import net.sourceforge.lept4j.Pix;
public record ImageFile(int pageNumber, String absoluteFilePath) {
public Pix readPix() {
return Leptonica1.pixRead(absoluteFilePath);
}
}

View File

@ -0,0 +1,7 @@
package com.iqser.red.pdftronlogic.commons.rendering;
public interface ImageSupervisor {
void requireNoErrors();
}

View File

@ -0,0 +1,114 @@
package com.iqser.red.pdftronlogic.commons.rendering;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CountDownLatch;
import java.util.function.Consumer;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import lombok.experimental.FieldDefaults;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@RequiredArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
public class ImageSupervisorImpl implements ImageSupervisor {
final Map<Integer, CountDownLatch> pageLatches;
final Map<Integer, ImageFile> images;
final List<String> errors;
final ImageFile[] finishedPages;
public ImageSupervisorImpl(List<Integer> pageNumbers) {
this.pageLatches = Collections.synchronizedMap(new HashMap<>());
this.images = Collections.synchronizedMap(new HashMap<>());
this.errors = Collections.synchronizedList(new ArrayList<>());
this.finishedPages = new ImageFile[pageNumbers.size()];
for (Integer pageNumber : pageNumbers) {
pageLatches.put(pageNumber, new CountDownLatch(1));
}
}
public List<ImageFile> getRenderedImages() {
return new ArrayList<>(images.values());
}
public void markPageFinished(ImageFile imageFile) {
log.debug("finished page: {}", imageFile.pageNumber());
getPageLatch(imageFile.pageNumber()).countDown();
images.put(imageFile.pageNumber(), imageFile);
finishedPages[imageFile.pageNumber() - 1] = imageFile;
}
public Consumer<ImageFile> successHandler() {
return this::markPageFinished;
}
public Consumer<String> errorHandler() {
return this::markError;
}
private CountDownLatch getPageLatch(Integer pageNumber) {
if (pageNumber == null || !pageLatches.containsKey(pageNumber)) {
throw new IllegalArgumentException("awaiting non-existent page " + pageNumber);
}
return pageLatches.get(pageNumber);
}
public ImageFile awaitProcessedPage(Integer pageNumber) throws InterruptedException {
if (hasErrors()) {
return null;
}
getPageLatch(pageNumber).await();
return images.get(pageNumber);
}
private boolean hasErrors() {
return errors.isEmpty();
}
public void markError(String errorMessage) {
this.errors.add(errorMessage);
}
public void awaitAll() throws InterruptedException {
for (CountDownLatch countDownLatch : pageLatches.values()) {
countDownLatch.await();
}
}
public void requireNoErrors() {
// GS will log
if (this.errors.isEmpty()) {
return;
}
throw new IllegalStateException("Error(s) occurred during image processing: " + String.join("\n", errors));
}
}

View File

@ -0,0 +1,106 @@
package com.iqser.red.pdftronlogic.commons.rendering;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.stream.IntStream;
import lombok.experimental.UtilityClass;
@UtilityClass
public class ListSplittingUtils {
public List<List<Integer>> buildBalancedContinuousSublist(Integer totalNumberOfEntries, int threadCount) {
return buildBalancedSublist(IntStream.range(0, totalNumberOfEntries)
.map(i -> i + 1).boxed()
.toList(), threadCount);
}
public <T> List<List<T>> buildBalancedSublist(List<T> entries, int threadCount) {
List<Integer> balancedEntryCounts = buildBalancedEntryCounts(entries.size(), threadCount);
List<List<T>> balancedSublist = new ArrayList<>(threadCount);
int startIdx = 0;
for (Integer numberOfEntriesPerThread : balancedEntryCounts) {
balancedSublist.add(entries.subList(startIdx, startIdx + numberOfEntriesPerThread));
startIdx += numberOfEntriesPerThread;
}
return balancedSublist;
}
public <T> List<List<List<T>>> buildBatchedBalancedSublist(List<T> entries, int threadCount, int batchSize) {
// batches -> threads -> entries
List<List<List<T>>> batchedBalancedSubList = new LinkedList<>();
List<List<List<T>>> threadsWithBatches = buildBalancedSublist(entries, threadCount).stream()
.map(list -> buildBalancedSublist(list, batchSize))
.toList();
// swap first two dimensions
for (int batchIdx = 0; batchIdx < batchSize; batchIdx++) {
List<List<T>> threadEntriesPerBatch = new ArrayList<>(threadCount);
for (int threadIdx = 0; threadIdx < threadCount; threadIdx++) {
threadEntriesPerBatch.add(threadsWithBatches.get(threadIdx).get(batchIdx));
}
batchedBalancedSubList.add(threadEntriesPerBatch);
}
return batchedBalancedSubList;
}
public List<Integer> buildBalancedEntryCounts(int totalNumberOfEntries, int threadCount) {
List<Integer> numberOfPagesPerThread = new ArrayList<>(threadCount);
for (int i = 0; i < threadCount; i++) {
numberOfPagesPerThread.add(0);
}
int threadIdx;
for (int i = 0; i < totalNumberOfEntries; i++) {
threadIdx = i % threadCount;
numberOfPagesPerThread.set(threadIdx, numberOfPagesPerThread.get(threadIdx) + 1);
}
return numberOfPagesPerThread;
}
public static List<String> formatIntervals(List<Integer> sortedList) {
List<String> intervals = new ArrayList<>();
if (sortedList.isEmpty()) {
return intervals;
}
int start = sortedList.get(0);
int end = start;
for (int i = 1; i < sortedList.size(); i++) {
int current = sortedList.get(i);
if (current == end + 1) {
end = current;
} else {
intervals.add(formatInterval(start, end));
start = current;
end = start;
}
}
intervals.add(formatInterval(start, end));
return intervals;
}
private static String formatInterval(int start, int end) {
if (start == end) {
return String.valueOf(start);
} else {
return start + "-" + end;
}
}
}

View File

@ -0,0 +1,249 @@
package com.iqser.red.pdftronlogic.commons;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.IntBuffer;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import com.iqser.red.pdftronlogic.commons.rendering.GhostScriptService;
import com.iqser.red.pdftronlogic.commons.rendering.ImageFile;
import com.pdftron.pdf.PDFNet;
import com.sun.jna.NativeLibrary;
import lombok.SneakyThrows;
import net.sourceforge.lept4j.Leptonica1;
import net.sourceforge.lept4j.Pix;
import net.sourceforge.lept4j.util.LeptUtils;
public class VisualEqualityTest {
public static final double SIMILARITY_THRESHOLD = 0.015; // percentage of pixels which differ by more than 10 points in luminance
GhostScriptService ghostScriptService = new GhostScriptService();
InvisibleElementRemovalService invisibleElementRemovalService = new InvisibleElementRemovalService();
Path stem = Path.of("/tmp/AAA_EQUALITY_TEST/");
@BeforeEach
public void setup() {
PDFNet.initialize(PDFTronConfig.license);
System.setProperty("jna.library.path", "/home/kschuettler/software/leptonica/vcpkg/installed/x64-linux-dynamic/lib/");
try (NativeLibrary leptonicaLib = NativeLibrary.getInstance("leptonica")) {
assert leptonicaLib != null;
}
}
@Test
@SneakyThrows
public void assertVisualEqualityOfProcessedFile() {
Path folder = Path.of("/home/kschuettler/Dokumente/TestFiles/syn-dm-testfiles/ITEM 19_A15149AC - Primary Skin Irritation Rabbit.pdf");
Context context = new Context(stem, new HashMap<>());
Files.walk(folder)
.filter(Files::isRegularFile)
.map(Path::toFile)
.filter(file -> file.toString().endsWith(".pdf"))
.map(File::toPath)
.peek(file -> runForFile(file, context))
.forEach(f -> System.out.println(context));
assert context.failedFiles.isEmpty();
}
@SneakyThrows
private void runForFile(Path originFile, Context context) {
System.out.println(originFile.toFile());
Path fileFolder = context.getFileFolder(originFile);
Files.createDirectories(fileFolder);
Path processedFile = fileFolder.resolve("processed.pdf");
Path deltaFile = fileFolder.resolve("delta.pdf");
Path copiedOriginFile = fileFolder.resolve("origin.pdf");
Files.copy(originFile, copiedOriginFile, StandardCopyOption.REPLACE_EXISTING);
try (var in = new FileInputStream(copiedOriginFile.toFile()); var out = new FileOutputStream(processedFile.toFile())) {
invisibleElementRemovalService.removeInvisibleElements(in, out, false);
}
try (var in = new FileInputStream(copiedOriginFile.toFile()); var out = new FileOutputStream(deltaFile.toFile())) {
invisibleElementRemovalService.removeInvisibleElements(in, out, true);
}
assertVisualEquality(originFile, processedFile, context);
}
@SneakyThrows
private void assertVisualEquality(Path originFile, Path processedFile, Context context) {
Path imageDir = context.getFileFolder(originFile).resolve("images");
Path originDir = imageDir.resolve("origin");
Files.createDirectories(originDir);
CompletableFuture<List<ImageFile>> originalPagesFuture = ghostScriptService.renderDocument(originFile, originDir);
Path processedDir = imageDir.resolve("processed");
Files.createDirectories(processedDir);
CompletableFuture<List<ImageFile>> processedPagesFuture = ghostScriptService.renderDocument(processedFile, processedDir);
Files.walk(context.getErrorFolder(originFile))
.map(Path::toFile)
.filter(File::isFile)
.forEach(File::delete);
List<ImageFile> originalPages = originalPagesFuture.join();
List<ImageFile> processedPages = processedPagesFuture.join();
if (originalPages.size() != processedPages.size()) {
context.getFailedFile(originFile).addErrorMessage("Differing page counts!");
return;
}
for (ImageFile originalPage : originalPages) {
Optional<ImageFile> samePage = processedPages.stream()
.filter(p -> p.pageNumber() == originalPage.pageNumber())
.findFirst();
if (samePage.isEmpty()) {
context.getFailedFile(originFile).addErrorMessage("Page " + originalPage.pageNumber() + " missing!");
return;
}
ImageFile processedPage = samePage.get();
Pix originalPagePix;
Pix processedPagePix;
synchronized (VisualEqualityTest.class) {
originalPagePix = originalPage.readPix();
processedPagePix = processedPage.readPix();
}
String errorFile = context.getErrorFolder(originFile).resolve(originalPage.pageNumber() + ".tiff").toFile().toString();
double diffRatio = detectErrors(originalPagePix, processedPagePix, errorFile);
if (diffRatio > SIMILARITY_THRESHOLD) {
context.getFailedFile(originFile).addErrorMessage("Page " + originalPage.pageNumber() + " differs by " + formatPercentage(diffRatio) + "%!");
}
synchronized (VisualEqualityTest.class) {
LeptUtils.disposePix(originalPagePix);
LeptUtils.disposePix(processedPagePix);
}
}
}
private static String formatPercentage(double diffRatio) {
return String.format("%.2f", diffRatio * 100);
}
public double detectErrors(Pix pix1, Pix pix2, String errorFile) {
// First, check if dimensions are the same
if (pix1.w != pix2.w || pix1.h != pix2.h || pix1.d != pix2.d) {
return 1;
}
// Create a new Pix for the absolute difference
Pix pixDiff = Leptonica1.pixAbsDifference(pix1, pix2);
// Set a threshold for pixel difference (e.g., 10 out of 255)
int threshold = 10;
Pix pixThresh = Leptonica1.pixThresholdToBinary(pixDiff, threshold);
IntBuffer pCount = IntBuffer.allocate(1);
Leptonica1.pixCountPixels(pixThresh, pCount, null);
long totalPixels = (long) pix1.w * pix1.h;
long samePixels = pCount.get();
double percentDifference = 1 - (double) samePixels / totalPixels;
if (percentDifference > SIMILARITY_THRESHOLD) {
Leptonica1.pixWrite(errorFile, pixThresh, 5);
}
LeptUtils.disposePix(pixDiff);
LeptUtils.disposePix(pixThresh);
return percentDifference;
}
private record Context(Path outFolder, Map<Path, FailedFile> failedFiles) {
public FailedFile getFailedFile(Path path) {
return failedFiles.computeIfAbsent(path, p -> FailedFile.init());
}
public Path getFileFolder(Path file) {
return outFolder.resolve(file.getFileName());
}
public String toString() {
if (failedFiles.isEmpty()) {
return "All files visually equal!";
}
StringBuilder sb = new StringBuilder();
failedFiles.forEach((file, failedFile) -> sb.append(file.getFileName().toFile()).append(": ").append(failedFile.toString()).append("\n"));
return sb.toString();
}
@SneakyThrows
public Path getErrorFolder(Path originFile) {
Path errorDir = getFileFolder(originFile).resolve("error");
Files.createDirectories(errorDir);
return errorDir;
}
}
private record FailedFile(Map<ImageFile, FailedPage> failedPages, List<String> errors) {
public static FailedFile init() {
return new FailedFile(new HashMap<>(), new LinkedList<>());
}
public void addErrorMessage(String s) {
errors.add(s);
}
public void addFailedPage(ImageFile imageFile, double location) {
failedPages.computeIfAbsent(imageFile, file -> new FailedPage(new LinkedList<>())).locations().add(location);
}
public String toString() {
return String.join(", ", errors);
}
}
private record FailedPage(List<Double> locations) {
}
}

File diff suppressed because one or more lines are too long