From 967cba820d70ddbe7fb23fad616377fafa67bf06 Mon Sep 17 00:00:00 2001 From: RaphaelArnold Date: Wed, 19 Jul 2023 12:56:39 +0200 Subject: [PATCH] RED-7075: WIP --- .../pdftronlogic/commons/ElementFeatures.java | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/src/main/java/com/iqser/red/pdftronlogic/commons/ElementFeatures.java b/src/main/java/com/iqser/red/pdftronlogic/commons/ElementFeatures.java index 51be5ab..12e5733 100644 --- a/src/main/java/com/iqser/red/pdftronlogic/commons/ElementFeatures.java +++ b/src/main/java/com/iqser/red/pdftronlogic/commons/ElementFeatures.java @@ -58,6 +58,12 @@ public class ElementFeatures { .componentNum(element.getComponentNum()) .bitsPerComponent(element.getBitsPerComponent()) .build(); + case Element.e_form -> Form.builder() + .elementType(element.getType()) + .boundingBox(Converter.toRectangle2D(element.getBBox())) + .xObjectType(element.getXObject().getType()) + .dictOrArrayOrStreamLength(element.getXObject().getType() == 7 ? element.getXObject().getDecodedStream().size() : 0) + .build(); // This technically should never happen, it's a safetynet default -> throw new RuntimeException("Feature Extraction is not supported for PDFTron.Element with type: " + element.getType()); }; @@ -71,6 +77,12 @@ public class ElementFeatures { rectsAlmostMatch(element.getBBox()); } + public boolean almostMatches(ElementFeatures elementFeatures){ + return elementFeatures.getElementType() == elementType && + elementFeatures.getBoundingBox() != null && + rectsAlmostMatch(elementFeatures.getBoundingBox()); + } + protected boolean almostEqual(double a, double b) { @@ -88,6 +100,16 @@ public class ElementFeatures { almostEqual(bBox.getHeight(), boundingBox.getHeight()); } + @SneakyThrows + private boolean rectsAlmostMatch(Rectangle2D bBox) { + // To address the inconsistencies in the calculation of the bounding box we check equality with a tolerance + + return almostEqual(bBox.getX(), boundingBox.getX()) && // + almostEqual(bBox.getY(), boundingBox.getY()) && // + almostEqual(bBox.getWidth(), boundingBox.getWidth()) && // + almostEqual(bBox.getHeight(), boundingBox.getHeight()); + } + @EqualsAndHashCode(callSuper = true) @Getter @@ -183,6 +205,45 @@ public class ElementFeatures { } + @EqualsAndHashCode(callSuper = true) + @Getter + @SuperBuilder + @FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) + private static class Form extends ElementFeatures { + + int xObjectType; + long dictOrArrayOrStreamLength; + + + @Override + public boolean almostMatches(Element element) throws PDFNetException { + return element.getType() == getElementType() && // + element.getBBox() != null && // + (super.rectsAlmostMatch(element.getBBox()) || almostRotateMatches(element.getBBox().getRectangle())) && + xObjectType == element.getXObject().getType() && + dictOrArrayOrStreamLength == element.getXObject().getDecodedStream().size(); + } + + public boolean almostMatches(ElementFeatures elementFeatures){ + if(elementFeatures.getClass() != this.getClass()){ + return false; + } + return elementFeatures.getElementType() == getElementType() && + elementFeatures.getBoundingBox() != null && + (super.rectsAlmostMatch(elementFeatures.getBoundingBox()) || almostRotateMatches(elementFeatures.getBoundingBox().getBounds2D())) && + xObjectType == ((Form)elementFeatures).getXObjectType() && + dictOrArrayOrStreamLength == ((Form)elementFeatures).getDictOrArrayOrStreamLength(); + + } + + + private boolean almostRotateMatches(Rectangle2D bBox) { + return almostEqual(bBox.getWidth(), getBoundingBox().getHeight()) && // + almostEqual(bBox.getHeight(), getBoundingBox().getWidth()); + } + + } +