From aef1146e8f18af2a0db11166e990dd5ca8374dbd Mon Sep 17 00:00:00 2001 From: maverickstuder Date: Tue, 27 Feb 2024 15:03:05 +0100 Subject: [PATCH] RED-8666 --- .../UnsupervisedReadingOrderDetector.java | 79 +++++++++++-------- .../docstrum/service/ReadingOrderService.java | 2 +- .../server/graph/ViewerDocumentTest.java | 2 +- 3 files changed, 47 insertions(+), 36 deletions(-) diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/readingorder/UnsupervisedReadingOrderDetector.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/readingorder/UnsupervisedReadingOrderDetector.java index a5f1062..adf0069 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/readingorder/UnsupervisedReadingOrderDetector.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/readingorder/UnsupervisedReadingOrderDetector.java @@ -208,32 +208,37 @@ public class UnsupervisedReadingOrderDetector { private static IntervalRelations getIntervalRelationX(Zone z1, Zone z2, double t) { - if (z1.getX() + z1.getWidth() < z2.getX() - t) { + double z1_minX = z1.getX(); + double z1_maxX = z1_minX + z1.getWidth(); + double z2_minX = z2.getX(); + double z2_maxX = z2_minX + z2.getWidth(); + + // this is very wrong: check https://www.cs.rug.nl/~aiellom/publications/ijdarNoi.pdf + if (z1_maxX < z2_minX - t) { return IntervalRelations.PRECEDES; - } else if (z1.getX() + z1.getWidth() >= z2.getX() - t) { + } else if (z1_maxX >= z2_minX - t) { return IntervalRelations.PRECEDES_INVERSE; - } else if (z2.getX() - t <= z1.getX() + z1.getWidth() && z1.getX() + z1.getWidth() <= z2.getX() + t) { + } else if (z2_minX - t <= z1_maxX && z1_maxX <= z2_minX + t) { return IntervalRelations.MEETS; - } else if (z2.getX() - t > z1.getX() + z1.getWidth() && z1.getX() + z1.getWidth() > z2.getX() + t) { + } else if (z2_minX - t > z1_maxX && z1_maxX > z2_minX + t) { return IntervalRelations.MEETS_INVERSE; - } else if (z1.getX() < z2.getX() - t && (z2.getX() + t < z1.getX() + z1.getWidth() && z1.getX() + z1.getWidth() < z2.getX() + z2.getWidth() - t)) { + } else if (z1_minX < z2_minX - t && (z2_minX + t < z1_maxX && z1_maxX < z2_maxX - t)) { return IntervalRelations.OVERLAPS; - } else if (z1.getX() >= z2.getX() - t && (z2.getX() + t >= z1.getX() + z1.getWidth() && z1.getX() + z1.getWidth() >= z2.getX() + z2.getWidth() - t)) { + } else if (z1_minX >= z2_minX - t && (z2_minX + t >= z1_maxX && z1_maxX >= z2_maxX - t)) { return IntervalRelations.OVERLAPS_INVERSE; - } else if (z2.getX() - t <= z1.getX() && z1.getX() <= z2.getX() + t && z1.getX() + z1.getWidth() < z2.getX() + z2.getWidth() - t) { + } else if (z2_minX - t <= z1_minX && z1_minX <= z2_minX + t && z1_maxX < z2_maxX - t) { return IntervalRelations.STARTS; - } else if (z2.getX() - t > z1.getX() && z1.getX() > z2.getX() + t && z1.getX() + z1.getWidth() >= z2.getX() + z2.getWidth() - t) { + } else if (z2_minX - t > z1_minX && z1_minX > z2_minX + t && z1_maxX >= z2_maxX - t) { return IntervalRelations.STARTS_INVERSE; - } else if (z1.getX() > z2.getX() + t && z1.getX() + z1.getWidth() < z2.getX() + z2.getWidth() - t) { + } else if (z1_minX > z2_minX + t && z1_maxX < z2_maxX - t) { return IntervalRelations.DURING; - } else if (z1.getX() <= z2.getX() + t && z1.getX() + z1.getWidth() >= z2.getX() + z2.getWidth() - t) { + } else if (z1_minX <= z2_minX + t && z1_maxX >= z2_maxX - t) { return IntervalRelations.DURING_INVERSE; - } else if (z1.getX() > z2.getX() + t && (z2.getX() + z2.getWidth() - t <= z1.getX() + z1.getWidth() && z1.getX() + z1.getWidth() <= z2.getX() + z2.getWidth() + t)) { + } else if (z1_minX > z2_minX + t && (z2_maxX - t <= z1_maxX && z1_maxX <= z2_maxX + t)) { return IntervalRelations.FINISHES; - } else if (z1.getX() <= z2.getX() + t && (z2.getX() + z2.getWidth() - t > z1.getX() + z1.getWidth() && z1.getX() + z1.getWidth() > z2.getX() + z2.getWidth() + t)) { + } else if (z1_minX <= z2_minX + t && (z2_maxX - t > z1_maxX && z1_maxX > z2_maxX + t)) { return IntervalRelations.FINISHES_INVERSE; - } else if (z2.getX() - t <= z1.getX() && z1.getX() <= z2.getX() + t && (z2.getX() + z2.getWidth() - t <= z1.getX() + z1.getWidth() - && z1.getX() + z1.getWidth() <= z2.getX() + z2.getWidth() + t)) { + } else if (z2_minX - t <= z1_minX && z1_minX <= z2_minX + t && (z2_maxX - t <= z1_maxX && z1_maxX <= z2_maxX + t)) { return IntervalRelations.EQUALS; } @@ -243,36 +248,42 @@ public class UnsupervisedReadingOrderDetector { private static IntervalRelations getIntervalRelationY(Zone z1, Zone z2, double t) { - if (z1.getY() + z1.getWidth() < z2.getY() - t) { - return IntervalRelations.PRECEDES; - } else if (z1.getY() + z1.getWidth() >= z2.getY() - t) { + double z1_minY = z1.getY(); + double z1_maxY = z1_minY + z1.getHeight(); + double z2_minY = z2.getY(); + double z2_maxY = z2_minY + z2.getHeight(); + + // this is very wrong: check https://www.cs.rug.nl/~aiellom/publications/ijdarNoi.pdf + if (z1_minY < z2_maxY - t) { return IntervalRelations.PRECEDES_INVERSE; - } else if (z2.getY() - t <= z1.getY() + z1.getWidth() && z1.getY() + z1.getWidth() <= z2.getY() + t) { - return IntervalRelations.MEETS; - } else if (z2.getY() - t > z1.getY() + z1.getWidth() && z1.getY() + z1.getWidth() > z2.getY() + t) { + } else if (z1_minY >= z2_maxY - t) { + return IntervalRelations.PRECEDES; + } else if (z2_maxY - t <= z1_minY && z1_minY <= z2_maxY + t) { return IntervalRelations.MEETS_INVERSE; - } else if (z1.getY() < z2.getY() - t && (z2.getY() + t < z1.getY() + z1.getWidth() && z1.getY() + z1.getWidth() < z2.getY() + z2.getWidth() - t)) { - return IntervalRelations.OVERLAPS; - } else if (z1.getY() >= z2.getY() - t && (z2.getY() + t >= z1.getY() + z1.getWidth() && z1.getY() + z1.getWidth() >= z2.getY() + z2.getWidth() - t)) { + } else if (z2_maxY - t > z1_minY && z1_minY > z2_maxY + t) { + return IntervalRelations.MEETS; + } else if (z1_maxY < z2_maxY - t && (z2_maxY + t < z1_minY && z1_minY < z2_minY - t)) { return IntervalRelations.OVERLAPS_INVERSE; - } else if (z2.getY() - t <= z1.getY() && z1.getY() <= z2.getY() + t && z1.getY() + z1.getWidth() < z2.getY() + z2.getWidth() - t) { - return IntervalRelations.STARTS; - } else if (z2.getY() - t > z1.getY() && z1.getY() > z2.getY() + t && z1.getY() + z1.getWidth() >= z2.getY() + z2.getWidth() - t) { + } else if (z1_maxY >= z2_maxY - t && (z2_maxY + t >= z1_minY && z1_minY >= z2_minY - t)) { + return IntervalRelations.OVERLAPS; + } else if (z2_maxY - t <= z1_maxY && z1_maxY <= z2_maxY + t && z1_minY < z2_minY - t) { return IntervalRelations.STARTS_INVERSE; - } else if (z1.getY() > z2.getY() + t && z1.getY() + z1.getWidth() < z2.getY() + z2.getWidth() - t) { - return IntervalRelations.DURING; - } else if (z1.getY() <= z2.getY() + t && z1.getY() + z1.getWidth() >= z2.getY() + z2.getWidth() - t) { + } else if (z2_maxY - t > z1_maxY && z1_maxY > z2_maxY + t && z1_minY >= z2_minY - t) { + return IntervalRelations.STARTS; + } else if (z1_maxY > z2_maxY + t && z1_minY < z2_minY - t) { return IntervalRelations.DURING_INVERSE; - } else if (z1.getY() > z2.getY() + t && (z2.getY() + z2.getWidth() - t <= z1.getY() + z1.getWidth() && z1.getY() + z1.getWidth() <= z2.getY() + z2.getWidth() + t)) { - return IntervalRelations.FINISHES; - } else if (z1.getY() <= z2.getY() + t && (z2.getY() + z2.getWidth() - t > z1.getY() + z1.getWidth() && z1.getY() + z1.getWidth() > z2.getY() + z2.getWidth() + t)) { + } else if (z1_maxY <= z2_maxY + t && z1_minY >= z2_minY - t) { + return IntervalRelations.DURING; + } else if (z1_maxY > z2_maxY + t && (z2_minY - t <= z1_minY && z1_minY <= z2_minY + t)) { return IntervalRelations.FINISHES_INVERSE; - } else if (z2.getY() - t <= z1.getY() && z1.getY() <= z2.getY() + t && (z2.getY() + z2.getWidth() - t <= z1.getY() + z1.getWidth() - && z1.getY() + z1.getWidth() <= z2.getY() + z2.getWidth() + t)) { + } else if (z1_maxY <= z2_maxY + t && (z2_minY - t > z1_minY && z1_minY > z2_minY + t)) { + return IntervalRelations.FINISHES; + } else if (z2_maxY - t <= z1_maxY && z1_maxY <= z2_maxY + t && (z2_minY - t <= z1_minY && z1_minY <= z2_minY + t)) { return IntervalRelations.EQUALS; } return IntervalRelations.UNKNOWN; + } } \ No newline at end of file diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/ReadingOrderService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/ReadingOrderService.java index b3b9bab..0c2ea9e 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/ReadingOrderService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/ReadingOrderService.java @@ -27,7 +27,7 @@ public class ReadingOrderService { SpatialReasoningRules spatialReasoningRules = columnWise ? SpatialReasoningRules.COLUMN_WISE : SpatialReasoningRules.ROW_WISE; - var unsupervisedReadingOrderDetector = new UnsupervisedReadingOrderDetector(5, spatialReasoningRules, true); + var unsupervisedReadingOrderDetector = new UnsupervisedReadingOrderDetector(5, spatialReasoningRules, false); return unsupervisedReadingOrderDetector.get(zones); } diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java index 4d14bdc..38d13c7 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java @@ -26,7 +26,7 @@ public class ViewerDocumentTest extends BuildDocumentTest { @SneakyThrows public void testViewerDocument() { - String fileName = "files/new/A8240D TRESO dRR Part B Section 9 core - Germany Commenting - 07_05_2019.pdf"; + String fileName = "files/Plenarprotokoll 1 (keine Druchsache!) (1).pdf"; String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf"; var documentFile = new ClassPathResource(fileName).getFile();