diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/BoundingBox.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/BoundingBox.java index 5215d6f..c34db3a 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/BoundingBox.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/BoundingBox.java @@ -2,6 +2,8 @@ package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.m import java.awt.geom.Rectangle2D; +import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock; + import lombok.Data; @Data @@ -45,4 +47,10 @@ public abstract class BoundingBox { return bBox.getX() <= contained.getX() + tolerance && bBox.getY() <= contained.getY() + tolerance && bBox.getX() + bBox.getWidth() >= contained.getX() + contained.getWidth() - tolerance && bBox.getY() + bBox.getHeight() >= contained.getY() + contained.getHeight() - tolerance; } + + public boolean intersectsY(BoundingBox other) { + + return this.getBBox().getMinY() <= other.getBBox().getMaxY() && this.getBBox().getMaxY() >= other.getBBox().getMinY(); + } + } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/ReadingOrderService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/ReadingOrderService.java index 268d092..8e51641 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/ReadingOrderService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/ReadingOrderService.java @@ -2,8 +2,11 @@ package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.s import java.util.ArrayList; import java.util.Comparator; +import java.util.HashMap; import java.util.List; import java.util.ListIterator; +import java.util.Map; +import java.util.stream.Collectors; import org.springframework.stereotype.Service; @@ -19,7 +22,7 @@ public class ReadingOrderService { private static final double THRESHOLD = 1; - public List resolve(List zones, boolean columnWise) { + public List resolveNew(List zones, boolean columnWise) { if (zones.isEmpty() || zones.size() == 1) { return zones; @@ -32,15 +35,31 @@ public class ReadingOrderService { return unsupervisedReadingOrderDetector.get(zones); } - public List resolveOld(List zones, boolean xyOrder) { + + public List resolve(List zones, boolean columnWise) { if (zones.isEmpty() || zones.size() == 1) { return zones; } - if (xyOrder) { + Map histogram = new HashMap<>(); + + for (Zone zone : zones) { + long minY = Math.round(zone.getBBox().getMinY()); + long maxY = Math.round(zone.getBBox().getMaxY()); + for (long i = minY; i <= maxY; i++) { + histogram.put(i, histogram.getOrDefault(i, 0) + 1); + } + } + + columnWise = histogram.values() + .stream() + .mapToInt(Integer::intValue).average() + .orElse(1) > 1.5; + + if (!columnWise) { zones.sort(Comparator.comparing(BoundingBox::getY, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD)) - .thenComparing(BoundingBox::getX, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD))); + .thenComparing(BoundingBox::getX, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD))); return zones; } @@ -80,14 +99,48 @@ public class ReadingOrderService { } } + List leftNotIntersecting = new ArrayList<>(); + for (Zone leftZone : leftOf) { + boolean intersects = false; + for (Zone rightZone : rightOf) { + if (leftZone.intersectsY(rightZone)) { + intersects = true; + break; + } + } + if (!intersects) { + leftNotIntersecting.add(leftZone); + } + } + + List rightNotIntersecting = new ArrayList<>(); + for (Zone rightZone : rightOf) { + boolean intersects = false; + for (Zone leftZone : leftOf) { + if (rightZone.intersectsY(leftZone)) { + intersects = true; + break; + } + } + if (!intersects) { + rightNotIntersecting.add(rightZone); + } + } + + leftOf.removeAll(leftNotIntersecting); + rightOf.removeAll(rightNotIntersecting); + + middle.addAll(leftNotIntersecting); + middle.addAll(rightNotIntersecting); + leftOf.sort(Comparator.comparing(BoundingBox::getY, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD)) - .thenComparing(BoundingBox::getX, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD))); + .thenComparing(BoundingBox::getX, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD))); rightOf.sort(Comparator.comparing(BoundingBox::getY, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD)) - .thenComparing(BoundingBox::getX, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD))); + .thenComparing(BoundingBox::getX, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD))); middle.sort(Comparator.comparing(BoundingBox::getY, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD)) - .thenComparing(BoundingBox::getX, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD))); + .thenComparing(BoundingBox::getX, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD))); List sortedZones = new ArrayList<>(); sortedZones.addAll(leftOf);