diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/AbstractPageBlock.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/AbstractPageBlock.java index 16a468e..d4f70fe 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/AbstractPageBlock.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/AbstractPageBlock.java @@ -45,6 +45,12 @@ public abstract class AbstractPageBlock { } + public boolean containsBlock(TextPageBlock other, float threshold) { + + return this.minX <= other.getMinX() + threshold && this.maxX >= other.getMaxX() - threshold && this.minY <= other.getMinY() + threshold && this.maxY >= other.getMaxY() - threshold; + } + + public boolean contains(AbstractPageBlock other) { return this.minX <= other.minX && this.maxX >= other.maxX && this.minY >= other.minY && this.maxY <= other.maxY; diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocstrumBlockificationService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocstrumBlockificationService.java index a7ffd7e..2b3ceda 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocstrumBlockificationService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocstrumBlockificationService.java @@ -7,6 +7,7 @@ import java.util.Comparator; import java.util.List; import java.util.ListIterator; import java.util.regex.Pattern; +import java.util.stream.Collectors; import org.springframework.stereotype.Service; @@ -68,24 +69,31 @@ public class DocstrumBlockificationService { if (previous != null) { - if ((Math.abs(previous.getMaxY() - current.getMaxY()) < THRESHOLD || Math.abs(previous.getMinY() - current.getMinY()) < THRESHOLD) && (previous.getNumberOfLines() == 1 && current.getNumberOfLines() >= 1 && current.getNumberOfLines() <= 5 || previous.getNumberOfLines() == 2 && current.getNumberOfLines() == 1)) { + if ((Math.abs(previous.getMaxY() - current.getMaxY()) < THRESHOLD || Math.abs(previous.getMinY() - current.getMinY()) < THRESHOLD) && (previous.getNumberOfLines() == 1 && current.getNumberOfLines() >= 1 /* && current.getNumberOfLines() <= 10 */ || previous.getNumberOfLines() == 2 && current.getNumberOfLines() == 1)) { previous.getSequences().addAll(current.getSequences()); previous = buildTextBlock(previous.getSequences(), 0); itty.remove(); continue; } - if ((Math.abs(previous.getMaxY() - current.getMaxY()) < THRESHOLD || Math.abs(previous.getMinY() - current.getMinY()) < THRESHOLD) && (previous.getNumberOfLines() >= 2 && current.getNumberOfLines() >= 2 && current.getNumberOfLines() <= 5 && previous.getNumberOfLines() <= current.getNumberOfLines())) { + if ((Math.abs(previous.getMaxY() - current.getMaxY()) < THRESHOLD || Math.abs(previous.getMinY() - current.getMinY()) < THRESHOLD) && (previous.getNumberOfLines() >= 2 && current.getNumberOfLines() >= 2 /* && current.getNumberOfLines() <= 10 */ && previous.getNumberOfLines() <= current.getNumberOfLines())) { previous.getSequences().addAll(current.getSequences()); previous = buildTextBlock(previous.getSequences(), 0); itty.remove(); // Might be a left/right mapping add one sorted as well - var sortedDublicate = buildTextBlock(previous.getSequences().stream().sorted(new TextPositionSequenceComparator()).toList(), 0); + var sortedDublicate = buildTextBlock(previous.getSequences().stream().sorted(new TextPositionSequenceComparator()).collect(Collectors.toList()), 0); itty.add(sortedDublicate); continue; } + if (previous.containsBlock(current, THRESHOLD)) { + previous.getSequences().addAll(current.getSequences()); + previous = buildTextBlock(previous.getSequences().stream().sorted(new TextPositionSequenceComparator()).collect(Collectors.toList()), 0); + itty.remove(); + continue; + } + } previous = current; }