diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocstrumBlockificationService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocstrumBlockificationService.java index 23e598a..1676696 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocstrumBlockificationService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocstrumBlockificationService.java @@ -14,6 +14,7 @@ import org.springframework.stereotype.Service; import com.knecon.fforesight.service.layoutparser.processor.docstrum.DocstrumSegmentationService; import com.knecon.fforesight.service.layoutparser.processor.docstrum.model.Zone; +import com.knecon.fforesight.service.layoutparser.processor.docstrum.utils.DoubleUtils; import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock; import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage; import com.knecon.fforesight.service.layoutparser.processor.model.FloatFrequencyCounter; @@ -53,13 +54,13 @@ public class DocstrumBlockificationService { }); var zones = docstrumSegmentationService.segmentPage(textPositions, xyOrder); - var pageBlocks = toAbstractPageBlocks(zones, usedHorizonalRulings, usedVerticalRulings); + var pageBlocks = toAbstractPageBlocks(zones, usedHorizonalRulings, usedVerticalRulings, xyOrder); return new ClassificationPage(pageBlocks); } - private List toAbstractPageBlocks(List zones, List horizontalRulings, List verticalRulings) { + private List toAbstractPageBlocks(List zones, List horizontalRulings, List verticalRulings, boolean xyOrder) { List abstractPageBlocks = new ArrayList<>(); zones.forEach(zone -> { @@ -74,6 +75,12 @@ public class DocstrumBlockificationService { abstractPageBlocks.addAll(splitZonesAtRulings(textPositionSequences, horizontalRulings, verticalRulings)); }); + + if (xyOrder) { + abstractPageBlocks.sort(Comparator.comparing(AbstractPageBlock::getMinY, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD)) + .thenComparing(AbstractPageBlock::getMinX, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD))); + } + return abstractPageBlocks; } @@ -88,6 +95,7 @@ public class DocstrumBlockificationService { AbstractPageBlock block = itty.next(); if (block instanceof TablePageBlock) { + previous = new TextPageBlock(); continue; } TextPageBlock current = (TextPageBlock) block;