RED-7141: Align backend text sorting with Webviewer sorting

* hotfix for tables not being detected due to wrong x-y-sorting
This commit is contained in:
maverickstuder 2024-03-12 11:06:53 +01:00
parent f4cae8a7dc
commit 956fbff872

View File

@ -14,6 +14,7 @@ import org.springframework.stereotype.Service;
import com.knecon.fforesight.service.layoutparser.processor.docstrum.DocstrumSegmentationService;
import com.knecon.fforesight.service.layoutparser.processor.docstrum.model.Zone;
import com.knecon.fforesight.service.layoutparser.processor.docstrum.utils.DoubleUtils;
import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
import com.knecon.fforesight.service.layoutparser.processor.model.FloatFrequencyCounter;
@ -53,13 +54,13 @@ public class DocstrumBlockificationService {
});
var zones = docstrumSegmentationService.segmentPage(textPositions, xyOrder);
var pageBlocks = toAbstractPageBlocks(zones, usedHorizonalRulings, usedVerticalRulings);
var pageBlocks = toAbstractPageBlocks(zones, usedHorizonalRulings, usedVerticalRulings, xyOrder);
return new ClassificationPage(pageBlocks);
}
private List<AbstractPageBlock> toAbstractPageBlocks(List<Zone> zones, List<Ruling> horizontalRulings, List<Ruling> verticalRulings) {
private List<AbstractPageBlock> toAbstractPageBlocks(List<Zone> zones, List<Ruling> horizontalRulings, List<Ruling> verticalRulings, boolean xyOrder) {
List<AbstractPageBlock> abstractPageBlocks = new ArrayList<>();
zones.forEach(zone -> {
@ -74,6 +75,12 @@ public class DocstrumBlockificationService {
abstractPageBlocks.addAll(splitZonesAtRulings(textPositionSequences, horizontalRulings, verticalRulings));
});
if (xyOrder) {
abstractPageBlocks.sort(Comparator.comparing(AbstractPageBlock::getMinY, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD))
.thenComparing(AbstractPageBlock::getMinX, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD)));
}
return abstractPageBlocks;
}
@ -88,6 +95,7 @@ public class DocstrumBlockificationService {
AbstractPageBlock block = itty.next();
if (block instanceof TablePageBlock) {
previous = new TextPageBlock();
continue;
}
TextPageBlock current = (TextPageBlock) block;