From 86addc31399f4ce05c816f19869b856041921385 Mon Sep 17 00:00:00 2001 From: maverickstuder Date: Mon, 11 Mar 2024 11:26:14 +0100 Subject: [PATCH] RED-8642: Use LineMode from cv-analysis-service instead of table cell mode * added line straightening logic --- .../adapter/CvTableParsingAdapter.java | 37 ++++++++++++++++--- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/CvTableParsingAdapter.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/CvTableParsingAdapter.java index 2b1af1e..6c9df83 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/CvTableParsingAdapter.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/CvTableParsingAdapter.java @@ -22,6 +22,9 @@ import lombok.extern.slf4j.Slf4j; @RequiredArgsConstructor public class CvTableParsingAdapter { + private static final double CROOKED_THRESHOLD = 0.2; + + public Map> buildCvParsedRulingsPerPage(TableServiceResponse tableServiceResponse) { Map> rulings = new HashMap<>(); @@ -37,12 +40,36 @@ public class CvTableParsingAdapter { List cvParsedRulings = new ArrayList<>(); - tableLines.forEach(l -> cvParsedRulings.add(new Ruling(new Point2D.Double(l.getX1() * pageInfo.getWidth(), (1 - l.getY1()) * pageInfo.getHeight()), - new Point2D.Double(l.getX2() * pageInfo.getWidth(), (1 - l.getY2()) * pageInfo.getHeight())))); + tableLines.forEach(l -> { + Point2D.Double p1 = new Point2D.Double(l.getX1() * pageInfo.getWidth(), (1 - l.getY1()) * pageInfo.getHeight()); + Point2D.Double p2 = new Point2D.Double(l.getX2() * pageInfo.getWidth(), (1 - l.getY2()) * pageInfo.getHeight()); - return cvParsedRulings.stream() - .filter(ruling -> ruling.getWidth() < pageInfo.getWidth() * 0.98 && ruling.getHeight() < pageInfo.getHeight() * 0.98) - .toList(); + // Determine if line is primarily horizontal or vertical + double xDiff = Math.abs(p1.x - p2.x); + double yDiff = Math.abs(p1.y - p2.y); + + if (xDiff < yDiff) { + if (xDiff / yDiff > CROOKED_THRESHOLD || yDiff > pageInfo.getHeight() * 0.98) { + return; + } + // straighten x-coordinates + double avgX = (p1.x + p2.x) / 2; + p1.x = avgX; + p2.x = avgX; + } else { + if (yDiff / xDiff > CROOKED_THRESHOLD || xDiff > pageInfo.getWidth() * 0.98) { + return; + } + // straighten y-coordinates + double avgY = (p1.y + p2.y) / 2; + p1.y = avgY; + p2.y = avgY; + } + + cvParsedRulings.add(new Ruling(p1, p2)); + }); + + return cvParsedRulings; } }