RED-8642: Use LineMode from cv-analysis-service instead of table cell mode

* added line straightening logic
This commit is contained in:
maverickstuder 2024-03-11 11:26:14 +01:00
parent 78fb6b825b
commit 86addc3139

View File

@ -22,6 +22,9 @@ import lombok.extern.slf4j.Slf4j;
@RequiredArgsConstructor
public class CvTableParsingAdapter {
private static final double CROOKED_THRESHOLD = 0.2;
public Map<Integer, List<Ruling>> buildCvParsedRulingsPerPage(TableServiceResponse tableServiceResponse) {
Map<Integer, List<Ruling>> rulings = new HashMap<>();
@ -37,12 +40,36 @@ public class CvTableParsingAdapter {
List<Ruling> cvParsedRulings = new ArrayList<>();
tableLines.forEach(l -> cvParsedRulings.add(new Ruling(new Point2D.Double(l.getX1() * pageInfo.getWidth(), (1 - l.getY1()) * pageInfo.getHeight()),
new Point2D.Double(l.getX2() * pageInfo.getWidth(), (1 - l.getY2()) * pageInfo.getHeight()))));
tableLines.forEach(l -> {
Point2D.Double p1 = new Point2D.Double(l.getX1() * pageInfo.getWidth(), (1 - l.getY1()) * pageInfo.getHeight());
Point2D.Double p2 = new Point2D.Double(l.getX2() * pageInfo.getWidth(), (1 - l.getY2()) * pageInfo.getHeight());
return cvParsedRulings.stream()
.filter(ruling -> ruling.getWidth() < pageInfo.getWidth() * 0.98 && ruling.getHeight() < pageInfo.getHeight() * 0.98)
.toList();
// Determine if line is primarily horizontal or vertical
double xDiff = Math.abs(p1.x - p2.x);
double yDiff = Math.abs(p1.y - p2.y);
if (xDiff < yDiff) {
if (xDiff / yDiff > CROOKED_THRESHOLD || yDiff > pageInfo.getHeight() * 0.98) {
return;
}
// straighten x-coordinates
double avgX = (p1.x + p2.x) / 2;
p1.x = avgX;
p2.x = avgX;
} else {
if (yDiff / xDiff > CROOKED_THRESHOLD || xDiff > pageInfo.getWidth() * 0.98) {
return;
}
// straighten y-coordinates
double avgY = (p1.y + p2.y) / 2;
p1.y = avgY;
p2.y = avgY;
}
cvParsedRulings.add(new Ruling(p1, p2));
});
return cvParsedRulings;
}
}