RED-8670: add table detection from idp result

* some 'slight' refactoring
This commit is contained in:
Kilian Schuettler 2025-01-09 13:21:26 +01:00
parent 06618c2e9e
commit 39e20bad8d
2 changed files with 18 additions and 10 deletions

View File

@ -84,9 +84,9 @@ public class QuadPointGridifier {
.toList();
computeNeighbours(linkedCells);
int numberOfSplits = 0;
int splits = 0;
while (linkedCells.stream()
.anyMatch(LinkedQuadPointCell::needsSplit) && numberOfSplits < MAX_SPLITTING_ITERATIONS) {
.anyMatch(LinkedQuadPointCell::needsSplit) && splits < MAX_SPLITTING_ITERATIONS) {
List<LinkedQuadPointCell> newCells = new LinkedList<>();
for (LinkedQuadPointCell linkedCell : linkedCells) {
@ -98,7 +98,7 @@ public class QuadPointGridifier {
}
computeNeighbours(newCells);
linkedCells = newCells;
numberOfSplits++;
splits++;
}
return buildStructure(linkedCells);

View File

@ -21,6 +21,7 @@ public class TableGridStructureCalculator {
// multiplied with minimum cell height/width, Cells may be at most this apart in one dimension, and must overlap at least that much in the other dimension to be considered neighbours
private static final double DISTANCE_FACTOR = 0.5;
private static final int MAX_SPLITTING_ITERATIONS = 10;
Set<Cell> cells;
AffineTransform pageToPdfTransform;
double minCellHeight;
@ -52,10 +53,7 @@ public class TableGridStructureCalculator {
if (cellsHaveLargeOverlaps()) {
// If cells overlap significantly, the logic below will keep splitting them infinitely, so we revert to the simpler area sweep implementation.
List<List<Cell>> rows = AreaSweepGridifier.gridify(cells, pageToPdfTransform, minCellWidth, minCellHeight);
rows = removeEmptyRows(rows);
rows = removeEmptyCols(rows);
return rows;
return areaSweepFallback();
}
var linkedCells = cells.stream()
@ -63,9 +61,9 @@ public class TableGridStructureCalculator {
.collect(Collectors.toList());
computeNeighbours(linkedCells);
int splits = 0;
while (linkedCells.stream()
.anyMatch(LinkedCell::needsSplit)) {
.anyMatch(LinkedCell::needsSplit) && splits <= MAX_SPLITTING_ITERATIONS) {
List<LinkedCell> newCells = new LinkedList<>();
for (LinkedCell linkedCell : linkedCells) {
@ -77,11 +75,21 @@ public class TableGridStructureCalculator {
}
computeNeighbours(newCells);
linkedCells = newCells;
splits++;
}
return buildStructure(linkedCells);
}
private List<List<Cell>> areaSweepFallback() {
List<List<Cell>> rows = AreaSweepGridifier.gridify(cells, pageToPdfTransform, minCellWidth, minCellHeight);
rows = removeEmptyRows(rows);
rows = removeEmptyCols(rows);
return rows;
}
private boolean cellsHaveLargeOverlaps() {
for (Cell cell1 : cells) {
@ -106,7 +114,7 @@ public class TableGridStructureCalculator {
}
List<List<Cell>> rows = buildRows(cells);
if (isNotRectangular(rows)) {
throw new AssertionError();
return areaSweepFallback();
}
rows = removeEmptyRows(rows);
rows = removeEmptyCols(rows);