RED-8670: add table detection from idp result
* some 'slight' refactoring
This commit is contained in:
parent
8df429730f
commit
853a2e62aa
@ -84,9 +84,9 @@ public class QuadPointGridifier {
|
||||
.toList();
|
||||
|
||||
computeNeighbours(linkedCells);
|
||||
int numberOfSplits = 0;
|
||||
int splits = 0;
|
||||
while (linkedCells.stream()
|
||||
.anyMatch(LinkedQuadPointCell::needsSplit) && numberOfSplits < MAX_SPLITTING_ITERATIONS) {
|
||||
.anyMatch(LinkedQuadPointCell::needsSplit) && splits < MAX_SPLITTING_ITERATIONS) {
|
||||
|
||||
List<LinkedQuadPointCell> newCells = new LinkedList<>();
|
||||
for (LinkedQuadPointCell linkedCell : linkedCells) {
|
||||
@ -98,7 +98,7 @@ public class QuadPointGridifier {
|
||||
}
|
||||
computeNeighbours(newCells);
|
||||
linkedCells = newCells;
|
||||
numberOfSplits++;
|
||||
splits++;
|
||||
}
|
||||
|
||||
return buildStructure(linkedCells);
|
||||
|
||||
@ -21,6 +21,7 @@ public class TableGridStructureCalculator {
|
||||
|
||||
// multiplied with minimum cell height/width, Cells may be at most this apart in one dimension, and must overlap at least that much in the other dimension to be considered neighbours
|
||||
private static final double DISTANCE_FACTOR = 0.5;
|
||||
private static final int MAX_SPLITTING_ITERATIONS = 10;
|
||||
Set<Cell> cells;
|
||||
AffineTransform pageToPdfTransform;
|
||||
double minCellHeight;
|
||||
@ -52,10 +53,7 @@ public class TableGridStructureCalculator {
|
||||
|
||||
if (cellsHaveLargeOverlaps()) {
|
||||
// If cells overlap significantly, the logic below will keep splitting them infinitely, so we revert to the simpler area sweep implementation.
|
||||
List<List<Cell>> rows = AreaSweepGridifier.gridify(cells, pageToPdfTransform, minCellWidth, minCellHeight);
|
||||
rows = removeEmptyRows(rows);
|
||||
rows = removeEmptyCols(rows);
|
||||
return rows;
|
||||
return areaSweepFallback();
|
||||
}
|
||||
|
||||
var linkedCells = cells.stream()
|
||||
@ -63,9 +61,9 @@ public class TableGridStructureCalculator {
|
||||
.collect(Collectors.toList());
|
||||
|
||||
computeNeighbours(linkedCells);
|
||||
|
||||
int splits = 0;
|
||||
while (linkedCells.stream()
|
||||
.anyMatch(LinkedCell::needsSplit)) {
|
||||
.anyMatch(LinkedCell::needsSplit) && splits <= MAX_SPLITTING_ITERATIONS) {
|
||||
|
||||
List<LinkedCell> newCells = new LinkedList<>();
|
||||
for (LinkedCell linkedCell : linkedCells) {
|
||||
@ -77,11 +75,21 @@ public class TableGridStructureCalculator {
|
||||
}
|
||||
computeNeighbours(newCells);
|
||||
linkedCells = newCells;
|
||||
splits++;
|
||||
}
|
||||
return buildStructure(linkedCells);
|
||||
}
|
||||
|
||||
|
||||
private List<List<Cell>> areaSweepFallback() {
|
||||
|
||||
List<List<Cell>> rows = AreaSweepGridifier.gridify(cells, pageToPdfTransform, minCellWidth, minCellHeight);
|
||||
rows = removeEmptyRows(rows);
|
||||
rows = removeEmptyCols(rows);
|
||||
return rows;
|
||||
}
|
||||
|
||||
|
||||
private boolean cellsHaveLargeOverlaps() {
|
||||
|
||||
for (Cell cell1 : cells) {
|
||||
@ -106,7 +114,7 @@ public class TableGridStructureCalculator {
|
||||
}
|
||||
List<List<Cell>> rows = buildRows(cells);
|
||||
if (isNotRectangular(rows)) {
|
||||
throw new AssertionError();
|
||||
return areaSweepFallback();
|
||||
}
|
||||
rows = removeEmptyRows(rows);
|
||||
rows = removeEmptyCols(rows);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user