RED-8670: add table detection from idp result
* some 'slight' refactoring
This commit is contained in:
parent
06618c2e9e
commit
39e20bad8d
@ -84,9 +84,9 @@ public class QuadPointGridifier {
|
|||||||
.toList();
|
.toList();
|
||||||
|
|
||||||
computeNeighbours(linkedCells);
|
computeNeighbours(linkedCells);
|
||||||
int numberOfSplits = 0;
|
int splits = 0;
|
||||||
while (linkedCells.stream()
|
while (linkedCells.stream()
|
||||||
.anyMatch(LinkedQuadPointCell::needsSplit) && numberOfSplits < MAX_SPLITTING_ITERATIONS) {
|
.anyMatch(LinkedQuadPointCell::needsSplit) && splits < MAX_SPLITTING_ITERATIONS) {
|
||||||
|
|
||||||
List<LinkedQuadPointCell> newCells = new LinkedList<>();
|
List<LinkedQuadPointCell> newCells = new LinkedList<>();
|
||||||
for (LinkedQuadPointCell linkedCell : linkedCells) {
|
for (LinkedQuadPointCell linkedCell : linkedCells) {
|
||||||
@ -98,7 +98,7 @@ public class QuadPointGridifier {
|
|||||||
}
|
}
|
||||||
computeNeighbours(newCells);
|
computeNeighbours(newCells);
|
||||||
linkedCells = newCells;
|
linkedCells = newCells;
|
||||||
numberOfSplits++;
|
splits++;
|
||||||
}
|
}
|
||||||
|
|
||||||
return buildStructure(linkedCells);
|
return buildStructure(linkedCells);
|
||||||
|
|||||||
@ -21,6 +21,7 @@ public class TableGridStructureCalculator {
|
|||||||
|
|
||||||
// multiplied with minimum cell height/width, Cells may be at most this apart in one dimension, and must overlap at least that much in the other dimension to be considered neighbours
|
// multiplied with minimum cell height/width, Cells may be at most this apart in one dimension, and must overlap at least that much in the other dimension to be considered neighbours
|
||||||
private static final double DISTANCE_FACTOR = 0.5;
|
private static final double DISTANCE_FACTOR = 0.5;
|
||||||
|
private static final int MAX_SPLITTING_ITERATIONS = 10;
|
||||||
Set<Cell> cells;
|
Set<Cell> cells;
|
||||||
AffineTransform pageToPdfTransform;
|
AffineTransform pageToPdfTransform;
|
||||||
double minCellHeight;
|
double minCellHeight;
|
||||||
@ -52,10 +53,7 @@ public class TableGridStructureCalculator {
|
|||||||
|
|
||||||
if (cellsHaveLargeOverlaps()) {
|
if (cellsHaveLargeOverlaps()) {
|
||||||
// If cells overlap significantly, the logic below will keep splitting them infinitely, so we revert to the simpler area sweep implementation.
|
// If cells overlap significantly, the logic below will keep splitting them infinitely, so we revert to the simpler area sweep implementation.
|
||||||
List<List<Cell>> rows = AreaSweepGridifier.gridify(cells, pageToPdfTransform, minCellWidth, minCellHeight);
|
return areaSweepFallback();
|
||||||
rows = removeEmptyRows(rows);
|
|
||||||
rows = removeEmptyCols(rows);
|
|
||||||
return rows;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var linkedCells = cells.stream()
|
var linkedCells = cells.stream()
|
||||||
@ -63,9 +61,9 @@ public class TableGridStructureCalculator {
|
|||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
computeNeighbours(linkedCells);
|
computeNeighbours(linkedCells);
|
||||||
|
int splits = 0;
|
||||||
while (linkedCells.stream()
|
while (linkedCells.stream()
|
||||||
.anyMatch(LinkedCell::needsSplit)) {
|
.anyMatch(LinkedCell::needsSplit) && splits <= MAX_SPLITTING_ITERATIONS) {
|
||||||
|
|
||||||
List<LinkedCell> newCells = new LinkedList<>();
|
List<LinkedCell> newCells = new LinkedList<>();
|
||||||
for (LinkedCell linkedCell : linkedCells) {
|
for (LinkedCell linkedCell : linkedCells) {
|
||||||
@ -77,11 +75,21 @@ public class TableGridStructureCalculator {
|
|||||||
}
|
}
|
||||||
computeNeighbours(newCells);
|
computeNeighbours(newCells);
|
||||||
linkedCells = newCells;
|
linkedCells = newCells;
|
||||||
|
splits++;
|
||||||
}
|
}
|
||||||
return buildStructure(linkedCells);
|
return buildStructure(linkedCells);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private List<List<Cell>> areaSweepFallback() {
|
||||||
|
|
||||||
|
List<List<Cell>> rows = AreaSweepGridifier.gridify(cells, pageToPdfTransform, minCellWidth, minCellHeight);
|
||||||
|
rows = removeEmptyRows(rows);
|
||||||
|
rows = removeEmptyCols(rows);
|
||||||
|
return rows;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private boolean cellsHaveLargeOverlaps() {
|
private boolean cellsHaveLargeOverlaps() {
|
||||||
|
|
||||||
for (Cell cell1 : cells) {
|
for (Cell cell1 : cells) {
|
||||||
@ -106,7 +114,7 @@ public class TableGridStructureCalculator {
|
|||||||
}
|
}
|
||||||
List<List<Cell>> rows = buildRows(cells);
|
List<List<Cell>> rows = buildRows(cells);
|
||||||
if (isNotRectangular(rows)) {
|
if (isNotRectangular(rows)) {
|
||||||
throw new AssertionError();
|
return areaSweepFallback();
|
||||||
}
|
}
|
||||||
rows = removeEmptyRows(rows);
|
rows = removeEmptyRows(rows);
|
||||||
rows = removeEmptyCols(rows);
|
rows = removeEmptyCols(rows);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user