diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/tables/QuadPointGridifier.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/tables/QuadPointGridifier.java index ca8711d..b2f285a 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/tables/QuadPointGridifier.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/tables/QuadPointGridifier.java @@ -84,9 +84,9 @@ public class QuadPointGridifier { .toList(); computeNeighbours(linkedCells); - int numberOfSplits = 0; + int splits = 0; while (linkedCells.stream() - .anyMatch(LinkedQuadPointCell::needsSplit) && numberOfSplits < MAX_SPLITTING_ITERATIONS) { + .anyMatch(LinkedQuadPointCell::needsSplit) && splits < MAX_SPLITTING_ITERATIONS) { List newCells = new LinkedList<>(); for (LinkedQuadPointCell linkedCell : linkedCells) { @@ -98,7 +98,7 @@ public class QuadPointGridifier { } computeNeighbours(newCells); linkedCells = newCells; - numberOfSplits++; + splits++; } return buildStructure(linkedCells); diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/tables/TableGridStructureCalculator.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/tables/TableGridStructureCalculator.java index b1c21d2..b33a86c 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/tables/TableGridStructureCalculator.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/tables/TableGridStructureCalculator.java @@ -21,6 +21,7 @@ public class TableGridStructureCalculator { // multiplied with minimum cell height/width, Cells may be at most this apart in one dimension, and must overlap at least that much in the other dimension to be considered neighbours private static final double DISTANCE_FACTOR = 0.5; + private static final int MAX_SPLITTING_ITERATIONS = 10; Set cells; AffineTransform pageToPdfTransform; double minCellHeight; @@ -52,10 +53,7 @@ public class TableGridStructureCalculator { if (cellsHaveLargeOverlaps()) { // If cells overlap significantly, the logic below will keep splitting them infinitely, so we revert to the simpler area sweep implementation. - List> rows = AreaSweepGridifier.gridify(cells, pageToPdfTransform, minCellWidth, minCellHeight); - rows = removeEmptyRows(rows); - rows = removeEmptyCols(rows); - return rows; + return areaSweepFallback(); } var linkedCells = cells.stream() @@ -63,9 +61,9 @@ public class TableGridStructureCalculator { .collect(Collectors.toList()); computeNeighbours(linkedCells); - + int splits = 0; while (linkedCells.stream() - .anyMatch(LinkedCell::needsSplit)) { + .anyMatch(LinkedCell::needsSplit) && splits <= MAX_SPLITTING_ITERATIONS) { List newCells = new LinkedList<>(); for (LinkedCell linkedCell : linkedCells) { @@ -77,11 +75,21 @@ public class TableGridStructureCalculator { } computeNeighbours(newCells); linkedCells = newCells; + splits++; } return buildStructure(linkedCells); } + private List> areaSweepFallback() { + + List> rows = AreaSweepGridifier.gridify(cells, pageToPdfTransform, minCellWidth, minCellHeight); + rows = removeEmptyRows(rows); + rows = removeEmptyCols(rows); + return rows; + } + + private boolean cellsHaveLargeOverlaps() { for (Cell cell1 : cells) { @@ -106,7 +114,7 @@ public class TableGridStructureCalculator { } List> rows = buildRows(cells); if (isNotRectangular(rows)) { - throw new AssertionError(); + return areaSweepFallback(); } rows = removeEmptyRows(rows); rows = removeEmptyCols(rows);