RED-8550: Faulty table recognition and text duplication leads to huge sections

* cherrypick
2024-02-21 13:54:30 +01:00 · 2024-02-21 13:54:30 +01:00 · 18a28e82d0
commit 18a28e82d0
parent 3c9049dc8a
15 changed files with 546 additions and 248 deletions
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java
@ -94,16 +94,21 @@ public class LayoutParsingPipeline {
        log.info("Starting layout parsing for {}", layoutParsingRequest.identifier());

        File originFile = layoutParsingStorageService.getOriginFile(layoutParsingRequest.originFileStorageId());
-        File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId()).orElse(originFile);
+        File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId())
+                .orElse(originFile);

        ImageServiceResponse imageServiceResponse = new ImageServiceResponse();
-        if (layoutParsingRequest.imagesFileStorageId().isPresent()) {
-            imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.imagesFileStorageId().get());
+        if (layoutParsingRequest.imagesFileStorageId()
+                .isPresent()) {
+            imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.imagesFileStorageId()
+                                                                                     .get());
        }

        TableServiceResponse tableServiceResponse = new TableServiceResponse();
-        if (layoutParsingRequest.tablesFileStorageId().isPresent()) {
-            tableServiceResponse = layoutParsingStorageService.getTablesFile(layoutParsingRequest.tablesFileStorageId().get());
+        if (layoutParsingRequest.tablesFileStorageId()
+                .isPresent()) {
+            tableServiceResponse = layoutParsingStorageService.getTablesFile(layoutParsingRequest.tablesFileStorageId()
+                                                                                     .get());
        }

        ClassificationDocument classificationDocument = parseLayout(layoutParsingRequest.layoutParsingType(),
@ -142,25 +147,25 @@ public class LayoutParsingPipeline {
                .numberOfPages(documentGraph.getNumberOfPages())
                .duration(System.currentTimeMillis() - start)
                .message(format("""
-                                Layout parsing has finished in %.02f s.
-                                identifiers: %s
-                                %s
-                                Files have been saved with Ids:
-                                Structure: %s
-                                Text: %s
-                                Positions: %s
-                                PageData: %s
-                                Simplified Text: %s
-                                Viewer Doc: %s""",
-                        ((float) (System.currentTimeMillis() - start)) / 1000,
-                        layoutParsingRequest.identifier(),
-                        buildSemanticNodeCountMessage(documentGraph.getNumberOfPages(), documentGraph.buildSemanticNodeCounts()),
-                        layoutParsingRequest.structureFileStorageId(),
-                        layoutParsingRequest.textBlockFileStorageId(),
-                        layoutParsingRequest.positionBlockFileStorageId(),
-                        layoutParsingRequest.pageFileStorageId(),
-                        layoutParsingRequest.simplifiedTextStorageId(),
-                        layoutParsingRequest.viewerDocumentStorageId()))
+                                        Layout parsing has finished in %.02f s.
+                                        identifiers: %s
+                                        %s
+                                        Files have been saved with Ids:
+                                        Structure: %s
+                                        Text: %s
+                                        Positions: %s
+                                        PageData: %s
+                                        Simplified Text: %s
+                                        Viewer Doc: %s""",
+                                ((float) (System.currentTimeMillis() - start)) / 1000,
+                                layoutParsingRequest.identifier(),
+                                buildSemanticNodeCountMessage(documentGraph.getNumberOfPages(), documentGraph.buildSemanticNodeCounts()),
+                                layoutParsingRequest.structureFileStorageId(),
+                                layoutParsingRequest.textBlockFileStorageId(),
+                                layoutParsingRequest.positionBlockFileStorageId(),
+                                layoutParsingRequest.pageFileStorageId(),
+                                layoutParsingRequest.simplifiedTextStorageId(),
+                                layoutParsingRequest.viewerDocumentStorageId()))
                .build();

    }
@ -170,9 +175,9 @@ public class LayoutParsingPipeline {

        AtomicReference<Document> documentReference = new AtomicReference<>();

-        Observation.createNotStarted("LayoutParsingPipeline", observationRegistry).contextualName("build-document-graph").observe(() -> {
-            documentReference.set(DocumentGraphFactory.buildDocumentGraph(classificationDocument));
-        });
+        Observation.createNotStarted("LayoutParsingPipeline", observationRegistry)
+                .contextualName("build-document-graph")
+                .observe(() -> documentReference.set(DocumentGraphFactory.buildDocumentGraph(classificationDocument)));

        return documentReference.get();
    }
@ -181,14 +186,14 @@ public class LayoutParsingPipeline {
    private String buildSemanticNodeCountMessage(int numberOfPages, Map<NodeType, Long> semanticNodeCounts) {

        return String.format("%d pages with %d sections, %d headlines, %d paragraphs, %d tables with %d cells, %d headers, and %d footers parsed",
-                numberOfPages,
-                semanticNodeCounts.get(NodeType.SECTION) == null ? 0 : semanticNodeCounts.get(NodeType.SECTION),
-                semanticNodeCounts.get(NodeType.HEADLINE) == null ? 0 : semanticNodeCounts.get(NodeType.HEADLINE),
-                semanticNodeCounts.get(NodeType.PARAGRAPH) == null ? 0 : semanticNodeCounts.get(NodeType.PARAGRAPH),
-                semanticNodeCounts.get(NodeType.TABLE) == null ? 0 : semanticNodeCounts.get(NodeType.TABLE),
-                semanticNodeCounts.get(NodeType.TABLE_CELL) == null ? 0 : semanticNodeCounts.get(NodeType.TABLE_CELL),
-                semanticNodeCounts.get(NodeType.HEADER) == null ? 0 : semanticNodeCounts.get(NodeType.HEADER),
-                semanticNodeCounts.get(NodeType.FOOTER) == null ? 0 : semanticNodeCounts.get(NodeType.FOOTER));
+                             numberOfPages,
+                             semanticNodeCounts.get(NodeType.SECTION) == null ? 0 : semanticNodeCounts.get(NodeType.SECTION),
+                             semanticNodeCounts.get(NodeType.HEADLINE) == null ? 0 : semanticNodeCounts.get(NodeType.HEADLINE),
+                             semanticNodeCounts.get(NodeType.PARAGRAPH) == null ? 0 : semanticNodeCounts.get(NodeType.PARAGRAPH),
+                             semanticNodeCounts.get(NodeType.TABLE) == null ? 0 : semanticNodeCounts.get(NodeType.TABLE),
+                             semanticNodeCounts.get(NodeType.TABLE_CELL) == null ? 0 : semanticNodeCounts.get(NodeType.TABLE_CELL),
+                             semanticNodeCounts.get(NodeType.HEADER) == null ? 0 : semanticNodeCounts.get(NodeType.HEADER),
+                             semanticNodeCounts.get(NodeType.FOOTER) == null ? 0 : semanticNodeCounts.get(NodeType.FOOTER));
    }


@ -319,9 +324,7 @@ public class LayoutParsingPipeline {

    private void increaseDocumentStatistics(ClassificationPage classificationPage, ClassificationDocument document) {

-        if (!classificationPage.isLandscape()) {
-            document.getFontSizeCounter().addAll(classificationPage.getFontSizeCounter().getCountPerValue());
-        }
+        document.getFontSizeCounter().addAll(classificationPage.getFontSizeCounter().getCountPerValue());
        document.getFontCounter().addAll(classificationPage.getFontCounter().getCountPerValue());
        document.getTextHeightCounter().addAll(classificationPage.getTextHeightCounter().getCountPerValue());
        document.getFontStyleCounter().addAll(classificationPage.getFontStyleCounter().getCountPerValue());
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/table/Cell.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/table/Cell.java
@ -76,4 +76,14 @@ public class Cell extends Rectangle {
        return this.getHeight() >= MIN_SIZE && this.getWidth() >= MIN_SIZE;
    }

+    public boolean nearlyIntersects(Cell other) {
+
+        if (this.getHeight() <= 0 || other.getHeight() <= 0) {
+            return false;
+        }
+        double x0 = this.getX() + 2;
+        double y0 = this.getY() + 2;
+        return (other.x + other.width > x0 && other.y + other.height > y0 && other.x < x0 + this.getWidth() - 2 && other.y < y0 + this.getHeight() - 2);
+    }
+
 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/table/TablePageBlock.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/table/TablePageBlock.java
@ -1,14 +1,12 @@
 package com.knecon.fforesight.service.layoutparser.processor.model.table;

 import java.awt.geom.Point2D;
-import java.awt.geom.Rectangle2D;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 import java.util.TreeMap;
-import java.util.stream.Collectors;

 import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
 import com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType;
@ -21,7 +19,7 @@ import lombok.extern.slf4j.Slf4j;
@Slf4j
 public class TablePageBlock extends AbstractPageBlock {

-    private final TreeMap<CellPosition, Cell> cells = new TreeMap<>();
+    private final TreeMap<CellPosition, Cell> cellTreeMap = new TreeMap<>();

    private final int rotation;
    @Getter
@ -30,10 +28,14 @@ public class TablePageBlock extends AbstractPageBlock {
    private int unrotatedRowCount;
    private int unrotatedColCount;
    private List<List<Cell>> rows;
+    @Getter
+    @Setter
+    private List<Cell> cells;


    public TablePageBlock(List<Cell> cells, Rectangle area, int rotation) {

+        this.cells = cells;
        addCells(cells);
        minX = area.getLeft();
        minY = area.getBottom();
@ -50,6 +52,7 @@ public class TablePageBlock extends AbstractPageBlock {
        return getColCount() == 0 || getRowCount() == 0;
    }

+
    public List<List<Cell>> getRows() {

        if (rows == null) {
@ -80,7 +83,10 @@ public class TablePageBlock extends AbstractPageBlock {

    public int getColCount() {

-        return getRows().stream().mapToInt(List::size).max().orElse(0);
+        return getRows().stream()
+                .mapToInt(List::size)
+                .max()
+                .orElse(0);

    }

@ -120,7 +126,8 @@ public class TablePageBlock extends AbstractPageBlock {
                List<Cell> cellsToTheTop = new ArrayList<>();
                for (int i = 0; i < rowIndex; i++) {
                    try {
-                        cellsToTheTop.add(rows.get(i).get(colIndex));
+                        cellsToTheTop.add(rows.get(i)
+                                                  .get(colIndex));
                    } catch (IndexOutOfBoundsException e) {
                        log.debug("No cell {} in row {}, ignoring.", colIndex, rowIndex);
                    }
@ -135,7 +142,8 @@ public class TablePageBlock extends AbstractPageBlock {
                if (lastHeaderCell != null) {
                    cell.getHeaderCells().add(lastHeaderCell);
                }
-                if (!cell.getTextBlocks().isEmpty() && cell.getTextBlocks().get(0).getMostPopularWordStyle().equals("bold")) {
+                if (!cell.getTextBlocks().isEmpty() && cell.getTextBlocks()
+                        .get(0).getMostPopularWordStyle().equals("bold")) {
                    cell.setHeaderCell(true);
                }
            }
@ -151,7 +159,7 @@ public class TablePageBlock extends AbstractPageBlock {
            for (int i = 0; i < unrotatedColCount; i++) { // rows
                List<Cell> lastRow = new ArrayList<>();
                for (int j = unrotatedRowCount - 1; j >= 0; j--) { // cols
-                    Cell cell = cells.get(new CellPosition(j, i));
+                    Cell cell = cellTreeMap.get(new CellPosition(j, i));
                    if (cell != null) {
                        lastRow.add(cell);
                    }
@ -162,7 +170,7 @@ public class TablePageBlock extends AbstractPageBlock {
            for (int i = unrotatedColCount - 1; i >= 0; i--) { // rows
                List<Cell> lastRow = new ArrayList<>();
                for (int j = 0; j < unrotatedRowCount; j++) { // cols
-                    Cell cell = cells.get(new CellPosition(j, i));
+                    Cell cell = cellTreeMap.get(new CellPosition(j, i));
                    if (cell != null) {
                        lastRow.add(cell);
                    }
@ -173,7 +181,7 @@ public class TablePageBlock extends AbstractPageBlock {
            for (int i = 0; i < unrotatedRowCount; i++) {
                List<Cell> lastRow = new ArrayList<>();
                for (int j = 0; j < unrotatedColCount; j++) {
-                    Cell cell = cells.get(new CellPosition(i, j)); // JAVA_8 use getOrDefault()
+                    Cell cell = cellTreeMap.get(new CellPosition(i, j)); // JAVA_8 use getOrDefault()
                    if (cell != null) {
                        lastRow.add(cell);
                    }
@ -187,17 +195,6 @@ public class TablePageBlock extends AbstractPageBlock {
    }


-    private void add(Cell chunk, int row, int col) {
-
-        unrotatedRowCount = Math.max(unrotatedRowCount, row + 1);
-        unrotatedColCount = Math.max(unrotatedColCount, col + 1);
-
-        CellPosition cp = new CellPosition(row, col);
-        cells.put(cp, chunk);
-
-    }
-
-
    private void addCells(List<Cell> cells) {

        if (cells.isEmpty()) {
@ -206,11 +203,12 @@ public class TablePageBlock extends AbstractPageBlock {

        cells.removeIf(cell -> cell.getWidth() < 1.1 || cell.getHeight() < 1.1);

-        List<List<Cell>> rowsOfCells = calculateStructure(cells);
+        List<List<Cell>> rowsOfCellsMatrix = calculateTableStructure(cells);

-        for (int i = 0; i < rowsOfCells.size(); i++) {
-            for (int j = 0; j < rowsOfCells.get(i).size(); j++) {
-                add(rowsOfCells.get(i).get(j), i, j);
+        for (int i = 0; i < rowsOfCellsMatrix.size(); i++) {
+            for (int j = 0; j < rowsOfCellsMatrix.get(i).size(); j++) {
+                addCellToRowAndCol(rowsOfCellsMatrix.get(i)
+                                           .get(j), i, j);
            }
        }

@ -221,29 +219,36 @@ public class TablePageBlock extends AbstractPageBlock {
     * Calculates the structure of the table. For spanning rows and columns multiple cells with the same values will be inserted.
     *
     * @param cells The found cells
-     * @return TablePageBlock Structure
+     * @return TablePageBlock Structure as a rows of cells matrix
     */
-    private List<List<Cell>> calculateStructure(List<Cell> cells) {
-
-        List<List<Cell>> matrix = new ArrayList<>();
+    private List<List<Cell>> calculateTableStructure(List<Cell> cells) {

        if (cells.isEmpty()) {
-            return matrix;
+            return new ArrayList<>();
        }

        Set<Float> uniqueX = new HashSet<>();
        Set<Float> uniqueY = new HashSet<>();
-        cells.stream().filter(c -> !c.getTextBlocks().isEmpty() || c.getHeight() > 3 && c.getWidth() > 3).forEach(c -> {
-            uniqueX.add(c.getLeft());
-            uniqueX.add(c.getRight());
-            uniqueY.add(c.getBottom());
-            uniqueY.add(c.getTop());
-        });
+        cells.stream()
+                .filter(c -> !c.getTextBlocks().isEmpty() || c.getHeight() > 3 && c.getWidth() > 3)
+                .forEach(c -> {
+                    uniqueX.add(c.getLeft());
+                    uniqueX.add(c.getRight());
+                    uniqueY.add(c.getBottom());
+                    uniqueY.add(c.getTop());
+                });

-        var sortedUniqueX = uniqueX.stream().sorted().toList();
-        var sortedUniqueY = uniqueY.stream().sorted().toList();
+        var sortedUniqueX = uniqueX.stream()
+                .sorted()
+                .toList();
+        var sortedUniqueY = uniqueY.stream()
+                .sorted()
+                .toList();
+
+        List<List<Cell>> rowsOfCells = new ArrayList<>();

        Float prevY = null;
+
        for (Float y : sortedUniqueY) {

            List<Cell> row = new ArrayList<>();
@ -254,42 +259,81 @@ public class TablePageBlock extends AbstractPageBlock {
                if (prevY != null && prevX != null) {
                    var cell = new Cell(new Point2D.Float(prevX, prevY), new Point2D.Float(x, y));

-                    var intersectionCell = cells.stream().filter(c -> intersects(cell, c)).findFirst();
-
-                    intersectionCell.ifPresent(value -> cell.getTextBlocks().addAll(value.getTextBlocks()));
                    if (cell.hasMinimumSize()) {
+
+                        cells.stream()
+                                .filter(cell::nearlyIntersects)
+                                .forEach(intersectingCell -> cell.getTextBlocks().addAll(intersectingCell.getTextBlocks()));
+
                        row.add(cell);
                    }
                }
                prevX = x;
            }

-            if (prevY != null && prevX != null && !row.isEmpty()) {
-                matrix.add(row);
+            // exclude empty rows and rows where all text blocks are empty
+            if (prevY != null && prevX != null && !row.isEmpty() && !row.stream()
+                    .allMatch(cell -> cell.getTextBlocks().isEmpty())) {
+
+                rowsOfCells.add(row);
            }
            prevY = y;
        }

-        Collections.reverse(matrix);
+        Collections.reverse(rowsOfCells);

-        return matrix;
-    }
-
-
-
-    public boolean intersects(Cell cell1, Cell cell2) {
-        if (cell1.getHeight() <= 0 || cell2.getHeight() <= 0) {
-            return false;
+        // now cells are removed which are part of a column without any text blocks
+        // this is done by first computing the inverse matrix which contains call columns of cells
+        // then the column indices that have to be removed are determined
+        List<List<Cell>> columnsOfCells = new ArrayList<>();
+        int maxRowLength = rowsOfCells.stream()
+                .map(List::size)
+                .max(java.util.Comparator.naturalOrder())
+                .orElse(0);
+        for (int i = 0; i < maxRowLength; i++) {
+            columnsOfCells.add(new ArrayList<>());
        }
-        double x0 = cell1.getX() + 2;
-        double y0 = cell1.getY() + 2;
-        return (cell2.x + cell2.width > x0 &&
-                cell2.y + cell2.height > y0 &&
-                cell2.x < x0 + cell1.getWidth() -2 &&
-                cell2.y < y0 + cell1.getHeight() -2);
+
+        for (List<Cell> row : rowsOfCells) {
+            for (int j = 0; j < row.size(); j++) {
+                columnsOfCells.get(j).add(row.get(j));
+            }
+        }
+
+        List<Integer> columnIndicesToRemove = new ArrayList<>();
+        int columnIndex = 0;
+        for (List<Cell> col : columnsOfCells) {
+            if (col.stream()
+                    .allMatch(cell -> cell.getTextBlocks().isEmpty())) {
+                columnIndicesToRemove.add(columnIndex);
+            }
+            columnIndex++;
+        }
+        columnIndicesToRemove.sort(Collections.reverseOrder());
+
+        // update all rows so that the values of the empty columns get removed
+        var rowsOfCellsBefore = new ArrayList<>(rowsOfCells);
+        rowsOfCells = new ArrayList<>();
+        for (List<Cell> row : rowsOfCellsBefore) {
+            var updatedRow = new ArrayList<>(row);
+            columnIndicesToRemove.forEach(idxToRemove -> updatedRow.remove(updatedRow.get(idxToRemove)));
+            rowsOfCells.add(updatedRow);
+        }
+
+        return rowsOfCells;
    }


+    private void addCellToRowAndCol(Cell cell, int row, int col) {
+
+        unrotatedRowCount = Math.max(unrotatedRowCount, row + 1);
+        unrotatedColCount = Math.max(unrotatedColCount, col + 1);
+
+        CellPosition cp = new CellPosition(row, col);
+        cellTreeMap.put(cp, cell);
+
+    }
+

    @Override
    public String getText() {
@ -314,7 +358,7 @@ public class TablePageBlock extends AbstractPageBlock {
                            if (!first) {
                                sb.append("\n");
                            }
-                            sb.append('\"').append(textBlock.getText().replaceAll("\"", "\\\"")).append('\"');
+                            sb.append('\"').append(textBlock.getText().replaceAll("\"", "\"")).append('\"');
                            first = false;
                        }
                    }
@ -328,8 +372,6 @@ public class TablePageBlock extends AbstractPageBlock {
    }


-
-
    public String getTextAsHtml() {

        StringBuilder sb = new StringBuilder();
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/RulingCleaningService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/RulingCleaningService.java
@ -25,7 +25,8 @@ import lombok.extern.slf4j.Slf4j;
@RequiredArgsConstructor
 public class RulingCleaningService {

-    private static final float THRESHOLD = 6;
+    private static final float THRESHOLD_Y = 6;
+    private static final float THRESHOLD_X = 2;


    public CleanRulings getCleanRulings(List<TableCells> tableCells, List<Ruling> rulings) {
@ -81,7 +82,7 @@ public class RulingCleaningService {

        for (Point2D p : points.subList(1, points.size() - 1)) {
            List<Point2D> last = groupedPoints.get(groupedPoints.size() - 1);
-            if (Math.abs(p.getX() - last.get(0).getX()) < THRESHOLD) {
+            if (Math.abs(p.getX() - last.get(0).getX()) < THRESHOLD_X) {
                groupedPoints.get(groupedPoints.size() - 1).add(p);
            } else {
                groupedPoints.add(new ArrayList<>(Collections.singletonList(p)));
@ -108,7 +109,7 @@ public class RulingCleaningService {

        for (Point2D p : points.subList(1, points.size() - 1)) {
            List<Point2D> last = groupedPoints.get(groupedPoints.size() - 1);
-            if (Math.abs(p.getY() - last.get(0).getY()) < THRESHOLD) {
+            if (Math.abs(p.getY() - last.get(0).getY()) < THRESHOLD_Y) {
                groupedPoints.get(groupedPoints.size() - 1).add(p);
            } else {
                groupedPoints.add(new ArrayList<>(Collections.singletonList(p)));
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/TableExtractionService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/TableExtractionService.java
@ -5,7 +5,6 @@ import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.HashSet;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@ -25,55 +24,62 @@ import com.knecon.fforesight.service.layoutparser.processor.utils.DoubleComparis
@Service
 public class TableExtractionService {

-    private static final Comparator<Point2D> X_FIRST_POINT_COMPARATOR = (arg0, arg1) -> {
+    private static final int MAX_TABLE_OUTER_POINT_TOLERANCE = 10;
+    private static final int MAX_TABLE_CONTAINED_CELLS_WITH_TEXT = 1;
+    private static final float SPREADSHEET_AREA_TOLERANCE = 0.001f;
+
+    private static final Comparator<Point2D> X_FIRST_POINT_COMPARATOR = (point1, point2) -> {

        int rv = 0;
-        float arg0X = DoubleComparisons.round(arg0.getX(), 2);
-        float arg0Y = DoubleComparisons.round(arg0.getY(), 2);
-        float arg1X = DoubleComparisons.round(arg1.getX(), 2);
-        float arg1Y = DoubleComparisons.round(arg1.getY(), 2);
+        float point1X = DoubleComparisons.round(point1.getX(), 2);
+        float point1Y = DoubleComparisons.round(point1.getY(), 2);
+        float point2X = DoubleComparisons.round(point2.getX(), 2);
+        float point2Y = DoubleComparisons.round(point2.getY(), 2);

-        if (arg0X > arg1X) {
+        if (point1X > point2X) {
            rv = 1;
-        } else if (arg0X < arg1X) {
+        } else if (point1X < point2X) {
            rv = -1;
-        } else if (arg0Y > arg1Y) {
+        } else if (point1Y > point2Y) {
            rv = 1;
-        } else if (arg0Y < arg1Y) {
+        } else if (point1Y < point2Y) {
            rv = -1;
        }
        return rv;
    };
-    private static final Comparator<Point2D> POINT_COMPARATOR = (arg0, arg1) -> {
+    private static final Comparator<Point2D> Y_FIRST_POINT_COMPARATOR = (point1, point2) -> {

        int rv = 0;
-        float arg0X = DoubleComparisons.round(arg0.getX(), 2);
-        float arg0Y = DoubleComparisons.round(arg0.getY(), 2);
-        float arg1X = DoubleComparisons.round(arg1.getX(), 2);
-        float arg1Y = DoubleComparisons.round(arg1.getY(), 2);
+        float point1X = DoubleComparisons.round(point1.getX(), 2);
+        float point1Y = DoubleComparisons.round(point1.getY(), 2);
+        float point2X = DoubleComparisons.round(point2.getX(), 2);
+        float point2Y = DoubleComparisons.round(point2.getY(), 2);

-        if (arg0Y > arg1Y) {
+        if (point1Y > point2Y) {
            rv = 1;
-        } else if (arg0Y < arg1Y) {
+        } else if (point1Y < point2Y) {
            rv = -1;
-        } else if (arg0X > arg1X) {
+        } else if (point1X > point2X) {
            rv = 1;
-        } else if (arg0X < arg1X) {
+        } else if (point1X < point2X) {
            rv = -1;
        }
        return rv;
    };

+    private static final Comparator<Cell> CELL_SIZE_COMPARATOR = (cell1, cell2) -> {

-    public boolean contains(Cell cell, double x, double y, double w, double h) {
+        Double cell1Size = cell1.getHeight() * cell1.getWidth();
+        Double cell2Size = cell2.getHeight() * cell2.getWidth();
+        return cell1Size.compareTo(cell2Size);
+    };

-        if (cell.isEmpty() || w <= 0 || h <= 0) {
-            return false;
-        }
-        double x0 = cell.getX();
-        double y0 = cell.getY();
-        return (x >= x0 - 2 && y >= y0 - 2 && (x + w) <= x0 + cell.getWidth() + 2 && (y + h) <= y0 + cell.getHeight() + 2);
-    }
+    private static final Comparator<Rectangle> RECTANGLE_SIZE_COMPARATOR = (rect1, rect2) -> {
+
+        Double rect1Size = rect1.getHeight() * rect1.getWidth();
+        Double rect2Size = rect2.getHeight() * rect2.getWidth();
+        return rect1Size.compareTo(rect2Size);
+    };


    /**
@ -89,22 +95,18 @@ public class TableExtractionService {
     * @param cleanRulings The lines used to build the table.
     * @param page         Page object that contains textblocks and statistics.
     */
+
    public void extractTables(CleanRulings cleanRulings, ClassificationPage page) {

        List<Cell> cells = findCells(cleanRulings.getHorizontal(), cleanRulings.getVertical());
-
-        List<TextPageBlock> toBeRemoved = new ArrayList<>();
+        // sort cells by size (height * width) ascending so that textBlocks are always assigned to the smallest cells that contain them
+        cells.sort(CELL_SIZE_COMPARATOR);

        for (AbstractPageBlock abstractPageBlock : page.getTextBlocks()) {
            TextPageBlock textBlock = (TextPageBlock) abstractPageBlock;
            for (Cell cell : cells) {
-                if (cell.hasMinimumSize() && contains(cell,
-                        textBlock.getPdfMinX(),
-                        textBlock.getPdfMinY(),
-                        textBlock.getPdfMaxX() - textBlock.getPdfMinX(),
-                        textBlock.getPdfMaxY() - textBlock.getPdfMinY())) {
+                if (cell.hasMinimumSize() && doesCellContainTextBlock(cell, textBlock)) {
                    cell.addTextBlock(textBlock);
-                    toBeRemoved.add(textBlock);
                    break;
                }
            }
@ -114,39 +116,70 @@ public class TableExtractionService {
        DoubleComparisons.sort(cells, Rectangle.ILL_DEFINED_ORDER);

        List<Rectangle> spreadsheetAreas = findSpreadsheetsFromCells(cells);
+        // sort spreadsheetAreas by size (height * width) ascending so that cells are placed in the smallest tables first
+        // this way no cell duplication occurs when tables are contained in other tables and only the most inner table contains the cells
+        spreadsheetAreas.sort(RECTANGLE_SIZE_COMPARATOR);

        List<TablePageBlock> tables = new ArrayList<>();
        for (Rectangle area : spreadsheetAreas) {

-            List<Cell> overlappingCells = new ArrayList<>();
+            List<Cell> containedCells = new ArrayList<>();
            for (Cell c : cells) {
-                if (c.hasMinimumSize() && c.intersects(area)) {
-                    overlappingCells.add(c);
+                if (c.hasMinimumSize() && area.contains(c)) {
+                    containedCells.add(c);
                }
            }
-            tables.add(new TablePageBlock(overlappingCells, area, page.getRotation()));
+
+            var containedCellsWithText = containedCells.stream()
+                    .filter(cell -> !cell.getTextBlocks().isEmpty())
+                    .count();
+
+            // verify if table would contain fewer cells with text than the threshold allows
+            if (containedCellsWithText >= MAX_TABLE_CONTAINED_CELLS_WITH_TEXT) {
+                tables.add(new TablePageBlock(containedCells, area, page.getRotation()));
+                cells.removeAll(containedCells);
+            }
        }

        for (TablePageBlock table : tables) {
            int position = -1;

-            Iterator<AbstractPageBlock> itty = page.getTextBlocks().iterator();
-            while (itty.hasNext()) {
-                AbstractPageBlock textBlock = itty.next();
-                if (textBlock instanceof TextPageBlock ? table.containsBlock((TextPageBlock) textBlock) : table.contains(textBlock) && position == -1) {
-                    position = page.getTextBlocks().indexOf(textBlock);
+            for (AbstractPageBlock pageBlock : page.getTextBlocks()) {
+                if (pageBlock instanceof TextPageBlock ? table.containsBlock((TextPageBlock) pageBlock) : table.contains(pageBlock) && position == -1) {
+                    position = page.getTextBlocks().indexOf(pageBlock);
                }
            }
            if (position != -1) {
                page.getTextBlocks().add(position, table);
+
+                var toBeRemoved = table.getCells()
+                        .stream()
+                        .map(Cell::getTextBlocks)
+                        .flatMap(List::stream)
+                        .toList();
+                // remove text blocks from the page that were also added with the table (from its contained cells)
+                page.getTextBlocks().removeAll(toBeRemoved);
            }
        }
-
-        page.getTextBlocks().removeAll(toBeRemoved);
    }


-    public List<Cell> findCells(List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines) {
+    private boolean doesCellContainTextBlock(Cell cell, TextPageBlock textBlock) {
+
+        double x = textBlock.getPdfMinX();
+        double y = textBlock.getPdfMinY();
+        double w = textBlock.getPdfMaxX() - textBlock.getPdfMinX();
+        double h = textBlock.getPdfMaxY() - textBlock.getPdfMinY();
+        if (cell.isEmpty() || w <= 0 || h <= 0) {
+            return false;
+        }
+        double x0 = cell.getX();
+        double y0 = cell.getY();
+        return (x >= x0 - 2 && y >= y0 - 2 && (x + w) <= x0 + cell.getWidth() + 2 && (y + h) <= y0 + cell.getHeight() + 2);
+    }
+
+
+    private List<Cell> findCells(List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines) {

        // Fix for 211.pdf
        for (Ruling r : horizontalRulingLines) {
@ -160,7 +193,7 @@ public class TableExtractionService {
        List<Cell> cellsFound = new ArrayList<>();
        Map<Point2D, Ruling[]> intersectionPoints = Ruling.findIntersections(horizontalRulingLines, verticalRulingLines);
        List<Point2D> intersectionPointsList = new ArrayList<>(intersectionPoints.keySet());
-        intersectionPointsList.sort(POINT_COMPARATOR);
+        intersectionPointsList.sort(Y_FIRST_POINT_COMPARATOR);

        for (int i = 0; i < intersectionPointsList.size(); i++) {
            Point2D topLeft = intersectionPointsList.get(i);
@ -186,13 +219,14 @@ public class TableExtractionService {
                    continue;
                }
                for (Point2D yPoint : yPoints) {
-                    // is there an horizontal edge b/w topLeft and yPoint ?
+                    // is there a horizontal edge b/w topLeft and yPoint ?
                    if (!hv[0].equals(intersectionPoints.get(yPoint)[0])) {
                        continue;
                    }
                    Point2D btmRight = new Point2D.Float((float) yPoint.getX(), (float) xPoint.getY());
-                    if (intersectionPoints.containsKey(btmRight) && intersectionPoints.get(btmRight)[0].equals(intersectionPoints.get(xPoint)[0]) && intersectionPoints.get(btmRight)[1].equals(
-                            intersectionPoints.get(yPoint)[1])) {
+                    if (intersectionPoints.containsKey(btmRight)
+                        && intersectionPoints.get(btmRight)[0].equals(intersectionPoints.get(xPoint)[0])
+                        && intersectionPoints.get(btmRight)[1].equals(intersectionPoints.get(yPoint)[1])) {
                        cellsFound.add(new Cell(topLeft, btmRight));
                        break outer;
                    }
@ -214,7 +248,6 @@ public class TableExtractionService {
        Set<Point2D> pointSet = new HashSet<>();
        Map<Point2D, Point2D> edgesH = new HashMap<>();
        Map<Point2D, Point2D> edgesV = new HashMap<>();
-        int i = 0;

        for (Rectangle cell : cells) {
            for (Point2D pt : cell.getPoints()) {
@ -231,8 +264,9 @@ public class TableExtractionService {
        pointsSortX.sort(X_FIRST_POINT_COMPARATOR);
        // Y first sort
        List<Point2D> pointsSortY = new ArrayList<>(pointSet);
-        pointsSortY.sort(POINT_COMPARATOR);
+        pointsSortY.sort(Y_FIRST_POINT_COMPARATOR);

+        int i = 0;
        while (i < pointSet.size()) {
            float currY = (float) pointsSortY.get(i).getY();
            while (i < pointSet.size() && DoubleComparisons.feq(pointsSortY.get(i).getY(), currY)) {
@ -257,7 +291,8 @@ public class TableExtractionService {
        Point2D nextVertex;
        while (!edgesH.isEmpty()) {
            ArrayList<PolygonVertex> polygon = new ArrayList<>();
-            Point2D first = edgesH.keySet().iterator().next();
+            Point2D first = edgesH.keySet()
+                    .iterator().next();
            polygon.add(new PolygonVertex(first, Direction.HORIZONTAL));
            edgesH.remove(first);

@ -301,7 +336,14 @@ public class TableExtractionService {
                bottom = (float) Math.max(bottom, pt.point.getY());
                right = (float) Math.max(right, pt.point.getX());
            }
-            rectangles.add(new Rectangle(top, left, right - left, bottom - top));
+
+            // do not add polygons with too many outer points as they are unlikely to be tables
+            if (poly.size() <= MAX_TABLE_OUTER_POINT_TOLERANCE) {
+                rectangles.add(new Rectangle(top - SPREADSHEET_AREA_TOLERANCE,
+                                             left - SPREADSHEET_AREA_TOLERANCE,
+                                             right - left + 2 * SPREADSHEET_AREA_TOLERANCE,
+                                             bottom - top + 2 * SPREADSHEET_AREA_TOLERANCE));
+            }
        }

        return rectangles;
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/TableMergingUtility.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/TableMergingUtility.java
@ -30,8 +30,6 @@ public class TableMergingUtility {
            if (consecutiveTable.getColCount() == originalTablePageBlock.getColCount() && !hasTableHeader(consecutiveTable) && outerBoundaryAlignsX(originalTablePageBlock,
                    consecutiveTable)) {
                consecutiveTablesWithSameColCountAndHeaders.add(consecutiveTable);
-            } else {
-                break;
            }
        }
        return Stream.concat(Stream.of(originalTablePageBlock), consecutiveTablesWithSameColCountAndHeaders.stream()).toList();
--- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java
+++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java
@ -35,9 +35,10 @@ public class ViewerDocumentTest extends BuildDocumentTest {
        Document document = buildGraph(fileName, LayoutParsingType.REDACT_MANAGER);
        long start = System.currentTimeMillis();
        layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), true);
-        System.out.printf("Total time: %.2fs%n",  ((float) (System.currentTimeMillis() - start)) / 1000);
+        System.out.printf("Total time: %.2fs%n", ((float) (System.currentTimeMillis() - start)) / 1000);
    }

+
    @Test
    @Disabled
    @SneakyThrows
@ -51,7 +52,11 @@ public class ViewerDocumentTest extends BuildDocumentTest {
        var tableResponse = mapper.readValue(new ClassPathResource(tableFileName).getInputStream(), TableServiceResponse.class);
        var documentFile = new ClassPathResource(fileName).getFile();

-        var classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.DOCUMINE, documentFile, new ImageServiceResponse(), tableResponse, Path.of(fileName).getFileName().toFile().toString());
+        var classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.DOCUMINE,
+                                                                       documentFile,
+                                                                       new ImageServiceResponse(),
+                                                                       tableResponse,
+                                                                       Path.of(fileName).getFileName().toFile().toString());
        ViewerDocumentService viewerDocumentService = new ViewerDocumentService(null);
        LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService);
        Document document = DocumentGraphFactory.buildDocumentGraph(classificationDocument);
@ -60,3 +65,4 @@ public class ViewerDocumentTest extends BuildDocumentTest {
    }

 }
+
--- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java
+++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java
@ -29,8 +29,6 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Im
 import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
-import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.CvTableParsingAdapter;
-import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.ImageServiceResponseAdapter;
 import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
 import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
 import com.knecon.fforesight.service.layoutparser.processor.services.SectionsBuilderService;
@ -50,12 +48,6 @@ public class PdfSegmentationServiceTest extends AbstractTest {
    @Autowired
    private RedactManagerClassificationService redactManagerClassificationService;

-    @Autowired
-    private CvTableParsingAdapter cvTableParsingAdapter;
-
-    @Autowired
-    private ImageServiceResponseAdapter imageServiceResponseAdapter;
-
    @Autowired
    private SectionsBuilderService sectionsBuilderService;

@ -64,10 +56,10 @@ public class PdfSegmentationServiceTest extends AbstractTest {
    public ClassificationDocument buildClassificationDocument(File originDocument, TableServiceResponse tableServiceResponse) {

        ClassificationDocument classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
-                originDocument,
-                new ImageServiceResponse(),
-                tableServiceResponse,
-                "document");
+                                                                                          originDocument,
+                                                                                          new ImageServiceResponse(),
+                                                                                          tableServiceResponse,
+                                                                                          "document");

        redactManagerClassificationService.classifyDocument(classificationDocument);

@ -87,11 +79,11 @@ public class PdfSegmentationServiceTest extends AbstractTest {
    @Test
    public void tablesToHtmlDebugger() throws IOException {

-        ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/A20622A izRMS (CZ) fRR Part B9_Page185.pdf");
+        ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/T5 VV-640252-Page16.pdf");

        ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());

-        toHtml(document, "/tmp/A20622A izRMS (CZ) fRR Part B9_Page185.html");
+        toHtml(document, "/tmp/T5.html");

    }

@ -109,6 +101,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
    }


+    @Disabled
    @Test
    public void testScanRotationBorderIsIgnored() throws IOException {

@ -117,8 +110,16 @@ public class PdfSegmentationServiceTest extends AbstractTest {
        var tableServiceResponse = objectMapper.readValue(cvTablesResource.getInputStream(), TableServiceResponse.class);

        ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile(), tableServiceResponse);
-        assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
-        var tables = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList();
+        assertThat(document.getSections()
+                           .stream()
+                           .flatMap(paragraph -> paragraph.getTables()
+                                   .stream())
+                           .collect(Collectors.toList())).isNotEmpty();
+        var tables = document.getSections()
+                .stream()
+                .flatMap(paragraph -> paragraph.getTables()
+                        .stream())
+                .toList();

        // Quality of the table parsing is not good, because the file is rotated at scanning.
        // We only asset that the table border is not the page border.
@ -140,12 +141,12 @@ public class PdfSegmentationServiceTest extends AbstractTest {
        imageServiceResponse.getData()
                .forEach(imageMetadata -> images.computeIfAbsent(imageMetadata.getPosition().getPageNumber(), x -> new ArrayList<>())
                        .add(new ClassifiedImage(new Rectangle2D.Double(imageMetadata.getPosition().getX1(),
-                                imageMetadata.getPosition().getY1(),
-                                imageMetadata.getGeometry().getWidth(),
-                                imageMetadata.getGeometry().getHeight()),
-                                ImageType.valueOf(imageMetadata.getClassification().getLabel().toUpperCase(Locale.ROOT)),
-                                imageMetadata.isAlpha(),
-                                imageMetadata.getPosition().getPageNumber())));
+                                                                        imageMetadata.getPosition().getY1(),
+                                                                        imageMetadata.getGeometry().getWidth(),
+                                                                        imageMetadata.getGeometry().getHeight()),
+                                                 ImageType.valueOf(imageMetadata.getClassification().getLabel().toUpperCase(Locale.ROOT)),
+                                                 imageMetadata.isAlpha(),
+                                                 imageMetadata.getPosition().getPageNumber())));

        System.out.println("object");
    }
@ -157,11 +158,22 @@ public class PdfSegmentationServiceTest extends AbstractTest {
        ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Spanning Cells.pdf");

        ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
-        assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
-        TablePageBlock table = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList()).get(0);
+        assertThat(document.getSections()
+                           .stream()
+                           .flatMap(paragraph -> paragraph.getTables()
+                                   .stream())
+                           .collect(Collectors.toList())).isNotEmpty();
+        TablePageBlock table = document.getSections()
+                .stream()
+                .flatMap(paragraph -> paragraph.getTables()
+                        .stream())
+                .toList()
+                .get(0);
        assertThat(table.getColCount()).isEqualTo(6);
        assertThat(table.getRowCount()).isEqualTo(13);
-        assertThat(table.getRows().stream().mapToInt(List::size).sum()).isEqualTo(6 * 13);
+        assertThat(table.getRows()
+                           .stream()
+                           .mapToInt(List::size).sum()).isEqualTo(6 * 13);
    }


@ -171,15 +183,37 @@ public class PdfSegmentationServiceTest extends AbstractTest {
        ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Table.pdf");

        ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
-        assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
-        TablePageBlock firstTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList()).get(0);
+        assertThat(document.getSections()
+                           .stream()
+                           .flatMap(paragraph -> paragraph.getTables()
+                                   .stream())
+                           .collect(Collectors.toList())).isNotEmpty();
+        TablePageBlock firstTable = document.getSections()
+                .stream()
+                .flatMap(paragraph -> paragraph.getTables()
+                        .stream())
+                .toList()
+                .get(0);
        assertThat(firstTable.getColCount()).isEqualTo(8);
        assertThat(firstTable.getRowCount()).isEqualTo(1);
-        TablePageBlock secondTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList()).get(1);
+        TablePageBlock secondTable = document.getSections()
+                .stream()
+                .flatMap(paragraph -> paragraph.getTables()
+                        .stream())
+                .toList()
+                .get(1);
        assertThat(secondTable.getColCount()).isEqualTo(8);
        assertThat(secondTable.getRowCount()).isEqualTo(2);
-        List<List<Cell>> firstTableHeaderCells = firstTable.getRows().get(0).stream().map(Collections::singletonList).collect(Collectors.toList());
-        assertThat(secondTable.getRows().stream().allMatch(row -> row.stream().map(Cell::getHeaderCells).toList().equals(firstTableHeaderCells))).isTrue();
+        List<List<Cell>> firstTableHeaderCells = firstTable.getRows()
+                .get(0)
+                .stream()
+                .map(Collections::singletonList)
+                .collect(Collectors.toList());
+        assertThat(secondTable.getRows()
+                           .stream()
+                           .allMatch(row -> row.stream()
+                                   .map(Cell::getHeaderCells)
+                                   .toList().equals(firstTableHeaderCells))).isTrue();
    }


@ -189,15 +223,37 @@ public class PdfSegmentationServiceTest extends AbstractTest {
        ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Multi Page Table.pdf");

        ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
-        assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
-        TablePageBlock firstTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList()).get(0);
+        assertThat(document.getSections()
+                           .stream()
+                           .flatMap(paragraph -> paragraph.getTables()
+                                   .stream())
+                           .collect(Collectors.toList())).isNotEmpty();
+        TablePageBlock firstTable = document.getSections()
+                .stream()
+                .flatMap(paragraph -> paragraph.getTables()
+                        .stream())
+                .toList()
+                .get(0);
        assertThat(firstTable.getColCount()).isEqualTo(9);
        assertThat(firstTable.getRowCount()).isEqualTo(5);
-        TablePageBlock secondTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList()).get(1);
+        TablePageBlock secondTable = document.getSections()
+                .stream()
+                .flatMap(paragraph -> paragraph.getTables()
+                        .stream())
+                .toList()
+                .get(1);
        assertThat(secondTable.getColCount()).isEqualTo(9);
        assertThat(secondTable.getRowCount()).isEqualTo(6);
-        List<List<Cell>> firstTableHeaderCells = firstTable.getRows().get(firstTable.getRowCount() - 1).stream().map(Cell::getHeaderCells).collect(Collectors.toList());
-        assertThat(secondTable.getRows().stream().allMatch(row -> row.stream().map(Cell::getHeaderCells).toList().equals(firstTableHeaderCells))).isTrue();
+        List<List<Cell>> firstTableHeaderCells = firstTable.getRows()
+                .get(firstTable.getRowCount() - 1)
+                .stream()
+                .map(Cell::getHeaderCells)
+                .collect(Collectors.toList());
+        assertThat(secondTable.getRows()
+                           .stream()
+                           .allMatch(row -> row.stream()
+                                   .map(Cell::getHeaderCells)
+                                   .toList().equals(firstTableHeaderCells))).isTrue();
    }


@ -207,19 +263,41 @@ public class PdfSegmentationServiceTest extends AbstractTest {
        ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Rotated Table Headers.pdf");

        ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
-        assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
-        TablePageBlock firstTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList()).get(0);
+        assertThat(document.getSections()
+                           .stream()
+                           .flatMap(paragraph -> paragraph.getTables()
+                                   .stream())
+                           .collect(Collectors.toList())).isNotEmpty();
+        TablePageBlock firstTable = document.getSections()
+                .stream()
+                .flatMap(paragraph -> paragraph.getTables()
+                        .stream())
+                .toList()
+                .get(0);
        assertThat(firstTable.getColCount()).isEqualTo(8);
        assertThat(firstTable.getRowCount()).isEqualTo(1);
-        TablePageBlock secondTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList()).get(1);
+        TablePageBlock secondTable = document.getSections()
+                .stream()
+                .flatMap(paragraph -> paragraph.getTables()
+                        .stream())
+                .toList()
+                .get(1);
        assertThat(secondTable.getColCount()).isEqualTo(8);
        assertThat(secondTable.getRowCount()).isEqualTo(6);
-        List<List<Cell>> firstTableHeaderCells = firstTable.getRows().get(0).stream().map(Collections::singletonList).collect(Collectors.toList());
-        assertThat(secondTable.getRows().stream().allMatch(row -> row.stream().map(Cell::getHeaderCells).toList().equals(firstTableHeaderCells))).isTrue();
+        List<List<Cell>> firstTableHeaderCells = firstTable.getRows()
+                .get(0)
+                .stream()
+                .map(Collections::singletonList)
+                .collect(Collectors.toList());
+        assertThat(secondTable.getRows()
+                           .stream()
+                           .allMatch(row -> row.stream()
+                                   .map(Cell::getHeaderCells)
+                                   .toList().equals(firstTableHeaderCells))).isTrue();
    }


-    @Test // Non-sense test
+    @Test
    public void testDoc56Page170() throws IOException {

        ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/56 Fludioxonil_RAR_12_Volume_3CA_B-7_2018-02-21_Page170.pdf");
@ -230,8 +308,8 @@ public class PdfSegmentationServiceTest extends AbstractTest {

        validateTable(document, 0, 1, 1, 0, 0);
        validateTable(document, 1, 2, 2, 0, 0);
-        validateTable(document, 2, 6, 20, 0, 0);
-        validateTable(document, 3, 7, 31, 0, 0);
+        validateTable(document, 2, 4, 19, 12, 0);
+        validateTable(document, 3, 2, 12, 0, 0);

    }

@ -265,29 +343,30 @@ public class PdfSegmentationServiceTest extends AbstractTest {
        validateTable(document, 0, 8, 8, 0, 0);

        List<List<String>> values = Arrays.asList(Arrays.asList("Annex point Reference within DAR/RAR",
-                        "Author, date",
-                        "Study title",
-                        "Analytical method Author, date, No.",
-                        "Technique, LOQ of the method, validated working range",
-                        "Method meets analytical validation criteria",
-                        "Remarks (in case validation criteria are not met)",
-                        "Acceptability of the method"),
-                Arrays.asList("Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
-                        "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
-                        "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
-                        "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
-                        "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
-                        "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
-                        "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
-                        "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies"),
-                Arrays.asList("CA 7.1.2.1.1 DAR (2009)",
-                        "Evans P.G. 2001 TMJ4569B, VV-323245",
-                        "Azoxystrobin Laboratory Degradation Study in Three Soil Types, Sampled from Holland and the United Kingdom",
-                        "Method: RAM 269 Johnson R.I., Tummon O.J., Earl M. 1995 RJ1864B, VV-377731 Johnson R.I., Tummon O.J., Earl M. 1998 RAM 269/02, VV-124072 Johnson R.I., Tummon O.J., Earl M. 2000 RAM 269/03, VV-123986 Validation: Robinson N.J. 2001 TMJ4617B, VV-895845",
-                        "LC-MS/MS LOQ: 0.01 mg/kg (R401553 (SYN50165 7), R402173 (SYN501114 )) or 0.02 mg/kg (azoxystrobin, R230310, R234886) Working range: 0.02-1.0 or 0.01-0.5 mg/kg (depending on analyte) Other supporting quantificati on methods: HPLC-UV GC-MSD",
-                        "Y",
-                        "N/A",
-                        "Y"));
+                                                                "Author, date",
+                                                                "Study title",
+                                                                "Analytical method Author, date, No.",
+                                                                "Technique, LOQ of the method, validated working range",
+                                                                "Method meets analytical validation criteria",
+                                                                "Remarks (in case validation criteria are not met)",
+                                                                "Acceptability of the method"),
+                                                  Arrays.asList(
+                                                          "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
+                                                          "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
+                                                          "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
+                                                          "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
+                                                          "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
+                                                          "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
+                                                          "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
+                                                          "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies"),
+                                                  Arrays.asList("CA 7.1.2.1.1 DAR (2009)",
+                                                                "Evans P.G. 2001 TMJ4569B, VV-323245",
+                                                                "Azoxystrobin Laboratory Degradation Study in Three Soil Types, Sampled from Holland and the United Kingdom",
+                                                                "Method: RAM 269 Johnson R.I., Tummon O.J., Earl M. 1995 RJ1864B, VV-377731 Johnson R.I., Tummon O.J., Earl M. 1998 RAM 269/02, VV-124072 Johnson R.I., Tummon O.J., Earl M. 2000 RAM 269/03, VV-123986 Validation: Robinson N.J. 2001 TMJ4617B, VV-895845",
+                                                                "LC-MS/MS LOQ: 0.01 mg/kg (R401553 (SYN50165 7), R402173 (SYN501114 )) or 0.02 mg/kg (azoxystrobin, R230310, R234886) Working range: 0.02-1.0 or 0.01-0.5 mg/kg (depending on analyte) Other supporting quantificati on methods: HPLC-UV GC-MSD",
+                                                                "Y",
+                                                                "N/A",
+                                                                "Y"));

        validateTable(document, 0, values);

@ -579,10 +658,109 @@ public class PdfSegmentationServiceTest extends AbstractTest {
    }


+    @Test
+    public void testT0() throws IOException {
+
+        ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/T0 TableWithMergedCells.pdf");
+
+        ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
+
+        validateTableSize(document, 1);
+
+        validateTable(document, 0, 6, 8, 0, 0);
+    }
+
+
+    @Test
+    public void testT1() throws IOException {
+
+        ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/T1 MultipleNestedTable.pdf");
+
+        ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
+
+        validateTableSize(document, 4);
+
+        validateTable(document, 0, 3, 3, 0, 0);
+        validateTable(document, 1, 3, 5, 2, 0);
+        validateTable(document, 2, 3, 3, 1, 0);
+        validateTable(document, 3, 3, 3, 0, 0);
+
+    }
+
+
+    @Test
+    public void testT2() throws IOException {
+
+        ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/T2 MultipleTables.pdf");
+
+        ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
+
+        validateTableSize(document, 6);
+
+        validateTable(document, 0, 5, 5, 0, 0);
+        validateTable(document, 1, 5, 6, 0, 0);
+        validateTable(document, 2, 5, 5, 0, 0);
+        validateTable(document, 3, 5, 5, 0, 0);
+        validateTable(document, 4, 5, 5, 0, 0);
+        validateTable(document, 5, 5, 5, 0, 0);
+
+    }
+
+
+    @Test
+    public void testT3() throws IOException {
+
+        ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/T3 S-Meto_Page29.pdf");
+
+        ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
+
+        validateTableSize(document, 1);
+
+        validateTable(document, 0, 6, 5, 0, 0);
+
+    }
+
+
+    @Test
+    public void testT4() throws IOException {
+
+        ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/T4 138 IDD0000261736_Page16.pdf");
+
+        ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
+
+        validateTableSize(document, 1);
+
+        validateTable(document, 0, 5, 8, 1, 0);
+
+    }
+
+
+    @Test
+    public void testT5() throws IOException {
+
+        ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/T5 VV-640252-Page16.pdf");
+
+        ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
+
+        validateTableSize(document, 6);
+        validateTable(document, 0, 1, 1, 0, 0);
+        validateTable(document, 1, 1, 1, 0, 0);
+        validateTable(document, 2, 1, 1, 0, 0);
+        validateTable(document, 3, 1, 1, 0, 0);
+        validateTable(document, 4, 1, 1, 0, 0);
+        validateTable(document, 5, 1, 1, 0, 0);
+
+    }
+
+
    @SneakyThrows
    private void toHtml(ClassificationDocument document, String filename) {

-        var tables = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList();
+        var tables = document.getSections()
+                .stream()
+                .flatMap(paragraph -> paragraph.getTables()
+                        .stream())
+                .toList();
        StringBuilder sb = new StringBuilder();

        int currentPage = 1;
@ -603,9 +781,19 @@ public class PdfSegmentationServiceTest extends AbstractTest {

    private void validateTable(ClassificationDocument document, int tableIndex, int colCount, int rowCount, int emptyCellsCountCorrect, int emptyCellsCountIncorrect) {

-        TablePageBlock table = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(tableIndex);
+        TablePageBlock table = document.getSections()
+                .stream()
+                .flatMap(paragraph -> paragraph.getTables()
+                        .stream())
+                .toList()
+                .get(tableIndex);
        List<List<Cell>> rows = table.getRows();
-        int emptyCellsFoundFound = rows.stream().flatMap(List::stream).toList().stream().filter(f -> f.toString().equals("")).toList().size();
+        int emptyCellsFoundFound = rows.stream()
+                .flatMap(List::stream)
+                .toList()
+                .stream()
+                .filter(f -> f.toString().isEmpty())
+                .toList().size();

        for (List<Cell> row : table.getRows()) {
            row.forEach(r -> System.out.println(r.toString()));
@ -620,11 +808,20 @@ public class PdfSegmentationServiceTest extends AbstractTest {

    private void validateTable(ClassificationDocument document, int tableIndex, List<List<String>> values) {

-        TablePageBlock table = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(tableIndex);
+        TablePageBlock table = document.getSections()
+                .stream()
+                .flatMap(paragraph -> paragraph.getTables()
+                        .stream())
+                .toList()
+                .get(tableIndex);
        List<List<Cell>> rows = table.getRows();

-        List<Cell> rowsFlattened = rows.stream().flatMap(List::stream).toList();
-        List<String> valuesFlattened = values.stream().flatMap(List::stream).toList();
+        List<Cell> rowsFlattened = rows.stream()
+                .flatMap(List::stream)
+                .toList();
+        List<String> valuesFlattened = values.stream()
+                .flatMap(List::stream)
+                .toList();

        for (int i = 0; i < valuesFlattened.size(); i++) {
            Cell cell = rowsFlattened.get(i);
@ -637,7 +834,11 @@ public class PdfSegmentationServiceTest extends AbstractTest {

    private void validateTableSize(ClassificationDocument document, int tableSize) {

-        assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().size()).isEqualTo(tableSize);
+        assertThat(document.getSections()
+                           .stream()
+                           .flatMap(paragraph -> paragraph.getTables()
+                                   .stream())
+                           .toList().size()).isEqualTo(tableSize);

    }

--- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/RulingCleaningServiceTest.java
+++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/RulingCleaningServiceTest.java
@ -26,10 +26,8 @@ import com.knecon.fforesight.service.layoutparser.processor.services.RulingClean
 import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
 import com.knecon.fforesight.service.layoutparser.processor.services.mapper.DocumentDataMapper;
 import com.knecon.fforesight.service.layoutparser.processor.services.mapper.PropertiesMapper;
-import com.knecon.fforesight.service.layoutparser.processor.services.visualization.LayoutGridService;
 import com.knecon.fforesight.service.layoutparser.server.utils.BuildDocumentTest;
 import com.knecon.fforesight.service.layoutparser.server.utils.visualizations.PdfDraw;
-import com.knecon.fforesight.service.viewerdoc.service.ViewerDocumentService;

 import lombok.SneakyThrows;

@ -57,9 +55,6 @@ public class RulingCleaningServiceTest extends BuildDocumentTest {
    @SneakyThrows
    public void testTableExtraction() {

-        ViewerDocumentService viewerDocumentService = new ViewerDocumentService(null);
-        LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService);
-
        ClassPathResource resource = new ClassPathResource("files");
        List<String> pdfFileNames = Files.walk(resource.getFile().toPath())
                .filter(path -> path.getFileName().toString().endsWith(".pdf"))
@ -67,8 +62,8 @@ public class RulingCleaningServiceTest extends BuildDocumentTest {
                .map(Path::toString)
                .toList();

-        for (int i = 0; i < pdfFileNames.size(); i++) {
-            writeJsons(Path.of(pdfFileNames.get(i)));
+        for (String pdfFileName : pdfFileNames) {
+            writeJsons(Path.of(pdfFileName));
        }
    }

@ -88,13 +83,13 @@ public class RulingCleaningServiceTest extends BuildDocumentTest {
                filename.toFile().toString()));
        DocumentData documentDataBefore = DocumentDataMapper.toDocumentData(documentGraphBefore);
        DocumentData documentDataAfter = DocumentDataMapper.toDocumentData(documentGraphAfter);
-        if (!compareStructures(documentDataBefore.getDocumentStructure(), documentDataAfter.getDocumentStructure(), filename.getFileName().toString())) {
-            String tmpFileNameBefore = "C:/Users/YANNIK~1/AppData/Local/Temp/before." + filename.getFileName().toString();
+        if (!compareStructures(documentDataBefore.getDocumentStructure(), documentDataAfter.getDocumentStructure())) {
+            String tmpFileNameBefore = "/tmp/before." + filename.getFileName().toString();
            try (PDDocument pdDocument = Loader.loadPDF(filename.toFile())) {
                PdfDraw.drawDocumentGraph(pdDocument, documentGraphBefore);
                pdDocument.save(tmpFileNameBefore);
            }
-            String tmpFileNameAfter = "C:/Users/YANNIK~1/AppData/Local/Temp/after." + filename.getFileName().toString();
+            String tmpFileNameAfter = "/tmp/after." + filename.getFileName().toString();
            try (PDDocument pdDocument = Loader.loadPDF(filename.toFile())) {
                PdfDraw.drawDocumentGraph(pdDocument, documentGraphAfter);
                pdDocument.save(tmpFileNameAfter);
@ -105,9 +100,9 @@ public class RulingCleaningServiceTest extends BuildDocumentTest {


    @SneakyThrows
-    private boolean compareStructures(DocumentStructure structure1, DocumentStructure structure2, String pdfName) {
+    private boolean compareStructures(DocumentStructure structure1, DocumentStructure structure2) {

-        List listStructure1 = structure1.streamAllEntries()
+        List<Table> listStructure1 = structure1.streamAllEntries()
                .filter(entryData -> entryData.getType().equals(NodeType.TABLE))
                .map(DocumentStructure.EntryData::getProperties)
                .map(properties -> {
@ -117,7 +112,7 @@ public class RulingCleaningServiceTest extends BuildDocumentTest {
                })
                .toList();

-        List listStructure2 = structure2.streamAllEntries()
+        List<Table> listStructure2 = structure2.streamAllEntries()
                .filter(entryData -> entryData.getType().equals(NodeType.TABLE))
                .map(DocumentStructure.EntryData::getProperties)
                .map(properties -> {
@ -128,8 +123,8 @@ public class RulingCleaningServiceTest extends BuildDocumentTest {
                .toList();

        for (int i = 0; i < listStructure1.size(); i++) {
-            Table tableNode1 = (Table) listStructure1.get(i);
-            Table tableNode2 = (Table) listStructure2.get(i);
+            Table tableNode1 = listStructure1.get(i);
+            Table tableNode2 = listStructure2.get(i);
            if (tableNode1.getNumberOfRows() != tableNode2.getNumberOfRows() || tableNode1.getNumberOfCols() != tableNode2.getNumberOfCols()) {
                return false;
            }
--- a/layoutparser-service/layoutparser-service-server/src/test/resources/files/SinglePages/T0
+++ b/layoutparser-service/layoutparser-service-server/src/test/resources/files/SinglePages/T0
--- a/layoutparser-service/layoutparser-service-server/src/test/resources/files/SinglePages/T1
+++ b/layoutparser-service/layoutparser-service-server/src/test/resources/files/SinglePages/T1
--- a/layoutparser-service/layoutparser-service-server/src/test/resources/files/SinglePages/T2
+++ b/layoutparser-service/layoutparser-service-server/src/test/resources/files/SinglePages/T2
--- a/layoutparser-service/layoutparser-service-server/src/test/resources/files/SinglePages/T3
+++ b/layoutparser-service/layoutparser-service-server/src/test/resources/files/SinglePages/T3
--- a/layoutparser-service/layoutparser-service-server/src/test/resources/files/SinglePages/T4
+++ b/layoutparser-service/layoutparser-service-server/src/test/resources/files/SinglePages/T4
--- a/layoutparser-service/layoutparser-service-server/src/test/resources/files/SinglePages/T5
+++ b/layoutparser-service/layoutparser-service-server/src/test/resources/files/SinglePages/T5