Merge branch 'TAAS-103' into 'main'

TAAS-103: Table Detection and rotated text See merge request fforesight/layout-parser!81
2023-11-16 09:13:41 +01:00 · 2023-11-16 09:13:41 +01:00 · 09ee90222e
commit 09ee90222e
parent 1b1f777706 1316a067fe
13 changed files with 372 additions and 123 deletions
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java
@ -187,10 +187,7 @@ public class LayoutParsingPipeline {
            boolean isLandscape = pdr.getWidth() > pdr.getHeight() && (rotation == 0 || rotation == 180) || pdr.getHeight() > pdr.getWidth() && (rotation == 90 || rotation == 270);

            PDRectangle cropbox = pdPage.getCropBox();
-            CleanRulings cleanRulings = rulingCleaningService.getCleanRulings(pdfTableCells.get(pageNumber),
-                    stripper.getRulings(),
-                    stripper.getMinCharWidth(),
-                    stripper.getMaxCharHeight());
+            CleanRulings cleanRulings = rulingCleaningService.getCleanRulings(pdfTableCells.get(pageNumber), stripper.getRulings());

            ClassificationPage classificationPage = switch (layoutParsingType) {
                case REDACT_MANAGER -> redactManagerBlockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings.getVertical());
@ -213,7 +210,8 @@ public class LayoutParsingPipeline {
                imageServiceResponseAdapter.findOcr(classificationPage);
            }

-            tableExtractionService.extractTables(cleanRulings, classificationPage, layoutParsingType);
+            tableExtractionService.extractTables(cleanRulings, classificationPage);
+
            buildPageStatistics(classificationPage);
            increaseDocumentStatistics(classificationPage, classificationDocument);

@ -246,8 +244,8 @@ public class LayoutParsingPipeline {

    private void increaseDocumentStatistics(ClassificationPage classificationPage, ClassificationDocument document) {

-//        if (!classificationPage.isLandscape()) {
-            document.getFontSizeCounter().addAll(classificationPage.getFontSizeCounter().getCountPerValue());
+        //        if (!classificationPage.isLandscape()) {
+        document.getFontSizeCounter().addAll(classificationPage.getFontSizeCounter().getCountPerValue());
 //        }
        document.getFontCounter().addAll(classificationPage.getFontCounter().getCountPerValue());
        document.getTextHeightCounter().addAll(classificationPage.getTextHeightCounter().getCountPerValue());
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Table.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Table.java
@ -34,7 +34,6 @@ public class Table implements SemanticNode {

    int numberOfRows;
    int numberOfCols;
-
    TextBlock textBlock;

    @Builder.Default
@ -208,7 +207,6 @@ public class Table implements SemanticNode {
        return IntStream.range(0, numberOfCols).boxed().map(col -> getCell(row, col));
    }

-
    /**
     * Streams all TableCells row-wise and filters them with header == true.
     *
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/table/TablePageBlock.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/table/TablePageBlock.java
@ -1,12 +1,14 @@
 package com.knecon.fforesight.service.layoutparser.processor.model.table;

 import java.awt.geom.Point2D;
+import java.awt.geom.Rectangle2D;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 import java.util.TreeMap;
+import java.util.stream.Collectors;

 import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
 import com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType;
@ -252,7 +254,8 @@ public class TablePageBlock extends AbstractPageBlock {
                if (prevY != null && prevX != null) {
                    var cell = new Cell(new Point2D.Float(prevX, prevY), new Point2D.Float(x, y));

-                    var intersectionCell = cells.stream().filter(c -> cell.intersects(c) && cell.overlapRatio(c) > 0.1f).findFirst();
+                    var intersectionCell = cells.stream().filter(c -> intersects(cell, c)).findFirst();
+
                    intersectionCell.ifPresent(value -> cell.getTextBlocks().addAll(value.getTextBlocks()));
                    if (cell.hasMinimumSize()) {
                        row.add(cell);
@ -273,6 +276,21 @@ public class TablePageBlock extends AbstractPageBlock {
    }


+
+    public boolean intersects(Cell cell1, Cell cell2) {
+        if (cell1.getHeight() <= 0 || cell2.getHeight() <= 0) {
+            return false;
+        }
+        double x0 = cell1.getX() + 2;
+        double y0 = cell1.getY() + 2;
+        return (cell2.x + cell2.width > x0 &&
+                cell2.y + cell2.height > y0 &&
+                cell2.x < x0 + cell1.getWidth() -2 &&
+                cell2.y < y0 + cell1.getHeight() -2);
+    }
+
+
+
    @Override
    public String getText() {

--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/RedTextPosition.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/RedTextPosition.java
@ -17,7 +17,6 @@ import lombok.SneakyThrows;
@AllArgsConstructor
 public class RedTextPosition {

-    private String textMatrix;
    private float[] position;

    @JsonIgnore
@ -56,8 +55,6 @@ public class RedTextPosition {

        pos.setFontSizeInPt(textPosition.getFontSizeInPt());

-        pos.setTextMatrix(textPosition.getTextMatrix().toString());
-
        var position = new float[4];

        position[0] = textPosition.getXDirAdj();
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/RulingCleaningService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/RulingCleaningService.java
@ -12,9 +12,9 @@ import java.util.Map;

 import org.springframework.stereotype.Service;

-import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableCells;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling;
+import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableCells;
 import com.knecon.fforesight.service.layoutparser.processor.utils.DoubleComparisons;

 import lombok.RequiredArgsConstructor;
@ -25,10 +25,13 @@ import lombok.extern.slf4j.Slf4j;
@RequiredArgsConstructor
 public class RulingCleaningService {

-    public CleanRulings getCleanRulings(List<TableCells> tableCells, List<Ruling> rulings, float minCharWidth, float maxCharHeight) {
+    private static final float THRESHOLD = 6;
+
+
+    public CleanRulings getCleanRulings(List<TableCells> tableCells, List<Ruling> rulings) {

        if (!rulings.isEmpty()) {
-            snapPoints(rulings, minCharWidth, maxCharHeight);
+            snapPoints(rulings);
        }

        List<Ruling> vrs = new ArrayList<>();
@ -53,14 +56,11 @@ public class RulingCleaningService {
        }
        List<Ruling> horizontalRulingLines = collapseOrientedRulings(hrs);

-        return CleanRulings.builder()
-                .vertical(verticalRulingLines)
-                .horizontal(horizontalRulingLines)
-                .build();
+        return CleanRulings.builder().vertical(verticalRulingLines).horizontal(horizontalRulingLines).build();
    }


-    public void snapPoints(List<? extends Line2D.Float> rulings, float xThreshold, float yThreshold) {
+    public void snapPoints(List<? extends Line2D.Float> rulings) {

        // collect points and keep a Line -> p1,p2 map
        Map<Line2D.Float, Point2D[]> linesToPoints = new HashMap<>();
@ -81,7 +81,7 @@ public class RulingCleaningService {

        for (Point2D p : points.subList(1, points.size() - 1)) {
            List<Point2D> last = groupedPoints.get(groupedPoints.size() - 1);
-            if (Math.abs(p.getX() - last.get(0).getX()) < xThreshold) {
+            if (Math.abs(p.getX() - last.get(0).getX()) < THRESHOLD) {
                groupedPoints.get(groupedPoints.size() - 1).add(p);
            } else {
                groupedPoints.add(new ArrayList<>(Collections.singletonList(p)));
@ -108,7 +108,7 @@ public class RulingCleaningService {

        for (Point2D p : points.subList(1, points.size() - 1)) {
            List<Point2D> last = groupedPoints.get(groupedPoints.size() - 1);
-            if (Math.abs(p.getY() - last.get(0).getY()) < yThreshold) {
+            if (Math.abs(p.getY() - last.get(0).getY()) < THRESHOLD) {
                groupedPoints.get(groupedPoints.size() - 1).add(p);
            } else {
                groupedPoints.add(new ArrayList<>(Collections.singletonList(p)));
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/TableExtractionService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/TableExtractionService.java
@ -12,7 +12,6 @@ import java.util.Set;

 import org.springframework.stereotype.Service;

-import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
 import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
 import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
@ -66,6 +65,17 @@ public class TableExtractionService {
    };


+    public boolean contains(Cell cell, double x, double y, double w, double h) {
+
+        if (cell.isEmpty() || w <= 0 || h <= 0) {
+            return false;
+        }
+        double x0 = cell.getX();
+        double y0 = cell.getY();
+        return (x >= x0 - 2 && y >= y0 - 2 && (x + w) <= x0 + cell.getWidth() + 2 && (y + h) <= y0 + cell.getHeight() + 2);
+    }
+
+
    /**
     * Finds tables on a page and moves textblocks into cells of the found tables.
     * Note: This algorithm uses Pdf Coordinate System where {0,0} rotated with the page rotation.
@ -79,16 +89,17 @@ public class TableExtractionService {
     * @param cleanRulings The lines used to build the table.
     * @param page         Page object that contains textblocks and statistics.
     */
-    public void extractTables(CleanRulings cleanRulings, ClassificationPage page, LayoutParsingType layoutParsingType) {
+    public void extractTables(CleanRulings cleanRulings, ClassificationPage page) {

-        List<Cell> cells = findCells(cleanRulings.getHorizontal(), cleanRulings.getVertical(), layoutParsingType);
+        List<Cell> cells = findCells(cleanRulings.getHorizontal(), cleanRulings.getVertical());

        List<TextPageBlock> toBeRemoved = new ArrayList<>();

        for (AbstractPageBlock abstractPageBlock : page.getTextBlocks()) {
            TextPageBlock textBlock = (TextPageBlock) abstractPageBlock;
            for (Cell cell : cells) {
-                if (cell.hasMinimumSize() && cell.intersects(textBlock.getPdfMinX(),
+                if (cell.hasMinimumSize() && contains(cell,
+                        textBlock.getPdfMinX(),
                        textBlock.getPdfMinY(),
                        textBlock.getPdfMaxX() - textBlock.getPdfMinX(),
                        textBlock.getPdfMaxY() - textBlock.getPdfMinY())) {
@ -102,7 +113,7 @@ public class TableExtractionService {
        cells = new ArrayList<>(new HashSet<>(cells));
        DoubleComparisons.sort(cells, Rectangle.ILL_DEFINED_ORDER);

-        List<Rectangle> spreadsheetAreas = findSpreadsheetsFromCells(cells).stream().filter(r -> r.getWidth() > 0f && r.getHeight() > 0f).toList();
+        List<Rectangle> spreadsheetAreas = findSpreadsheetsFromCells(cells);

        List<TablePageBlock> tables = new ArrayList<>();
        for (Rectangle area : spreadsheetAreas) {
@ -135,16 +146,14 @@ public class TableExtractionService {
    }


-    public List<Cell> findCells(List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines, LayoutParsingType layoutParsingType) {
+    public List<Cell> findCells(List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines) {

-        if (layoutParsingType.equals(LayoutParsingType.TAAS)) {
-            // TODO: breaks some tables, for example "1 Abamectin Prr.pdf" try to fix this upstream in RulingCleaningService
-            for (Ruling r : horizontalRulingLines) {
-                if (r.getX2() < r.getX1()) {
-                    double a = r.getX2();
-                    r.x2 = (float) r.getX1();
-                    r.x1 = (float) a;
-                }
+        // Fix for 211.pdf
+        for (Ruling r : horizontalRulingLines) {
+            if (r.getX2() < r.getX1()) {
+                double a = r.getX2();
+                r.x2 = (float) r.getX1();
+                r.x1 = (float) a;
            }
        }

--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/mapper/PropertiesMapper.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/mapper/PropertiesMapper.java
@ -1,6 +1,7 @@
 package com.knecon.fforesight.service.layoutparser.processor.services.mapper;

 import java.awt.geom.Rectangle2D;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.Locale;
 import java.util.Map;
@ -8,6 +9,7 @@ import java.util.Map;
 import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Image;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.ImageType;
+import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableCell;

--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/parsing/PDFLinesTextStripper.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/parsing/PDFLinesTextStripper.java
@ -1,18 +1,34 @@
 package com.knecon.fforesight.service.layoutparser.processor.services.parsing;

-import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling;
-import com.knecon.fforesight.service.layoutparser.processor.model.text.RedTextPosition;
-import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
-import lombok.Getter;
-import lombok.Setter;
-import lombok.SneakyThrows;
-import lombok.extern.slf4j.Slf4j;
+import java.awt.color.CMMException;
+import java.awt.geom.Point2D;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
 import org.apache.pdfbox.contentstream.operator.Operator;
 import org.apache.pdfbox.contentstream.operator.OperatorName;
-import org.apache.pdfbox.contentstream.operator.color.*;
+import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColor;
+import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorN;
+import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorSpace;
+import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceCMYKColor;
+import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceGrayColor;
+import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceRGBColor;
+import org.apache.pdfbox.contentstream.operator.color.SetStrokingColor;
+import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorN;
+import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorSpace;
+import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceCMYKColor;
+import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceGrayColor;
+import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceRGBColor;
 import org.apache.pdfbox.contentstream.operator.markedcontent.BeginMarkedContentSequenceWithProperties;
 import org.apache.pdfbox.contentstream.operator.markedcontent.EndMarkedContentSequence;
-import org.apache.pdfbox.contentstream.operator.state.*;
+import org.apache.pdfbox.contentstream.operator.state.SetFlatness;
+import org.apache.pdfbox.contentstream.operator.state.SetLineCapStyle;
+import org.apache.pdfbox.contentstream.operator.state.SetLineDashPattern;
+import org.apache.pdfbox.contentstream.operator.state.SetLineJoinStyle;
+import org.apache.pdfbox.contentstream.operator.state.SetLineMiterLimit;
+import org.apache.pdfbox.contentstream.operator.state.SetLineWidth;
+import org.apache.pdfbox.contentstream.operator.state.SetRenderingIntent;
 import org.apache.pdfbox.contentstream.operator.text.SetFontAndSize;
 import org.apache.pdfbox.cos.COSBase;
 import org.apache.pdfbox.cos.COSNumber;
@ -21,11 +37,14 @@ import org.apache.pdfbox.pdmodel.PDPage;
 import org.apache.pdfbox.pdmodel.graphics.color.PDColor;
 import org.apache.pdfbox.text.TextPosition;

-import java.awt.color.CMMException;
-import java.awt.geom.Point2D;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
+import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.RedTextPosition;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
+
+import lombok.Getter;
+import lombok.Setter;
+import lombok.SneakyThrows;
+import lombok.extern.slf4j.Slf4j;

@Getter
@Slf4j
@ -36,11 +55,6 @@ public class PDFLinesTextStripper extends PDFTextStripper {
    private final List<Ruling> graphicsPath = new ArrayList<>();
    @Setter
    protected PDPage pdpage;
-    private int minCharWidth;
-    private int maxCharWidth;
-    private int minCharHeight;
-    private int maxCharHeight;
-

    private float path_x;
    private float path_y;
@ -73,7 +87,6 @@ public class PDFLinesTextStripper extends PDFTextStripper {
        this.addOperator(new SetFontAndSize(this));
        this.addOperator(new SetLineWidth(this));

-
        addOperator(new BeginMarkedContentSequenceWithProperties(this));
 //        addOperator(new BeginMarkedContentSequence(this));
        addOperator(new EndMarkedContentSequence(this));
@ -232,33 +245,15 @@ public class PDFLinesTextStripper extends PDFTextStripper {
                        .get(textPositionSequences.get(textPositionSequences.size() - 1).getTextPositions().size() - 1);
            }

-            int charWidth = (int) textPositions.get(i).getWidthDirAdj();
-            if (charWidth < minCharWidth) {
-                minCharWidth = charWidth;
-            }
-            if (charWidth > maxCharWidth) {
-                maxCharWidth = charWidth;
-            }
-
-            int charHeight = (int) textPositions.get(i).getHeightDir();
-            if (charHeight < minCharHeight) {
-                minCharHeight = charHeight;
-            }
-            if (charWidth > maxCharHeight) {
-                maxCharHeight = charHeight;
-            }
-
            if (i == 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i).getUnicode().equals("\u00A0") || textPositions.get(i).getUnicode().equals("\t"))) {
                startIndex++;
                continue;
            }

            // Strange but sometimes this is happening, for example: Metolachlor2.pdf
-            if (i > 0 && textPositions.get(i).getXDirAdj() < textPositions.get(i - 1).getXDirAdj()) {
+            if (checkIfCurrentPositionIsToTheRightOfPreviousPosition(i, textPositions)) {
                List<TextPosition> sublist = textPositions.subList(startIndex, i);
-                if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0)
-                        .getUnicode()
-                        .equals("\t")))) {
+                if (checkIfSequenceContainsOnlyWhitespaces(sublist)) {
                    textPositionSequences.add(new TextPositionSequence(sublist, pageNumber, i == textPositions.size() - 1 && isParagraphStart));
                }
                startIndex = i;
@ -266,9 +261,7 @@ public class PDFLinesTextStripper extends PDFTextStripper {

            if (textPositions.get(i).getRotation() == 0 && i > 0 && textPositions.get(i).getX() > textPositions.get(i - 1).getEndX() + 1) {
                List<TextPosition> sublist = textPositions.subList(startIndex, i);
-                if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0)
-                        .getUnicode()
-                        .equals("\t")))) {
+                if (checkIfSequenceContainsOnlyWhitespaces(sublist)) {
                    textPositionSequences.add(new TextPositionSequence(sublist, pageNumber, i == textPositions.size() - 1 && isParagraphStart));
                }
                startIndex = i;
@ -278,13 +271,10 @@ public class PDFLinesTextStripper extends PDFTextStripper {
                    .getUnicode()
                    .equals("\t")) && i <= textPositions.size() - 2) {
                List<TextPosition> sublist = textPositions.subList(startIndex, i);
-                if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0)
-                        .getUnicode()
-                        .equals("\t")))) {
+                if (checkIfSequenceContainsOnlyWhitespaces(sublist)) {

                    // Remove false sequence ends (whitespaces)
-                    if (previous != null && sublist.get(0).getYDirAdj() == previous.getYDirAdj() && sublist.get(0)
-                            .getXDirAdj() - (previous.getXDirAdj() + previous.getWidthDirAdj()) < 0.01) {
+                    if (checkIfGapSizeBetweenCharactersSmallerThanMaximum(previous, sublist, 0.01f)) {
                        for (TextPosition t : sublist) {
                            textPositionSequences.get(textPositionSequences.size() - 1).add(t);
                        }
@ -319,13 +309,34 @@ public class PDFLinesTextStripper extends PDFTextStripper {
    }


+    public boolean checkIfCurrentPositionIsToTheRightOfPreviousPosition(int i, List<TextPosition> textPositions) {
+
+        return i > 0 && textPositions.get(i).getXDirAdj() < textPositions.get(i - 1).getXDirAdj();
+    }
+
+
+    public boolean checkIfSequenceContainsOnlyWhitespaces(List<TextPosition> sublist) {
+
+        return !(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0)
+                .getUnicode()
+                .equals("\t")));
+    }
+
+
+    public boolean checkIfGapSizeBetweenCharactersSmallerThanMaximum(RedTextPosition previous, List<TextPosition> sublist, float maximumGapSize) {
+
+        return previous != null && sublist.get(0).getYDirAdj() == previous.getYDirAdj() && sublist.get(0)
+                .getXDirAdj() - (previous.getXDirAdj() + previous.getWidthDirAdj()) < maximumGapSize;
+    }
+
+    // !(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0)
+    //                        .getUnicode()
+    //                        .equals("\t")))
+
+
    @Override
    public String getText(PDDocument doc) throws IOException {

-        minCharWidth = Integer.MAX_VALUE;
-        maxCharWidth = 0;
-        minCharHeight = Integer.MAX_VALUE;
-        maxCharHeight = 0;
        textPositionSequences.clear();
        rulings.clear();
        graphicsPath.clear();
--- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphVisualizationTest.java
+++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphVisualizationTest.java
@ -47,7 +47,7 @@ public class DocumentGraphVisualizationTest extends BuildDocumentTest {
    @Disabled
    public void visualizeCraftedDocument() {

-        String filename = "files/crafted document.pdf";
+        String filename = "files/1 Abamectin_prr.pdf";
        visualizePdf(filename);
    }

--- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java
+++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java
@ -2,14 +2,31 @@ package com.knecon.fforesight.service.layoutparser.server.graph;

 import java.io.FileOutputStream;
 import java.nio.file.Path;
+import java.util.List;

 import org.apache.pdfbox.Loader;
+import org.apache.pdfbox.pdmodel.PDDocument;
 import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
+import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.core.io.ClassPathResource;

+import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
+import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
+import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
 import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
+import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
+import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
+import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
+import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
+import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
+import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
+import com.knecon.fforesight.service.layoutparser.processor.services.SectionsBuilderService;
+import com.knecon.fforesight.service.layoutparser.processor.services.classification.RedactManagerClassificationService;
+import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
+import com.knecon.fforesight.service.layoutparser.processor.services.mapper.DocumentDataMapper;
+import com.knecon.fforesight.service.layoutparser.processor.services.mapper.PropertiesMapper;
 import com.knecon.fforesight.service.layoutparser.processor.services.visualization.LayoutGridService;
 import com.knecon.fforesight.service.layoutparser.processor.services.visualization.ViewerDocumentService;
 import com.knecon.fforesight.service.layoutparser.server.utils.BuildDocumentTest;
@ -18,19 +35,38 @@ import lombok.SneakyThrows;

 public class ViewerDocumentTest extends BuildDocumentTest {

+    @Autowired
+    private SectionsBuilderService sectionsBuilderService;
+
+    @Autowired
+    private RedactManagerClassificationService redactManagerClassificationService;
+
    @Test
-    @Disabled
    @SneakyThrows
    public void testViewerDocument() {

+        String fileName = "files/bdr/notMergedParagraphs.pdf";
+        String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf";
        LayoutGridService layoutGridService = new LayoutGridService();
        ViewerDocumentService viewerDocumentService = new ViewerDocumentService(layoutGridService);
-        String fileName = "files/bdr/notMergedParagraphs.pdf";
        Document document = buildGraph(fileName, LayoutParsingType.TAAS);
-        String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf";
        try (var pdDocument = Loader.loadPDF(new ClassPathResource(fileName).getFile()); var out = new FileOutputStream(tmpFileName)) {
            viewerDocumentService.createViewerDocument(pdDocument, document, out, true);
        }
    }

+    public ClassificationDocument buildClassificationDocument(PDDocument originDocument) {
+
+        ClassificationDocument classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
+                originDocument,
+                new ImageServiceResponse(),
+                new TableServiceResponse());
+
+        redactManagerClassificationService.classifyDocument(classificationDocument);
+
+        sectionsBuilderService.buildSections(classificationDocument);
+
+        return classificationDocument;
+    }
+
 }
--- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java
+++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java
@ -1,5 +1,27 @@
 package com.knecon.fforesight.service.layoutparser.server.segmentation;

+import static org.assertj.core.api.Assertions.assertThat;
+
+import java.awt.geom.Rectangle2D;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import org.apache.pdfbox.Loader;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.core.io.ClassPathResource;
+
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
 import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingPipeline;
@ -15,19 +37,8 @@ import com.knecon.fforesight.service.layoutparser.processor.python_api.model.tab
 import com.knecon.fforesight.service.layoutparser.processor.services.SectionsBuilderService;
 import com.knecon.fforesight.service.layoutparser.processor.services.classification.RedactManagerClassificationService;
 import com.knecon.fforesight.service.layoutparser.server.utils.AbstractTest;
+
 import lombok.SneakyThrows;
-import org.apache.pdfbox.Loader;
-import org.apache.pdfbox.pdmodel.PDDocument;
-import org.junit.jupiter.api.Test;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.core.io.ClassPathResource;
-
-import java.awt.geom.Rectangle2D;
-import java.io.IOException;
-import java.util.*;
-import java.util.stream.Collectors;
-
-import static org.assertj.core.api.Assertions.assertThat;

 public class PdfSegmentationServiceTest extends AbstractTest {

@ -52,7 +63,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {

    public ClassificationDocument buildClassificationDocument(PDDocument originDocument) {

-        ClassificationDocument classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.DOCUMINE,
+        ClassificationDocument classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
                originDocument,
                new ImageServiceResponse(),
                new TableServiceResponse());
@ -65,6 +76,18 @@ public class PdfSegmentationServiceTest extends AbstractTest {
    }


+    @Test
+    public void tablesToHtmlDebugger() throws IOException {
+
+        ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/A20622A izRMS (CZ) fRR Part B9_Page185.pdf");
+
+        ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getFile()));
+
+        toHtml(document, "/tmp/A20622A izRMS (CZ) fRR Part B9_Page185.html");
+
+    }
+
+
    @Test
    @SneakyThrows
    public void testMapping() {
@ -155,7 +178,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
    }


-    @Test
+    @Test // Non-sense test
    public void testDoc56Page170() throws IOException {

        ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/56 Fludioxonil_RAR_12_Volume_3CA_B-7_2018-02-21_Page170.pdf");
@ -166,8 +189,25 @@ public class PdfSegmentationServiceTest extends AbstractTest {

        validateTable(document, 0, 1, 1, 0, 0);
        validateTable(document, 1, 2, 2, 0, 0);
-        validateTable(document, 2, 7, 20, 0, 140);
-        validateTable(document, 3, 8, 31, 0, 170);
+        validateTable(document, 2, 6, 20, 0, 0);
+        validateTable(document, 3, 7, 31, 0, 0);
+
+    }
+
+
+    @Test
+    public void testDoc211() throws IOException {
+
+        ClassPathResource pdfFileResource = new ClassPathResource("files/211.pdf");
+
+        ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getFile()));
+
+        validateTableSize(document, 4);
+
+        validateTable(document, 0, 5, 4, 0, 0);
+        validateTable(document, 1, 5, 15, 14, 0);
+        validateTable(document, 2, 5, 14, 11, 0);
+        validateTable(document, 3, 5, 3, 0, 0);

    }

@ -181,7 +221,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {

        validateTableSize(document, 1);

-        validateTable(document, 0, 8, 8, 0, 2);
+        validateTable(document, 0, 8, 8, 0, 0);

        List<List<String>> values = Arrays.asList(Arrays.asList("Annex point Reference within DAR/RAR",
                        "Author, date",
@ -191,18 +231,18 @@ public class PdfSegmentationServiceTest extends AbstractTest {
                        "Method meets analytical validation criteria",
                        "Remarks (in case validation criteria are not met)",
                        "Acceptability of the method"),
-                Arrays.asList("",
+                Arrays.asList("Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
+                        "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
                        "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
                        "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
                        "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
                        "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
-                        "",
                        "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
                        "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies"),
                Arrays.asList("CA 7.1.2.1.1 DAR (2009)",
                        "Evans P.G. 2001 TMJ4569B, VV-323245",
                        "Azoxystrobin Laboratory Degradation Study in Three Soil Types, Sampled from Holland and the United Kingdom",
-                        "Method: RAM 269 Johnson R.I., Tummon O.J., Earl M. 1995 RJ1864B, VV-377731 Johnson R.I., Tummon O.J., Earl M. 1998 RAM 269/02, VV-124072 Johnson R.I., Tummon O.J., Earl M. 2000 RAM 269/03, VV-123986 Validation: Robinson N.J. 2001 TMJ4617B, VV-895845 in a Trial Carried",
+                        "Method: RAM 269 Johnson R.I., Tummon O.J., Earl M. 1995 RJ1864B, VV-377731 Johnson R.I., Tummon O.J., Earl M. 1998 RAM 269/02, VV-124072 Johnson R.I., Tummon O.J., Earl M. 2000 RAM 269/03, VV-123986 Validation: Robinson N.J. 2001 TMJ4617B, VV-895845",
                        "LC-MS/MS LOQ: 0.01 mg/kg (R401553 (SYN50165 7), R402173 (SYN501114 )) or 0.02 mg/kg (azoxystrobin, R230310, R234886) Working range: 0.02-1.0 or 0.01-0.5 mg/kg (depending on analyte) Other supporting quantificati on methods: HPLC-UV GC-MSD",
                        "Y",
                        "N/A",
@ -220,6 +260,8 @@ public class PdfSegmentationServiceTest extends AbstractTest {

        ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getFile()));

+        toHtml(document, "/tmp/html.html");
+
        validateTableSize(document, 4);

        validateTable(document, 0, 3, 2, 0, 0);
@ -231,17 +273,29 @@ public class PdfSegmentationServiceTest extends AbstractTest {


    @Test
+    @Disabled // FIXME Fake Redactions leads to more cells, no solution for this currently
    public void testDocA20622APartB9Page185() throws IOException {

        ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/A20622A izRMS (CZ) fRR Part B9_Page185.pdf");

        ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getFile()));

-        validateTableSize(document, 2);
+        validateTableSize(document, 1);

-        validateTable(document, 0, 5, 5, 0, 23);
-        validateTable(document, 1, 11, 9, 0, 36);
+        validateTable(document, 0, 7, 4, 0, 0);
+    }

+
+    @Test
+    public void testDocA20622APartB9Page185FixedDoc() throws IOException {
+
+        ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/A20622A izRMS (CZ) fRR Part B9_Page185_fixed.pdf");
+
+        ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getFile()));
+
+        validateTableSize(document, 1);
+
+        validateTable(document, 0, 7, 4, 0, 0);
    }


@ -328,7 +382,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
        ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getFile()));

        validateTableSize(document, 1);
-        validateTable(document, 0, 10, 6, 0, 1);
+        validateTable(document, 0, 10, 6, 0, 0);

    }

@ -450,8 +504,8 @@ public class PdfSegmentationServiceTest extends AbstractTest {

        validateTableSize(document, 2);

-        validateTable(document, 0, 6, 8, 0, 2);
-        validateTable(document, 1, 6, 8, 0, 1);
+        validateTable(document, 0, 6, 8, 0, 0);
+        validateTable(document, 1, 6, 8, 0, 0);

    }

@ -484,12 +538,37 @@ public class PdfSegmentationServiceTest extends AbstractTest {
    }


+    @SneakyThrows
+    private void toHtml(ClassificationDocument document, String filename) {
+
+        var tables = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList();
+        StringBuilder sb = new StringBuilder();
+
+        int currentPage = 1;
+        for (var table : tables) {
+            if (currentPage != table.getPage()) {
+                currentPage = table.getPage();
+                sb.append("---------------------- Page ").append(currentPage).append("--------------\n");
+            }
+            sb.append("\n\n");
+            sb.append(table.getTextAsHtml());
+        }
+
+        try (FileOutputStream fileOutputStream = new FileOutputStream(Path.of(filename).toFile())) {
+            fileOutputStream.write(sb.toString().getBytes());
+        }
+    }
+
+
    private void validateTable(ClassificationDocument document, int tableIndex, int colCount, int rowCount, int emptyCellsCountCorrect, int emptyCellsCountIncorrect) {

        TablePageBlock table = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(tableIndex);
        List<List<Cell>> rows = table.getRows();
        int emptyCellsFoundFound = rows.stream().flatMap(List::stream).toList().stream().filter(f -> f.toString().equals("")).toList().size();

+        for (List<Cell> row : table.getRows()) {
+            row.forEach(r -> System.out.println(r.toString()));
+        }
        assertThat(emptyCellsFoundFound).isEqualTo(emptyCellsCountCorrect + emptyCellsCountIncorrect);

        assertThat(table.getColCount()).isEqualTo(colCount);
--- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/RulingCleaningServiceTest.java
+++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/RulingCleaningServiceTest.java
@ -1,21 +1,39 @@
 package com.knecon.fforesight.service.layoutparser.server.services;

+import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.Collections;
 import java.util.LinkedList;
 import java.util.List;

+import org.apache.pdfbox.Loader;
+import org.apache.pdfbox.pdmodel.PDDocument;
 import org.junit.jupiter.api.Test;
+import org.springframework.core.io.ClassPathResource;

+import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
+import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
+import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
+import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
 import com.knecon.fforesight.service.layoutparser.processor.model.PageContents;
+import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
+import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
+import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
+import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
 import com.knecon.fforesight.service.layoutparser.processor.services.PageContentExtractor;
 import com.knecon.fforesight.service.layoutparser.processor.services.RulingCleaningService;
+import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
+import com.knecon.fforesight.service.layoutparser.processor.services.mapper.DocumentDataMapper;
+import com.knecon.fforesight.service.layoutparser.processor.services.mapper.PropertiesMapper;
+import com.knecon.fforesight.service.layoutparser.processor.services.visualization.LayoutGridService;
+import com.knecon.fforesight.service.layoutparser.processor.services.visualization.ViewerDocumentService;
+import com.knecon.fforesight.service.layoutparser.server.utils.BuildDocumentTest;
 import com.knecon.fforesight.service.layoutparser.server.utils.visualizations.PdfDraw;

 import lombok.SneakyThrows;

-public class RulingCleaningServiceTest {
+public class RulingCleaningServiceTest extends BuildDocumentTest {

    @Test
 //    @Disabled
@ -25,13 +43,96 @@ public class RulingCleaningServiceTest {
        String fileName = "files/211.pdf";
        String lineFileName = "/tmp/" + Path.of(fileName).getFileName().toString() + "_LINES.pdf";
        List<PageContents> pageContents = PageContentExtractor.getSortedPageContents(fileName);
-        PdfDraw.drawLinesPerPage(fileName, pageContents.stream().map(PageContents::getRulings).toList(), lineFileName);
-
        RulingCleaningService rulingCleaningService = new RulingCleaningService();
+        PdfDraw.drawLinesPerPage(fileName, pageContents.stream().map(PageContents::getRulings).toList(), lineFileName);
        List<CleanRulings> cleanRulingsPerPage = new LinkedList<>();
        for (PageContents pageContent : pageContents) {
-            cleanRulingsPerPage.add(rulingCleaningService.getCleanRulings(Collections.emptyList(), pageContent.getRulings(), 8, 20));
+            cleanRulingsPerPage.add(rulingCleaningService.getCleanRulings(Collections.emptyList(), pageContent.getRulings()));
+        }
+
+    }
+
+
+    @Test
+    @SneakyThrows
+    public void testTableExtraction() {
+
+        LayoutGridService layoutGridService = new LayoutGridService();
+        ViewerDocumentService viewerDocumentService = new ViewerDocumentService(layoutGridService);
+
+        ClassPathResource resource = new ClassPathResource("files");
+        List<String> pdfFileNames = Files.walk(resource.getFile().toPath())
+                .filter(path -> path.getFileName().toString().endsWith(".pdf"))
+                .map(Path::toAbsolutePath)
+                .map(Path::toString)
+                .toList();
+
+        for (int i = 0; i < pdfFileNames.size(); i++) {
+            writeJsons(Path.of(pdfFileNames.get(i)));
        }
    }

+
+    @SneakyThrows
+    private void writeJsons(Path filename) {
+
+        Document documentGraphBefore = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
+                Loader.loadPDF(filename.toFile()),
+                new ImageServiceResponse(),
+                new TableServiceResponse()));
+        Document documentGraphAfter = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
+                Loader.loadPDF(filename.toFile()),
+                new ImageServiceResponse(),
+                new TableServiceResponse()));
+        DocumentData documentDataBefore = DocumentDataMapper.toDocumentData(documentGraphBefore);
+        DocumentData documentDataAfter = DocumentDataMapper.toDocumentData(documentGraphAfter);
+        if (!compareStructures(documentDataBefore.getDocumentStructure(), documentDataAfter.getDocumentStructure(), filename.getFileName().toString())) {
+            String tmpFileNameBefore = "C:/Users/YANNIK~1/AppData/Local/Temp/before." + filename.getFileName().toString();
+            try (PDDocument pdDocument = Loader.loadPDF(filename.toFile())) {
+                PdfDraw.drawDocumentGraph(pdDocument, documentGraphBefore);
+                pdDocument.save(tmpFileNameBefore);
+            }
+            String tmpFileNameAfter = "C:/Users/YANNIK~1/AppData/Local/Temp/after." + filename.getFileName().toString();
+            try (PDDocument pdDocument = Loader.loadPDF(filename.toFile())) {
+                PdfDraw.drawDocumentGraph(pdDocument, documentGraphAfter);
+                pdDocument.save(tmpFileNameAfter);
+
+            }
+        }
+    }
+
+
+    @SneakyThrows
+    private boolean compareStructures(DocumentStructure structure1, DocumentStructure structure2, String pdfName) {
+
+        List listStructure1 = structure1.streamAllEntries()
+                .filter(entryData -> entryData.getType().equals(NodeType.TABLE))
+                .map(DocumentStructure.EntryData::getProperties)
+                .map(properties -> {
+                    var builder = Table.builder();
+                    PropertiesMapper.parseTableProperties(properties, builder);
+                    return builder.build();
+                })
+                .toList();
+
+        List listStructure2 = structure2.streamAllEntries()
+                .filter(entryData -> entryData.getType().equals(NodeType.TABLE))
+                .map(DocumentStructure.EntryData::getProperties)
+                .map(properties -> {
+                    var builder = Table.builder();
+                    PropertiesMapper.parseTableProperties(properties, builder);
+                    return builder.build();
+                })
+                .toList();
+
+        for (int i = 0; i < listStructure1.size(); i++) {
+            Table tableNode1 = (Table) listStructure1.get(i);
+            Table tableNode2 = (Table) listStructure2.get(i);
+            if (tableNode1.getNumberOfRows() != tableNode2.getNumberOfRows() || tableNode1.getNumberOfCols() != tableNode2.getNumberOfCols()) {
+                return false;
+            }
+        }
+        return true;
+    }
+
 }
--- a/layoutparser-service/layoutparser-service-server/src/test/resources/files/SinglePages/A20622A
+++ b/layoutparser-service/layoutparser-service-server/src/test/resources/files/SinglePages/A20622A