RED-7141: Implemented docstrum layout parsing

2024-02-22 11:02:50 +01:00 · 2024-02-22 11:02:50 +01:00 · b0efac0b36
commit b0efac0b36
parent f146beeb44
48 changed files with 1983 additions and 331 deletions
--- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentStructure.java
+++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentStructure.java
@ -55,6 +55,13 @@ public class DocumentStructure implements Serializable {

    }

+    @Schema(description = "Object containing the extra field names, a duplicate paragraph has in its properties field.")
+    public static class DuplicateParagraphProperties implements Serializable {
+
+        public static final String UNSORTED_TEXTBLOCK_ID = "utbid";
+
+    }
+
    public static final String RECTANGLE_DELIMITER = ";";


--- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingType.java
+++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingType.java
@ -1,7 +1,10 @@
 package com.knecon.fforesight.service.layoutparser.internal.api.queue;

 public enum LayoutParsingType {
-    REDACT_MANAGER,
+    REDACT_MANAGER_OLD,
    TAAS,
-    DOCUMINE
+    DOCUMINE,
+
+    DOCSTRUM,
+    REDACT_MANAGER
 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java
@ -28,6 +28,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.Classification
 import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
 import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
+import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
@ -43,6 +44,7 @@ import com.knecon.fforesight.service.layoutparser.processor.services.RulingClean
 import com.knecon.fforesight.service.layoutparser.processor.services.SectionsBuilderService;
 import com.knecon.fforesight.service.layoutparser.processor.services.SimplifiedSectionTextService;
 import com.knecon.fforesight.service.layoutparser.processor.services.TableExtractionService;
+import com.knecon.fforesight.service.layoutparser.processor.services.blockification.DocstrumBlockificationService;
 import com.knecon.fforesight.service.layoutparser.processor.services.blockification.DocuMineBlockificationService;
 import com.knecon.fforesight.service.layoutparser.processor.services.blockification.RedactManagerBlockificationService;
 import com.knecon.fforesight.service.layoutparser.processor.services.blockification.TaasBlockificationService;
@ -86,6 +88,7 @@ public class LayoutParsingPipeline {
    TaasBlockificationService taasBlockificationService;
    DocuMineBlockificationService docuMineBlockificationService;
    RedactManagerBlockificationService redactManagerBlockificationService;
+    DocstrumBlockificationService docstrumBlockificationService;
    LayoutGridService layoutGridService;
    ObservationRegistry observationRegistry;
    VisualLayoutParsingAdapter visualLayoutParsingAdapter;
@ -97,36 +100,29 @@ public class LayoutParsingPipeline {
        log.info("Starting layout parsing for {}", layoutParsingRequest.identifier());

        File originFile = layoutParsingStorageService.getOriginFile(layoutParsingRequest.originFileStorageId());
-        File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId())
-                .orElse(originFile);
+        File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId()).orElse(originFile);

        VisualLayoutParsingResponse visualLayoutParsingResponse = new VisualLayoutParsingResponse();
-        if (layoutParsingRequest.visualLayoutParsingFileId()
-                .isPresent()) {
-            visualLayoutParsingResponse = layoutParsingStorageService.getVisualLayoutParsingFile(layoutParsingRequest.visualLayoutParsingFileId()
-                                                                                                         .get());
+        if (layoutParsingRequest.visualLayoutParsingFileId().isPresent()) {
+            visualLayoutParsingResponse = layoutParsingStorageService.getVisualLayoutParsingFile(layoutParsingRequest.visualLayoutParsingFileId().get());
        }

        ImageServiceResponse imageServiceResponse = new ImageServiceResponse();
-        if (layoutParsingRequest.imagesFileStorageId()
-                .isPresent()) {
-            imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.imagesFileStorageId()
-                                                                                     .get());
+        if (layoutParsingRequest.imagesFileStorageId().isPresent()) {
+            imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.imagesFileStorageId().get());
        }

        TableServiceResponse tableServiceResponse = new TableServiceResponse();
-        if (layoutParsingRequest.tablesFileStorageId()
-                .isPresent()) {
-            tableServiceResponse = layoutParsingStorageService.getTablesFile(layoutParsingRequest.tablesFileStorageId()
-                                                                                     .get());
+        if (layoutParsingRequest.tablesFileStorageId().isPresent()) {
+            tableServiceResponse = layoutParsingStorageService.getTablesFile(layoutParsingRequest.tablesFileStorageId().get());
        }

        ClassificationDocument classificationDocument = parseLayout(layoutParsingRequest.layoutParsingType(),
-                                                                    originFile,
-                                                                    imageServiceResponse,
-                                                                    tableServiceResponse,
-                                                                    visualLayoutParsingResponse,
-                                                                    layoutParsingRequest.identifier().toString());
+                originFile,
+                imageServiceResponse,
+                tableServiceResponse,
+                visualLayoutParsingResponse,
+                layoutParsingRequest.identifier().toString());

        log.info("Building document graph for {}", layoutParsingRequest.identifier());

@ -158,25 +154,25 @@ public class LayoutParsingPipeline {
                .numberOfPages(documentGraph.getNumberOfPages())
                .duration(System.currentTimeMillis() - start)
                .message(format("""
-                                        Layout parsing has finished in %.02f s.
-                                        identifiers: %s
-                                        %s
-                                        Files have been saved with Ids:
-                                        Structure: %s
-                                        Text: %s
-                                        Positions: %s
-                                        PageData: %s
-                                        Simplified Text: %s
-                                        Viewer Doc: %s""",
-                                ((float) (System.currentTimeMillis() - start)) / 1000,
-                                layoutParsingRequest.identifier(),
-                                buildSemanticNodeCountMessage(documentGraph.getNumberOfPages(), documentGraph.buildSemanticNodeCounts()),
-                                layoutParsingRequest.structureFileStorageId(),
-                                layoutParsingRequest.textBlockFileStorageId(),
-                                layoutParsingRequest.positionBlockFileStorageId(),
-                                layoutParsingRequest.pageFileStorageId(),
-                                layoutParsingRequest.simplifiedTextStorageId(),
-                                layoutParsingRequest.viewerDocumentStorageId()))
+                                Layout parsing has finished in %.02f s.
+                                identifiers: %s
+                                %s
+                                Files have been saved with Ids:
+                                Structure: %s
+                                Text: %s
+                                Positions: %s
+                                PageData: %s
+                                Simplified Text: %s
+                                Viewer Doc: %s""",
+                        ((float) (System.currentTimeMillis() - start)) / 1000,
+                        layoutParsingRequest.identifier(),
+                        buildSemanticNodeCountMessage(documentGraph.getNumberOfPages(), documentGraph.buildSemanticNodeCounts()),
+                        layoutParsingRequest.structureFileStorageId(),
+                        layoutParsingRequest.textBlockFileStorageId(),
+                        layoutParsingRequest.positionBlockFileStorageId(),
+                        layoutParsingRequest.pageFileStorageId(),
+                        layoutParsingRequest.simplifiedTextStorageId(),
+                        layoutParsingRequest.viewerDocumentStorageId()))
                .build();

    }
@ -197,14 +193,14 @@ public class LayoutParsingPipeline {
    private String buildSemanticNodeCountMessage(int numberOfPages, Map<NodeType, Long> semanticNodeCounts) {

        return String.format("%d pages with %d sections, %d headlines, %d paragraphs, %d tables with %d cells, %d headers, and %d footers parsed",
-                             numberOfPages,
-                             semanticNodeCounts.get(NodeType.SECTION) == null ? 0 : semanticNodeCounts.get(NodeType.SECTION),
-                             semanticNodeCounts.get(NodeType.HEADLINE) == null ? 0 : semanticNodeCounts.get(NodeType.HEADLINE),
-                             semanticNodeCounts.get(NodeType.PARAGRAPH) == null ? 0 : semanticNodeCounts.get(NodeType.PARAGRAPH),
-                             semanticNodeCounts.get(NodeType.TABLE) == null ? 0 : semanticNodeCounts.get(NodeType.TABLE),
-                             semanticNodeCounts.get(NodeType.TABLE_CELL) == null ? 0 : semanticNodeCounts.get(NodeType.TABLE_CELL),
-                             semanticNodeCounts.get(NodeType.HEADER) == null ? 0 : semanticNodeCounts.get(NodeType.HEADER),
-                             semanticNodeCounts.get(NodeType.FOOTER) == null ? 0 : semanticNodeCounts.get(NodeType.FOOTER));
+                numberOfPages,
+                semanticNodeCounts.get(NodeType.SECTION) == null ? 0 : semanticNodeCounts.get(NodeType.SECTION),
+                semanticNodeCounts.get(NodeType.HEADLINE) == null ? 0 : semanticNodeCounts.get(NodeType.HEADLINE),
+                semanticNodeCounts.get(NodeType.PARAGRAPH) == null ? 0 : semanticNodeCounts.get(NodeType.PARAGRAPH),
+                semanticNodeCounts.get(NodeType.TABLE) == null ? 0 : semanticNodeCounts.get(NodeType.TABLE),
+                semanticNodeCounts.get(NodeType.TABLE_CELL) == null ? 0 : semanticNodeCounts.get(NodeType.TABLE_CELL),
+                semanticNodeCounts.get(NodeType.HEADER) == null ? 0 : semanticNodeCounts.get(NodeType.HEADER),
+                semanticNodeCounts.get(NodeType.FOOTER) == null ? 0 : semanticNodeCounts.get(NodeType.FOOTER));
    }


@ -260,10 +256,15 @@ public class LayoutParsingPipeline {
            PDRectangle cropbox = pdPage.getCropBox();
            CleanRulings cleanRulings = rulingCleaningService.getCleanRulings(pdfTableCells.get(pageNumber), stripper.getRulings());

+            List<Cell> emptyTableCells = tableExtractionService.findCells(cleanRulings.getHorizontal(), cleanRulings.getVertical());
+
            ClassificationPage classificationPage = switch (layoutParsingType) {
-                case REDACT_MANAGER -> redactManagerBlockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings.getVertical());
+                case REDACT_MANAGER_OLD ->
+                        redactManagerBlockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings.getVertical());
                case TAAS -> taasBlockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings.getVertical());
                case DOCUMINE -> docuMineBlockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings.getVertical());
+                case DOCSTRUM -> docstrumBlockificationService.blockify(stripper.getTextPositionSequences(), emptyTableCells, false);
+                case REDACT_MANAGER -> docstrumBlockificationService.blockify(stripper.getTextPositionSequences(), emptyTableCells, true);
            };
            classificationPage.setCleanRulings(cleanRulings);
            classificationPage.setRotation(rotation);
@ -289,7 +290,11 @@ public class LayoutParsingPipeline {
                }
            }

-            tableExtractionService.extractTables(cleanRulings, classificationPage);
+            tableExtractionService.extractTables(emptyTableCells, classificationPage);
+
+            if (layoutParsingType == LayoutParsingType.DOCSTRUM || layoutParsingType == LayoutParsingType.REDACT_MANAGER) {
+                docstrumBlockificationService.combineBlocks(classificationPage);
+            }

            buildPageStatistics(classificationPage);
            increaseDocumentStatistics(classificationPage, classificationDocument);
@ -305,12 +310,28 @@ public class LayoutParsingPipeline {
        switch (layoutParsingType) {
            case TAAS -> taasClassificationService.classifyDocument(classificationDocument);
            case DOCUMINE -> docuMineClassificationService.classifyDocument(classificationDocument);
+            case REDACT_MANAGER_OLD -> redactManagerClassificationService.classifyDocument(classificationDocument);
            case REDACT_MANAGER -> redactManagerClassificationService.classifyDocument(classificationDocument);
+            case DOCSTRUM -> redactManagerClassificationService.classifyDocument(classificationDocument);
        }

        log.info("Building Sections for {}", identifier);
+
+//        if (layoutParsingType == DOCSTRUM || layoutParsingType == DOCSTRUM_XY) {
+//            // Currently for debugging return paragraphs as sections, because there is a merging logic in sectionBuilder
+//            List<ClassificationSection> sections = new ArrayList<>();
+//            for (var page : classificationPages) {
+//                page.getTextBlocks().forEach(block -> {
+//                    block.setPage(page.getPageNumber());
+//                    var section = sectionsBuilderService.buildTextBlock(List.of(block), "a");
+//                    sections.add(section);
+//                });
+//            }
+//            classificationDocument.setSections(sections);
+//        } else {
        sectionsBuilderService.buildSections(classificationDocument);
        sectionsBuilderService.addImagesToSections(classificationDocument);
+//        }
        return classificationDocument;
    }

--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/AbstractPageBlock.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/AbstractPageBlock.java
@ -96,7 +96,7 @@ public abstract class AbstractPageBlock extends Rectangle {

        return this.minX - threshold <= apb.getMaxX() && this.maxX + threshold >= apb.getMinX();
    }
-
+    

    public abstract boolean isEmpty();

--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Document.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Document.java
@ -15,7 +15,6 @@ import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.No
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlockCollector;

 import lombok.AccessLevel;
 import lombok.AllArgsConstructor;
@ -52,7 +51,7 @@ public class Document implements GenericSemanticNode {
    public TextBlock getTextBlock() {

        if (textBlock == null) {
-            textBlock = streamTerminalTextBlocksInOrder().collect(new TextBlockCollector());
+            textBlock = GenericSemanticNode.super.getTextBlock();
        }
        return textBlock;
    }
@ -67,8 +66,7 @@ public class Document implements GenericSemanticNode {

    public Stream<TextBlock> streamTerminalTextBlocksInOrder() {

-        return streamAllNodes().filter(SemanticNode::isLeaf)
-                .map(SemanticNode::getLeafTextBlock);
+        return streamAllNodes().filter(SemanticNode::isLeaf).map(SemanticNode::getTextBlock);
    }


--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/DuplicatedParagraph.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/DuplicatedParagraph.java
@ -0,0 +1,34 @@
+package com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes;
+
+import java.util.stream.Stream;
+
+import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
+import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlockCollector;
+
+import lombok.Data;
+import lombok.EqualsAndHashCode;
+import lombok.experimental.SuperBuilder;
+
+@Data
+@EqualsAndHashCode(callSuper = true)
+@SuperBuilder
+public class DuplicatedParagraph extends Paragraph {
+
+    TextBlock unsortedLeafTextBlock;
+
+
+    @Override
+    public TextBlock getTextBlock() {
+
+        return Stream.of(leafTextBlock, unsortedLeafTextBlock).collect(new TextBlockCollector());
+
+    }
+
+
+    @Override
+    public String toString() {
+
+        return super.toString();
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Paragraph.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Paragraph.java
@ -18,11 +18,12 @@ import lombok.Builder;
 import lombok.Data;
 import lombok.EqualsAndHashCode;
 import lombok.experimental.FieldDefaults;
+import lombok.experimental.SuperBuilder;

@Data
-@Builder
+@SuperBuilder
@AllArgsConstructor
-@FieldDefaults(level = AccessLevel.PRIVATE)
+@FieldDefaults(level = AccessLevel.PROTECTED)
 public class Paragraph implements GenericSemanticNode {

    @Builder.Default
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Section.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Section.java
@ -11,7 +11,6 @@ import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.No
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlockCollector;

 import lombok.AccessLevel;
 import lombok.AllArgsConstructor;
@ -62,9 +61,7 @@ public class Section implements GenericSemanticNode {
    public TextBlock getTextBlock() {

        if (textBlock == null) {
-            textBlock = streamAllSubNodes().filter(SemanticNode::isLeaf)
-                    .map(SemanticNode::getLeafTextBlock)
-                    .collect(new TextBlockCollector());
+            textBlock = GenericSemanticNode.super.getTextBlock();
        }
        return textBlock;
    }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/SemanticNode.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/SemanticNode.java
@ -20,6 +20,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.E
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.AtomicTextBlock;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
+import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlockCollector;
 import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;

 public interface SemanticNode {
@ -39,7 +40,10 @@ public interface SemanticNode {
     *
     * @return TextBlock containing all AtomicTextBlocks that are located under this Node.
     */
-    TextBlock getTextBlock();
+    default TextBlock getTextBlock() {
+
+        return streamAllSubNodes().filter(SemanticNode::isLeaf).map(SemanticNode::getTextBlock).collect(new TextBlockCollector());
+    }


    /**
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Table.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Table.java
@ -48,7 +48,6 @@ public class Table implements SemanticNode {
    @EqualsAndHashCode.Exclude
    Map<Page, Rectangle2D> bBoxCache;

-
    /**
     * Streams all entities in this table, that appear in a row, which contains any of the provided strings.
     *
@ -332,9 +331,7 @@ public class Table implements SemanticNode {
    public TextBlock getTextBlock() {

        if (textBlock == null) {
-            textBlock = streamAllSubNodes().filter(SemanticNode::isLeaf)
-                    .map(SemanticNode::getLeafTextBlock)
-                    .collect(new TextBlockCollector());
+            textBlock = SemanticNode.super.getTextBlock();
        }
        return textBlock;
    }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPageBlock.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPageBlock.java
@ -53,6 +53,9 @@ public class TextPageBlock extends AbstractPageBlock {
    @JsonIgnore
    private PageBlockType classification;

+    @JsonIgnore
+    private boolean toDuplicate;
+

    @JsonIgnore
    public TextDirection getDir() {
@ -73,7 +76,7 @@ public class TextPageBlock extends AbstractPageBlock {

        return sequences.get(0).getPageWidth();
    }
-    
+

    public static TextPageBlock merge(List<TextPageBlock> textBlocksToMerge) {

@ -82,6 +85,7 @@ public class TextPageBlock extends AbstractPageBlock {
        return fromTextPositionSequences(sequences);
    }

+
    public static TextPageBlock fromTextPositionSequences(List<TextPositionSequence> wordBlockList) {

        TextPageBlock textBlock = null;
@ -133,7 +137,6 @@ public class TextPageBlock extends AbstractPageBlock {
    }


-
    /**
     * Returns the minX value in pdf coordinate system.
     * Note: This needs to use Pdf Coordinate System where {0,0} rotated with the page rotation.
@ -362,7 +365,22 @@ public class TextPageBlock extends AbstractPageBlock {
        }

        return TextNormalizationUtilities.removeHyphenLineBreaks(sb.toString());
+    }

+
+    public int getNumberOfLines() {
+
+        int numberOfLines = 1;
+        TextPositionSequence previous = null;
+        for (TextPositionSequence word : sequences) {
+            if (previous != null) {
+                if (word.getMaxYDirAdj() - previous.getMaxYDirAdj() > word.getTextHeight()) {
+                    numberOfLines++;
+                }
+            }
+            previous = word;
+        }
+        return numberOfLines;
    }


--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPositionSequence.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPositionSequence.java
@ -55,6 +55,17 @@ public class TextPositionSequence implements CharSequence {
    }


+    public TextPositionSequence(List<RedTextPosition> textPositions, int page) {
+
+        this.textPositions = textPositions;
+        this.page = page;
+        this.dir = TextDirection.fromDegrees(textPositions.get(0).getDir());
+        this.rotation = textPositions.get(0).getRotation();
+        this.pageHeight = textPositions.get(0).getPageHeight();
+        this.pageWidth = textPositions.get(0).getPageWidth();
+    }
+
+
    @Override
    public int length() {

--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/SectionsBuilderService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/SectionsBuilderService.java
@ -240,7 +240,7 @@ public class SectionsBuilderService {
    }


-    private ClassificationSection buildTextBlock(List<AbstractPageBlock> wordBlockList, String lastHeadline) {
+    public ClassificationSection buildTextBlock(List<AbstractPageBlock> wordBlockList, String lastHeadline) {

        ClassificationSection section = new ClassificationSection();

--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/TableExtractionService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/TableExtractionService.java
@ -14,7 +14,6 @@ import org.springframework.stereotype.Service;
 import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
 import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
-import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.Rectangle;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
@ -41,19 +40,18 @@ public class TableExtractionService {
     * <p>
     * DirAdj (Text direction adjusted) values can not be used here.
     *
-     * @param cleanRulings The lines used to build the table.
-     * @param page         Page object that contains textblocks and statistics.
+     * @param emptyCells The cells used to build the table.
+     * @param page       Page object that contains textblocks and statistics.
     */

-    public void extractTables(CleanRulings cleanRulings, ClassificationPage page) {
+    public void extractTables(List<Cell> emptyCells, ClassificationPage page) {

-        List<Cell> cells = findCells(cleanRulings.getHorizontal(), cleanRulings.getVertical());
        // sort cells by size (height * width) ascending so that textBlocks are always assigned to the smallest cells that contain them
-        cells.sort(CELL_SIZE_COMPARATOR);
+        emptyCells.sort(CELL_SIZE_COMPARATOR);

        for (AbstractPageBlock abstractPageBlock : page.getTextBlocks()) {
            TextPageBlock textBlock = (TextPageBlock) abstractPageBlock;
-            for (Cell cell : cells) {
+            for (Cell cell : emptyCells) {
                if (cell.hasMinimumSize() && doesCellContainTextBlock(cell, textBlock)) {
                    cell.addTextBlock(textBlock);
                    break;
@ -61,7 +59,7 @@ public class TableExtractionService {
            }
        }

-        cells = new ArrayList<>(new HashSet<>(cells));
+        var cells = new ArrayList<>(new HashSet<>(emptyCells));
        DoubleComparisons.sort(cells, Rectangle.ILL_DEFINED_ORDER);

        List<Rectangle> spreadsheetAreas = SpreadsheetFinder.findSpreadsheetsFromCells(cells);
@ -79,9 +77,7 @@ public class TableExtractionService {
                }
            }

-            var containedCellsWithText = containedCells.stream()
-                    .filter(cell -> !cell.getTextBlocks().isEmpty())
-                    .toList();
+            var containedCellsWithText = containedCells.stream().filter(cell -> !cell.getTextBlocks().isEmpty()).toList();

            // verify if table would contain fewer cells with text than the threshold allows
            if (containedCellsWithText.size() >= MAX_TABLE_CONTAINED_CELLS_WITH_TEXT && checkIfTableCellsAreUniform(containedCells)) {
@ -101,11 +97,7 @@ public class TableExtractionService {
            if (position != -1) {
                page.getTextBlocks().add(position, table);

-                var toBeRemoved = table.getCells()
-                        .stream()
-                        .map(Cell::getTextBlocks)
-                        .flatMap(List::stream)
-                        .toList();
+                var toBeRemoved = table.getCells().stream().map(Cell::getTextBlocks).flatMap(List::stream).toList();
                // remove text blocks from the page that were also added with the table (from its contained cells)
                page.getTextBlocks().removeAll(toBeRemoved);
            }
@ -115,7 +107,7 @@ public class TableExtractionService {

    private boolean checkIfTableCellsAreUniform(List<Cell> containedCells) {

-        if(containedCells.size() <= 2) {
+        if (containedCells.size() <= 2) {
            return true;
        }

@ -139,19 +131,13 @@ public class TableExtractionService {
        }
        double x0 = cell.getX();
        double y0 = cell.getY();
-        return (x >= x0 - TEXT_BLOCK_CONTAINMENT_TOLERANCE
-                && y >= y0 - TEXT_BLOCK_CONTAINMENT_TOLERANCE
-                && (x + w) <= x0 + cell.getWidth() + 2 * TEXT_BLOCK_CONTAINMENT_TOLERANCE
-                && (y + h) <= y0 + cell.getHeight() + 2 * TEXT_BLOCK_CONTAINMENT_TOLERANCE);
+        return (x >= x0 - TEXT_BLOCK_CONTAINMENT_TOLERANCE && y >= y0 - TEXT_BLOCK_CONTAINMENT_TOLERANCE && (x + w) <= x0 + cell.getWidth() + 2 * TEXT_BLOCK_CONTAINMENT_TOLERANCE && (y + h) <= y0 + cell.getHeight() + 2 * TEXT_BLOCK_CONTAINMENT_TOLERANCE);
    }


    public static List<Cell> findCells(List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines) {

-        return RectangularIntersectionFinder.find(horizontalRulingLines, verticalRulingLines)
-                .stream()
-                .map(Cell::new)
-                .collect(Collectors.toList());
+        return RectangularIntersectionFinder.find(horizontalRulingLines, verticalRulingLines).stream().map(Cell::new).collect(Collectors.toList());
    }

 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocstrumBlockificationService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocstrumBlockificationService.java
@ -0,0 +1,408 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.blockification;
+
+import static java.util.stream.Collectors.toSet;
+
+import java.awt.geom.Point2D;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.List;
+import java.util.ListIterator;
+import java.util.Set;
+
+import org.springframework.stereotype.Service;
+
+import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
+import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
+import com.knecon.fforesight.service.layoutparser.processor.model.FloatFrequencyCounter;
+import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
+import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling;
+import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.StringFrequencyCounter;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.DocstrumSegmentationService;
+import com.knecon.fforesight.service.layoutparser.processor.utils.QuickSort;
+import com.knecon.fforesight.service.layoutparser.processor.utils.RulingTextDirAdjustUtil;
+import com.knecon.fforesight.service.layoutparser.processor.utils.TextPositionSequenceComparator;
+
+import lombok.RequiredArgsConstructor;
+
+@SuppressWarnings("all")
+@Service
+@RequiredArgsConstructor
+public class DocstrumBlockificationService {
+
+    private final DocstrumSegmentationService docstrumSegmentationService;
+
+    static final float THRESHOLD = 1f;
+
+
+    public ClassificationPage blockify(List<TextPositionSequence> textPositions, List<Cell> cells, boolean xyOder) {
+
+        // Underlined or strikethrough are also in rulings but we dont want to split blocks with them so we use cells.
+        List<Ruling> usedHorizonalRulings = new ArrayList<>();
+        List<Ruling> usedVerticalRulings = new ArrayList<>();
+
+        cells.forEach(cell -> {
+            usedHorizonalRulings.add(new Ruling(new Point2D.Float(cell.x, cell.y), new Point2D.Float(cell.x + cell.width, cell.y)));
+            usedHorizonalRulings.add(new Ruling(new Point2D.Float(cell.x, cell.y + cell.height), new Point2D.Float(cell.x + cell.width, cell.y + cell.height)));
+            usedVerticalRulings.add(new Ruling(new Point2D.Float(cell.x, cell.y), new Point2D.Float(cell.x, cell.y + cell.height)));
+            usedVerticalRulings.add(new Ruling(new Point2D.Float(cell.x + cell.width, cell.y), new Point2D.Float(cell.x + cell.width, cell.y + cell.height)));
+        });
+
+        List<AbstractPageBlock> abstractPageBlocks = new ArrayList<>();
+        var zones = docstrumSegmentationService.segmentPage(textPositions, xyOder);
+        zones.forEach(zone -> {
+
+            List<TextPositionSequence> textPositionSequences = new ArrayList<>();
+            zone.getLines().forEach(line -> {
+                line.getWords().forEach(word -> {
+                    textPositionSequences.add(new TextPositionSequence(word.getTextPositions(), word.getPage()));
+                });
+            });
+
+            abstractPageBlocks.addAll(splitZonesAtRulings(textPositionSequences, usedHorizonalRulings, usedVerticalRulings));
+//            abstractPageBlocks.add(buildTextBlock(textPositionSequences, 0));
+        });
+
+        return new ClassificationPage(abstractPageBlocks);
+    }
+
+
+    public void combineBlocks(ClassificationPage page) {
+
+        mergeZones(page.getTextBlocks());
+
+        TextPageBlock previous = new TextPageBlock();
+        ListIterator<AbstractPageBlock> itty = page.getTextBlocks().listIterator();
+        while (itty.hasNext()) {
+            AbstractPageBlock block = itty.next();
+            if (block instanceof TablePageBlock) {
+                continue;
+            }
+            TextPageBlock current = (TextPageBlock) block;
+
+            if (previous != null && !previous.getSequences().isEmpty()) {
+
+                if (current.getDir() == previous.getDir() //
+                        && previous.getNumberOfLines() >= 2 && current.getNumberOfLines() >= 2 //
+                        && previous.intersectsY(current) //
+                        && !hasBetween(current, previous, page.getTextBlocks()) //
+                        && numberOfYIntersections(current, previous, page.getTextBlocks()) == 0) {
+
+                    previous.getSequences().addAll(current.getSequences());
+                    previous = buildTextBlock(previous.getSequences(), 0);
+                    previous.setToDuplicate(true);
+                    itty.remove();
+                    itty.previous();
+                    itty.set(previous);
+                    itty.next();
+                    continue;
+                }
+
+                if (current.getDir() == previous.getDir() && (previous.almostIntersects(current, 0, 0))) {
+
+                    previous.getSequences().addAll(current.getSequences());
+                    boolean toDuplicate = previous.isToDuplicate();
+                    previous = buildTextBlock(previous.getSequences(), 0);
+                    previous.setToDuplicate(toDuplicate);
+                    itty.remove();
+                    itty.previous();
+                    itty.set(previous);
+                    itty.next();
+                    continue;
+                }
+
+                if (current.getDir() == previous.getDir() //
+                        && (Math.abs(previous.getMaxY() - current.getMaxY()) < THRESHOLD || Math.abs(previous.getMinY() - current.getMinY()) < THRESHOLD) //
+                        && (previous.getNumberOfLines() == 1 && current.getNumberOfLines() >= 1 || previous.getNumberOfLines() == 2 && current.getNumberOfLines() == 1) //
+                        && !hasBetween(current, previous, page.getTextBlocks()) && numberOfYIntersections(current, previous, page.getTextBlocks()) <= 4) {
+
+                    previous.getSequences().addAll(current.getSequences());
+                    previous = buildTextBlock(previous.getSequences(), 0);
+                    itty.remove();
+                    itty.previous();
+                    itty.set(previous);
+                    itty.next();
+                    continue;
+                }
+
+                if (current.getDir() == previous.getDir() //
+                        && current.intersectsY(previous) //
+                        && (previous.getNumberOfLines() == 1 && current.getNumberOfLines() >= 1 || previous.getNumberOfLines() == 2 && current.getNumberOfLines() == 1) //
+                        && !hasBetween(current, previous, page.getTextBlocks()) //
+                        && numberOfYIntersections(current, previous, page.getTextBlocks()) <= 0) {
+                    previous.getSequences().addAll(current.getSequences());
+                    previous = buildTextBlock(previous.getSequences(), 0);
+                    itty.remove();
+                    itty.previous();
+                    itty.set(previous);
+                    itty.next();
+                    continue;
+                }
+
+            }
+            previous = current;
+        }
+
+        mergeZones(page.getTextBlocks());
+    }
+
+
+    private boolean hasBetween(TextPageBlock block, TextPageBlock other, List<AbstractPageBlock> allBlocks) {
+
+        for (AbstractPageBlock current : allBlocks) {
+
+            if (current == other || current == block) {
+                continue;
+            }
+
+            if (other.intersectsY(current) && other.getMaxX() <= current.getMinX() && current.getMaxX() <= block.getMinX()) {
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+
+    private int numberOfYIntersections(TextPageBlock block, TextPageBlock other, List<AbstractPageBlock> allBlocks) {
+
+        double minY = Math.min(block.getMinY(), other.getMinY());
+        double maxY = Math.min(block.getMaxY(), other.getMaxY());
+
+        int numberOfYIntersections = 0;
+        for (AbstractPageBlock current : allBlocks) {
+
+            if (current == other || current == block) {
+                continue;
+            }
+
+            if (minY <= current.getMaxY() && maxY >= current.getMinY()) {
+                numberOfYIntersections++;
+            }
+        }
+
+        return numberOfYIntersections;
+    }
+
+
+    private void mergeZones(List<AbstractPageBlock> zones) {
+
+        ListIterator<AbstractPageBlock> itty = zones.listIterator();
+        Set<AbstractPageBlock> toRemove = new HashSet<>();
+        while (itty.hasNext()) {
+            AbstractPageBlock block = itty.next();
+            if (block instanceof TablePageBlock) {
+                continue;
+            }
+
+            TextPageBlock current = (TextPageBlock) block;
+
+            if (current.isToDuplicate()) {
+                continue;
+            }
+
+            for (int i = 0; i < zones.size(); i++) {
+
+                if (toRemove.contains(zones.get(i))) {
+                    continue;
+                }
+                if (zones.get(i) == current) {
+                    continue;
+                }
+                if (zones.get(i) instanceof TablePageBlock) {
+                    continue;
+                }
+
+                TextPageBlock inner = (TextPageBlock) zones.get(i);
+
+                if (inner.isToDuplicate()) {
+                    continue;
+                }
+
+                if (current.getDir() == inner.getDir() && current.almostIntersects(inner, 0, 0)) {
+
+                    current.getSequences().addAll(inner.getSequences());
+                    QuickSort.sort(current.getSequences(), new TextPositionSequenceComparator());
+                    current = buildTextBlock(current.getSequences(), 0);
+                    toRemove.add(inner);
+                    itty.set(current);
+                }
+            }
+        }
+        zones.removeAll(toRemove);
+    }
+
+
+    public List<AbstractPageBlock> splitZonesAtRulings(List<TextPositionSequence> textPositions, List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines) {
+
+        int indexOnPage = 0;
+        List<TextPositionSequence> chunkWords = new ArrayList<>();
+        List<AbstractPageBlock> chunkBlockList = new ArrayList<>();
+
+        float minX = 1000, maxX = 0, minY = 1000, maxY = 0;
+        TextPositionSequence prev = null;
+
+        for (TextPositionSequence word : textPositions) {
+
+            boolean isSplitByRuling = isSplitByRuling(minX, minY, maxX, maxY, word, horizontalRulingLines, verticalRulingLines);
+            boolean splitByDir = prev != null && !prev.getDir().equals(word.getDir());
+
+            if (prev != null && (splitByDir || isSplitByRuling)) {
+
+                TextPageBlock cb1 = buildTextBlock(chunkWords, indexOnPage);
+                indexOnPage++;
+
+                chunkBlockList.add(cb1);
+                chunkWords = new ArrayList<>();
+
+                minX = 1000;
+                maxX = 0;
+                minY = 1000;
+                maxY = 0;
+                prev = null;
+            }
+
+            chunkWords.add(word);
+
+            prev = word;
+            if (word.getMinXDirAdj() < minX) {
+                minX = word.getMinXDirAdj();
+            }
+            if (word.getMaxXDirAdj() > maxX) {
+                maxX = word.getMaxXDirAdj();
+            }
+            if (word.getMinYDirAdj() < minY) {
+                minY = word.getMinYDirAdj();
+            }
+            if (word.getMaxYDirAdj() > maxY) {
+                maxY = word.getMaxYDirAdj();
+            }
+        }
+
+        TextPageBlock cb1 = buildTextBlock(chunkWords, indexOnPage);
+        if (cb1 != null) {
+            chunkBlockList.add(cb1);
+        }
+
+        return chunkBlockList;
+    }
+
+
+    private boolean equalsWithThreshold(float f1, float f2) {
+
+        return Math.abs(f1 - f2) < THRESHOLD;
+    }
+
+
+    private TextPageBlock buildTextBlock(List<TextPositionSequence> wordBlockList, int indexOnPage) {
+
+        TextPageBlock textBlock = null;
+
+        FloatFrequencyCounter lineHeightFrequencyCounter = new FloatFrequencyCounter();
+        FloatFrequencyCounter fontSizeFrequencyCounter = new FloatFrequencyCounter();
+        FloatFrequencyCounter spaceFrequencyCounter = new FloatFrequencyCounter();
+        StringFrequencyCounter fontFrequencyCounter = new StringFrequencyCounter();
+        StringFrequencyCounter styleFrequencyCounter = new StringFrequencyCounter();
+
+        for (TextPositionSequence wordBlock : wordBlockList) {
+
+            lineHeightFrequencyCounter.add(wordBlock.getTextHeight());
+            fontSizeFrequencyCounter.add(wordBlock.getFontSize());
+            spaceFrequencyCounter.add(wordBlock.getSpaceWidth());
+            fontFrequencyCounter.add(wordBlock.getFont());
+            styleFrequencyCounter.add(wordBlock.getFontStyle());
+
+            if (textBlock == null) {
+                textBlock = new TextPageBlock(wordBlock.getMinXDirAdj(),
+                        wordBlock.getMaxXDirAdj(),
+                        wordBlock.getMinYDirAdj(),
+                        wordBlock.getMaxYDirAdj(),
+                        wordBlockList,
+                        wordBlock.getRotation());
+            } else {
+                TextPageBlock spatialEntity = textBlock.union(wordBlock);
+                textBlock.resize(spatialEntity.getMinX(), spatialEntity.getMinY(), spatialEntity.getWidth(), spatialEntity.getHeight());
+            }
+        }
+
+        if (textBlock != null) {
+            textBlock.setMostPopularWordFont(fontFrequencyCounter.getMostPopular());
+            textBlock.setMostPopularWordStyle(styleFrequencyCounter.getMostPopular());
+            textBlock.setMostPopularWordFontSize(fontSizeFrequencyCounter.getMostPopular());
+            textBlock.setMostPopularWordHeight(lineHeightFrequencyCounter.getMostPopular());
+            textBlock.setMostPopularWordSpaceWidth(spaceFrequencyCounter.getMostPopular());
+            textBlock.setHighestFontSize(fontSizeFrequencyCounter.getHighest());
+        }
+
+        if (textBlock != null && textBlock.getSequences() != null && textBlock.getSequences().stream().map(t -> round(t.getMinYDirAdj(), 3)).collect(toSet()).size() == 1) {
+            textBlock.getSequences().sort(Comparator.comparing(TextPositionSequence::getMinXDirAdj));
+        }
+        return textBlock;
+    }
+
+
+    private boolean isSplitByRuling(float minX,
+                                    float minY,
+                                    float maxX,
+                                    float maxY,
+                                    TextPositionSequence word,
+                                    List<Ruling> horizontalRulingLines,
+                                    List<Ruling> verticalRulingLines) {
+
+        return isSplitByRuling(maxX,
+                minY,
+                word.getMinXDirAdj(),
+                word.getMinYDirAdj(),
+                verticalRulingLines,
+                word.getDir().getDegrees(),
+                word.getPageWidth(),
+                word.getPageHeight()) //
+                || isSplitByRuling(minX,
+                minY,
+                word.getMinXDirAdj(),
+                word.getMaxYDirAdj(),
+                horizontalRulingLines,
+                word.getDir().getDegrees(),
+                word.getPageWidth(),
+                word.getPageHeight()) //
+                || isSplitByRuling(maxX,
+                minY,
+                word.getMinXDirAdj(),
+                word.getMinYDirAdj(),
+                horizontalRulingLines,
+                word.getDir().getDegrees(),
+                word.getPageWidth(),
+                word.getPageHeight()) //
+                || isSplitByRuling(minX,
+                minY,
+                word.getMinXDirAdj(),
+                word.getMaxYDirAdj(),
+                verticalRulingLines,
+                word.getDir().getDegrees(),
+                word.getPageWidth(),
+                word.getPageHeight());
+    }
+
+
+    private boolean isSplitByRuling(float previousX2, float previousY1, float currentX1, float currentY1, List<Ruling> rulingLines, float dir, float pageWidth, float pageHeight) {
+
+        for (Ruling ruling : rulingLines) {
+            var line = RulingTextDirAdjustUtil.convertToDirAdj(ruling, dir, pageWidth, pageHeight);
+            if (line.intersectsLine(previousX2, previousY1, currentX1, currentY1)) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+
+    private double round(float value, int decimalPoints) {
+
+        var d = Math.pow(10, decimalPoints);
+        return Math.round(value * d) / d;
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/DocstrumSegmentationService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/DocstrumSegmentationService.java
@ -0,0 +1,59 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.docstrum;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import org.springframework.stereotype.Service;
+
+import com.knecon.fforesight.service.layoutparser.processor.model.text.TextDirection;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Character;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Zone;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service.LineBuilderService;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service.NearestNeighbourService;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service.ReadingOrderService;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service.SpacingService;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service.ZoneBuilderService;
+
+import lombok.RequiredArgsConstructor;
+
+@Service
+@RequiredArgsConstructor
+public class DocstrumSegmentationService {
+
+    private final NearestNeighbourService nearestNeighbourService;
+    private final SpacingService spacingService;
+    private final LineBuilderService lineBuilderService;
+    private final ZoneBuilderService zoneBuilderService;
+    private final ReadingOrderService readingOrderService;
+
+
+    public List<Zone> segmentPage(List<TextPositionSequence> textPositions, boolean xyOder) {
+
+        List<Zone> zones = new ArrayList<>();
+        zones.addAll(computeZones(textPositions, TextDirection.ZERO));
+        zones.addAll(computeZones(textPositions, TextDirection.QUARTER_CIRCLE));
+        zones.addAll(computeZones(textPositions, TextDirection.HALF_CIRCLE));
+        zones.addAll(computeZones(textPositions, TextDirection.THREE_QUARTER_CIRCLE));
+
+        return readingOrderService.resolve(zones, xyOder);
+    }
+
+
+    private List<Zone> computeZones(List<TextPositionSequence> textPositions, TextDirection direction) {
+
+        var positions = textPositions.stream().filter(t -> t.getDir() == direction).map(TextPositionSequence::getTextPositions).flatMap(List::stream).toList();
+
+        var characters = positions.stream().map(Character::new).collect(Collectors.toList());
+
+        nearestNeighbourService.findNearestNeighbors(characters);
+
+        var characterSpacing = spacingService.computeCharacterSpacing(characters);
+        var lineSpacing = Math.min(spacingService.computeLineSpacing(characters), 20);
+
+        var lines = lineBuilderService.buildLines(characters, characterSpacing, lineSpacing);
+        return zoneBuilderService.buildZones(lines, characterSpacing, lineSpacing);
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/AngleFilter.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/AngleFilter.java
@ -0,0 +1,25 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model;
+
+public class AngleFilter {
+
+    protected double lowerAngle;
+    protected double upperAngle;
+
+
+    public AngleFilter(double lowerAngle, double upperAngle) {
+        
+        this.lowerAngle = lowerAngle < -Math.PI / 2 ? lowerAngle + Math.PI : lowerAngle;
+        this.upperAngle = upperAngle >= Math.PI / 2 ? upperAngle - Math.PI : upperAngle;
+    }
+
+
+    public boolean matches(Neighbor neighbor) {
+
+        if (lowerAngle <= upperAngle) {
+            return lowerAngle <= neighbor.getAngle() && neighbor.getAngle() < upperAngle;
+        } else {
+            return lowerAngle <= neighbor.getAngle() || neighbor.getAngle() < upperAngle;
+        }
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/BoundingBox.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/BoundingBox.java
@ -0,0 +1,48 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model;
+
+import java.awt.geom.Rectangle2D;
+
+import lombok.Data;
+
+@Data
+public abstract class BoundingBox {
+
+    private Rectangle2D bBox;
+
+
+    public double getX() {
+
+        return bBox.getX();
+    }
+
+
+    public double getY() {
+
+        return bBox.getY();
+    }
+
+
+    public double getWidth() {
+
+        return bBox.getWidth();
+    }
+
+
+    public double getHeight() {
+
+        return bBox.getHeight();
+    }
+
+
+    public double getArea() {
+
+        return (bBox.getHeight() * bBox.getWidth());
+    }
+
+
+    public boolean contains(Rectangle2D contained, double tolerance) {
+
+        return bBox.getX() <= contained.getX() + tolerance && bBox.getY() <= contained.getY() + tolerance && bBox.getX() + bBox.getWidth() >= contained.getX() + contained.getWidth() - tolerance && bBox.getY() + bBox.getHeight() >= contained.getY() + contained.getHeight() - tolerance;
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Character.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Character.java
@ -0,0 +1,85 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import com.knecon.fforesight.service.layoutparser.processor.model.text.RedTextPosition;
+
+import lombok.Data;
+
+@Data
+public class Character {
+
+    private final double x;
+    private final double y;
+    private final RedTextPosition textPosition;
+
+    private List<Neighbor> neighbors = new ArrayList<>();
+
+
+    public Character(RedTextPosition chunk) {
+
+        this.x = chunk.getXDirAdj() + chunk.getWidthDirAdj() / 2;
+        this.y = chunk.getYDirAdj() + chunk.getHeightDir() / 2;
+        this.textPosition = chunk;
+    }
+
+
+    public double getHeight() {
+
+        return textPosition.getHeightDir();
+    }
+
+
+    public double distance(Character character) {
+
+        double dx = getX() - character.getX();
+        double dy = getY() - character.getY();
+        return Math.sqrt(dx * dx + dy * dy);
+    }
+
+
+    public double horizontalDistance(Character character) {
+
+        return Math.abs(getX() - character.getX());
+    }
+
+
+    public double verticalDistance(Character character) {
+
+        return Math.abs(getY() - character.getY());
+    }
+
+
+    public double overlappingDistance(Character other) {
+
+        double[] xs = new double[4];
+        double s = Math.sin(-0);
+        double c = Math.cos(-0);
+        xs[0] = c * x - s * y;
+        xs[1] = c * (x + textPosition.getWidthDirAdj()) - s * (y + textPosition.getHeightDir());
+        xs[2] = c * other.x - s * other.y;
+        xs[3] = c * (other.x + other.textPosition.getWidthDirAdj()) - s * (other.y + other.textPosition.getHeightDir());
+        boolean overlapping = xs[1] >= xs[2] && xs[3] >= xs[0];
+        Arrays.sort(xs);
+        return Math.abs(xs[2] - xs[1]) * (overlapping ? 1 : -1);
+    }
+
+
+    public void setNeighbors(List<Neighbor> neighbors) {
+
+        this.neighbors = neighbors;
+    }
+
+
+    public double angle(Character character) {
+
+        if (getX() > character.getX()) {
+            return Math.atan2(getY() - character.getY(), getX() - character.getX());
+        } else {
+            return Math.atan2(character.getY() - getY(), character.getX() - getX());
+        }
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/DisjointSets.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/DisjointSets.java
@ -0,0 +1,194 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model;
+
+import java.util.AbstractSet;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Set;
+
+public class DisjointSets<E> implements Iterable<Set<E>> {
+
+    private final Map<E, Entry<E>> map = new HashMap<>();
+
+
+    public DisjointSets(Collection<? extends E> collection) {
+
+        for (E element : collection) {
+            map.put(element, new Entry<E>(element));
+        }
+    }
+
+
+    public boolean areTogether(E e1, E e2) {
+
+        return map.get(e1).findRepresentative().equals(map.get(e2).findRepresentative());
+    }
+
+
+    public void union(E e1, E e2) {
+
+        Entry<E> r1 = map.get(e1).findRepresentative();
+        Entry<E> r2 = map.get(e2).findRepresentative();
+        if (!r1.equals(r2)) {
+            if (r1.size <= r2.size) {
+                r2.mergeWith(r1);
+            } else {
+                r1.mergeWith(r2);
+            }
+        }
+    }
+
+
+    @Override
+    public Iterator<Set<E>> iterator() {
+
+        return new Iterator<>() {
+
+            private final Iterator<Entry<E>> iterator = map.values().iterator();
+            private Entry<E> nextRepresentative;
+
+            {
+                findNextRepresentative();
+            }
+
+            @Override
+            public boolean hasNext() {
+
+                return nextRepresentative != null;
+            }
+
+
+            @Override
+            public Set<E> next() {
+
+                if (nextRepresentative == null) {
+                    throw new NoSuchElementException();
+                }
+                Set<E> result = nextRepresentative.asSet();
+                findNextRepresentative();
+                return result;
+            }
+
+
+            private void findNextRepresentative() {
+
+                while (iterator.hasNext()) {
+                    Entry<E> candidate = iterator.next();
+                    if (candidate.isRepresentative()) {
+                        nextRepresentative = candidate;
+                        return;
+                    }
+                }
+                nextRepresentative = null;
+            }
+
+
+            @Override
+            public void remove() {
+
+                throw new UnsupportedOperationException();
+            }
+
+        };
+    }
+
+
+    private static class Entry<E> {
+
+        private int size = 1;
+        private final E value;
+        private Entry<E> parent = this;
+        private Entry<E> next;
+        private Entry<E> last = this;
+
+
+        Entry(E value) {
+
+            this.value = value;
+        }
+
+
+        void mergeWith(Entry<E> otherRepresentative) {
+
+            size += otherRepresentative.size;
+            last.next = otherRepresentative;
+            last = otherRepresentative.last;
+            otherRepresentative.parent = this;
+        }
+
+
+        Entry<E> findRepresentative() {
+
+            Entry<E> representative = parent;
+            while (!representative.parent.equals(representative)) {
+                representative = representative.parent;
+            }
+            for (Entry<E> entry = this; !entry.equals(representative); ) {
+                Entry<E> nextEntry = entry.parent;
+                entry.parent = representative;
+                entry = nextEntry;
+            }
+            return representative;
+        }
+
+
+        boolean isRepresentative() {
+
+            return parent.equals(this);
+        }
+
+
+        Set<E> asSet() {
+
+            return new AbstractSet<E>() {
+
+                @Override
+                public Iterator<E> iterator() {
+
+                    return new Iterator<E>() {
+
+                        private Entry<E> nextEntry = findRepresentative();
+
+
+                        @Override
+                        public boolean hasNext() {
+
+                            return nextEntry != null;
+                        }
+
+
+                        @Override
+                        public E next() {
+
+                            if (nextEntry == null) {
+                                throw new NoSuchElementException();
+                            }
+                            E result = nextEntry.value;
+                            nextEntry = nextEntry.next;
+                            return result;
+                        }
+
+
+                        @Override
+                        public void remove() {
+
+                            throw new UnsupportedOperationException();
+                        }
+
+                    };
+                }
+
+
+                @Override
+                public int size() {
+
+                    return findRepresentative().size;
+                }
+            };
+        }
+
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Histogram.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Histogram.java
@ -0,0 +1,90 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model;
+
+public class Histogram {
+
+    private static final double EPSILON = 1.0e-6;
+    private final double min;
+    private final double resolution;
+    private double[] frequencies;
+
+
+    public Histogram(double minValue, double maxValue, double resolution) {
+
+        this.min = minValue - EPSILON;
+        double delta = maxValue - minValue + 2 * EPSILON;
+        int size = Math.max(1, (int) Math.round((maxValue - minValue) / resolution));
+        this.resolution = delta / size;
+        this.frequencies = new double[size];
+    }
+
+
+    public void kernelSmooth(double[] kernel) {
+
+        double[] newFrequencies = new double[frequencies.length];
+        int shift = (kernel.length - 1) / 2;
+        for (int i = 0; i < kernel.length; i++) {
+            int jStart = Math.max(0, i - shift);
+            int jEnd = Math.min(frequencies.length, frequencies.length + i - shift);
+            for (int j = jStart; j < jEnd; j++) {
+                newFrequencies[j - i + shift] += kernel[i] * frequencies[j];
+            }
+        }
+        frequencies = newFrequencies;
+    }
+
+
+    public double[] createGaussianKernel(double length, double stdDeviation) {
+
+        int r = (int) Math.round(length / resolution) / 2;
+
+        int size = 2 * r + 1;
+        double[] kernel = new double[size];
+        double sum = 0;
+        double b = 2 * (stdDeviation / resolution) * (stdDeviation / resolution);
+        double a = 1 / Math.sqrt(Math.PI * b);
+        for (int i = 0; i < size; i++) {
+            kernel[i] = a * Math.exp(-(i - r) * (i - r) / b);
+            sum += kernel[i];
+        }
+        for (int i = 0; i < size; i++) {
+            kernel[i] /= sum;
+        }
+        return kernel;
+    }
+
+
+    public void gaussianSmooth(double windowLength, double stdDeviation) {
+
+        kernelSmooth(createGaussianKernel(windowLength, stdDeviation));
+    }
+
+
+    public void add(double value) {
+
+        frequencies[(int) ((value - min) / resolution)] += 1.0;
+    }
+
+
+    public int getSize() {
+
+        return frequencies.length;
+    }
+
+
+    public double getPeakValue() {
+
+        int peakIndex = 0;
+        for (int i = 1; i < frequencies.length; i++) {
+            if (frequencies[i] > frequencies[peakIndex]) {
+                peakIndex = i;
+            }
+        }
+        int peakEndIndex = peakIndex + 1;
+        final double EPS = 0.0001;
+        while (peakEndIndex < frequencies.length && Math.abs(frequencies[peakEndIndex] - frequencies[peakIndex]) < EPS) {
+            peakEndIndex++;
+        }
+        return ((double) peakIndex + peakEndIndex) / 2 * resolution + min;
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Line.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Line.java
@ -0,0 +1,168 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model;
+
+import java.awt.geom.Rectangle2D;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
+
+import lombok.Data;
+
+@Data
+public class Line extends BoundingBox {
+
+    private static final double WORD_DISTANCE_MULTIPLIER = 0.2;
+
+    private final double x0;
+    private final double y0;
+
+    private final double x1;
+    private final double y1;
+
+    private final double height;
+
+    private final List<Character> characters;
+    private final List<TextPositionSequence> words = new ArrayList<>();
+
+
+    public Line(List<Character> characters, double wordSpacing) {
+
+        this.characters = characters;
+
+        if (characters.size() >= 2) {
+            // linear regression
+            double sx = 0.0;
+            double sxx = 0.0;
+            double sxy = 0.0;
+            double sy = 0.0;
+            for (Character character : characters) {
+                sx += character.getX();
+                sxx += character.getX() * character.getX();
+                sxy += character.getX() * character.getY();
+                sy += character.getY();
+            }
+            double b = (characters.size() * sxy - sx * sy) / (characters.size() * sxx - sx * sx);
+            double a = (sy - b * sx) / characters.size();
+
+            this.x0 = characters.get(0).getX();
+            this.y0 = a + b * this.x0;
+            this.x1 = characters.get(characters.size() - 1).getX();
+            this.y1 = a + b * this.x1;
+        } else {
+            Character character = characters.get(0);
+            double dx = character.getTextPosition().getWidthDirAdj() / 3;
+            double dy = dx * Math.tan(0);
+            this.x0 = character.getX() - dx;
+            this.x1 = character.getX() + dx;
+            this.y0 = character.getY() - dy;
+            this.y1 = character.getY() + dy;
+        }
+        height = computeHeight();
+        computeWords(wordSpacing * WORD_DISTANCE_MULTIPLIER);
+        buildBBox();
+    }
+
+
+    public double getAngle() {
+
+        return Math.atan2(y1 - y0, x1 - x0);
+    }
+
+
+    public double getLength() {
+
+        return Math.sqrt((x0 - x1) * (x0 - x1) + (y0 - y1) * (y0 - y1));
+    }
+
+
+    private double computeHeight() {
+
+        double sum = 0.0;
+        for (Character component : characters) {
+            sum += component.getHeight();
+        }
+        return sum / characters.size();
+    }
+
+
+    public double angularDifference(Line j) {
+
+        double diff = Math.abs(getAngle() - j.getAngle());
+        if (diff <= Math.PI / 2) {
+            return diff;
+        } else {
+            return Math.PI - diff;
+        }
+    }
+
+
+    public double horizontalDistance(Line other) {
+
+        double[] xs = new double[4];
+        xs[0] = x0;
+        xs[1] = x1;
+        xs[2] = other.x0;
+        xs[3] = other.x1;
+        boolean overlapping = xs[1] >= xs[2] && xs[3] >= xs[0];
+        Arrays.sort(xs);
+        return Math.abs(xs[2] - xs[1]) * (overlapping ? 1 : -1);
+    }
+
+
+    public double verticalDistance(Line other) {
+
+        double ym = (y0 + y1) / 2;
+        double yn = (other.y0 + other.y1) / 2;
+        return Math.abs(ym - yn) / Math.sqrt(1);
+    }
+
+
+    private void computeWords(double wordSpacing) {
+
+        TextPositionSequence word = new TextPositionSequence();
+        Character previous = null;
+        for (Character current : characters) {
+            if (previous != null) {
+                double dist = current.getTextPosition().getXDirAdj() - previous.getTextPosition().getXDirAdj() - previous.getTextPosition().getWidthDirAdj();
+                if (dist > wordSpacing) {
+                    words.add(word);
+                    word = new TextPositionSequence();
+                }
+            }
+            word.getTextPositions().add(current.getTextPosition());
+            previous = current;
+        }
+        words.add(word);
+    }
+
+
+    private void buildBBox() {
+
+        double minX = Double.POSITIVE_INFINITY;
+        double minY = Double.POSITIVE_INFINITY;
+        double maxX = Double.NEGATIVE_INFINITY;
+        double maxY = Double.NEGATIVE_INFINITY;
+
+        for (Character character : characters) {
+
+            minX = Math.min(minX, character.getTextPosition().getXDirAdj());
+            minY = Math.min(minY, character.getTextPosition().getYDirAdj());
+            maxX = Math.max(maxX, character.getTextPosition().getXDirAdj() + character.getTextPosition().getWidthDirAdj());
+            maxY = Math.max(maxY, character.getTextPosition().getYDirAdj() + character.getTextPosition().getHeightDir());
+
+        }
+
+        this.setBBox(new Rectangle2D.Double(minX, minY, maxX - minX, maxY - minY));
+    }
+
+
+    public String toString() {
+
+        StringBuilder sb = new StringBuilder();
+        words.forEach(word -> sb.append(word.toString()).append(" "));
+        return sb.toString().trim();
+    }
+
+}
+
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Neighbor.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Neighbor.java
@ -0,0 +1,36 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model;
+
+import lombok.Getter;
+
+public class Neighbor {
+
+    @Getter
+    private final double distance;
+    @Getter
+    private final double angle;
+    private final Character originCharacter;
+    @Getter
+    private final Character character;
+
+
+    public Neighbor(Character neighbor, Character origin) {
+
+        this.distance = neighbor.distance(origin);
+        this.angle = neighbor.angle(origin);
+        this.character = neighbor;
+        this.originCharacter = origin;
+    }
+
+
+    public double getHorizontalDistance() {
+
+        return character.horizontalDistance(originCharacter);
+    }
+
+
+    public double getVerticalDistance() {
+
+        return character.verticalDistance(originCharacter);
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Zone.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Zone.java
@ -0,0 +1,51 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model;
+
+import java.awt.geom.Rectangle2D;
+import java.util.Comparator;
+import java.util.List;
+
+import lombok.Data;
+
+@Data
+public class Zone extends BoundingBox {
+
+    private List<Line> lines;
+
+
+    @SuppressWarnings("PMD.ConstructorCallsOverridableMethod")
+    public Zone(List<Line> lines) {
+
+        lines.sort(Comparator.comparingDouble(Line::getY));
+        this.lines = lines;
+        buildBBox();
+    }
+
+
+    public void buildBBox() {
+
+        double minX = Double.POSITIVE_INFINITY;
+        double minY = Double.POSITIVE_INFINITY;
+        double maxX = Double.NEGATIVE_INFINITY;
+        double maxY = Double.NEGATIVE_INFINITY;
+
+        for (Line line : lines) {
+
+            minX = Math.min(minX, line.getX());
+            minY = Math.min(minY, line.getY());
+            maxX = Math.max(maxX, line.getX() + line.getWidth());
+            maxY = Math.max(maxY, line.getY() + line.getHeight());
+
+        }
+
+        this.setBBox(new Rectangle2D.Double(minX, minY, maxX - minX, maxY - minY));
+    }
+
+
+    public String toString() {
+
+        StringBuilder sb = new StringBuilder();
+        lines.forEach(line -> sb.append(line.toString()).append("\n"));
+        return sb.toString().trim();
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/LineBuilderService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/LineBuilderService.java
@ -0,0 +1,51 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service;
+
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+
+import org.springframework.stereotype.Service;
+
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.AngleFilter;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Character;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.DisjointSets;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Line;
+
+@Service
+public class LineBuilderService {
+
+    private static final double CHARACTER_SPACING_DISTANCE_MULTIPLIER = 3.5;
+    private static final double MAX_VERTICAL_CHARACTER_DISTANCE = 0.67;
+    private static final double ANGLE_TOLERANCE = Math.PI / 6;
+
+
+    public List<Line> buildLines(List<Character> characters, double characterSpacing, double lineSpacing) {
+
+        double maxHorizontalDistance = characterSpacing * CHARACTER_SPACING_DISTANCE_MULTIPLIER;
+        double maxVerticalDistance = lineSpacing * MAX_VERTICAL_CHARACTER_DISTANCE;
+
+        DisjointSets<Character> sets = new DisjointSets<>(characters);
+        AngleFilter filter = new AngleFilter(-ANGLE_TOLERANCE, ANGLE_TOLERANCE);
+
+        characters.forEach(character -> {
+            character.getNeighbors().forEach(neighbor -> {
+                double x = neighbor.getHorizontalDistance() / maxHorizontalDistance;
+                double y = neighbor.getVerticalDistance() / maxVerticalDistance;
+                if (character.getTextPosition().getDir() == neighbor.getCharacter().getTextPosition().getDir() && filter.matches(neighbor) && Math.pow(x, 2) + Math.pow(y,
+                        2) <= 1) {
+                    sets.union(character, neighbor.getCharacter());
+                }
+            });
+        });
+
+        List<Line> lines = new ArrayList<>();
+        sets.forEach(group -> {
+            List<Character> lineCharacters = new ArrayList<>(group);
+            lineCharacters.sort(Comparator.comparingDouble(Character::getX));
+            lines.add(new Line(lineCharacters, characterSpacing));
+        });
+
+        return lines;
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/NearestNeighbourService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/NearestNeighbourService.java
@ -0,0 +1,78 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service;
+
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+
+import org.springframework.stereotype.Service;
+
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Character;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Neighbor;
+
+@Service
+public class NearestNeighbourService {
+
+    private static final int NUMBER_OF_NEIGHBOURS = 8;
+    private static final double STEP = 16.0;
+
+
+    public void findNearestNeighbors(List<Character> characters) {
+
+        if (characters.isEmpty()) {
+            return;
+        }
+
+        characters.sort(Comparator.comparingDouble(Character::getX));
+
+        int maxNeighborCount = NUMBER_OF_NEIGHBOURS;
+        if (characters.size() <= NUMBER_OF_NEIGHBOURS) {
+            maxNeighborCount = characters.size() - 1;
+        }
+
+        for (int i = 0; i < characters.size(); i++) {
+
+            List<Neighbor> candidates = new ArrayList<>();
+
+            int start = i;
+            int end = i + 1;
+
+            double distance = Double.POSITIVE_INFINITY;
+
+            for (double searchDistance = 0; searchDistance < distance; ) {
+
+                searchDistance += STEP;
+                boolean newCandidatesFound = false;
+
+                while (start > 0 && characters.get(i).getX() - characters.get(start - 1).getX() < searchDistance) {
+                    start--;
+                    candidates.add(new Neighbor(characters.get(start), characters.get(i)));
+                    clearLeastDistant(candidates, maxNeighborCount);
+                    newCandidatesFound = true;
+                }
+
+                while (end < characters.size() && characters.get(end).getX() - characters.get(i).getX() < searchDistance) {
+                    candidates.add(new Neighbor(characters.get(end), characters.get(i)));
+                    clearLeastDistant(candidates, maxNeighborCount);
+                    end++;
+                    newCandidatesFound = true;
+                }
+
+                if (newCandidatesFound && candidates.size() >= maxNeighborCount) {
+                    distance = candidates.get(maxNeighborCount - 1).getDistance();
+                }
+            }
+            clearLeastDistant(candidates, maxNeighborCount);
+            characters.get(i).setNeighbors(new ArrayList<>(candidates));
+        }
+    }
+
+
+    private void clearLeastDistant(List<Neighbor> candidates, int maxNeighborCount) {
+
+        if (candidates.size() > maxNeighborCount) {
+            candidates.sort(Comparator.comparingDouble(Neighbor::getDistance));
+            candidates.remove(candidates.remove(candidates.size() - 1));
+        }
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/ReadingOrderService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/ReadingOrderService.java
@ -0,0 +1,100 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service;
+
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+import java.util.ListIterator;
+
+import org.springframework.stereotype.Service;
+
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.BoundingBox;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Zone;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.utils.DoubleUtils;
+
+@Service
+public class ReadingOrderService {
+
+    private static final double THRESHOLD = 5;
+
+
+    public List<Zone> resolve(List<Zone> zones, boolean xyOrder) {
+
+        if (zones.isEmpty() || zones.size() == 1) {
+            return zones;
+        }
+
+        if (xyOrder) {
+//            QuickSort.sort(zones, new ZoneComparator());
+            zones.sort(Comparator.comparing(BoundingBox::getY, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD))
+                    .thenComparing(BoundingBox::getX, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, 0)));
+            return zones;
+        }
+
+        return resolveMultiColumnReadingOder(zones);
+    }
+
+
+    private List<Zone> resolveMultiColumnReadingOder(List<Zone> zones) {
+
+        // Simple reading order resolver for multi column page layout as described here : https://pub.towardsai.net/advanced-rag-02-unveiling-pdf-parsing-b84ae866344e
+        // TODO implement a more fancy reading order resolver see https://github.com/BobLd/DocumentLayoutAnalysis/blob/master/README.md#reading-order
+
+        double minX = Double.POSITIVE_INFINITY;
+        double maxX = Double.NEGATIVE_INFINITY;
+
+        for (Zone zone : zones) {
+            if (zone.getX() < minX) {
+                minX = zone.getX();
+            }
+            if (zone.getX() + zone.getWidth() > maxX) {
+                maxX = zone.getX() + zone.getWidth();
+            }
+        }
+
+        double midLineXCoordinate = (minX + maxX) / 2;
+
+        List<Zone> leftOf = new ArrayList<>();
+        List<Zone> rightOf = new ArrayList<>();
+        List<Zone> middle = new ArrayList<>();
+        for (Zone zone : zones) {
+            if (zone.getX() < midLineXCoordinate && zone.getX() + zone.getWidth() < midLineXCoordinate) {
+                leftOf.add(zone);
+            } else if (zone.getX() > midLineXCoordinate && zone.getX() + zone.getWidth() > midLineXCoordinate) {
+                rightOf.add(zone);
+            } else {
+                middle.add(zone);
+            }
+        }
+
+        leftOf.sort(Comparator.comparing(BoundingBox::getY, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD))
+                .thenComparing(BoundingBox::getX, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD)));
+
+        rightOf.sort(Comparator.comparing(BoundingBox::getY, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD))
+                .thenComparing(BoundingBox::getX, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD)));
+
+        middle.sort(Comparator.comparing(BoundingBox::getY, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD))
+                .thenComparing(BoundingBox::getX, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD)));
+
+        List<Zone> sortedZones = new ArrayList<>();
+        sortedZones.addAll(leftOf);
+        sortedZones.addAll(rightOf);
+
+        ListIterator<Zone> itty = middle.listIterator();
+
+        while (itty.hasNext()) {
+            Zone current = itty.next();
+            for (int i = 0; i < sortedZones.size(); i++) {
+                if (current.getY() < sortedZones.get(i).getY()) {
+                    sortedZones.add(i, current);
+                    itty.remove();
+                    break;
+                }
+            }
+        }
+
+        sortedZones.addAll(middle);
+
+        return sortedZones;
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/SpacingService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/SpacingService.java
@ -0,0 +1,56 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service;
+
+import java.util.List;
+
+import org.springframework.stereotype.Service;
+
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.AngleFilter;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Character;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Histogram;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Neighbor;
+
+@Service
+public class SpacingService {
+
+    private static final double SPACING_HISTOGRAM_RESOLUTION = 0.5;
+    private static final double SPACING_HISTOGRAM_SMOOTHING_LENGTH = 2.5;
+    private static final double SPACING_HIST_SMOOTHING_STANDARD_DEVIATION = 0.5;
+    private static final double ANGLE_TOLERANCE = Math.PI / 6;
+
+
+    public double computeCharacterSpacing(List<Character> characters) {
+
+        return computeSpacing(characters, 0);
+    }
+
+
+    public double computeLineSpacing(List<Character> characters) {
+
+        return computeSpacing(characters, Math.PI / 2);
+    }
+
+
+    private double computeSpacing(List<Character> characters, double angle) {
+
+        double maxDistance = Double.NEGATIVE_INFINITY;
+
+        for (Character character : characters) {
+            for (Neighbor neighbor : character.getNeighbors()) {
+                maxDistance = Math.max(maxDistance, neighbor.getDistance());
+            }
+        }
+        Histogram histogram = new Histogram(0, maxDistance, SPACING_HISTOGRAM_RESOLUTION);
+        AngleFilter angleFilter = new AngleFilter(angle - ANGLE_TOLERANCE, angle + ANGLE_TOLERANCE);
+        for (Character character : characters) {
+            for (Neighbor neighbor : character.getNeighbors()) {
+                if (angleFilter.matches(neighbor)) {
+                    histogram.add(neighbor.getDistance());
+                }
+            }
+        }
+
+        histogram.gaussianSmooth(SPACING_HISTOGRAM_SMOOTHING_LENGTH, SPACING_HIST_SMOOTHING_STANDARD_DEVIATION);
+        return histogram.getPeakValue();
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/ZoneBuilderService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/ZoneBuilderService.java
@ -0,0 +1,150 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service;
+
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Set;
+
+import org.springframework.stereotype.Service;
+
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Character;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.DisjointSets;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Line;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Zone;
+
+@Service
+public class ZoneBuilderService {
+
+    private static final double MIN_HORIZONTAL_DISTANCE_MULTIPLIER = -0.5;
+    private static final double MAX_VERTICAL_DISTANCE_MULTIPLIER = 1.2;
+
+    private static final double MIN_HORIZONTAL_MERGE_DISTANCE_MULTIPLIER = -3.0;
+
+    private static final double MAX_VERTICAL_MERGE_DISTANCE_MULTIPLIER = 0.5;
+
+    private static final double MIN_LINE_SIZE_SCALE = 0.9;
+
+    private static final double MAX_LINE_SIZE_SCALE = 2.5;
+
+    private static final double ANGLE_TOLERANCE = Math.PI / 6;
+
+    private static final int MAX_ZONES = 300;
+
+    private static final double MAX_VERTICAL_MERGE_DISTANCE = 0.5;
+
+
+    public List<Zone> buildZones(List<Line> lines, double characterSpacing, double lineSpacing) {
+
+        double minHorizontalDistance = characterSpacing * MIN_HORIZONTAL_DISTANCE_MULTIPLIER;
+        double maxVerticalDistance = lineSpacing * MAX_VERTICAL_DISTANCE_MULTIPLIER;
+        double minHorizontalMergeDistance = characterSpacing * MIN_HORIZONTAL_MERGE_DISTANCE_MULTIPLIER;
+        double maxVerticalMergeDistance = lineSpacing * MAX_VERTICAL_MERGE_DISTANCE_MULTIPLIER;
+
+        DisjointSets<Line> sets = new DisjointSets<>(lines);
+
+        double meanHeight = calculateMeanHeight(lines);
+
+        lines.forEach(outerLine -> //
+                lines.forEach(innerLine -> {
+
+                    double scale = Math.min(outerLine.getHeight(), innerLine.getHeight()) / meanHeight;
+                    scale = Math.max(MIN_LINE_SIZE_SCALE, Math.min(scale, MAX_LINE_SIZE_SCALE));
+
+                    if (!sets.areTogether(outerLine, innerLine) && outerLine.angularDifference(innerLine) <= ANGLE_TOLERANCE) {
+
+                        double horizontalDistance = outerLine.horizontalDistance(innerLine) / scale;
+                        double verticalDistance = outerLine.verticalDistance(innerLine) / scale;
+
+                        if (minHorizontalDistance <= horizontalDistance && verticalDistance <= maxVerticalDistance //
+                                || minHorizontalMergeDistance <= horizontalDistance && verticalDistance <= maxVerticalMergeDistance) {
+                            sets.union(outerLine, innerLine);
+                        }
+                    }
+                }));
+
+        List<Zone> zones = new ArrayList<>();
+        sets.forEach(group -> {
+            zones.add(new Zone(new ArrayList<>(group)));
+        });
+
+        if (zones.size() > MAX_ZONES) {
+            List<Line> oneZoneLines = new ArrayList<>();
+            for (Zone zone : zones) {
+                oneZoneLines.addAll(zone.getLines());
+            }
+            return List.of(mergeLinesInZone(oneZoneLines, characterSpacing, lineSpacing));
+        }
+
+        return zones;
+    }
+
+
+    private double calculateMeanHeight(List<Line> lines) {
+
+        double meanHeight = 0.0;
+        double weights = 0.0;
+        for (Line line : lines) {
+            double weight = line.getLength();
+            meanHeight += line.getHeight() * weight;
+            weights += weight;
+        }
+        meanHeight /= weights;
+        return meanHeight;
+    }
+
+
+    private Zone mergeLinesInZone(List<Line> lines, double characterSpacing, double lineSpacing) {
+
+        double maxHorizontalDistance = 0;
+        double minVerticalDistance = 0;
+        double maxVerticalDistance = lineSpacing * MAX_VERTICAL_MERGE_DISTANCE;
+
+        DisjointSets<Line> sets = new DisjointSets<>(lines);
+
+        lines.forEach(outer -> {
+
+            lines.forEach(inner -> {
+                if (inner != outer) {
+
+                    double horizontalDistance = outer.horizontalDistance(inner);
+                    double verticalDistance = outer.verticalDistance(inner);
+
+                    if (horizontalDistance <= maxHorizontalDistance && minVerticalDistance <= verticalDistance && verticalDistance <= maxVerticalDistance) {
+                        sets.union(outer, inner);
+                    } else if (minVerticalDistance <= verticalDistance && verticalDistance <= maxVerticalDistance && Math.abs(horizontalDistance - Math.min(outer.getLength(),
+                            inner.getLength())) < 0.1) {
+                        boolean characterOverlap = false;
+                        int overlappingCount = 0;
+                        for (Character outerCharacter : outer.getCharacters()) {
+                            for (Character innerCharacter : inner.getCharacters()) {
+                                double characterOverlapDistance = outerCharacter.overlappingDistance(innerCharacter);
+                                if (characterOverlapDistance > 2) {
+                                    characterOverlap = true;
+                                }
+                                if (characterOverlapDistance > 0) {
+                                    overlappingCount++;
+                                }
+                            }
+                        }
+                        if (!characterOverlap && overlappingCount <= 2) {
+                            sets.union(outer, inner);
+                        }
+                    }
+                }
+            });
+        });
+
+        List<Line> outputZone = new ArrayList<>();
+        for (Set<Line> group : sets) {
+            List<Character> components = new ArrayList<>();
+            for (Line line : group) {
+                components.addAll(line.getCharacters());
+            }
+            components.sort(Comparator.comparingDouble(Character::getX));
+
+            outputZone.add(new Line(components, characterSpacing));
+        }
+        return new Zone(outputZone);
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/utils/DoubleUtils.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/utils/DoubleUtils.java
@ -0,0 +1,15 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.utils;
+
+public class DoubleUtils {
+
+    public static int compareDouble(double d1, double d2, double precision) {
+
+        if (Double.isNaN(d1) || Double.isNaN(d2)) {
+            return Double.compare(d1, d2);
+        }
+        long i1 = Math.round(d1 / (precision == 0 ? 1 : precision));
+        long i2 = Math.round(d2 / (precision == 0 ? 1 : precision));
+        return Long.compare(i1, i2);
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/DocumentGraphFactory.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/DocumentGraphFactory.java
@ -13,6 +13,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.NoSuchElementException;
 import java.util.Set;
+import java.util.stream.Collectors;

 import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
 import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
@ -22,6 +23,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.Classification
 import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
+import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.DuplicatedParagraph;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Footer;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.GenericSemanticNode;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Header;
@ -77,6 +79,8 @@ public class DocumentGraphFactory {
        GenericSemanticNode node;
        if (originalTextBlock.isHeadline()) {
            node = Headline.builder().documentTree(context.getDocumentTree()).build();
+        } else if (originalTextBlock.isToDuplicate()) {
+            node = DuplicatedParagraph.builder().documentTree(context.getDocumentTree()).build();
        } else {
            node = Paragraph.builder().documentTree(context.getDocumentTree()).build();
        }
@ -87,6 +91,14 @@ public class DocumentGraphFactory {
        textBlocks.add(originalTextBlock);
        textBlocks.addAll(textBlocksToMerge);
        AtomicTextBlock textBlock = context.textBlockFactory.buildAtomicTextBlock(TextPositionOperations.mergeAndSortTextPositionSequenceByYThenX(textBlocks), node, context, page);
+
+        if (node instanceof DuplicatedParagraph duplicatedParagraph) {
+            AtomicTextBlock unsortedTextBlock = context.textBlockFactory.buildAtomicTextBlock(textBlocks.stream()
+                    .flatMap(tb -> tb.getSequences().stream())
+                    .collect(Collectors.toList()), node, context, page);
+            duplicatedParagraph.setUnsortedLeafTextBlock(unsortedTextBlock);
+        }
+
        List<Integer> treeId = context.documentTree.createNewChildEntryAndReturnId(parentNode, node);
        node.setLeafTextBlock(textBlock);
        node.setTreeId(treeId);
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/SectionNodeFactory.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/SectionNodeFactory.java
@ -11,12 +11,12 @@ import java.util.Map;
 import java.util.Set;

 import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
-import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
-import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
-import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.GenericSemanticNode;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Section;
+import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
+import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
 import com.knecon.fforesight.service.layoutparser.processor.utils.TableMergingUtility;

 import lombok.experimental.UtilityClass;
@ -171,6 +171,7 @@ public class SectionNodeFactory {
                .filter(abstractTextContainer -> abstractTextContainer.intersectsY(atc))
                .map(abstractTextContainer -> (TextPageBlock) abstractTextContainer)
                .filter(abstractTextContainer -> abstractTextContainer.getDir() == atc.getDir())
+                .filter(abstractTextContainer -> !abstractTextContainer.isToDuplicate())
                .toList();
    }

--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/mapper/DocumentDataMapper.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/mapper/DocumentDataMapper.java
@ -8,8 +8,6 @@ import java.util.Map;
 import java.util.Set;
 import java.util.stream.Collectors;

-import javax.xml.parsers.DocumentBuilder;
-
 import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
 import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPage;
 import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionData;
@ -18,6 +16,7 @@ import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.Do
 import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
+import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.DuplicatedParagraph;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Image;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
@ -33,27 +32,20 @@ public class DocumentDataMapper {
    public DocumentData toDocumentData(Document document) {

        List<DocumentTextData> documentTextData = document.streamTerminalTextBlocksInOrder()
-                .flatMap(textBlock -> textBlock.getAtomicTextBlocks()
-                        .stream())
+                .flatMap(textBlock -> textBlock.getAtomicTextBlocks().stream())
                .distinct()
                .map(DocumentDataMapper::toAtomicTextBlockData)
                .toList();

        List<DocumentPositionData> atomicPositionBlockData = document.streamTerminalTextBlocksInOrder()
-                .flatMap(textBlock -> textBlock.getAtomicTextBlocks()
-                        .stream())
+                .flatMap(textBlock -> textBlock.getAtomicTextBlocks().stream())
                .distinct()
                .map(DocumentDataMapper::toAtomicPositionBlockData)
                .toList();

-        Set<Long> nonEmptyTextBlocks = documentTextData.stream()
-                .mapToLong(DocumentTextData::getId).boxed()
-                .collect(Collectors.toSet());
+        Set<Long> nonEmptyTextBlocks = documentTextData.stream().mapToLong(DocumentTextData::getId).boxed().collect(Collectors.toSet());

-        List<DocumentPage> documentPageData = document.getPages()
-                .stream()
-                .map(DocumentDataMapper::toPageData)
-                .toList();
+        List<DocumentPage> documentPageData = document.getPages().stream().map(DocumentDataMapper::toPageData).toList();
        DocumentStructure tableOfContentsData = toDocumentTreeData(document.getDocumentTree());
        return DocumentData.builder()
                .documentTextData(documentTextData.toArray(new DocumentTextData[0]))
@ -84,22 +76,17 @@ public class DocumentDataMapper {
            case TABLE -> PropertiesMapper.buildTableProperties((Table) entry.getNode());
            case TABLE_CELL -> PropertiesMapper.buildTableCellProperties((TableCell) entry.getNode());
            case IMAGE -> PropertiesMapper.buildImageProperties((Image) entry.getNode());
+            case PARAGRAPH ->
+                    entry.getNode() instanceof DuplicatedParagraph duplicatedParagraph ? PropertiesMapper.buildDuplicateParagraphProperties(duplicatedParagraph) : new HashMap<>();
            default -> new HashMap<>();
        };

        DocumentStructure.EntryData.EntryDataBuilder documentBuilder = DocumentStructure.EntryData.builder()
                .treeId(toPrimitiveIntArray(entry.getTreeId()))
-                .children(entry.getChildren()
-                                  .stream()
-                                  .map(DocumentDataMapper::toEntryData)
-                                  .toList())
+                .children(entry.getChildren().stream().map(DocumentDataMapper::toEntryData).toList())
                .type(entry.getType())
                .atomicBlockIds(atomicTextBlocks)
-                .pageNumbers(entry.getNode().getPages()
-                                     .stream()
-                                     .map(Page::getNumber)
-                                     .map(Integer::longValue)
-                                     .toArray(Long[]::new))
+                .pageNumbers(entry.getNode().getPages().stream().map(Page::getNumber).map(Integer::longValue).toArray(Long[]::new))
                .properties(properties);
        if (entry.getNode() != null) {
            documentBuilder.engines(entry.getNode().getEngines());
@ -112,10 +99,7 @@ public class DocumentDataMapper {

    private Long[] toAtomicTextBlockIds(TextBlock textBlock) {

-        return textBlock.getAtomicTextBlocks()
-                .stream()
-                .map(AtomicTextBlock::getId)
-                .toArray(Long[]::new);
+        return textBlock.getAtomicTextBlocks().stream().map(AtomicTextBlock::getId).toArray(Long[]::new);
    }


@ -167,9 +151,7 @@ public class DocumentDataMapper {

    private int[] toPrimitiveIntArray(List<Integer> list) {

-        return list.stream()
-                .mapToInt(Integer::intValue)
-                .toArray();
+        return list.stream().mapToInt(Integer::intValue).toArray();
    }

 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/mapper/DocumentGraphMapper.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/mapper/DocumentGraphMapper.java
@ -7,13 +7,14 @@ import java.util.List;
 import java.util.Map;
 import java.util.NoSuchElementException;

-import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionData;
-import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextData;
 import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
-import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
 import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPage;
+import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionData;
+import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
+import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextData;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
+import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.DuplicatedParagraph;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Footer;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Header;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Headline;
@ -61,7 +62,7 @@ public class DocumentGraphMapper {

            SemanticNode node = switch (entryData.getType()) {
                case SECTION -> buildSection(context);
-                case PARAGRAPH -> buildParagraph(context);
+                case PARAGRAPH -> buildParagraph(context, entryData.getProperties());
                case HEADLINE -> buildHeadline(context);
                case HEADER -> buildHeader(context);
                case FOOTER -> buildFooter(context);
@ -140,7 +141,17 @@ public class DocumentGraphMapper {
    }


-    private Paragraph buildParagraph(Context context) {
+    private Paragraph buildParagraph(Context context, Map<String, String> properties) {
+
+        if (PropertiesMapper.isDuplicateParagraph(properties)) {
+
+            DuplicatedParagraph duplicatedParagraph = DuplicatedParagraph.builder().documentTree(context.documentTree).build();
+
+            Long[] unsortedTextblockIds = PropertiesMapper.getUnsortedTextblockIds(properties);
+            duplicatedParagraph.setUnsortedLeafTextBlock(toTextBlock(unsortedTextblockIds, context, duplicatedParagraph));
+            return duplicatedParagraph;
+
+        }

        return Paragraph.builder().documentTree(context.documentTree).build();
    }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/mapper/PropertiesMapper.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/mapper/PropertiesMapper.java
@ -1,17 +1,19 @@
 package com.knecon.fforesight.service.layoutparser.processor.services.mapper;

 import java.awt.geom.Rectangle2D;
-import java.util.Collections;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Locale;
 import java.util.Map;

 import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
+import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.DuplicatedParagraph;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Image;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.ImageType;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableCell;
+import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.AtomicTextBlock;
+import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;

 public class PropertiesMapper {

@ -76,6 +78,32 @@ public class PropertiesMapper {
    }


+    public static Map<String, String> buildDuplicateParagraphProperties(DuplicatedParagraph duplicatedParagraph) {
+
+        Map<String, String> properties = new HashMap<>();
+        properties.put(DocumentStructure.DuplicateParagraphProperties.UNSORTED_TEXTBLOCK_ID, Arrays.toString(toAtomicTextBlockIds(duplicatedParagraph.getUnsortedLeafTextBlock())));
+        return properties;
+    }
+
+
+    public static boolean isDuplicateParagraph(Map<String, String> properties) {
+
+        return properties.containsKey(DocumentStructure.DuplicateParagraphProperties.UNSORTED_TEXTBLOCK_ID);
+    }
+
+
+    public static Long[] getUnsortedTextblockIds(Map<String, String> properties) {
+
+        return toLongArray(properties.get(DocumentStructure.DuplicateParagraphProperties.UNSORTED_TEXTBLOCK_ID));
+    }
+
+
+    public static Long[] toLongArray(String ids) {
+
+        return Arrays.stream(ids.substring(1, ids.length() - 1).trim().split(",")).map(Long::valueOf).toArray(Long[]::new);
+    }
+
+
    private static ImageType parseImageType(String imageType) {

        return switch (imageType) {
@ -101,4 +129,10 @@ public class PropertiesMapper {
                rectangle2D.getHeight());
    }

+
+    private static Long[] toAtomicTextBlockIds(TextBlock textBlock) {
+
+        return textBlock.getAtomicTextBlocks().stream().map(AtomicTextBlock::getId).toArray(Long[]::new);
+    }
+
 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/parsing/PDFLinesTextStripper.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/parsing/PDFLinesTextStripper.java
@ -237,8 +237,13 @@ public class PDFLinesTextStripper extends PDFTextStripper {
        int startIndex = 0;
        RedTextPosition previous = null;

+        float direction = -1;
        for (int i = 0; i <= textPositions.size() - 1; i++) {

+            if (direction == -1) {
+                direction = textPositions.get(i).getDir();
+            }
+
            if (!textPositionSequences.isEmpty()) {
                previous = textPositionSequences.get(textPositionSequences.size() - 1)
                        .getTextPositions()
@ -250,6 +255,13 @@ public class PDFLinesTextStripper extends PDFTextStripper {
                continue;
            }

+            if (textPositions.get(i).getDir() != direction && startIndex != i) {
+                List<TextPosition> sublist = textPositions.subList(startIndex, i);
+                textPositionSequences.add(new TextPositionSequence(sublist, pageNumber, i == textPositions.size() - 1 && isParagraphStart));
+                startIndex = i;
+                direction = textPositions.get(i).getDir();
+            }
+
            // Strange but sometimes this is happening, for example: Metolachlor2.pdf
            if (checkIfCurrentPositionIsToTheRightOfPreviousPosition(i, textPositions)) {
                List<TextPosition> sublist = textPositions.subList(startIndex, i);
@ -329,6 +341,7 @@ public class PDFLinesTextStripper extends PDFTextStripper {
                .getXDirAdj() - (previous.getXDirAdj() + previous.getWidthDirAdj()) < maximumGapSize;
    }

+
    @Override
    public String getText(PDDocument doc) throws IOException {

--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/visualization/LayoutGridService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/visualization/LayoutGridService.java
@ -20,6 +20,7 @@ import org.springframework.stereotype.Service;
 import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
 import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
+import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.DuplicatedParagraph;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SemanticNode;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
@ -53,6 +54,8 @@ public class LayoutGridService {

    static Color INNER_LINES_COLOR = new Color(255, 175, 175);
    static Color PARAGRAPH_COLOR = new Color(70, 130, 180);
+
+    static Color DUPLICATE_PARAGRAPH_COLOR = new Color(70, 180, 101);
    static Color TABLE_COLOR = new Color(102, 205, 170);
    static Color SECTION_COLOR = new Color(50, 50, 50);
    static Color HEADLINE_COLOR = new Color(162, 56, 56);
@ -100,6 +103,11 @@ public class LayoutGridService {
                        case IMAGE -> IMAGE_COLOR;
                        default -> null;
                    };
+
+                    if (semanticNode instanceof DuplicatedParagraph) {
+                        color = DUPLICATE_PARAGRAPH_COLOR;
+                    }
+
                    if (isNotSectionOrTableCellOrDocument(semanticNode)) {
                        addAsRectangle(semanticNode, layoutGrid, color);
                    }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/TextPositionOperations.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/TextPositionOperations.java
@ -3,7 +3,6 @@ package com.knecon.fforesight.service.layoutparser.processor.utils;
 import java.util.List;
 import java.util.stream.Collectors;

-
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;

--- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/HeadlinesGoldStandardIntegrationTest.java
+++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/HeadlinesGoldStandardIntegrationTest.java
@ -95,7 +95,7 @@ public class HeadlinesGoldStandardIntegrationTest {
        goldStandardLog.getRedactionLogEntry().removeIf(r -> !r.isRedacted() || r.getChanges().get(r.getChanges().size() - 1).getType().equals(ChangeType.REMOVED));
        goldStandardLog.getRedactionLogEntry().forEach(e -> goldStandardHeadlines.add(new Headline(e.getPositions().get(0).getPage(), e.getValue())));

-        Document documentGraph = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
+        Document documentGraph = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER_OLD,
                pdfFileResource.getFile(),
                new ImageServiceResponse(),
                new TableServiceResponse(),
--- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/LayoutparserEnd2EndTest.java
+++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/LayoutparserEnd2EndTest.java
@ -26,7 +26,7 @@ public class LayoutparserEnd2EndTest extends AbstractTest {
    public void testLayoutParserEndToEnd() {

        prepareStorage("files/bdr/Wie weiter bei Kristeneinrichtungen.pdf");
-        LayoutParsingRequest layoutParsingRequest = buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER);
+        LayoutParsingRequest layoutParsingRequest = buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER_OLD);
        LayoutParsingFinishedEvent finishedEvent = layoutParsingPipeline.parseLayoutAndSaveFilesToStorage(layoutParsingRequest);
        Arrays.stream(finishedEvent.message().split("\n")).forEach(log::info);
    }
--- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java
+++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java
@ -55,7 +55,7 @@ public class DocumentGraphJsonWritingTest extends BuildDocumentTest {
    @SneakyThrows
    private void writeJsons(Path filename) {

-        Document documentGraph = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
+        Document documentGraph = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER_OLD,
                filename.toFile(),
                new ImageServiceResponse(),
                new TableServiceResponse(),
--- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java
+++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java
@ -26,7 +26,7 @@ public class ViewerDocumentTest extends BuildDocumentTest {
    @SneakyThrows
    public void testViewerDocument() {

-        String fileName = "files/SinglePages/T5 VV-640252-Page16.pdf";
+        String fileName = "files/new/270 rotated text on non rotated pages.pdf";
        String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf";

        var documentFile = new ClassPathResource(fileName).getFile();
@ -54,10 +54,11 @@ public class ViewerDocumentTest extends BuildDocumentTest {
        var documentFile = new ClassPathResource(fileName).getFile();

        var classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.DOCUMINE,
-                                                                       documentFile,
-                                                                       new ImageServiceResponse(),
-                                                                       tableResponse,
-                                                                       new VisualLayoutParsingResponse(),Path.of(fileName).getFileName().toFile().toString());
+                documentFile,
+                new ImageServiceResponse(),
+                tableResponse,
+                new VisualLayoutParsingResponse(),
+                Path.of(fileName).getFileName().toFile().toString());
        ViewerDocumentService viewerDocumentService = new ViewerDocumentService(null);
        LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService);
        Document document = DocumentGraphFactory.buildDocumentGraph(classificationDocument);
--- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java
+++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java
@ -56,12 +56,12 @@ public class PdfSegmentationServiceTest extends AbstractTest {
    @SneakyThrows
    public ClassificationDocument buildClassificationDocument(File originDocument, TableServiceResponse tableServiceResponse) {

-        ClassificationDocument classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
-                                                                                          originDocument,
-                                                                                          new ImageServiceResponse(),
-                                                                                          tableServiceResponse,
-                                                                                          new VisualLayoutParsingResponse(),
-                                                                                          "document");
+        ClassificationDocument classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER_OLD,
+                originDocument,
+                new ImageServiceResponse(),
+                tableServiceResponse,
+                new VisualLayoutParsingResponse(),
+                "document");

        redactManagerClassificationService.classifyDocument(classificationDocument);

@ -112,16 +112,8 @@ public class PdfSegmentationServiceTest extends AbstractTest {
        var tableServiceResponse = objectMapper.readValue(cvTablesResource.getInputStream(), TableServiceResponse.class);

        ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile(), tableServiceResponse);
-        assertThat(document.getSections()
-                           .stream()
-                           .flatMap(paragraph -> paragraph.getTables()
-                                   .stream())
-                           .collect(Collectors.toList())).isNotEmpty();
-        var tables = document.getSections()
-                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
-                        .stream())
-                .toList();
+        assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
+        var tables = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList();

        // Quality of the table parsing is not good, because the file is rotated at scanning.
        // We only asset that the table border is not the page border.
@ -143,12 +135,12 @@ public class PdfSegmentationServiceTest extends AbstractTest {
        imageServiceResponse.getData()
                .forEach(imageMetadata -> images.computeIfAbsent(imageMetadata.getPosition().getPageNumber(), x -> new ArrayList<>())
                        .add(new ClassifiedImage(new Rectangle2D.Double(imageMetadata.getPosition().getX1(),
-                                                                        imageMetadata.getPosition().getY1(),
-                                                                        imageMetadata.getGeometry().getWidth(),
-                                                                        imageMetadata.getGeometry().getHeight()),
-                                                 ImageType.valueOf(imageMetadata.getClassification().getLabel().toUpperCase(Locale.ROOT)),
-                                                 imageMetadata.isAlpha(),
-                                                 imageMetadata.getPosition().getPageNumber())));
+                                imageMetadata.getPosition().getY1(),
+                                imageMetadata.getGeometry().getWidth(),
+                                imageMetadata.getGeometry().getHeight()),
+                                ImageType.valueOf(imageMetadata.getClassification().getLabel().toUpperCase(Locale.ROOT)),
+                                imageMetadata.isAlpha(),
+                                imageMetadata.getPosition().getPageNumber())));

        System.out.println("object");
    }
@ -160,22 +152,11 @@ public class PdfSegmentationServiceTest extends AbstractTest {
        ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Spanning Cells.pdf");

        ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
-        assertThat(document.getSections()
-                           .stream()
-                           .flatMap(paragraph -> paragraph.getTables()
-                                   .stream())
-                           .collect(Collectors.toList())).isNotEmpty();
-        TablePageBlock table = document.getSections()
-                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
-                        .stream())
-                .toList()
-                .get(0);
+        assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
+        TablePageBlock table = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(0);
        assertThat(table.getColCount()).isEqualTo(6);
        assertThat(table.getRowCount()).isEqualTo(13);
-        assertThat(table.getRows()
-                           .stream()
-                           .mapToInt(List::size).sum()).isEqualTo(6 * 13);
+        assertThat(table.getRows().stream().mapToInt(List::size).sum()).isEqualTo(6 * 13);
    }


@ -185,37 +166,15 @@ public class PdfSegmentationServiceTest extends AbstractTest {
        ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Table.pdf");

        ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
-        assertThat(document.getSections()
-                           .stream()
-                           .flatMap(paragraph -> paragraph.getTables()
-                                   .stream())
-                           .collect(Collectors.toList())).isNotEmpty();
-        TablePageBlock firstTable = document.getSections()
-                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
-                        .stream())
-                .toList()
-                .get(0);
+        assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
+        TablePageBlock firstTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(0);
        assertThat(firstTable.getColCount()).isEqualTo(8);
        assertThat(firstTable.getRowCount()).isEqualTo(1);
-        TablePageBlock secondTable = document.getSections()
-                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
-                        .stream())
-                .toList()
-                .get(1);
+        TablePageBlock secondTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(1);
        assertThat(secondTable.getColCount()).isEqualTo(8);
        assertThat(secondTable.getRowCount()).isEqualTo(2);
-        List<List<Cell>> firstTableHeaderCells = firstTable.getRows()
-                .get(0)
-                .stream()
-                .map(Collections::singletonList)
-                .collect(Collectors.toList());
-        assertThat(secondTable.getRows()
-                           .stream()
-                           .allMatch(row -> row.stream()
-                                   .map(Cell::getHeaderCells)
-                                   .toList().equals(firstTableHeaderCells))).isTrue();
+        List<List<Cell>> firstTableHeaderCells = firstTable.getRows().get(0).stream().map(Collections::singletonList).collect(Collectors.toList());
+        assertThat(secondTable.getRows().stream().allMatch(row -> row.stream().map(Cell::getHeaderCells).toList().equals(firstTableHeaderCells))).isTrue();
    }


@ -225,37 +184,15 @@ public class PdfSegmentationServiceTest extends AbstractTest {
        ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Multi Page Table.pdf");

        ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
-        assertThat(document.getSections()
-                           .stream()
-                           .flatMap(paragraph -> paragraph.getTables()
-                                   .stream())
-                           .collect(Collectors.toList())).isNotEmpty();
-        TablePageBlock firstTable = document.getSections()
-                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
-                        .stream())
-                .toList()
-                .get(0);
+        assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
+        TablePageBlock firstTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(0);
        assertThat(firstTable.getColCount()).isEqualTo(9);
        assertThat(firstTable.getRowCount()).isEqualTo(5);
-        TablePageBlock secondTable = document.getSections()
-                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
-                        .stream())
-                .toList()
-                .get(1);
+        TablePageBlock secondTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(1);
        assertThat(secondTable.getColCount()).isEqualTo(9);
        assertThat(secondTable.getRowCount()).isEqualTo(6);
-        List<List<Cell>> firstTableHeaderCells = firstTable.getRows()
-                .get(firstTable.getRowCount() - 1)
-                .stream()
-                .map(Cell::getHeaderCells)
-                .collect(Collectors.toList());
-        assertThat(secondTable.getRows()
-                           .stream()
-                           .allMatch(row -> row.stream()
-                                   .map(Cell::getHeaderCells)
-                                   .toList().equals(firstTableHeaderCells))).isTrue();
+        List<List<Cell>> firstTableHeaderCells = firstTable.getRows().get(firstTable.getRowCount() - 1).stream().map(Cell::getHeaderCells).collect(Collectors.toList());
+        assertThat(secondTable.getRows().stream().allMatch(row -> row.stream().map(Cell::getHeaderCells).toList().equals(firstTableHeaderCells))).isTrue();
    }


@ -265,37 +202,15 @@ public class PdfSegmentationServiceTest extends AbstractTest {
        ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Rotated Table Headers.pdf");

        ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
-        assertThat(document.getSections()
-                           .stream()
-                           .flatMap(paragraph -> paragraph.getTables()
-                                   .stream())
-                           .collect(Collectors.toList())).isNotEmpty();
-        TablePageBlock firstTable = document.getSections()
-                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
-                        .stream())
-                .toList()
-                .get(0);
+        assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
+        TablePageBlock firstTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(0);
        assertThat(firstTable.getColCount()).isEqualTo(8);
        assertThat(firstTable.getRowCount()).isEqualTo(1);
-        TablePageBlock secondTable = document.getSections()
-                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
-                        .stream())
-                .toList()
-                .get(1);
+        TablePageBlock secondTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(1);
        assertThat(secondTable.getColCount()).isEqualTo(8);
        assertThat(secondTable.getRowCount()).isEqualTo(6);
-        List<List<Cell>> firstTableHeaderCells = firstTable.getRows()
-                .get(0)
-                .stream()
-                .map(Collections::singletonList)
-                .collect(Collectors.toList());
-        assertThat(secondTable.getRows()
-                           .stream()
-                           .allMatch(row -> row.stream()
-                                   .map(Cell::getHeaderCells)
-                                   .toList().equals(firstTableHeaderCells))).isTrue();
+        List<List<Cell>> firstTableHeaderCells = firstTable.getRows().get(0).stream().map(Collections::singletonList).collect(Collectors.toList());
+        assertThat(secondTable.getRows().stream().allMatch(row -> row.stream().map(Cell::getHeaderCells).toList().equals(firstTableHeaderCells))).isTrue();
    }


@ -345,30 +260,29 @@ public class PdfSegmentationServiceTest extends AbstractTest {
        validateTable(document, 0, 8, 8, 0, 0);

        List<List<String>> values = Arrays.asList(Arrays.asList("Annex point Reference within DAR/RAR",
-                                                                "Author, date",
-                                                                "Study title",
-                                                                "Analytical method Author, date, No.",
-                                                                "Technique, LOQ of the method, validated working range",
-                                                                "Method meets analytical validation criteria",
-                                                                "Remarks (in case validation criteria are not met)",
-                                                                "Acceptability of the method"),
-                                                  Arrays.asList(
-                                                          "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
-                                                          "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
-                                                          "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
-                                                          "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
-                                                          "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
-                                                          "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
-                                                          "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
-                                                          "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies"),
-                                                  Arrays.asList("CA 7.1.2.1.1 DAR (2009)",
-                                                                "Evans P.G. 2001 TMJ4569B, VV-323245",
-                                                                "Azoxystrobin Laboratory Degradation Study in Three Soil Types, Sampled from Holland and the United Kingdom",
-                                                                "Method: RAM 269 Johnson R.I., Tummon O.J., Earl M. 1995 RJ1864B, VV-377731 Johnson R.I., Tummon O.J., Earl M. 1998 RAM 269/02, VV-124072 Johnson R.I., Tummon O.J., Earl M. 2000 RAM 269/03, VV-123986 Validation: Robinson N.J. 2001 TMJ4617B, VV-895845",
-                                                                "LC-MS/MS LOQ: 0.01 mg/kg (R401553 (SYN50165 7), R402173 (SYN501114 )) or 0.02 mg/kg (azoxystrobin, R230310, R234886) Working range: 0.02-1.0 or 0.01-0.5 mg/kg (depending on analyte) Other supporting quantificati on methods: HPLC-UV GC-MSD",
-                                                                "Y",
-                                                                "N/A",
-                                                                "Y"));
+                        "Author, date",
+                        "Study title",
+                        "Analytical method Author, date, No.",
+                        "Technique, LOQ of the method, validated working range",
+                        "Method meets analytical validation criteria",
+                        "Remarks (in case validation criteria are not met)",
+                        "Acceptability of the method"),
+                Arrays.asList("Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
+                        "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
+                        "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
+                        "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
+                        "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
+                        "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
+                        "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
+                        "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies"),
+                Arrays.asList("CA 7.1.2.1.1 DAR (2009)",
+                        "Evans P.G. 2001 TMJ4569B, VV-323245",
+                        "Azoxystrobin Laboratory Degradation Study in Three Soil Types, Sampled from Holland and the United Kingdom",
+                        "Method: RAM 269 Johnson R.I., Tummon O.J., Earl M. 1995 RJ1864B, VV-377731 Johnson R.I., Tummon O.J., Earl M. 1998 RAM 269/02, VV-124072 Johnson R.I., Tummon O.J., Earl M. 2000 RAM 269/03, VV-123986 Validation: Robinson N.J. 2001 TMJ4617B, VV-895845",
+                        "LC-MS/MS LOQ: 0.01 mg/kg (R401553 (SYN50165 7), R402173 (SYN501114 )) or 0.02 mg/kg (azoxystrobin, R230310, R234886) Working range: 0.02-1.0 or 0.01-0.5 mg/kg (depending on analyte) Other supporting quantificati on methods: HPLC-UV GC-MSD",
+                        "Y",
+                        "N/A",
+                        "Y"));

        validateTable(document, 0, values);

@ -757,11 +671,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
    @SneakyThrows
    private void toHtml(ClassificationDocument document, String filename) {

-        var tables = document.getSections()
-                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
-                        .stream())
-                .toList();
+        var tables = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList();
        StringBuilder sb = new StringBuilder();

        int currentPage = 1;
@ -782,19 +692,9 @@ public class PdfSegmentationServiceTest extends AbstractTest {

    private void validateTable(ClassificationDocument document, int tableIndex, int colCount, int rowCount, int emptyCellsCountCorrect, int emptyCellsCountIncorrect) {

-        TablePageBlock table = document.getSections()
-                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
-                        .stream())
-                .toList()
-                .get(tableIndex);
+        TablePageBlock table = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(tableIndex);
        List<List<Cell>> rows = table.getRows();
-        int emptyCellsFoundFound = rows.stream()
-                .flatMap(List::stream)
-                .toList()
-                .stream()
-                .filter(f -> f.toString().isEmpty())
-                .toList().size();
+        int emptyCellsFoundFound = rows.stream().flatMap(List::stream).toList().stream().filter(f -> f.toString().isEmpty()).toList().size();

        for (List<Cell> row : table.getRows()) {
            row.forEach(r -> System.out.println(r.toString()));
@ -809,20 +709,11 @@ public class PdfSegmentationServiceTest extends AbstractTest {

    private void validateTable(ClassificationDocument document, int tableIndex, List<List<String>> values) {

-        TablePageBlock table = document.getSections()
-                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
-                        .stream())
-                .toList()
-                .get(tableIndex);
+        TablePageBlock table = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(tableIndex);
        List<List<Cell>> rows = table.getRows();

-        List<Cell> rowsFlattened = rows.stream()
-                .flatMap(List::stream)
-                .toList();
-        List<String> valuesFlattened = values.stream()
-                .flatMap(List::stream)
-                .toList();
+        List<Cell> rowsFlattened = rows.stream().flatMap(List::stream).toList();
+        List<String> valuesFlattened = values.stream().flatMap(List::stream).toList();

        for (int i = 0; i < valuesFlattened.size(); i++) {
            Cell cell = rowsFlattened.get(i);
@ -835,11 +726,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {

    private void validateTableSize(ClassificationDocument document, int tableSize) {

-        assertThat(document.getSections()
-                           .stream()
-                           .flatMap(paragraph -> paragraph.getTables()
-                                   .stream())
-                           .toList().size()).isEqualTo(tableSize);
+        assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().size()).isEqualTo(tableSize);

    }

--- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/RulingCleaningServiceTest.java
+++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/RulingCleaningServiceTest.java
@ -74,7 +74,7 @@ public class RulingCleaningServiceTest extends BuildDocumentTest {
            cleanRulingsPerPage.add(rulingCleaningService.getCleanRulings(Collections.emptyList(), pageContent.getRulings()));
        }
        var cleanRulings = cleanRulingsPerPage.stream().map(CleanRulings::getVertical).collect(Collectors.toList());
-        PdfDraw.drawLinesPerPage(fileName,  cleanRulings, lineFileName);
+        PdfDraw.drawLinesPerPage(fileName, cleanRulings, lineFileName);

    }

@ -99,13 +99,13 @@ public class RulingCleaningServiceTest extends BuildDocumentTest {
    @SneakyThrows
    private void writeJsons(Path filename) {

-        Document documentGraphBefore = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
+        Document documentGraphBefore = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER_OLD,
                filename.toFile(),
                new ImageServiceResponse(),
                new TableServiceResponse(),
                new VisualLayoutParsingResponse(),
                filename.toFile().toString()));
-        Document documentGraphAfter = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
+        Document documentGraphAfter = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER_OLD,
                filename.toFile(),
                new ImageServiceResponse(),
                new TableServiceResponse(),
--- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/AbstractTest.java
+++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/AbstractTest.java
@ -20,7 +20,6 @@ import org.springframework.context.annotation.Import;
 import org.springframework.context.annotation.Primary;
 import org.springframework.core.io.ClassPathResource;
 import org.springframework.test.context.junit.jupiter.SpringExtension;
-import org.xmlunit.builder.Input;

 import com.iqser.red.commons.jackson.ObjectMapperFactory;
 import com.iqser.red.storage.commons.service.StorageService;
@ -68,7 +67,7 @@ public abstract class AbstractTest {
    protected LayoutParsingRequest buildStandardLayoutParsingRequest() {

        return LayoutParsingRequest.builder()
-                .layoutParsingType(LayoutParsingType.REDACT_MANAGER)
+                .layoutParsingType(LayoutParsingType.REDACT_MANAGER_OLD)
                .originFileStorageId(ORIGIN_FILE_ID)
                .tablesFileStorageId(Optional.of(TABLE_FILE_ID))
                .imagesFileStorageId(Optional.of(IMAGE_FILE_ID))
@ -99,7 +98,7 @@ public abstract class AbstractTest {
    @SneakyThrows
    protected LayoutParsingRequest prepareStorage(String file) {

-        return prepareStorage(file, "cv_table_parsing_response/empty.json", "image_service_response/empty.json","visual_layout_parsing_response/empty.json");
+        return prepareStorage(file, "cv_table_parsing_response/empty.json", "image_service_response/empty.json", "visual_layout_parsing_response/empty.json");
    }


@ -107,7 +106,7 @@ public abstract class AbstractTest {
    protected LayoutParsingRequest prepareStorage(InputStream fileInputStream) {

        storageService.storeObject(TenantContext.getTenantId(), ORIGIN_FILE_ID, fileInputStream);
-        return buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER);
+        return buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER_OLD);
    }


@ -140,6 +139,7 @@ public abstract class AbstractTest {
        return prepareStorage(pdfFileResource.getInputStream(), cvServiceResponseFileResource.getInputStream(), imageInfoFileResource.getInputStream());
    }

+
    @SneakyThrows
    protected LayoutParsingRequest prepareStorage(String file, String cvServiceResponseFile, String imageInfoFile, String visualLayoutParsingResponseFile) {

@ -148,9 +148,13 @@ public abstract class AbstractTest {
        ClassPathResource imageInfoFileResource = new ClassPathResource(imageInfoFile);
        ClassPathResource visualLayoutParsingResponseResource = new ClassPathResource(visualLayoutParsingResponseFile);

-        return prepareStorage(pdfFileResource.getInputStream(), cvServiceResponseFileResource.getInputStream(), imageInfoFileResource.getInputStream(), visualLayoutParsingResponseResource.getInputStream());
+        return prepareStorage(pdfFileResource.getInputStream(),
+                cvServiceResponseFileResource.getInputStream(),
+                imageInfoFileResource.getInputStream(),
+                visualLayoutParsingResponseResource.getInputStream());
    }

+
    @SneakyThrows
    protected LayoutParsingRequest prepareStorage(InputStream fileStream, InputStream cvServiceResponseFileStream, InputStream imageInfoStream) {

@ -158,18 +162,22 @@ public abstract class AbstractTest {
        storageService.storeObject(TenantContext.getTenantId(), TABLE_FILE_ID, cvServiceResponseFileStream);
        storageService.storeObject(TenantContext.getTenantId(), ORIGIN_FILE_ID, fileStream);

-        return buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER);
+        return buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER_OLD);
    }

+
    @SneakyThrows
-    protected LayoutParsingRequest prepareStorage(InputStream fileStream, InputStream cvServiceResponseFileStream, InputStream imageInfoStream, InputStream visualLayoutParsingResponseFileStream) {
+    protected LayoutParsingRequest prepareStorage(InputStream fileStream,
+                                                  InputStream cvServiceResponseFileStream,
+                                                  InputStream imageInfoStream,
+                                                  InputStream visualLayoutParsingResponseFileStream) {

        storageService.storeObject(TenantContext.getTenantId(), IMAGE_FILE_ID, imageInfoStream);
        storageService.storeObject(TenantContext.getTenantId(), TABLE_FILE_ID, cvServiceResponseFileStream);
        storageService.storeObject(TenantContext.getTenantId(), ORIGIN_FILE_ID, fileStream);
-        storageService.storeObject(TenantContext.getTenantId(),VISUAL_LAYOUT_FILE,visualLayoutParsingResponseFileStream );
+        storageService.storeObject(TenantContext.getTenantId(), VISUAL_LAYOUT_FILE, visualLayoutParsingResponseFileStream);

-        return buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER);
+        return buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER_OLD);
    }


--- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/BuildDocumentTest.java
+++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/BuildDocumentTest.java
@ -26,14 +26,19 @@ public abstract class BuildDocumentTest extends AbstractTest {

        File fileResource = new ClassPathResource(filename).getFile();
        prepareStorage(filename);
-        return layoutParsingPipeline.parseLayout(layoutParsingType, fileResource, layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID), new TableServiceResponse(), new VisualLayoutParsingResponse(),filename);
+        return layoutParsingPipeline.parseLayout(layoutParsingType,
+                fileResource,
+                layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID),
+                new TableServiceResponse(),
+                new VisualLayoutParsingResponse(),
+                filename);
    }


    @SneakyThrows
    protected Document buildGraph(String filename) {

-        return buildGraph(filename, LayoutParsingType.REDACT_MANAGER);
+        return buildGraph(filename, LayoutParsingType.REDACT_MANAGER_OLD);
    }


--- a/layoutparser-service/layoutparser-service-server/src/test/resources/files/brokenTableOnOcr_ocred
+++ b/layoutparser-service/layoutparser-service-server/src/test/resources/files/brokenTableOnOcr_ocred
--- a/layoutparser-service/layoutparser-service-server/src/test/resources/files/new/wrongOrder
+++ b/layoutparser-service/layoutparser-service-server/src/test/resources/files/new/wrongOrder