RED-7141: Implemented docstrum layout parsing

2024-02-22 11:02:50 +01:00 · 2024-02-22 11:02:50 +01:00 · 79239b751d
commit 79239b751d
parent f146beeb44
57 changed files with 1998 additions and 850 deletions
--- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentStructure.java
+++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentStructure.java
@ -55,6 +55,13 @@ public class DocumentStructure implements Serializable {

    }

+    @Schema(description = "Object containing the extra field names, a duplicate paragraph has in its properties field.")
+    public static class DuplicateParagraphProperties implements Serializable {
+
+        public static final String UNSORTED_TEXTBLOCK_ID = "utbid";
+
+    }
+
    public static final String RECTANGLE_DELIMITER = ";";


--- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingType.java
+++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingType.java
@ -2,6 +2,9 @@ package com.knecon.fforesight.service.layoutparser.internal.api.queue;

 public enum LayoutParsingType {
    REDACT_MANAGER,
-    TAAS,
-    DOCUMINE
+    REDACT_MANAGER_OLD,
+    REDACT_MANAGER_PARAGRAPH_DEBUG,
+    DOCUMINE,
+    CLARIFYND,
+    CLARIFYND_PARAGRAPH_DEBUG
 }
--- a/layoutparser-service/layoutparser-service-processor/build.gradle.kts
+++ b/layoutparser-service/layoutparser-service-processor/build.gradle.kts
@ -24,4 +24,5 @@ dependencies {
    implementation("com.fasterxml.jackson.module:jackson-module-afterburner:${jacksonVersion}")
    implementation("com.fasterxml.jackson.datatype:jackson-datatype-jsr310:${jacksonVersion}")
    implementation("org.springframework.boot:spring-boot-starter-web:3.1.3")
+    implementation("org.jgrapht:jgrapht-core:1.5.2")
 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java
@ -28,6 +28,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.Classification
 import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
 import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
+import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
@ -43,12 +44,11 @@ import com.knecon.fforesight.service.layoutparser.processor.services.RulingClean
 import com.knecon.fforesight.service.layoutparser.processor.services.SectionsBuilderService;
 import com.knecon.fforesight.service.layoutparser.processor.services.SimplifiedSectionTextService;
 import com.knecon.fforesight.service.layoutparser.processor.services.TableExtractionService;
+import com.knecon.fforesight.service.layoutparser.processor.services.blockification.DocstrumBlockificationService;
 import com.knecon.fforesight.service.layoutparser.processor.services.blockification.DocuMineBlockificationService;
 import com.knecon.fforesight.service.layoutparser.processor.services.blockification.RedactManagerBlockificationService;
-import com.knecon.fforesight.service.layoutparser.processor.services.blockification.TaasBlockificationService;
 import com.knecon.fforesight.service.layoutparser.processor.services.classification.DocuMineClassificationService;
 import com.knecon.fforesight.service.layoutparser.processor.services.classification.RedactManagerClassificationService;
-import com.knecon.fforesight.service.layoutparser.processor.services.classification.TaasClassificationService;
 import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
 import com.knecon.fforesight.service.layoutparser.processor.services.mapper.DocumentDataMapper;
 import com.knecon.fforesight.service.layoutparser.processor.services.mapper.TaasDocumentDataMapper;
@ -76,16 +76,15 @@ public class LayoutParsingPipeline {
    CvTableParsingAdapter cvTableParsingAdapter;
    LayoutParsingStorageService layoutParsingStorageService;
    SectionsBuilderService sectionsBuilderService;
-    TaasClassificationService taasClassificationService;
    RedactManagerClassificationService redactManagerClassificationService;
    DocuMineClassificationService docuMineClassificationService;
    SimplifiedSectionTextService simplifiedSectionTextService;
    BodyTextFrameService bodyTextFrameService;
    RulingCleaningService rulingCleaningService;
    TableExtractionService tableExtractionService;
-    TaasBlockificationService taasBlockificationService;
    DocuMineBlockificationService docuMineBlockificationService;
    RedactManagerBlockificationService redactManagerBlockificationService;
+    DocstrumBlockificationService docstrumBlockificationService;
    LayoutGridService layoutGridService;
    ObservationRegistry observationRegistry;
    VisualLayoutParsingAdapter visualLayoutParsingAdapter;
@ -97,40 +96,33 @@ public class LayoutParsingPipeline {
        log.info("Starting layout parsing for {}", layoutParsingRequest.identifier());

        File originFile = layoutParsingStorageService.getOriginFile(layoutParsingRequest.originFileStorageId());
-        File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId())
-                .orElse(originFile);
+        File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId()).orElse(originFile);

        VisualLayoutParsingResponse visualLayoutParsingResponse = new VisualLayoutParsingResponse();
-        if (layoutParsingRequest.visualLayoutParsingFileId()
-                .isPresent()) {
-            visualLayoutParsingResponse = layoutParsingStorageService.getVisualLayoutParsingFile(layoutParsingRequest.visualLayoutParsingFileId()
-                                                                                                         .get());
+        if (layoutParsingRequest.visualLayoutParsingFileId().isPresent()) {
+            visualLayoutParsingResponse = layoutParsingStorageService.getVisualLayoutParsingFile(layoutParsingRequest.visualLayoutParsingFileId().get());
        }

        ImageServiceResponse imageServiceResponse = new ImageServiceResponse();
-        if (layoutParsingRequest.imagesFileStorageId()
-                .isPresent()) {
-            imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.imagesFileStorageId()
-                                                                                     .get());
+        if (layoutParsingRequest.imagesFileStorageId().isPresent()) {
+            imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.imagesFileStorageId().get());
        }

        TableServiceResponse tableServiceResponse = new TableServiceResponse();
-        if (layoutParsingRequest.tablesFileStorageId()
-                .isPresent()) {
-            tableServiceResponse = layoutParsingStorageService.getTablesFile(layoutParsingRequest.tablesFileStorageId()
-                                                                                     .get());
+        if (layoutParsingRequest.tablesFileStorageId().isPresent()) {
+            tableServiceResponse = layoutParsingStorageService.getTablesFile(layoutParsingRequest.tablesFileStorageId().get());
        }

        ClassificationDocument classificationDocument = parseLayout(layoutParsingRequest.layoutParsingType(),
-                                                                    originFile,
-                                                                    imageServiceResponse,
-                                                                    tableServiceResponse,
-                                                                    visualLayoutParsingResponse,
-                                                                    layoutParsingRequest.identifier().toString());
+                originFile,
+                imageServiceResponse,
+                tableServiceResponse,
+                visualLayoutParsingResponse,
+                layoutParsingRequest.identifier().toString());

        log.info("Building document graph for {}", layoutParsingRequest.identifier());

-        Document documentGraph = observeBuildDocumentGraph(classificationDocument);
+        Document documentGraph = observeBuildDocumentGraph(layoutParsingRequest.layoutParsingType(), classificationDocument);

        log.info("Creating viewer document for {}", layoutParsingRequest.identifier());

@ -142,7 +134,7 @@ public class LayoutParsingPipeline {
        layoutParsingStorageService.storeSimplifiedText(layoutParsingRequest, simplifiedSectionTextService.toSimplifiedText(documentGraph));
        layoutParsingStorageService.storeViewerDocument(layoutParsingRequest, viewerDocumentFile);

-        if (layoutParsingRequest.layoutParsingType().equals(LayoutParsingType.TAAS)) {
+        if (layoutParsingRequest.layoutParsingType().equals(LayoutParsingType.CLARIFYND)) {
            log.info("Building research document data for {}", layoutParsingRequest.identifier());
            var researchDocumentData = TaasDocumentDataMapper.fromDocument(documentGraph);
            layoutParsingStorageService.storeResearchDocumentData(layoutParsingRequest, researchDocumentData);
@ -158,37 +150,37 @@ public class LayoutParsingPipeline {
                .numberOfPages(documentGraph.getNumberOfPages())
                .duration(System.currentTimeMillis() - start)
                .message(format("""
-                                        Layout parsing has finished in %.02f s.
-                                        identifiers: %s
-                                        %s
-                                        Files have been saved with Ids:
-                                        Structure: %s
-                                        Text: %s
-                                        Positions: %s
-                                        PageData: %s
-                                        Simplified Text: %s
-                                        Viewer Doc: %s""",
-                                ((float) (System.currentTimeMillis() - start)) / 1000,
-                                layoutParsingRequest.identifier(),
-                                buildSemanticNodeCountMessage(documentGraph.getNumberOfPages(), documentGraph.buildSemanticNodeCounts()),
-                                layoutParsingRequest.structureFileStorageId(),
-                                layoutParsingRequest.textBlockFileStorageId(),
-                                layoutParsingRequest.positionBlockFileStorageId(),
-                                layoutParsingRequest.pageFileStorageId(),
-                                layoutParsingRequest.simplifiedTextStorageId(),
-                                layoutParsingRequest.viewerDocumentStorageId()))
+                                Layout parsing has finished in %.02f s.
+                                identifiers: %s
+                                %s
+                                Files have been saved with Ids:
+                                Structure: %s
+                                Text: %s
+                                Positions: %s
+                                PageData: %s
+                                Simplified Text: %s
+                                Viewer Doc: %s""",
+                        ((float) (System.currentTimeMillis() - start)) / 1000,
+                        layoutParsingRequest.identifier(),
+                        buildSemanticNodeCountMessage(documentGraph.getNumberOfPages(), documentGraph.buildSemanticNodeCounts()),
+                        layoutParsingRequest.structureFileStorageId(),
+                        layoutParsingRequest.textBlockFileStorageId(),
+                        layoutParsingRequest.positionBlockFileStorageId(),
+                        layoutParsingRequest.pageFileStorageId(),
+                        layoutParsingRequest.simplifiedTextStorageId(),
+                        layoutParsingRequest.viewerDocumentStorageId()))
                .build();

    }


-    private Document observeBuildDocumentGraph(ClassificationDocument classificationDocument) {
+    private Document observeBuildDocumentGraph(LayoutParsingType layoutParsingType, ClassificationDocument classificationDocument) {

        AtomicReference<Document> documentReference = new AtomicReference<>();

        Observation.createNotStarted("LayoutParsingPipeline", observationRegistry)
                .contextualName("build-document-graph")
-                .observe(() -> documentReference.set(DocumentGraphFactory.buildDocumentGraph(classificationDocument)));
+                .observe(() -> documentReference.set(DocumentGraphFactory.buildDocumentGraph(layoutParsingType, classificationDocument)));

        return documentReference.get();
    }
@ -197,14 +189,14 @@ public class LayoutParsingPipeline {
    private String buildSemanticNodeCountMessage(int numberOfPages, Map<NodeType, Long> semanticNodeCounts) {

        return String.format("%d pages with %d sections, %d headlines, %d paragraphs, %d tables with %d cells, %d headers, and %d footers parsed",
-                             numberOfPages,
-                             semanticNodeCounts.get(NodeType.SECTION) == null ? 0 : semanticNodeCounts.get(NodeType.SECTION),
-                             semanticNodeCounts.get(NodeType.HEADLINE) == null ? 0 : semanticNodeCounts.get(NodeType.HEADLINE),
-                             semanticNodeCounts.get(NodeType.PARAGRAPH) == null ? 0 : semanticNodeCounts.get(NodeType.PARAGRAPH),
-                             semanticNodeCounts.get(NodeType.TABLE) == null ? 0 : semanticNodeCounts.get(NodeType.TABLE),
-                             semanticNodeCounts.get(NodeType.TABLE_CELL) == null ? 0 : semanticNodeCounts.get(NodeType.TABLE_CELL),
-                             semanticNodeCounts.get(NodeType.HEADER) == null ? 0 : semanticNodeCounts.get(NodeType.HEADER),
-                             semanticNodeCounts.get(NodeType.FOOTER) == null ? 0 : semanticNodeCounts.get(NodeType.FOOTER));
+                numberOfPages,
+                semanticNodeCounts.get(NodeType.SECTION) == null ? 0 : semanticNodeCounts.get(NodeType.SECTION),
+                semanticNodeCounts.get(NodeType.HEADLINE) == null ? 0 : semanticNodeCounts.get(NodeType.HEADLINE),
+                semanticNodeCounts.get(NodeType.PARAGRAPH) == null ? 0 : semanticNodeCounts.get(NodeType.PARAGRAPH),
+                semanticNodeCounts.get(NodeType.TABLE) == null ? 0 : semanticNodeCounts.get(NodeType.TABLE),
+                semanticNodeCounts.get(NodeType.TABLE_CELL) == null ? 0 : semanticNodeCounts.get(NodeType.TABLE_CELL),
+                semanticNodeCounts.get(NodeType.HEADER) == null ? 0 : semanticNodeCounts.get(NodeType.HEADER),
+                semanticNodeCounts.get(NodeType.FOOTER) == null ? 0 : semanticNodeCounts.get(NodeType.FOOTER));
    }


@ -260,11 +252,16 @@ public class LayoutParsingPipeline {
            PDRectangle cropbox = pdPage.getCropBox();
            CleanRulings cleanRulings = rulingCleaningService.getCleanRulings(pdfTableCells.get(pageNumber), stripper.getRulings());

+            List<Cell> emptyTableCells = TableExtractionService.findCells(cleanRulings.getHorizontal(), cleanRulings.getVertical());
+
            ClassificationPage classificationPage = switch (layoutParsingType) {
-                case REDACT_MANAGER -> redactManagerBlockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings.getVertical());
-                case TAAS -> taasBlockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings.getVertical());
+                case REDACT_MANAGER_OLD ->
+                        redactManagerBlockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings.getVertical());
                case DOCUMINE -> docuMineBlockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings.getVertical());
+                case REDACT_MANAGER, REDACT_MANAGER_PARAGRAPH_DEBUG -> docstrumBlockificationService.blockify(stripper.getTextPositionSequences(), emptyTableCells, true);
+                case CLARIFYND, CLARIFYND_PARAGRAPH_DEBUG -> docstrumBlockificationService.blockify(stripper.getTextPositionSequences(), emptyTableCells, false);
            };
+
            classificationPage.setCleanRulings(cleanRulings);
            classificationPage.setRotation(rotation);
            classificationPage.setLandscape(isLandscape);
@ -289,7 +286,13 @@ public class LayoutParsingPipeline {
                }
            }

-            tableExtractionService.extractTables(cleanRulings, classificationPage);
+            tableExtractionService.extractTables(emptyTableCells, classificationPage);
+
+            if (layoutParsingType == LayoutParsingType.REDACT_MANAGER) {
+                docstrumBlockificationService.combineBlocks(classificationPage);
+            } else if (layoutParsingType == LayoutParsingType.CLARIFYND) {
+                docstrumBlockificationService.mergeZones(classificationPage.getTextBlocks());
+            }

            buildPageStatistics(classificationPage);
            increaseDocumentStatistics(classificationPage, classificationDocument);
@ -303,14 +306,21 @@ public class LayoutParsingPipeline {
        bodyTextFrameService.setBodyTextFrames(classificationDocument, layoutParsingType);
        log.info("Classify TextBlocks for {}", identifier);
        switch (layoutParsingType) {
-            case TAAS -> taasClassificationService.classifyDocument(classificationDocument);
+            case REDACT_MANAGER, REDACT_MANAGER_PARAGRAPH_DEBUG, REDACT_MANAGER_OLD, CLARIFYND, CLARIFYND_PARAGRAPH_DEBUG ->
+                    redactManagerClassificationService.classifyDocument(classificationDocument);
            case DOCUMINE -> docuMineClassificationService.classifyDocument(classificationDocument);
-            case REDACT_MANAGER -> redactManagerClassificationService.classifyDocument(classificationDocument);
        }

        log.info("Building Sections for {}", identifier);
-        sectionsBuilderService.buildSections(classificationDocument);
-        sectionsBuilderService.addImagesToSections(classificationDocument);
+
+        switch (layoutParsingType) {
+            case CLARIFYND_PARAGRAPH_DEBUG, REDACT_MANAGER_PARAGRAPH_DEBUG -> sectionsBuilderService.buildParagraphDebugSections(classificationDocument);
+            default -> {
+                sectionsBuilderService.buildSections(classificationDocument);
+                sectionsBuilderService.addImagesToSections(classificationDocument);
+            }
+        }
+
        return classificationDocument;
    }

--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/AbstractPageBlock.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/AbstractPageBlock.java
@ -96,7 +96,7 @@ public abstract class AbstractPageBlock extends Rectangle {

        return this.minX - threshold <= apb.getMaxX() && this.maxX + threshold >= apb.getMinX();
    }
-
+    

    public abstract boolean isEmpty();

--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Document.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Document.java
@ -15,7 +15,6 @@ import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.No
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlockCollector;

 import lombok.AccessLevel;
 import lombok.AllArgsConstructor;
@ -52,7 +51,7 @@ public class Document implements GenericSemanticNode {
    public TextBlock getTextBlock() {

        if (textBlock == null) {
-            textBlock = streamTerminalTextBlocksInOrder().collect(new TextBlockCollector());
+            textBlock = GenericSemanticNode.super.getTextBlock();
        }
        return textBlock;
    }
@ -67,8 +66,7 @@ public class Document implements GenericSemanticNode {

    public Stream<TextBlock> streamTerminalTextBlocksInOrder() {

-        return streamAllNodes().filter(SemanticNode::isLeaf)
-                .map(SemanticNode::getLeafTextBlock);
+        return streamAllNodes().filter(SemanticNode::isLeaf).map(SemanticNode::getTextBlock);
    }


--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/DuplicatedParagraph.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/DuplicatedParagraph.java
@ -0,0 +1,34 @@
+package com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes;
+
+import java.util.stream.Stream;
+
+import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
+import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlockCollector;
+
+import lombok.Data;
+import lombok.EqualsAndHashCode;
+import lombok.experimental.SuperBuilder;
+
+@Data
+@EqualsAndHashCode(callSuper = true)
+@SuperBuilder
+public class DuplicatedParagraph extends Paragraph {
+
+    TextBlock unsortedLeafTextBlock;
+
+
+    @Override
+    public TextBlock getTextBlock() {
+
+        return Stream.of(leafTextBlock, unsortedLeafTextBlock).collect(new TextBlockCollector());
+
+    }
+
+
+    @Override
+    public String toString() {
+
+        return super.toString();
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Paragraph.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Paragraph.java
@ -18,11 +18,12 @@ import lombok.Builder;
 import lombok.Data;
 import lombok.EqualsAndHashCode;
 import lombok.experimental.FieldDefaults;
+import lombok.experimental.SuperBuilder;

@Data
-@Builder
+@SuperBuilder
@AllArgsConstructor
-@FieldDefaults(level = AccessLevel.PRIVATE)
+@FieldDefaults(level = AccessLevel.PROTECTED)
 public class Paragraph implements GenericSemanticNode {

    @Builder.Default
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Section.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Section.java
@ -11,7 +11,6 @@ import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.No
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlockCollector;

 import lombok.AccessLevel;
 import lombok.AllArgsConstructor;
@ -62,9 +61,7 @@ public class Section implements GenericSemanticNode {
    public TextBlock getTextBlock() {

        if (textBlock == null) {
-            textBlock = streamAllSubNodes().filter(SemanticNode::isLeaf)
-                    .map(SemanticNode::getLeafTextBlock)
-                    .collect(new TextBlockCollector());
+            textBlock = GenericSemanticNode.super.getTextBlock();
        }
        return textBlock;
    }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/SemanticNode.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/SemanticNode.java
@ -20,6 +20,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.E
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.AtomicTextBlock;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
+import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlockCollector;
 import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;

 public interface SemanticNode {
@ -39,7 +40,10 @@ public interface SemanticNode {
     *
     * @return TextBlock containing all AtomicTextBlocks that are located under this Node.
     */
-    TextBlock getTextBlock();
+    default TextBlock getTextBlock() {
+
+        return streamAllSubNodes().filter(SemanticNode::isLeaf).map(SemanticNode::getTextBlock).collect(new TextBlockCollector());
+    }


    /**
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Table.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Table.java
@ -48,7 +48,6 @@ public class Table implements SemanticNode {
    @EqualsAndHashCode.Exclude
    Map<Page, Rectangle2D> bBoxCache;

-
    /**
     * Streams all entities in this table, that appear in a row, which contains any of the provided strings.
     *
@ -332,9 +331,7 @@ public class Table implements SemanticNode {
    public TextBlock getTextBlock() {

        if (textBlock == null) {
-            textBlock = streamAllSubNodes().filter(SemanticNode::isLeaf)
-                    .map(SemanticNode::getLeafTextBlock)
-                    .collect(new TextBlockCollector());
+            textBlock = SemanticNode.super.getTextBlock();
        }
        return textBlock;
    }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPageBlock.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPageBlock.java
@ -53,6 +53,9 @@ public class TextPageBlock extends AbstractPageBlock {
    @JsonIgnore
    private PageBlockType classification;

+    @JsonIgnore
+    private boolean toDuplicate;
+

    @JsonIgnore
    public TextDirection getDir() {
@ -73,7 +76,7 @@ public class TextPageBlock extends AbstractPageBlock {

        return sequences.get(0).getPageWidth();
    }
-    
+

    public static TextPageBlock merge(List<TextPageBlock> textBlocksToMerge) {

@ -82,6 +85,7 @@ public class TextPageBlock extends AbstractPageBlock {
        return fromTextPositionSequences(sequences);
    }

+
    public static TextPageBlock fromTextPositionSequences(List<TextPositionSequence> wordBlockList) {

        TextPageBlock textBlock = null;
@ -133,7 +137,6 @@ public class TextPageBlock extends AbstractPageBlock {
    }


-
    /**
     * Returns the minX value in pdf coordinate system.
     * Note: This needs to use Pdf Coordinate System where {0,0} rotated with the page rotation.
@ -362,7 +365,22 @@ public class TextPageBlock extends AbstractPageBlock {
        }

        return TextNormalizationUtilities.removeHyphenLineBreaks(sb.toString());
+    }

+
+    public int getNumberOfLines() {
+
+        int numberOfLines = 1;
+        TextPositionSequence previous = null;
+        for (TextPositionSequence word : sequences) {
+            if (previous != null) {
+                if (word.getMaxYDirAdj() - previous.getMaxYDirAdj() > word.getTextHeight()) {
+                    numberOfLines++;
+                }
+            }
+            previous = word;
+        }
+        return numberOfLines;
    }


--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPositionSequence.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPositionSequence.java
@ -55,6 +55,17 @@ public class TextPositionSequence implements CharSequence {
    }


+    public TextPositionSequence(List<RedTextPosition> textPositions, int page) {
+
+        this.textPositions = textPositions;
+        this.page = page;
+        this.dir = TextDirection.fromDegrees(textPositions.get(0).getDir());
+        this.rotation = textPositions.get(0).getRotation();
+        this.pageHeight = textPositions.get(0).getPageHeight();
+        this.pageWidth = textPositions.get(0).getPageWidth();
+    }
+
+
    @Override
    public int length() {

--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/BodyTextFrameService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/BodyTextFrameService.java
@ -25,6 +25,7 @@ public class BodyTextFrameService {
    private static final float RULING_HEIGHT_THRESHOLD = 0.15f; // multiplied with page height. Header/Footer Rulings must be within that border of the page.
    private static final float RULING_WIDTH_THRESHOLD = 0.75f; // multiplied with page width. Header/Footer Rulings must be at least that wide.

+
    public void setBodyTextFrames(ClassificationDocument classificationDocument, LayoutParsingType layoutParsingType) {

        Rectangle bodyTextFrame = calculateBodyTextFrame(classificationDocument.getPages(), classificationDocument.getFontSizeCounter(), false, layoutParsingType);
@ -132,12 +133,7 @@ public class BodyTextFrameService {
                                               boolean landscape,
                                               LayoutParsingType layoutParsingType) {

-        float approximateHeaderLineCount;
-        if (layoutParsingType.equals(LayoutParsingType.TAAS)) {
-            approximateHeaderLineCount = 3.3f;
-        } else {
-            approximateHeaderLineCount = 2.9f;
-        }
+        float approximateHeaderLineCount = 2.9f;

        BodyTextFrameExpansionsRectangle expansionsRectangle = new BodyTextFrameExpansionsRectangle();

@ -155,8 +151,9 @@ public class BodyTextFrameService {
                        continue;
                    }

-                    if (MarkedContentUtils.intersects(textBlock, page.getMarkedContentBboxPerType(), MarkedContentUtils.HEADER)
-                            || MarkedContentUtils.intersects(textBlock, page.getMarkedContentBboxPerType(), MarkedContentUtils.FOOTER)) {
+                    if (MarkedContentUtils.intersects(textBlock, page.getMarkedContentBboxPerType(), MarkedContentUtils.HEADER) || MarkedContentUtils.intersects(textBlock,
+                            page.getMarkedContentBboxPerType(),
+                            MarkedContentUtils.FOOTER)) {
                        continue;
                    }

--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/SectionsBuilderService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/SectionsBuilderService.java
@ -7,6 +7,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;

+import org.apache.logging.log4j.util.Strings;
 import org.springframework.stereotype.Service;

 import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
@ -110,6 +111,20 @@ public class SectionsBuilderService {
    }


+    public void buildParagraphDebugSections(ClassificationDocument document) {
+
+        List<ClassificationSection> sections = new ArrayList<>();
+        for (var page : document.getPages()) {
+            page.getTextBlocks().forEach(block -> {
+                block.setPage(page.getPageNumber());
+                var section = buildTextBlock(List.of(block), Strings.EMPTY);
+                sections.add(section);
+            });
+        }
+        document.setSections(sections);
+    }
+
+
    public void addImagesToSections(ClassificationDocument document) {

        Map<Integer, List<ClassificationSection>> sectionMap = new HashMap<>();
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/TableExtractionService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/TableExtractionService.java
@ -14,7 +14,6 @@ import org.springframework.stereotype.Service;
 import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
 import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
-import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.Rectangle;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
@ -41,19 +40,18 @@ public class TableExtractionService {
     * <p>
     * DirAdj (Text direction adjusted) values can not be used here.
     *
-     * @param cleanRulings The lines used to build the table.
-     * @param page         Page object that contains textblocks and statistics.
+     * @param emptyCells The cells used to build the table.
+     * @param page       Page object that contains textblocks and statistics.
     */

-    public void extractTables(CleanRulings cleanRulings, ClassificationPage page) {
+    public void extractTables(List<Cell> emptyCells, ClassificationPage page) {

-        List<Cell> cells = findCells(cleanRulings.getHorizontal(), cleanRulings.getVertical());
        // sort cells by size (height * width) ascending so that textBlocks are always assigned to the smallest cells that contain them
-        cells.sort(CELL_SIZE_COMPARATOR);
+        emptyCells.sort(CELL_SIZE_COMPARATOR);

        for (AbstractPageBlock abstractPageBlock : page.getTextBlocks()) {
            TextPageBlock textBlock = (TextPageBlock) abstractPageBlock;
-            for (Cell cell : cells) {
+            for (Cell cell : emptyCells) {
                if (cell.hasMinimumSize() && doesCellContainTextBlock(cell, textBlock)) {
                    cell.addTextBlock(textBlock);
                    break;
@ -61,7 +59,7 @@ public class TableExtractionService {
            }
        }

-        cells = new ArrayList<>(new HashSet<>(cells));
+        var cells = new ArrayList<>(new HashSet<>(emptyCells));
        DoubleComparisons.sort(cells, Rectangle.ILL_DEFINED_ORDER);

        List<Rectangle> spreadsheetAreas = SpreadsheetFinder.findSpreadsheetsFromCells(cells);
@ -79,9 +77,7 @@ public class TableExtractionService {
                }
            }

-            var containedCellsWithText = containedCells.stream()
-                    .filter(cell -> !cell.getTextBlocks().isEmpty())
-                    .toList();
+            var containedCellsWithText = containedCells.stream().filter(cell -> !cell.getTextBlocks().isEmpty()).toList();

            // verify if table would contain fewer cells with text than the threshold allows
            if (containedCellsWithText.size() >= MAX_TABLE_CONTAINED_CELLS_WITH_TEXT && checkIfTableCellsAreUniform(containedCells)) {
@ -101,11 +97,7 @@ public class TableExtractionService {
            if (position != -1) {
                page.getTextBlocks().add(position, table);

-                var toBeRemoved = table.getCells()
-                        .stream()
-                        .map(Cell::getTextBlocks)
-                        .flatMap(List::stream)
-                        .toList();
+                var toBeRemoved = table.getCells().stream().map(Cell::getTextBlocks).flatMap(List::stream).toList();
                // remove text blocks from the page that were also added with the table (from its contained cells)
                page.getTextBlocks().removeAll(toBeRemoved);
            }
@ -115,7 +107,7 @@ public class TableExtractionService {

    private boolean checkIfTableCellsAreUniform(List<Cell> containedCells) {

-        if(containedCells.size() <= 2) {
+        if (containedCells.size() <= 2) {
            return true;
        }

@ -139,19 +131,13 @@ public class TableExtractionService {
        }
        double x0 = cell.getX();
        double y0 = cell.getY();
-        return (x >= x0 - TEXT_BLOCK_CONTAINMENT_TOLERANCE
-                && y >= y0 - TEXT_BLOCK_CONTAINMENT_TOLERANCE
-                && (x + w) <= x0 + cell.getWidth() + 2 * TEXT_BLOCK_CONTAINMENT_TOLERANCE
-                && (y + h) <= y0 + cell.getHeight() + 2 * TEXT_BLOCK_CONTAINMENT_TOLERANCE);
+        return (x >= x0 - TEXT_BLOCK_CONTAINMENT_TOLERANCE && y >= y0 - TEXT_BLOCK_CONTAINMENT_TOLERANCE && (x + w) <= x0 + cell.getWidth() + 2 * TEXT_BLOCK_CONTAINMENT_TOLERANCE && (y + h) <= y0 + cell.getHeight() + 2 * TEXT_BLOCK_CONTAINMENT_TOLERANCE);
    }


    public static List<Cell> findCells(List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines) {

-        return RectangularIntersectionFinder.find(horizontalRulingLines, verticalRulingLines)
-                .stream()
-                .map(Cell::new)
-                .collect(Collectors.toList());
+        return RectangularIntersectionFinder.find(horizontalRulingLines, verticalRulingLines).stream().map(Cell::new).collect(Collectors.toList());
    }

 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocstrumBlockificationService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocstrumBlockificationService.java
@ -0,0 +1,408 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.blockification;
+
+import static java.util.stream.Collectors.toSet;
+
+import java.awt.geom.Point2D;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.List;
+import java.util.ListIterator;
+import java.util.Set;
+
+import org.springframework.stereotype.Service;
+
+import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
+import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
+import com.knecon.fforesight.service.layoutparser.processor.model.FloatFrequencyCounter;
+import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
+import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling;
+import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.StringFrequencyCounter;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.DocstrumSegmentationService;
+import com.knecon.fforesight.service.layoutparser.processor.utils.QuickSort;
+import com.knecon.fforesight.service.layoutparser.processor.utils.RulingTextDirAdjustUtil;
+import com.knecon.fforesight.service.layoutparser.processor.utils.TextPositionSequenceComparator;
+
+import lombok.RequiredArgsConstructor;
+
+@SuppressWarnings("all")
+@Service
+@RequiredArgsConstructor
+public class DocstrumBlockificationService {
+
+    private final DocstrumSegmentationService docstrumSegmentationService;
+
+    static final float THRESHOLD = 1f;
+
+
+    public ClassificationPage blockify(List<TextPositionSequence> textPositions, List<Cell> cells, boolean xyOrder) {
+
+        // Underlined or strikethrough are also in rulings but we dont want to split blocks with them so we use cells.
+        List<Ruling> usedHorizonalRulings = new ArrayList<>();
+        List<Ruling> usedVerticalRulings = new ArrayList<>();
+
+        cells.forEach(cell -> {
+            usedHorizonalRulings.add(new Ruling(new Point2D.Float(cell.x, cell.y), new Point2D.Float(cell.x + cell.width, cell.y)));
+            usedHorizonalRulings.add(new Ruling(new Point2D.Float(cell.x, cell.y + cell.height), new Point2D.Float(cell.x + cell.width, cell.y + cell.height)));
+            usedVerticalRulings.add(new Ruling(new Point2D.Float(cell.x, cell.y), new Point2D.Float(cell.x, cell.y + cell.height)));
+            usedVerticalRulings.add(new Ruling(new Point2D.Float(cell.x + cell.width, cell.y), new Point2D.Float(cell.x + cell.width, cell.y + cell.height)));
+        });
+
+        List<AbstractPageBlock> abstractPageBlocks = new ArrayList<>();
+        var zones = docstrumSegmentationService.segmentPage(textPositions, xyOrder);
+        zones.forEach(zone -> {
+
+            List<TextPositionSequence> textPositionSequences = new ArrayList<>();
+            zone.getLines().forEach(line -> {
+                line.getWords().forEach(word -> {
+                    textPositionSequences.add(new TextPositionSequence(word.getTextPositions(), word.getPage()));
+                });
+            });
+
+            abstractPageBlocks.addAll(splitZonesAtRulings(textPositionSequences, usedHorizonalRulings, usedVerticalRulings));
+        });
+
+        return new ClassificationPage(abstractPageBlocks);
+    }
+
+
+    public void combineBlocks(ClassificationPage page) {
+
+        mergeZones(page.getTextBlocks());
+
+        TextPageBlock previous = new TextPageBlock();
+        ListIterator<AbstractPageBlock> itty = page.getTextBlocks().listIterator();
+        while (itty.hasNext()) {
+            AbstractPageBlock block = itty.next();
+            if (block instanceof TablePageBlock) {
+                continue;
+            }
+            TextPageBlock current = (TextPageBlock) block;
+
+            if (previous != null && !previous.getSequences().isEmpty()) {
+
+                if (current.getDir() == previous.getDir() //
+                        && previous.getNumberOfLines() >= 2 && current.getNumberOfLines() >= 2 //
+                        && previous.intersectsY(current) //
+                        && !hasBetween(current, previous, page.getTextBlocks()) //
+                        && numberOfYIntersections(current, previous, page.getTextBlocks()) == 0) {
+
+                    previous.getSequences().addAll(current.getSequences());
+                    previous = buildTextBlock(previous.getSequences(), 0);
+                    previous.setToDuplicate(true);
+                    itty.remove();
+                    itty.previous();
+                    itty.set(previous);
+                    itty.next();
+                    continue;
+                }
+
+                if (current.getDir() == previous.getDir() && (previous.almostIntersects(current, 0, 0))) {
+
+                    previous.getSequences().addAll(current.getSequences());
+                    boolean toDuplicate = previous.isToDuplicate();
+                    previous = buildTextBlock(previous.getSequences(), 0);
+                    previous.setToDuplicate(toDuplicate);
+                    itty.remove();
+                    itty.previous();
+                    itty.set(previous);
+                    itty.next();
+                    continue;
+                }
+
+                if (current.getDir() == previous.getDir() //
+                        && (Math.abs(previous.getMaxY() - current.getMaxY()) < THRESHOLD || Math.abs(previous.getMinY() - current.getMinY()) < THRESHOLD) //
+                        && (previous.getNumberOfLines() == 1 && current.getNumberOfLines() >= 1 || previous.getNumberOfLines() == 2 && current.getNumberOfLines() == 1) //
+                        && !hasBetween(current, previous, page.getTextBlocks()) && numberOfYIntersections(current, previous, page.getTextBlocks()) <= 4) {
+
+                    previous.getSequences().addAll(current.getSequences());
+                    previous = buildTextBlock(previous.getSequences(), 0);
+                    itty.remove();
+                    itty.previous();
+                    itty.set(previous);
+                    itty.next();
+                    continue;
+                }
+
+                if (current.getDir() == previous.getDir() //
+                        && current.intersectsY(previous) //
+                        && (previous.getNumberOfLines() == 1 && current.getNumberOfLines() >= 1 || previous.getNumberOfLines() == 2 && current.getNumberOfLines() == 1) //
+                        && !hasBetween(current, previous, page.getTextBlocks()) //
+                        && numberOfYIntersections(current, previous, page.getTextBlocks()) <= 0) {
+                    previous.getSequences().addAll(current.getSequences());
+                    previous = buildTextBlock(previous.getSequences(), 0);
+                    itty.remove();
+                    itty.previous();
+                    itty.set(previous);
+                    itty.next();
+                    continue;
+                }
+
+            }
+            previous = current;
+        }
+
+        mergeZones(page.getTextBlocks());
+
+    }
+
+
+    private boolean hasBetween(TextPageBlock block, TextPageBlock other, List<AbstractPageBlock> allBlocks) {
+
+        for (AbstractPageBlock current : allBlocks) {
+
+            if (current == other || current == block) {
+                continue;
+            }
+
+            if (other.intersectsY(current) && other.getMaxX() <= current.getMinX() && current.getMaxX() <= block.getMinX()) {
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+
+    private int numberOfYIntersections(TextPageBlock block, TextPageBlock other, List<AbstractPageBlock> allBlocks) {
+
+        double minY = Math.min(block.getMinY(), other.getMinY());
+        double maxY = Math.min(block.getMaxY(), other.getMaxY());
+
+        int numberOfYIntersections = 0;
+        for (AbstractPageBlock current : allBlocks) {
+
+            if (current == other || current == block) {
+                continue;
+            }
+
+            if (minY <= current.getMaxY() && maxY >= current.getMinY()) {
+                numberOfYIntersections++;
+            }
+        }
+
+        return numberOfYIntersections;
+    }
+
+
+    public void mergeZones(List<AbstractPageBlock> zones) {
+
+        ListIterator<AbstractPageBlock> itty = zones.listIterator();
+        Set<AbstractPageBlock> toRemove = new HashSet<>();
+        while (itty.hasNext()) {
+            AbstractPageBlock block = itty.next();
+            if (block instanceof TablePageBlock) {
+                continue;
+            }
+
+            TextPageBlock current = (TextPageBlock) block;
+
+            if (current.isToDuplicate()) {
+                continue;
+            }
+
+            for (int i = 0; i < zones.size(); i++) {
+
+                if (toRemove.contains(zones.get(i))) {
+                    continue;
+                }
+                if (zones.get(i) == current) {
+                    continue;
+                }
+                if (zones.get(i) instanceof TablePageBlock) {
+                    continue;
+                }
+
+                TextPageBlock inner = (TextPageBlock) zones.get(i);
+
+                if (inner.isToDuplicate()) {
+                    continue;
+                }
+
+                if (current.getDir() == inner.getDir() && current.almostIntersects(inner, 0, 0)) {
+
+                    current.getSequences().addAll(inner.getSequences());
+                    QuickSort.sort(current.getSequences(), new TextPositionSequenceComparator());
+                    current = buildTextBlock(current.getSequences(), 0);
+                    toRemove.add(inner);
+                    itty.set(current);
+                }
+            }
+        }
+        zones.removeAll(toRemove);
+    }
+
+
+    public List<AbstractPageBlock> splitZonesAtRulings(List<TextPositionSequence> textPositions, List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines) {
+
+        int indexOnPage = 0;
+        List<TextPositionSequence> chunkWords = new ArrayList<>();
+        List<AbstractPageBlock> chunkBlockList = new ArrayList<>();
+
+        float minX = 1000, maxX = 0, minY = 1000, maxY = 0;
+        TextPositionSequence prev = null;
+
+        for (TextPositionSequence word : textPositions) {
+
+            boolean isSplitByRuling = isSplitByRuling(minX, minY, maxX, maxY, word, horizontalRulingLines, verticalRulingLines);
+            boolean splitByDir = prev != null && !prev.getDir().equals(word.getDir());
+
+            if (prev != null && (splitByDir || isSplitByRuling)) {
+
+                TextPageBlock cb1 = buildTextBlock(chunkWords, indexOnPage);
+                indexOnPage++;
+
+                chunkBlockList.add(cb1);
+                chunkWords = new ArrayList<>();
+
+                minX = 1000;
+                maxX = 0;
+                minY = 1000;
+                maxY = 0;
+                prev = null;
+            }
+
+            chunkWords.add(word);
+
+            prev = word;
+            if (word.getMinXDirAdj() < minX) {
+                minX = word.getMinXDirAdj();
+            }
+            if (word.getMaxXDirAdj() > maxX) {
+                maxX = word.getMaxXDirAdj();
+            }
+            if (word.getMinYDirAdj() < minY) {
+                minY = word.getMinYDirAdj();
+            }
+            if (word.getMaxYDirAdj() > maxY) {
+                maxY = word.getMaxYDirAdj();
+            }
+        }
+
+        TextPageBlock cb1 = buildTextBlock(chunkWords, indexOnPage);
+        if (cb1 != null) {
+            chunkBlockList.add(cb1);
+        }
+
+        return chunkBlockList;
+    }
+
+
+    private boolean equalsWithThreshold(float f1, float f2) {
+
+        return Math.abs(f1 - f2) < THRESHOLD;
+    }
+
+
+    private TextPageBlock buildTextBlock(List<TextPositionSequence> wordBlockList, int indexOnPage) {
+
+        TextPageBlock textBlock = null;
+
+        FloatFrequencyCounter lineHeightFrequencyCounter = new FloatFrequencyCounter();
+        FloatFrequencyCounter fontSizeFrequencyCounter = new FloatFrequencyCounter();
+        FloatFrequencyCounter spaceFrequencyCounter = new FloatFrequencyCounter();
+        StringFrequencyCounter fontFrequencyCounter = new StringFrequencyCounter();
+        StringFrequencyCounter styleFrequencyCounter = new StringFrequencyCounter();
+
+        for (TextPositionSequence wordBlock : wordBlockList) {
+
+            lineHeightFrequencyCounter.add(wordBlock.getTextHeight());
+            fontSizeFrequencyCounter.add(wordBlock.getFontSize());
+            spaceFrequencyCounter.add(wordBlock.getSpaceWidth());
+            fontFrequencyCounter.add(wordBlock.getFont());
+            styleFrequencyCounter.add(wordBlock.getFontStyle());
+
+            if (textBlock == null) {
+                textBlock = new TextPageBlock(wordBlock.getMinXDirAdj(),
+                        wordBlock.getMaxXDirAdj(),
+                        wordBlock.getMinYDirAdj(),
+                        wordBlock.getMaxYDirAdj(),
+                        wordBlockList,
+                        wordBlock.getRotation());
+            } else {
+                TextPageBlock spatialEntity = textBlock.union(wordBlock);
+                textBlock.resize(spatialEntity.getMinX(), spatialEntity.getMinY(), spatialEntity.getWidth(), spatialEntity.getHeight());
+            }
+        }
+
+        if (textBlock != null) {
+            textBlock.setMostPopularWordFont(fontFrequencyCounter.getMostPopular());
+            textBlock.setMostPopularWordStyle(styleFrequencyCounter.getMostPopular());
+            textBlock.setMostPopularWordFontSize(fontSizeFrequencyCounter.getMostPopular());
+            textBlock.setMostPopularWordHeight(lineHeightFrequencyCounter.getMostPopular());
+            textBlock.setMostPopularWordSpaceWidth(spaceFrequencyCounter.getMostPopular());
+            textBlock.setHighestFontSize(fontSizeFrequencyCounter.getHighest());
+        }
+
+        if (textBlock != null && textBlock.getSequences() != null && textBlock.getSequences().stream().map(t -> round(t.getMinYDirAdj(), 3)).collect(toSet()).size() == 1) {
+            textBlock.getSequences().sort(Comparator.comparing(TextPositionSequence::getMinXDirAdj));
+        }
+        return textBlock;
+    }
+
+
+    private boolean isSplitByRuling(float minX,
+                                    float minY,
+                                    float maxX,
+                                    float maxY,
+                                    TextPositionSequence word,
+                                    List<Ruling> horizontalRulingLines,
+                                    List<Ruling> verticalRulingLines) {
+
+        return isSplitByRuling(maxX,
+                minY,
+                word.getMinXDirAdj(),
+                word.getMinYDirAdj(),
+                verticalRulingLines,
+                word.getDir().getDegrees(),
+                word.getPageWidth(),
+                word.getPageHeight()) //
+                || isSplitByRuling(minX,
+                minY,
+                word.getMinXDirAdj(),
+                word.getMaxYDirAdj(),
+                horizontalRulingLines,
+                word.getDir().getDegrees(),
+                word.getPageWidth(),
+                word.getPageHeight()) //
+                || isSplitByRuling(maxX,
+                minY,
+                word.getMinXDirAdj(),
+                word.getMinYDirAdj(),
+                horizontalRulingLines,
+                word.getDir().getDegrees(),
+                word.getPageWidth(),
+                word.getPageHeight()) //
+                || isSplitByRuling(minX,
+                minY,
+                word.getMinXDirAdj(),
+                word.getMaxYDirAdj(),
+                verticalRulingLines,
+                word.getDir().getDegrees(),
+                word.getPageWidth(),
+                word.getPageHeight());
+    }
+
+
+    private boolean isSplitByRuling(float previousX2, float previousY1, float currentX1, float currentY1, List<Ruling> rulingLines, float dir, float pageWidth, float pageHeight) {
+
+        for (Ruling ruling : rulingLines) {
+            var line = RulingTextDirAdjustUtil.convertToDirAdj(ruling, dir, pageWidth, pageHeight);
+            if (line.intersectsLine(previousX2, previousY1, currentX1, currentY1)) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+
+    private double round(float value, int decimalPoints) {
+
+        var d = Math.pow(10, decimalPoints);
+        return Math.round(value * d) / d;
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/TaasBlockificationService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/TaasBlockificationService.java
@ -1,330 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.processor.services.blockification;
-
-
-// TODO: figure out, why this fails the build
-// import static com.knecon.fforesight.service.layoutparser.processor.services.factory.SearchTextWithTextPositionFactory.HEIGHT_PADDING;
-
-import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
-import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
-import com.knecon.fforesight.service.layoutparser.processor.model.Orientation;
-import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling;
-import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
-import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
-import com.knecon.fforesight.service.layoutparser.processor.utils.RulingTextDirAdjustUtil;
-import org.springframework.stereotype.Service;
-
-import java.util.*;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import java.util.stream.Stream;
-
-@Service
-@SuppressWarnings("all")
-public class TaasBlockificationService {
-
-    private static final float THRESHOLD = 1f;
-    private static final float Y_GAP_SPLIT_HEIGHT_MODIFIER = 1.25f; // multiplied with text height
-    private static final float INTERSECTS_Y_THRESHOLD = 4;// 2 * HEIGHT_PADDING // This is exactly 2 times our position height padding. This is required to find boxes that are visually intersecting.
-    private static final int X_GAP_SPLIT_CONSTANT = 50;
-    public static final int X_ALIGNMENT_THRESHOLD = 1;
-    public static final int NEGATIVE_X_GAP_THRESHOLD = -5;
-
-    private Pattern listIdentifier = Pattern.compile("^(?:(?:[1-9]|1\\d|20|[ivxlc]|[a-z])\\s*(?:[.)]))|\\uF0B7", Pattern.CASE_INSENSITIVE);
-
-
-    /**
-     * This method is building blocks by expanding the minX/maxX and minY/maxY value on each word that is not split by the conditions.
-     * This method must use text direction adjusted postions (DirAdj). Where {0,0} is on the upper left. Never try to change this!
-     * Rulings (Table lines) must be adjusted to the text directions as well, when checking if a block is split by a ruling.
-     *
-     * @param textPositions         The words of a page.
-     * @param horizontalRulingLines Horizontal table lines.
-     * @param verticalRulingLines   Vertical table lines.
-     * @return ClassificationPage object that contains the Textblock and text statistics.
-     */
-    public ClassificationPage blockify(List<TextPositionSequence> textPositions, List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines) {
-
-        List<TextPageBlock> classificationTextBlocks = constructFineGranularTextPageBlocks(textPositions, horizontalRulingLines, verticalRulingLines);
-        classificationTextBlocks = mergeTextPageBlocksAligningX(classificationTextBlocks);
-        classificationTextBlocks = mergeIntersectingTextBlocksUntilConvergence(classificationTextBlocks);
-
-        return new ClassificationPage(new ArrayList<>(classificationTextBlocks.stream().map(classificationTextBlock -> (AbstractPageBlock) classificationTextBlock).toList()));
-    }
-
-
-    private List<TextPageBlock> mergeIntersectingTextBlocksUntilConvergence(List<TextPageBlock> classificationTextBlocks) {
-
-        int currentSize = classificationTextBlocks.size();
-        while (true) {
-            classificationTextBlocks = mergeTextPageBlocksAlmostIntersecting(classificationTextBlocks);
-            if (classificationTextBlocks.size() == currentSize) {
-                break;
-            }
-            currentSize = classificationTextBlocks.size();
-        }
-        return classificationTextBlocks;
-    }
-
-
-    private List<TextPageBlock> mergeTextPageBlocksAligningX(List<TextPageBlock> classificationTextBlocks) {
-
-        if (classificationTextBlocks.isEmpty()) {
-            return new ArrayList<>();
-        }
-        List<List<TextPageBlock>> textBlocksToMerge = new LinkedList<>();
-        List<TextPageBlock> currentTextBlocksToMerge = new LinkedList<>();
-        textBlocksToMerge.add(currentTextBlocksToMerge);
-        TextPageBlock previousTextBlock = null;
-        Float lastLineGap = null;
-        for (TextPageBlock currentTextBlock : classificationTextBlocks) {
-            if (previousTextBlock == null) {
-                currentTextBlocksToMerge.add(currentTextBlock);
-                previousTextBlock = currentTextBlock;
-                continue;
-            }
-
-
-            Matcher listIdentifierPattern = listIdentifier.matcher(currentTextBlock.getText());
-            boolean isListIdentifier = listIdentifierPattern.find();
-
-            boolean yGap = Math.abs(currentTextBlock.getPdfMaxY() - previousTextBlock.getPdfMinY()) < previousTextBlock.getMostPopularWordHeight() * Y_GAP_SPLIT_HEIGHT_MODIFIER;
-
-            boolean sameFont = previousTextBlock.getMostPopularWordFont().equals(currentTextBlock.getMostPopularWordFont()) && previousTextBlock.getMostPopularWordFontSize() == currentTextBlock.getMostPopularWordFontSize();
-//            boolean yGap = previousTextBlock != null && currentTextBlock.getMinYDirAdj() - maxY > Math.min(word.getHeight(), prev.getHeight()) * Y_GAP_SPLIT_HEIGHT_MODIFIER;
-
-            boolean alignsXRight = Math.abs(currentTextBlock.getPdfMaxX() - previousTextBlock.getPdfMaxX()) < X_ALIGNMENT_THRESHOLD;
-            boolean alignsXLeft = Math.abs(currentTextBlock.getPdfMinX() - previousTextBlock.getPdfMinX()) < X_ALIGNMENT_THRESHOLD;
-//            boolean smallYGap = Math.abs(currentTextBlock.getPdfMaxY() - previousTextBlock.getPdfMinY()) < yGap;
-            if (yGap && sameFont && !isListIdentifier) {
-                currentTextBlocksToMerge.add(currentTextBlock);
-
-            } else {
-                currentTextBlocksToMerge = new LinkedList<>();
-                currentTextBlocksToMerge.add(currentTextBlock);
-                textBlocksToMerge.add(currentTextBlocksToMerge);
-            }
-            previousTextBlock = currentTextBlock;
-        }
-        return textBlocksToMerge.stream().map(TextPageBlock::merge).toList();
-    }
-
-
-    private List<TextPageBlock> mergeTextPageBlocksAlmostIntersecting(List<TextPageBlock> textPageBlocks) {
-
-        Set<TextPageBlock> alreadyMerged = new HashSet<>();
-        List<List<TextPageBlock>> textBlocksToMerge = new LinkedList<>();
-        for (TextPageBlock textPageBlock : textPageBlocks) {
-            if (alreadyMerged.contains(textPageBlock)) {
-                continue;
-            }
-            alreadyMerged.add(textPageBlock);
-            textBlocksToMerge.add(Stream.concat(Stream.of(textPageBlock),
-                            textPageBlocks.stream().filter(textPageBlock2 -> textPageBlock.almostIntersects(textPageBlock2, INTERSECTS_Y_THRESHOLD, 0) && !alreadyMerged.contains(textPageBlock2)).peek(alreadyMerged::add))
-                    .toList());
-        }
-        return textBlocksToMerge.stream().map(TextPageBlock::merge).toList();
-    }
-
-
-    private void assignOrientations(List<TextPageBlock> classificationTextBlocks) {
-
-        Iterator<TextPageBlock> itty = classificationTextBlocks.iterator();
-
-        TextPageBlock previousLeft = null;
-        TextPageBlock previousRight = null;
-        while (itty.hasNext()) {
-            TextPageBlock block = (TextPageBlock) itty.next();
-
-            if (previousLeft != null && block.getOrientation().equals(Orientation.LEFT)) {
-                if (previousLeft.getMinY() > block.getMinY() && block.getMaxY() + block.getMostPopularWordHeight() > previousLeft.getMinY()) {
-                    previousLeft.add(block);
-                    itty.remove();
-                    continue;
-                }
-            }
-
-            if (previousRight != null && block.getOrientation().equals(Orientation.RIGHT)) {
-                if (previousRight.getMinY() > block.getMinY() && block.getMaxY() + block.getMostPopularWordHeight() > previousRight.getMinY()) {
-                    previousRight.add(block);
-                    itty.remove();
-                    continue;
-                }
-            }
-
-            if (block.getOrientation().equals(Orientation.LEFT)) {
-                previousLeft = block;
-            } else if (block.getOrientation().equals(Orientation.RIGHT)) {
-                previousRight = block;
-            }
-        }
-
-        itty = classificationTextBlocks.iterator();
-        TextPageBlock previous = null;
-        while (itty.hasNext()) {
-            TextPageBlock block = (TextPageBlock) itty.next();
-
-            if (previous != null && previous.getOrientation().equals(Orientation.LEFT) && block.getOrientation().equals(Orientation.LEFT) && equalsWithThreshold(
-                    block.getMaxY(),
-                    previous.getMaxY()) || previous != null && previous.getOrientation().equals(Orientation.LEFT) && block.getOrientation()
-                    .equals(Orientation.RIGHT) && equalsWithThreshold(block.getMaxY(), previous.getMaxY())) {
-                previous.add(block);
-                itty.remove();
-                continue;
-            }
-
-            previous = block;
-        }
-    }
-
-
-    private List<TextPageBlock> constructFineGranularTextPageBlocks(List<TextPositionSequence> textPositions,
-                                                                    List<Ruling> horizontalRulingLines,
-                                                                    List<Ruling> verticalRulingLines) {
-
-        int indexOnPage = 0;
-        List<TextPositionSequence> wordClusterToCombine = new ArrayList<>();
-        List<TextPageBlock> classificationTextBlocks = new ArrayList<>();
-
-        float minX = 1000, maxX = 0, minY = 1000, maxY = 0;
-        TextPositionSequence prev = null;
-        // TODO: make static final constant
-
-
-        boolean wasSplitted = false;
-        Float splitX1 = null;
-        for (TextPositionSequence word : textPositions) {
-
-            Matcher listIdentifierPattern = listIdentifier.matcher(word.toString());
-
-            boolean yGap = prev != null && word.getMinYDirAdj() - maxY > Math.min(word.getHeight(), prev.getHeight()) * Y_GAP_SPLIT_HEIGHT_MODIFIER;
-            boolean sameLine = prev != null && equalsWithThreshold(prev.getMinYDirAdj(), word.getMinYDirAdj());
-            boolean positiveXGapInline = prev != null && maxX + X_GAP_SPLIT_CONSTANT < word.getMinXDirAdj() && sameLine;
-            boolean negativeXGap = prev != null && word.getMinXDirAdj() - minX < NEGATIVE_X_GAP_THRESHOLD;
-            boolean startFromTop = prev != null && word.getMinYDirAdj() < prev.getMinYDirAdj() - prev.getTextHeight();
-            boolean newLineAfterSplit = prev != null && word.getMinYDirAdj() != prev.getMinYDirAdj() && wasSplitted && splitX1 != word.getMinXDirAdj();
-            boolean splitByRuling = isSplitByRuling(minX, minY, maxX, maxY, word, horizontalRulingLines, verticalRulingLines);
-            boolean splitByDir = prev != null && !prev.getDir().equals(word.getDir());
-            boolean fontChange = prev != null && (!word.getFont().equals(prev.getFont()) || !word.getFontStyle()
-                    .equals(prev.getFontStyle()) || word.getFontSize() != prev.getFontSize());
-            boolean newline = prev != null && Math.abs(word.getMinYDirAdj() - prev.getMinYDirAdj()) > word.getHeight();
-            boolean isListIdentifier = listIdentifierPattern.matches();
-
-            if (prev != null && (prev.isParagraphStart() || negativeXGap || positiveXGapInline || yGap || startFromTop || splitByRuling || (newline && (fontChange || isListIdentifier)))) {
-//            if (prev != null && (lineSeparation || startFromTop || splitByX || splitByDir || isSplitByRuling)) {
-
-                Orientation prevOrientation = null;
-                if (!classificationTextBlocks.isEmpty()) {
-                    prevOrientation = classificationTextBlocks.get(classificationTextBlocks.size() - X_ALIGNMENT_THRESHOLD).getOrientation();
-                }
-
-                TextPageBlock classificationTextBlock = TextPageBlock.fromTextPositionSequences(wordClusterToCombine);
-
-                classificationTextBlocks.add(classificationTextBlock);
-                wordClusterToCombine = new ArrayList<>();
-
-                if (positiveXGapInline && !splitByRuling) {
-                    wasSplitted = true;
-                    classificationTextBlock.setOrientation(Orientation.LEFT);
-                    splitX1 = word.getMinXDirAdj();
-                } else if (newLineAfterSplit && !splitByRuling) {
-                    wasSplitted = false;
-                    classificationTextBlock.setOrientation(Orientation.RIGHT);
-                    splitX1 = null;
-                } else if (prevOrientation != null && prevOrientation.equals(Orientation.RIGHT) && (yGap || !startFromTop || !positiveXGapInline || !newLineAfterSplit || !splitByRuling)) {
-                    classificationTextBlock.setOrientation(Orientation.LEFT);
-                }
-
-                minX = 1000;
-                maxX = 0;
-                minY = 1000;
-                maxY = 0;
-                prev = null;
-            }
-
-            wordClusterToCombine.add(word);
-
-            prev = word;
-            if (word.getMinXDirAdj() < minX) {
-                minX = word.getMinXDirAdj();
-            }
-            if (word.getMaxXDirAdj() > maxX) {
-                maxX = word.getMaxXDirAdj();
-            }
-            if (word.getMinYDirAdj() < minY) {
-                minY = word.getMinYDirAdj();
-            }
-            if (word.getMaxYDirAdj() > maxY) {
-                maxY = word.getMaxYDirAdj();
-            }
-        }
-
-        TextPageBlock classificationTextBlock = TextPageBlock.fromTextPositionSequences(wordClusterToCombine);
-        if (classificationTextBlock != null) {
-            classificationTextBlocks.add(classificationTextBlock);
-        }
-        return classificationTextBlocks;
-    }
-
-
-    private boolean equalsWithThreshold(float f1, float f2) {
-
-        return Math.abs(f1 - f2) < THRESHOLD;
-    }
-
-
-    private boolean isSplitByRuling(float minX,
-                                    float minY,
-                                    float maxX,
-                                    float maxY,
-                                    TextPositionSequence word,
-                                    List<Ruling> horizontalRulingLines,
-                                    List<Ruling> verticalRulingLines) {
-
-        return isSplitByRuling(maxX,
-                minY,
-                word.getMinXDirAdj(),
-                word.getMinYDirAdj(),
-                verticalRulingLines,
-                word.getDir().getDegrees(),
-                word.getPageWidth(),
-                word.getPageHeight()) //
-                || isSplitByRuling(minX,
-                minY,
-                word.getMinXDirAdj(),
-                word.getMaxYDirAdj(),
-                horizontalRulingLines,
-                word.getDir().getDegrees(),
-                word.getPageWidth(),
-                word.getPageHeight()) //
-                || isSplitByRuling(maxX,
-                minY,
-                word.getMinXDirAdj(),
-                word.getMinYDirAdj(),
-                horizontalRulingLines,
-                word.getDir().getDegrees(),
-                word.getPageWidth(),
-                word.getPageHeight()) //
-                || isSplitByRuling(minX,
-                minY,
-                word.getMinXDirAdj(),
-                word.getMaxYDirAdj(),
-                verticalRulingLines,
-                word.getDir().getDegrees(),
-                word.getPageWidth(),
-                word.getPageHeight()); //
-    }
-
-
-    private boolean isSplitByRuling(float previousX2, float previousY1, float currentX1, float currentY1, List<Ruling> rulingLines, float dir, float pageWidth, float pageHeight) {
-
-        for (Ruling ruling : rulingLines) {
-            var line = RulingTextDirAdjustUtil.convertToDirAdj(ruling, dir, pageWidth, pageHeight);
-            if (line.intersectsLine(previousX2, previousY1, currentX1, currentY1)) {
-                return true;
-            }
-        }
-        return false;
-    }
-
-}
-
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/TaasClassificationService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/TaasClassificationService.java
@ -1,114 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.processor.services.classification;
-
-import java.util.List;
-import java.util.regex.Pattern;
-
-import com.knecon.fforesight.service.layoutparser.processor.utils.MarkedContentUtils;
-import org.springframework.stereotype.Service;
-
-import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
-import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument;
-import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
-import com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType;
-import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
-import com.knecon.fforesight.service.layoutparser.processor.services.BodyTextFrameService;
-import com.knecon.fforesight.service.layoutparser.processor.utils.PositionUtils;
-
-import lombok.RequiredArgsConstructor;
-import lombok.extern.slf4j.Slf4j;
-
-@Slf4j
-@Service
-@RequiredArgsConstructor
-public class TaasClassificationService {
-
-    private final BodyTextFrameService bodyTextFrameService;
-
-
-    public void classifyDocument(ClassificationDocument document) {
-
-
-        List<Float> headlineFontSizes = document.getFontSizeCounter().getHighterThanMostPopular();
-
-        log.debug("Document FontSize counters are: {}", document.getFontSizeCounter().getCountPerValue());
-
-        for (ClassificationPage page : document.getPages()) {
-
-            classifyPage(page, document, headlineFontSizes);
-        }
-    }
-
-
-    public void classifyPage(ClassificationPage page, ClassificationDocument document, List<Float> headlineFontSizes) {
-
-        for (AbstractPageBlock textBlock : page.getTextBlocks()) {
-            if (textBlock instanceof TextPageBlock) {
-                classifyBlock((TextPageBlock) textBlock, page, document, headlineFontSizes);
-            }
-        }
-    }
-
-
-    public void classifyBlock(TextPageBlock textBlock, ClassificationPage page, ClassificationDocument document, List<Float> headlineFontSizes) {
-
-        var bodyTextFrame = page.getBodyTextFrame();
-
-        if (document.getFontSizeCounter().getMostPopular() == null) {
-            textBlock.setClassification(PageBlockType.OTHER);
-            return;
-        }
-        if (MarkedContentUtils.intersects(textBlock, page.getMarkedContentBboxPerType(), MarkedContentUtils.HEADER)
-                || PositionUtils.isOverBodyTextFrame(bodyTextFrame, textBlock, page.getRotation())) {
-            textBlock.setClassification(PageBlockType.HEADER);
-        } else if (MarkedContentUtils.intersects(textBlock, page.getMarkedContentBboxPerType(), MarkedContentUtils.FOOTER)
-                || PositionUtils.isUnderBodyTextFrame(bodyTextFrame, textBlock, page.getRotation())) {
-            textBlock.setClassification(PageBlockType.FOOTER);
-        } else if (page.getPageNumber() == 1 && (PositionUtils.getHeightDifferenceBetweenChunkWordAndDocumentWord(textBlock,
-                document.getTextHeightCounter().getMostPopular()) > 2.5 && textBlock.getHighestFontSize() > document.getFontSizeCounter().getMostPopular() || page.getTextBlocks()
-                .size() == 1)) {
-            if (!Pattern.matches("[0-9]+", textBlock.toString())) {
-                textBlock.setClassification(PageBlockType.TITLE);
-            }
-        } else if (textBlock.getMostPopularWordFontSize() > document.getFontSizeCounter()
-                .getMostPopular() && PositionUtils.getApproxLineCount(textBlock) < 4.9 && (textBlock.getMostPopularWordStyle().equals("bold") || !document.getFontStyleCounter()
-                .getCountPerValue()
-                .containsKey("bold") && textBlock.getMostPopularWordFontSize() > document.getFontSizeCounter().getMostPopular() + 1) && textBlock.getSequences()
-                .get(0)
-                .getTextPositions()
-                .get(0)
-                .getFontSizeInPt() >= textBlock.getMostPopularWordFontSize()) {
-
-            for (int i = 1; i <= headlineFontSizes.size(); i++) {
-                if (textBlock.getMostPopularWordFontSize() == headlineFontSizes.get(i - 1)) {
-                    textBlock.setClassification(PageBlockType.getHeadlineType(i));
-                    document.setHeadlines(true);
-                }
-            }
-        } else if (!textBlock.getText().startsWith("Figure ") && PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordStyle()
-                .equals("bold") && !document.getFontStyleCounter().getMostPopular().equals("bold") && PositionUtils.getApproxLineCount(textBlock) < 2.9 && textBlock.getSequences()
-                .get(0)
-                .getTextPositions()
-                .get(0)
-                .getFontSizeInPt() >= textBlock.getMostPopularWordFontSize()) {
-            textBlock.setClassification(PageBlockType.getHeadlineType(headlineFontSizes.size() + 1));
-            document.setHeadlines(true);
-        } else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter()
-                .getMostPopular() && textBlock.getMostPopularWordStyle().equals("bold") && !document.getFontStyleCounter().getMostPopular().equals("bold")) {
-            textBlock.setClassification(PageBlockType.PARAGRAPH_BOLD);
-        } else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFont()
-                .equals(document.getFontCounter().getMostPopular()) && textBlock.getMostPopularWordStyle()
-                .equals(document.getFontStyleCounter().getMostPopular()) && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter().getMostPopular()) {
-            textBlock.setClassification(PageBlockType.PARAGRAPH);
-        } else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter()
-                .getMostPopular() && textBlock.getMostPopularWordStyle().equals("italic") && !document.getFontStyleCounter()
-                .getMostPopular()
-                .equals("italic") && PositionUtils.getApproxLineCount(textBlock) < 2.9) {
-            textBlock.setClassification(PageBlockType.PARAGRAPH_ITALIC);
-        } else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock)) {
-            textBlock.setClassification(PageBlockType.PARAGRAPH_UNKNOWN);
-        } else {
-            textBlock.setClassification(PageBlockType.OTHER);
-        }
-    }
-
-}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/DocstrumSegmentationService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/DocstrumSegmentationService.java
@ -0,0 +1,59 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.docstrum;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import org.springframework.stereotype.Service;
+
+import com.knecon.fforesight.service.layoutparser.processor.model.text.TextDirection;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Character;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Zone;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service.LineBuilderService;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service.NearestNeighbourService;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service.ReadingOrderService;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service.SpacingService;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service.ZoneBuilderService;
+
+import lombok.RequiredArgsConstructor;
+
+@Service
+@RequiredArgsConstructor
+public class DocstrumSegmentationService {
+
+    private final NearestNeighbourService nearestNeighbourService;
+    private final SpacingService spacingService;
+    private final LineBuilderService lineBuilderService;
+    private final ZoneBuilderService zoneBuilderService;
+    private final ReadingOrderService readingOrderService;
+
+
+    public List<Zone> segmentPage(List<TextPositionSequence> textPositions, boolean xyOrder) {
+
+        List<Zone> zones = new ArrayList<>();
+        zones.addAll(computeZones(textPositions, TextDirection.ZERO));
+        zones.addAll(computeZones(textPositions, TextDirection.QUARTER_CIRCLE));
+        zones.addAll(computeZones(textPositions, TextDirection.HALF_CIRCLE));
+        zones.addAll(computeZones(textPositions, TextDirection.THREE_QUARTER_CIRCLE));
+
+        return readingOrderService.resolve(zones, xyOrder);
+    }
+
+
+    private List<Zone> computeZones(List<TextPositionSequence> textPositions, TextDirection direction) {
+
+        var positions = textPositions.stream().filter(t -> t.getDir() == direction).map(TextPositionSequence::getTextPositions).flatMap(List::stream).toList();
+
+        var characters = positions.stream().map(Character::new).collect(Collectors.toList());
+
+        nearestNeighbourService.findNearestNeighbors(characters);
+
+        var characterSpacing = spacingService.computeCharacterSpacing(characters);
+        var lineSpacing = Math.min(spacingService.computeLineSpacing(characters), 20);
+
+        var lines = lineBuilderService.buildLines(characters, characterSpacing, lineSpacing);
+        return zoneBuilderService.buildZones(lines, characterSpacing, lineSpacing);
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/AngleFilter.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/AngleFilter.java
@ -0,0 +1,25 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model;
+
+public class AngleFilter {
+
+    protected double lowerAngle;
+    protected double upperAngle;
+
+
+    public AngleFilter(double lowerAngle, double upperAngle) {
+        
+        this.lowerAngle = lowerAngle < -Math.PI / 2 ? lowerAngle + Math.PI : lowerAngle;
+        this.upperAngle = upperAngle >= Math.PI / 2 ? upperAngle - Math.PI : upperAngle;
+    }
+
+
+    public boolean matches(Neighbor neighbor) {
+
+        if (lowerAngle <= upperAngle) {
+            return lowerAngle <= neighbor.getAngle() && neighbor.getAngle() < upperAngle;
+        } else {
+            return lowerAngle <= neighbor.getAngle() || neighbor.getAngle() < upperAngle;
+        }
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/BoundingBox.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/BoundingBox.java
@ -0,0 +1,57 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model;
+
+import java.awt.geom.Rectangle2D;
+
+import lombok.Data;
+
+@Data
+public abstract class BoundingBox {
+
+    private Rectangle2D bBox;
+
+
+    public double getX() {
+
+        return bBox.getX();
+    }
+
+
+    public double getY() {
+
+        return bBox.getY();
+    }
+
+
+    public double getWidth() {
+
+        return bBox.getWidth();
+    }
+
+
+    public double getHeight() {
+
+        return bBox.getHeight();
+    }
+
+
+    public double getArea() {
+
+        return (bBox.getHeight() * bBox.getWidth());
+    }
+
+
+    public boolean contains(Rectangle2D contained, double tolerance) {
+
+        return bBox.getX() <= contained.getX() + tolerance
+               && bBox.getY() <= contained.getY() + tolerance
+               && bBox.getX() + bBox.getWidth() >= contained.getX() + contained.getWidth() - tolerance
+               && bBox.getY() + bBox.getHeight() >= contained.getY() + contained.getHeight() - tolerance;
+    }
+
+
+    public boolean intersectsY(BoundingBox other) {
+
+        return this.getBBox().getMinY() <= other.getBBox().getMaxY() && this.getBBox().getMaxY() >= other.getBBox().getMinY();
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Character.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Character.java
@ -0,0 +1,85 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import com.knecon.fforesight.service.layoutparser.processor.model.text.RedTextPosition;
+
+import lombok.Data;
+
+@Data
+public class Character {
+
+    private final double x;
+    private final double y;
+    private final RedTextPosition textPosition;
+
+    private List<Neighbor> neighbors = new ArrayList<>();
+
+
+    public Character(RedTextPosition chunk) {
+
+        this.x = chunk.getXDirAdj() + chunk.getWidthDirAdj() / 2;
+        this.y = chunk.getYDirAdj() + chunk.getHeightDir() / 2;
+        this.textPosition = chunk;
+    }
+
+
+    public double getHeight() {
+
+        return textPosition.getHeightDir();
+    }
+
+
+    public double distance(Character character) {
+
+        double dx = getX() - character.getX();
+        double dy = getY() - character.getY();
+        return Math.sqrt(dx * dx + dy * dy);
+    }
+
+
+    public double horizontalDistance(Character character) {
+
+        return Math.abs(getX() - character.getX());
+    }
+
+
+    public double verticalDistance(Character character) {
+
+        return Math.abs(getY() - character.getY());
+    }
+
+
+    public double overlappingDistance(Character other) {
+
+        double[] xs = new double[4];
+        double s = Math.sin(-0);
+        double c = Math.cos(-0);
+        xs[0] = c * x - s * y;
+        xs[1] = c * (x + textPosition.getWidthDirAdj()) - s * (y + textPosition.getHeightDir());
+        xs[2] = c * other.x - s * other.y;
+        xs[3] = c * (other.x + other.textPosition.getWidthDirAdj()) - s * (other.y + other.textPosition.getHeightDir());
+        boolean overlapping = xs[1] >= xs[2] && xs[3] >= xs[0];
+        Arrays.sort(xs);
+        return Math.abs(xs[2] - xs[1]) * (overlapping ? 1 : -1);
+    }
+
+
+    public void setNeighbors(List<Neighbor> neighbors) {
+
+        this.neighbors = neighbors;
+    }
+
+
+    public double angle(Character character) {
+
+        if (getX() > character.getX()) {
+            return Math.atan2(getY() - character.getY(), getX() - character.getX());
+        } else {
+            return Math.atan2(character.getY() - getY(), character.getX() - getX());
+        }
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Histogram.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Histogram.java
@ -0,0 +1,90 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model;
+
+public class Histogram {
+
+    private static final double EPSILON = 1.0e-6;
+    private final double min;
+    private final double resolution;
+    private double[] frequencies;
+
+
+    public Histogram(double minValue, double maxValue, double resolution) {
+
+        this.min = minValue - EPSILON;
+        double delta = maxValue - minValue + 2 * EPSILON;
+        int size = Math.max(1, (int) Math.round((maxValue - minValue) / resolution));
+        this.resolution = delta / size;
+        this.frequencies = new double[size];
+    }
+
+
+    public void kernelSmooth(double[] kernel) {
+
+        double[] newFrequencies = new double[frequencies.length];
+        int shift = (kernel.length - 1) / 2;
+        for (int i = 0; i < kernel.length; i++) {
+            int jStart = Math.max(0, i - shift);
+            int jEnd = Math.min(frequencies.length, frequencies.length + i - shift);
+            for (int j = jStart; j < jEnd; j++) {
+                newFrequencies[j - i + shift] += kernel[i] * frequencies[j];
+            }
+        }
+        frequencies = newFrequencies;
+    }
+
+
+    public double[] createGaussianKernel(double length, double stdDeviation) {
+
+        int r = (int) Math.round(length / resolution) / 2;
+
+        int size = 2 * r + 1;
+        double[] kernel = new double[size];
+        double sum = 0;
+        double b = 2 * (stdDeviation / resolution) * (stdDeviation / resolution);
+        double a = 1 / Math.sqrt(Math.PI * b);
+        for (int i = 0; i < size; i++) {
+            kernel[i] = a * Math.exp(-(i - r) * (i - r) / b);
+            sum += kernel[i];
+        }
+        for (int i = 0; i < size; i++) {
+            kernel[i] /= sum;
+        }
+        return kernel;
+    }
+
+
+    public void gaussianSmooth(double windowLength, double stdDeviation) {
+
+        kernelSmooth(createGaussianKernel(windowLength, stdDeviation));
+    }
+
+
+    public void add(double value) {
+
+        frequencies[(int) ((value - min) / resolution)] += 1.0;
+    }
+
+
+    public int getSize() {
+
+        return frequencies.length;
+    }
+
+
+    public double getPeakValue() {
+
+        int peakIndex = 0;
+        for (int i = 1; i < frequencies.length; i++) {
+            if (frequencies[i] > frequencies[peakIndex]) {
+                peakIndex = i;
+            }
+        }
+        int peakEndIndex = peakIndex + 1;
+        final double EPS = 0.0001;
+        while (peakEndIndex < frequencies.length && Math.abs(frequencies[peakEndIndex] - frequencies[peakIndex]) < EPS) {
+            peakEndIndex++;
+        }
+        return ((double) peakIndex + peakEndIndex) / 2 * resolution + min;
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Line.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Line.java
@ -0,0 +1,164 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model;
+
+import java.awt.geom.Rectangle2D;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
+
+import lombok.Data;
+
+@Data
+public class Line extends BoundingBox {
+
+    private static final double WORD_DISTANCE_MULTIPLIER = 0.18;
+
+    private final double x0;
+    private final double y0;
+
+    private final double x1;
+    private final double y1;
+
+    private final double height;
+
+    private final List<Character> characters;
+    private final List<TextPositionSequence> words = new ArrayList<>();
+
+
+    public Line(List<Character> characters, double wordSpacing) {
+
+        this.characters = characters;
+
+        if (characters.size() >= 2) {
+            // linear regression
+            double sx = 0.0;
+            double sxx = 0.0;
+            double sxy = 0.0;
+            double sy = 0.0;
+            for (Character character : characters) {
+                sx += character.getX();
+                sxx += character.getX() * character.getX();
+                sxy += character.getX() * character.getY();
+                sy += character.getY();
+            }
+            double b = (characters.size() * sxy - sx * sy) / (characters.size() * sxx - sx * sx);
+            double a = (sy - b * sx) / characters.size();
+
+            this.x0 = characters.get(0).getX();
+            this.y0 = a + b * this.x0;
+            this.x1 = characters.get(characters.size() - 1).getX();
+            this.y1 = a + b * this.x1;
+        } else {
+            Character character = characters.get(0);
+            double dx = character.getTextPosition().getWidthDirAdj() / 3;
+            double dy = dx * Math.tan(0);
+            this.x0 = character.getX() - dx;
+            this.x1 = character.getX() + dx;
+            this.y0 = character.getY() - dy;
+            this.y1 = character.getY() + dy;
+        }
+        height = computeHeight();
+        computeWords(wordSpacing * WORD_DISTANCE_MULTIPLIER);
+        buildBBox();
+    }
+
+
+    public double getAngle() {
+
+        return Math.atan2(y1 - y0, x1 - x0);
+    }
+
+
+    public double getLength() {
+
+        return Math.sqrt((x0 - x1) * (x0 - x1) + (y0 - y1) * (y0 - y1));
+    }
+
+
+    private double computeHeight() {
+
+        return characters.stream().map(Character::getHeight).reduce(0d, Double::sum) / characters.size();
+    }
+
+
+    public double angularDifference(Line j) {
+
+        double diff = Math.abs(getAngle() - j.getAngle());
+        if (diff <= Math.PI / 2) {
+            return diff;
+        } else {
+            return Math.PI - diff;
+        }
+    }
+
+
+    public double horizontalDistance(Line other) {
+
+        double[] xs = new double[4];
+        xs[0] = x0;
+        xs[1] = x1;
+        xs[2] = other.x0;
+        xs[3] = other.x1;
+        boolean overlapping = xs[1] >= xs[2] && xs[3] >= xs[0];
+        Arrays.sort(xs);
+        return Math.abs(xs[2] - xs[1]) * (overlapping ? 1 : -1);
+    }
+
+
+    public double verticalDistance(Line other) {
+
+        double ym = (y0 + y1) / 2;
+        double yn = (other.y0 + other.y1) / 2;
+        return Math.abs(ym - yn) / Math.sqrt(1);
+    }
+
+
+    private void computeWords(double wordSpacing) {
+
+        TextPositionSequence word = new TextPositionSequence();
+        Character previous = null;
+        for (Character current : characters) {
+            if (previous != null) {
+                double dist = current.getTextPosition().getXDirAdj() - previous.getTextPosition().getXDirAdj() - previous.getTextPosition().getWidthDirAdj();
+                if (dist > wordSpacing) {
+                    words.add(word);
+                    word = new TextPositionSequence();
+                }
+            }
+            word.getTextPositions().add(current.getTextPosition());
+            previous = current;
+        }
+        words.add(word);
+    }
+
+
+    private void buildBBox() {
+
+        double minX = Double.POSITIVE_INFINITY;
+        double minY = Double.POSITIVE_INFINITY;
+        double maxX = Double.NEGATIVE_INFINITY;
+        double maxY = Double.NEGATIVE_INFINITY;
+
+        for (Character character : characters) {
+
+            minX = Math.min(minX, character.getTextPosition().getXDirAdj());
+            minY = Math.min(minY, character.getTextPosition().getYDirAdj());
+            maxX = Math.max(maxX, character.getTextPosition().getXDirAdj() + character.getTextPosition().getWidthDirAdj());
+            maxY = Math.max(maxY, character.getTextPosition().getYDirAdj() + character.getTextPosition().getHeightDir());
+
+        }
+
+        this.setBBox(new Rectangle2D.Double(minX, minY, maxX - minX, maxY - minY));
+    }
+
+
+    public String toString() {
+
+        StringBuilder sb = new StringBuilder();
+        words.forEach(word -> sb.append(word.toString()).append(" "));
+        return sb.toString().trim();
+    }
+
+}
+
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Neighbor.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Neighbor.java
@ -0,0 +1,36 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model;
+
+import lombok.Getter;
+
+public class Neighbor {
+
+    @Getter
+    private final double distance;
+    @Getter
+    private final double angle;
+    private final Character originCharacter;
+    @Getter
+    private final Character character;
+
+
+    public Neighbor(Character neighbor, Character origin) {
+
+        this.distance = neighbor.distance(origin);
+        this.angle = neighbor.angle(origin);
+        this.character = neighbor;
+        this.originCharacter = origin;
+    }
+
+
+    public double getHorizontalDistance() {
+
+        return character.horizontalDistance(originCharacter);
+    }
+
+
+    public double getVerticalDistance() {
+
+        return character.verticalDistance(originCharacter);
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/UnionFind.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/UnionFind.java
@ -0,0 +1,31 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model;
+
+import java.util.Collection;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.Map;
+import java.util.Set;
+
+public class UnionFind<T> extends org.jgrapht.alg.util.UnionFind<T> {
+
+    public UnionFind(Set<T> elements) {
+
+        super(elements);
+    }
+
+
+    public Collection<Set<T>> getGroups() {
+
+        Map<T, Set<T>> setRep = new LinkedHashMap<>();
+        for (T t : getParentMap().keySet()) {
+            T representative = find(t);
+            if (!setRep.containsKey(representative)) {
+                setRep.put(representative, new LinkedHashSet<>());
+            }
+            setRep.get(representative).add(t);
+        }
+
+        return setRep.values();
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Zone.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Zone.java
@ -0,0 +1,51 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model;
+
+import java.awt.geom.Rectangle2D;
+import java.util.Comparator;
+import java.util.List;
+
+import lombok.Data;
+
+@Data
+public class Zone extends BoundingBox {
+
+    private List<Line> lines;
+
+
+    @SuppressWarnings("PMD.ConstructorCallsOverridableMethod")
+    public Zone(List<Line> lines) {
+
+        lines.sort(Comparator.comparingDouble(Line::getY));
+        this.lines = lines;
+        buildBBox();
+    }
+
+
+    public void buildBBox() {
+
+        double minX = Double.POSITIVE_INFINITY;
+        double minY = Double.POSITIVE_INFINITY;
+        double maxX = Double.NEGATIVE_INFINITY;
+        double maxY = Double.NEGATIVE_INFINITY;
+
+        for (Line line : lines) {
+
+            minX = Math.min(minX, line.getX());
+            minY = Math.min(minY, line.getY());
+            maxX = Math.max(maxX, line.getX() + line.getWidth());
+            maxY = Math.max(maxY, line.getY() + line.getHeight());
+
+        }
+
+        this.setBBox(new Rectangle2D.Double(minX, minY, maxX - minX, maxY - minY));
+    }
+
+
+    public String toString() {
+
+        StringBuilder sb = new StringBuilder();
+        lines.forEach(line -> sb.append(line.toString()).append("\n"));
+        return sb.toString().trim();
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/LineBuilderService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/LineBuilderService.java
@ -0,0 +1,53 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service;
+
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.List;
+
+import org.springframework.stereotype.Service;
+
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.AngleFilter;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Character;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Line;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.UnionFind;
+
+@Service
+public class LineBuilderService {
+
+    private static final double CHARACTER_SPACING_DISTANCE_MULTIPLIER = 3.5;
+    private static final double MAX_VERTICAL_CHARACTER_DISTANCE = 0.67;
+    private static final double ANGLE_TOLERANCE = Math.PI / 6;
+
+
+    public List<Line> buildLines(List<Character> characters, double characterSpacing, double lineSpacing) {
+
+        double maxHorizontalDistance = characterSpacing * CHARACTER_SPACING_DISTANCE_MULTIPLIER;
+        double maxVerticalDistance = lineSpacing * MAX_VERTICAL_CHARACTER_DISTANCE;
+
+        UnionFind<Character> unionFind = new UnionFind<>(new HashSet<>(characters));
+
+        AngleFilter filter = new AngleFilter(-ANGLE_TOLERANCE, ANGLE_TOLERANCE);
+
+        characters.forEach(character -> {
+            character.getNeighbors().forEach(neighbor -> {
+                double x = neighbor.getHorizontalDistance() / maxHorizontalDistance;
+                double y = neighbor.getVerticalDistance() / maxVerticalDistance;
+                if (character.getTextPosition().getDir() == neighbor.getCharacter().getTextPosition().getDir() && filter.matches(neighbor) && Math.pow(x, 2) + Math.pow(y,
+                        2) <= 1) {
+                    unionFind.union(character, neighbor.getCharacter());
+                }
+            });
+        });
+
+        List<Line> lines = new ArrayList<>();
+        unionFind.getGroups().forEach(group -> {
+            List<Character> lineCharacters = new ArrayList<>(group);
+            lineCharacters.sort(Comparator.comparingDouble(Character::getX));
+            lines.add(new Line(lineCharacters, characterSpacing));
+        });
+
+        return lines;
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/NearestNeighbourService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/NearestNeighbourService.java
@ -0,0 +1,78 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service;
+
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+
+import org.springframework.stereotype.Service;
+
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Character;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Neighbor;
+
+@Service
+public class NearestNeighbourService {
+
+    private static final int NUMBER_OF_NEIGHBOURS = 8;
+    private static final double STEP = 16.0;
+
+
+    public void findNearestNeighbors(List<Character> characters) {
+
+        if (characters.isEmpty() || characters.size() == 1) {
+            return;
+        }
+
+        characters.sort(Comparator.comparingDouble(Character::getX));
+
+        int maxNeighborCount = NUMBER_OF_NEIGHBOURS;
+        if (characters.size() <= NUMBER_OF_NEIGHBOURS) {
+            maxNeighborCount = characters.size() - 1;
+        }
+
+        for (int i = 0; i < characters.size(); i++) {
+
+            List<Neighbor> candidates = new ArrayList<>();
+
+            int start = i;
+            int end = i + 1;
+
+            double distance = Double.POSITIVE_INFINITY;
+
+            for (double searchDistance = 0; searchDistance < distance; ) {
+
+                searchDistance += STEP;
+                boolean newCandidatesFound = false;
+
+                while (start > 0 && characters.get(i).getX() - characters.get(start - 1).getX() < searchDistance) {
+                    start--;
+                    candidates.add(new Neighbor(characters.get(start), characters.get(i)));
+                    clearLeastDistant(candidates, maxNeighborCount);
+                    newCandidatesFound = true;
+                }
+
+                while (end < characters.size() && characters.get(end).getX() - characters.get(i).getX() < searchDistance) {
+                    candidates.add(new Neighbor(characters.get(end), characters.get(i)));
+                    clearLeastDistant(candidates, maxNeighborCount);
+                    end++;
+                    newCandidatesFound = true;
+                }
+
+                if (newCandidatesFound && candidates.size() >= maxNeighborCount) {
+                    distance = candidates.get(maxNeighborCount - 1).getDistance();
+                }
+            }
+            clearLeastDistant(candidates, maxNeighborCount);
+            characters.get(i).setNeighbors(new ArrayList<>(candidates));
+        }
+    }
+
+
+    private void clearLeastDistant(List<Neighbor> candidates, int maxNeighborCount) {
+
+        if (candidates.size() > maxNeighborCount) {
+            candidates.sort(Comparator.comparingDouble(Neighbor::getDistance));
+            candidates.remove(candidates.remove(candidates.size() - 1));
+        }
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/ReadingOrderService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/ReadingOrderService.java
@ -0,0 +1,165 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service;
+
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.ListIterator;
+import java.util.Map;
+
+import org.springframework.stereotype.Service;
+
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.BoundingBox;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Zone;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.utils.DoubleUtils;
+
+@Service
+public class ReadingOrderService {
+
+    private static final double THRESHOLD = 5;
+    public static final double MULTI_COLUMN_DETECTION_THRESHOLD = 1.5;
+
+
+    public List<Zone> resolve(List<Zone> zones, boolean xyReadingOrder) {
+
+        if (zones.isEmpty() || zones.size() == 1) {
+            return zones;
+        }
+
+        if (xyReadingOrder) {
+            return resolveSingleColumnReadingOrder(zones);
+        }
+
+        Map<Long, Integer> histogram = new HashMap<>();
+        for (Zone zone : zones) {
+            long minY = Math.round(zone.getBBox().getMinY());
+            long maxY = Math.round(zone.getBBox().getMaxY());
+            for (long i = minY; i <= maxY; i++) {
+                histogram.put(i, histogram.getOrDefault(i, 0) + 1);
+            }
+        }
+
+        if (histogram.values().stream().mapToInt(Integer::intValue).average().orElse(1) < MULTI_COLUMN_DETECTION_THRESHOLD) {
+            return resolveSingleColumnReadingOrder(zones);
+        } else {
+
+            return resolveMultiColumnReadingOder(zones);
+        }
+
+    }
+
+
+    private static List<Zone> resolveSingleColumnReadingOrder(List<Zone> zones) {
+
+        zones.sort(Comparator.comparing(BoundingBox::getY, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD))
+                .thenComparing(BoundingBox::getX, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD)));
+        return zones;
+    }
+
+
+    private List<Zone> resolveMultiColumnReadingOder(List<Zone> zones) {
+
+        // Simple reading order resolver for multi column page layout as described here : https://pub.towardsai.net/advanced-rag-02-unveiling-pdf-parsing-b84ae866344e
+        // TODO implement a more fancy reading order resolver see https://github.com/BobLd/DocumentLayoutAnalysis/blob/master/README.md#reading-order
+
+        double minX = Double.POSITIVE_INFINITY;
+        double maxX = Double.NEGATIVE_INFINITY;
+
+        for (Zone zone : zones) {
+            if (zone.getX() < minX) {
+                minX = zone.getX();
+            }
+            if (zone.getX() + zone.getWidth() > maxX) {
+                maxX = zone.getX() + zone.getWidth();
+            }
+        }
+
+        double midLineXCoordinate = (minX + maxX) / 2;
+
+        List<Zone> leftOf = new ArrayList<>();
+        List<Zone> rightOf = new ArrayList<>();
+        List<Zone> middle = new ArrayList<>();
+        for (Zone zone : zones) {
+            if (zone.getX() < midLineXCoordinate && zone.getX() + zone.getWidth() < midLineXCoordinate) {
+                leftOf.add(zone);
+            } else if (zone.getX() > midLineXCoordinate && zone.getX() + zone.getWidth() > midLineXCoordinate) {
+                rightOf.add(zone);
+            } else {
+                middle.add(zone);
+            }
+        }
+
+        leftOf.sort(Comparator.comparing(BoundingBox::getY, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD))
+                .thenComparing(BoundingBox::getX, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD)));
+
+        rightOf.sort(Comparator.comparing(BoundingBox::getY, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD))
+                .thenComparing(BoundingBox::getX, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD)));
+
+        middle.sort(Comparator.comparing(BoundingBox::getY, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD))
+                .thenComparing(BoundingBox::getX, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD)));
+
+        List<Zone> leftNotIntersecting = new ArrayList<>();
+        for (Zone leftZone : leftOf) {
+            boolean intersects = false;
+            for (Zone rightZone : rightOf) {
+                if (leftZone.intersectsY(rightZone)) {
+                    intersects = true;
+                    break;
+                }
+                // early stopping
+                if (rightZone.getBBox().getMinY() > leftZone.getBBox().getMaxY()) {
+                    break;
+                }
+            }
+            if (!intersects) {
+                leftNotIntersecting.add(leftZone);
+            }
+        }
+
+        List<Zone> rightNotIntersecting = new ArrayList<>();
+        for (Zone rightZone : rightOf) {
+            boolean intersects = false;
+            for (Zone leftZone : leftOf) {
+                if (rightZone.intersectsY(leftZone)) {
+                    intersects = true;
+                    break;
+                }
+                // early stopping
+                if (leftZone.getBBox().getMinY() > rightZone.getBBox().getMaxY()) {
+                    break;
+                }
+            }
+            if (!intersects) {
+                rightNotIntersecting.add(rightZone);
+            }
+        }
+
+        leftOf.removeAll(leftNotIntersecting);
+        rightOf.removeAll(rightNotIntersecting);
+
+        middle.addAll(leftNotIntersecting);
+        middle.addAll(rightNotIntersecting);
+
+        List<Zone> sortedZones = new ArrayList<>();
+        sortedZones.addAll(leftOf);
+        sortedZones.addAll(rightOf);
+
+        ListIterator<Zone> itty = middle.listIterator();
+
+        while (itty.hasNext()) {
+            Zone current = itty.next();
+            for (int i = 0; i < sortedZones.size(); i++) {
+                if (current.getY() < sortedZones.get(i).getY()) {
+                    sortedZones.add(i, current);
+                    itty.remove();
+                    break;
+                }
+            }
+        }
+
+        sortedZones.addAll(middle);
+
+        return sortedZones;
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/SpacingService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/SpacingService.java
@ -0,0 +1,56 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service;
+
+import java.util.List;
+
+import org.springframework.stereotype.Service;
+
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.AngleFilter;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Character;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Histogram;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Neighbor;
+
+@Service
+public class SpacingService {
+
+    private static final double SPACING_HISTOGRAM_RESOLUTION = 0.5;
+    private static final double SPACING_HISTOGRAM_SMOOTHING_LENGTH = 2.5;
+    private static final double SPACING_HIST_SMOOTHING_STANDARD_DEVIATION = 0.5;
+    private static final double ANGLE_TOLERANCE = Math.PI / 6;
+
+
+    public double computeCharacterSpacing(List<Character> characters) {
+
+        return computeSpacing(characters, 0);
+    }
+
+
+    public double computeLineSpacing(List<Character> characters) {
+
+        return computeSpacing(characters, Math.PI / 2);
+    }
+
+
+    private double computeSpacing(List<Character> characters, double angle) {
+
+        double maxDistance = Double.NEGATIVE_INFINITY;
+
+        for (Character character : characters) {
+            for (Neighbor neighbor : character.getNeighbors()) {
+                maxDistance = Math.max(maxDistance, neighbor.getDistance());
+            }
+        }
+        Histogram histogram = new Histogram(0, maxDistance, SPACING_HISTOGRAM_RESOLUTION);
+        AngleFilter angleFilter = new AngleFilter(angle - ANGLE_TOLERANCE, angle + ANGLE_TOLERANCE);
+        for (Character character : characters) {
+            for (Neighbor neighbor : character.getNeighbors()) {
+                if (angleFilter.matches(neighbor)) {
+                    histogram.add(neighbor.getDistance());
+                }
+            }
+        }
+
+        histogram.gaussianSmooth(SPACING_HISTOGRAM_SMOOTHING_LENGTH, SPACING_HIST_SMOOTHING_STANDARD_DEVIATION);
+        return histogram.getPeakValue();
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/ZoneBuilderService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/ZoneBuilderService.java
@ -0,0 +1,152 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service;
+
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.springframework.stereotype.Service;
+
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Character;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Line;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.UnionFind;
+import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Zone;
+
+@Service
+public class ZoneBuilderService {
+
+    private static final double MIN_HORIZONTAL_DISTANCE_MULTIPLIER = -0.5;
+    private static final double MAX_VERTICAL_DISTANCE_MULTIPLIER = 1.2;
+
+    private static final double MIN_HORIZONTAL_MERGE_DISTANCE_MULTIPLIER = -3.0;
+
+    private static final double MAX_VERTICAL_MERGE_DISTANCE_MULTIPLIER = 0.5;
+
+    private static final double MIN_LINE_SIZE_SCALE = 0.9;
+
+    private static final double MAX_LINE_SIZE_SCALE = 2.5;
+
+    private static final double ANGLE_TOLERANCE = Math.PI / 6;
+
+    private static final int MAX_ZONES = 300;
+
+    private static final double MAX_VERTICAL_MERGE_DISTANCE = 0.5;
+
+
+    public List<Zone> buildZones(List<Line> lines, double characterSpacing, double lineSpacing) {
+
+        double minHorizontalDistance = characterSpacing * MIN_HORIZONTAL_DISTANCE_MULTIPLIER;
+        double maxVerticalDistance = lineSpacing * MAX_VERTICAL_DISTANCE_MULTIPLIER;
+        double minHorizontalMergeDistance = characterSpacing * MIN_HORIZONTAL_MERGE_DISTANCE_MULTIPLIER;
+        double maxVerticalMergeDistance = lineSpacing * MAX_VERTICAL_MERGE_DISTANCE_MULTIPLIER;
+
+        UnionFind<Line> unionFind = new UnionFind<>(new HashSet<>(lines));
+
+        double meanHeight = calculateMeanHeight(lines);
+
+        lines.forEach(outerLine -> //
+                lines.forEach(innerLine -> {
+
+                    double scale = Math.min(outerLine.getHeight(), innerLine.getHeight()) / meanHeight;
+                    scale = Math.max(MIN_LINE_SIZE_SCALE, Math.min(scale, MAX_LINE_SIZE_SCALE));
+
+                    if (!unionFind.inSameSet(outerLine, innerLine) && outerLine.angularDifference(innerLine) <= ANGLE_TOLERANCE) {
+
+                        double horizontalDistance = outerLine.horizontalDistance(innerLine) / scale;
+                        double verticalDistance = outerLine.verticalDistance(innerLine) / scale;
+
+                        if (minHorizontalDistance <= horizontalDistance && verticalDistance <= maxVerticalDistance //
+                                || minHorizontalMergeDistance <= horizontalDistance && verticalDistance <= maxVerticalMergeDistance) {
+                            unionFind.union(outerLine, innerLine);
+                        }
+                    }
+                }));
+
+        List<Zone> zones = new ArrayList<>();
+        unionFind.getGroups().forEach(group -> {
+            zones.add(new Zone(new ArrayList<>(group)));
+        });
+
+        if (zones.size() > MAX_ZONES) {
+            List<Line> oneZoneLines = new ArrayList<>();
+            for (Zone zone : zones) {
+                oneZoneLines.addAll(zone.getLines());
+            }
+            return List.of(mergeLinesInZone(oneZoneLines, characterSpacing, lineSpacing));
+        }
+
+        return zones;
+    }
+
+
+    private double calculateMeanHeight(List<Line> lines) {
+
+        double meanHeight = 0.0;
+        double weights = 0.0;
+        for (Line line : lines) {
+            double weight = line.getLength();
+            meanHeight += line.getHeight() * weight;
+            weights += weight;
+        }
+        meanHeight /= weights;
+        return meanHeight;
+    }
+
+
+    private Zone mergeLinesInZone(List<Line> lines, double characterSpacing, double lineSpacing) {
+
+        double maxHorizontalDistance = 0;
+        double minVerticalDistance = 0;
+        double maxVerticalDistance = lineSpacing * MAX_VERTICAL_MERGE_DISTANCE;
+
+        UnionFind<Line> unionFind = new UnionFind<>(new HashSet<>(lines));
+
+        lines.forEach(outer -> {
+
+            lines.forEach(inner -> {
+                if (inner != outer) {
+
+                    double horizontalDistance = outer.horizontalDistance(inner);
+                    double verticalDistance = outer.verticalDistance(inner);
+
+                    if (horizontalDistance <= maxHorizontalDistance && minVerticalDistance <= verticalDistance && verticalDistance <= maxVerticalDistance) {
+                        unionFind.union(outer, inner);
+                    } else if (minVerticalDistance <= verticalDistance && verticalDistance <= maxVerticalDistance && Math.abs(horizontalDistance - Math.min(outer.getLength(),
+                            inner.getLength())) < 0.1) {
+                        boolean characterOverlap = false;
+                        int overlappingCount = 0;
+                        for (Character outerCharacter : outer.getCharacters()) {
+                            for (Character innerCharacter : inner.getCharacters()) {
+                                double characterOverlapDistance = outerCharacter.overlappingDistance(innerCharacter);
+                                if (characterOverlapDistance > 2) {
+                                    characterOverlap = true;
+                                }
+                                if (characterOverlapDistance > 0) {
+                                    overlappingCount++;
+                                }
+                            }
+                        }
+                        if (!characterOverlap && overlappingCount <= 2) {
+                            unionFind.union(outer, inner);
+                        }
+                    }
+                }
+            });
+        });
+
+        List<Line> outputZone = new ArrayList<>();
+        for (Set<Line> group : unionFind.getGroups()) {
+            List<Character> characters = new ArrayList<>();
+            for (Line line : group) {
+                characters.addAll(line.getCharacters());
+            }
+            characters.sort(Comparator.comparingDouble(Character::getX));
+
+            outputZone.add(new Line(characters, characterSpacing));
+        }
+
+        return new Zone(outputZone);
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/utils/DoubleUtils.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/utils/DoubleUtils.java
@ -0,0 +1,15 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.utils;
+
+public class DoubleUtils {
+
+    public static int compareDouble(double d1, double d2, double precision) {
+
+        if (Double.isNaN(d1) || Double.isNaN(d2)) {
+            return Double.compare(d1, d2);
+        }
+        long i1 = Math.round(d1 / (precision == 0 ? 1 : precision));
+        long i2 = Math.round(d2 / (precision == 0 ? 1 : precision));
+        return Long.compare(i1, i2);
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/DocumentGraphFactory.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/DocumentGraphFactory.java
@ -13,8 +13,10 @@ import java.util.List;
 import java.util.Map;
 import java.util.NoSuchElementException;
 import java.util.Set;
+import java.util.stream.Collectors;

 import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
+import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
 import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
 import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument;
 import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationFooter;
@ -22,6 +24,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.Classification
 import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
+import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.DuplicatedParagraph;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Footer;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.GenericSemanticNode;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Header;
@ -46,14 +49,14 @@ import lombok.experimental.UtilityClass;
@UtilityClass
 public class DocumentGraphFactory {

-    public Document buildDocumentGraph(ClassificationDocument document) {
+    public Document buildDocumentGraph(LayoutParsingType layoutParsingType, ClassificationDocument document) {

        Document documentGraph = new Document();
        Context context = new Context(documentGraph);

        document.getPages().forEach(context::buildAndAddPageWithCounter);
        document.getSections().stream().flatMap(section -> section.getImages().stream()).forEach(image -> context.getImages().add(image));
-        addSections(document, context);
+        addSections(layoutParsingType, document, context);
        addHeaderAndFooterToEachPage(document, context);

        documentGraph.setNumberOfPages(context.pages.size());
@ -64,9 +67,9 @@ public class DocumentGraphFactory {
    }


-    private void addSections(ClassificationDocument document, Context context) {
+    private void addSections(LayoutParsingType layoutParsingType, ClassificationDocument document, Context context) {

-        document.getSections().forEach(section -> SectionNodeFactory.addSection(null, section.getNonEmptyPageBlocks(), section.getImages(), context));
+        document.getSections().forEach(section -> SectionNodeFactory.addSection(layoutParsingType, null, section.getNonEmptyPageBlocks(), section.getImages(), context));
    }


@ -77,6 +80,8 @@ public class DocumentGraphFactory {
        GenericSemanticNode node;
        if (originalTextBlock.isHeadline()) {
            node = Headline.builder().documentTree(context.getDocumentTree()).build();
+        } else if (originalTextBlock.isToDuplicate()) {
+            node = DuplicatedParagraph.builder().documentTree(context.getDocumentTree()).build();
        } else {
            node = Paragraph.builder().documentTree(context.getDocumentTree()).build();
        }
@ -86,7 +91,16 @@ public class DocumentGraphFactory {
        List<TextPageBlock> textBlocks = new ArrayList<>();
        textBlocks.add(originalTextBlock);
        textBlocks.addAll(textBlocksToMerge);
+
        AtomicTextBlock textBlock = context.textBlockFactory.buildAtomicTextBlock(TextPositionOperations.mergeAndSortTextPositionSequenceByYThenX(textBlocks), node, context, page);
+
+        if (node instanceof DuplicatedParagraph duplicatedParagraph) {
+            AtomicTextBlock unsortedTextBlock = context.textBlockFactory.buildAtomicTextBlock(textBlocks.stream()
+                    .flatMap(tb -> tb.getSequences().stream())
+                    .collect(Collectors.toList()), node, context, page);
+            duplicatedParagraph.setUnsortedLeafTextBlock(unsortedTextBlock);
+        }
+
        List<Integer> treeId = context.documentTree.createNewChildEntryAndReturnId(parentNode, node);
        node.setLeafTextBlock(textBlock);
        node.setTreeId(treeId);
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/SectionNodeFactory.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/SectionNodeFactory.java
@ -4,19 +4,21 @@ import static java.lang.String.format;
 import static java.util.Collections.emptyList;
 import static java.util.stream.Collectors.groupingBy;

+import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;

+import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
 import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
-import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
-import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
-import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.GenericSemanticNode;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Section;
+import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
+import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
 import com.knecon.fforesight.service.layoutparser.processor.utils.TableMergingUtility;

 import lombok.experimental.UtilityClass;
@ -24,7 +26,11 @@ import lombok.experimental.UtilityClass;
@UtilityClass
 public class SectionNodeFactory {

-    public void addSection(GenericSemanticNode parentNode, List<AbstractPageBlock> pageBlocks, List<ClassifiedImage> images, DocumentGraphFactory.Context context) {
+    public void addSection(LayoutParsingType layoutParsingType,
+                           GenericSemanticNode parentNode,
+                           List<AbstractPageBlock> pageBlocks,
+                           List<ClassifiedImage> images,
+                           DocumentGraphFactory.Context context) {

        if (pageBlocks.isEmpty()) {
            return;
@ -37,11 +43,11 @@ public class SectionNodeFactory {

        section.setTreeId(getTreeId(parentNode, context, section));

-        addFirstHeadlineDirectlyToSection(pageBlocks, context, section);
+        addFirstHeadlineDirectlyToSection(layoutParsingType, pageBlocks, context, section);
        if (containsTablesAndTextBlocks(pageBlocks)) {
-            splitPageBlocksIntoSubSections(pageBlocks).forEach(subSectionPageBlocks -> addSection(section, subSectionPageBlocks, emptyList(), context));
+            splitPageBlocksIntoSubSections(pageBlocks).forEach(subSectionPageBlocks -> addSection(layoutParsingType, section, subSectionPageBlocks, emptyList(), context));
        } else {
-            addTablesAndParagraphsAndHeadlinesToSection(pageBlocks, context, section);
+            addTablesAndParagraphsAndHeadlinesToSection(layoutParsingType, pageBlocks, context, section);
        }

        images.stream().distinct().forEach(image -> DocumentGraphFactory.addImage(section, image, context));
@ -58,16 +64,19 @@ public class SectionNodeFactory {
    }


-    private void addFirstHeadlineDirectlyToSection(List<AbstractPageBlock> pageBlocks, DocumentGraphFactory.Context context, Section section) {
+    private void addFirstHeadlineDirectlyToSection(LayoutParsingType layoutParsingType, List<AbstractPageBlock> pageBlocks, DocumentGraphFactory.Context context, Section section) {

        if (pageBlocks.get(0).isHeadline()) {
-            addTablesAndParagraphsAndHeadlinesToSection(List.of(pageBlocks.get(0)), context, section);
+            addTablesAndParagraphsAndHeadlinesToSection(layoutParsingType, List.of(pageBlocks.get(0)), context, section);
            pageBlocks.remove(0);
        }
    }


-    private void addTablesAndParagraphsAndHeadlinesToSection(List<AbstractPageBlock> pageBlocks, DocumentGraphFactory.Context context, Section section) {
+    private void addTablesAndParagraphsAndHeadlinesToSection(LayoutParsingType layoutParsingType,
+                                                             List<AbstractPageBlock> pageBlocks,
+                                                             DocumentGraphFactory.Context context,
+                                                             Section section) {

        Set<AbstractPageBlock> alreadyMerged = new HashSet<>();
        List<AbstractPageBlock> remainingBlocks = new LinkedList<>(pageBlocks);
@ -80,13 +89,23 @@ public class SectionNodeFactory {
            remainingBlocks.removeAll(alreadyMerged);

            if (abstractPageBlock instanceof TextPageBlock) {
-                List<TextPageBlock> textBlocks = findTextBlocksWithSameClassificationAndAlignsY((TextPageBlock) abstractPageBlock, remainingBlocks);
-                alreadyMerged.addAll(textBlocks);
-                DocumentGraphFactory.addParagraphOrHeadline(section, (TextPageBlock) abstractPageBlock, context, textBlocks);
+
+                switch (layoutParsingType) {
+                    case CLARIFYND, CLARIFYND_PARAGRAPH_DEBUG -> {
+                        alreadyMerged.add(abstractPageBlock);
+                        remainingBlocks.remove(abstractPageBlock);
+                        DocumentGraphFactory.addParagraphOrHeadline(section, (TextPageBlock) abstractPageBlock, context, new ArrayList<>());
+                    }
+                    default -> {
+                        List<TextPageBlock> textBlocks = findTextBlocksWithSameClassificationAndAlignsY((TextPageBlock) abstractPageBlock, remainingBlocks);
+                        alreadyMerged.addAll(textBlocks);
+                        DocumentGraphFactory.addParagraphOrHeadline(section, (TextPageBlock) abstractPageBlock, context, textBlocks);
+                    }
+                }
            } else if (abstractPageBlock instanceof TablePageBlock tablePageBlock) {
                List<TablePageBlock> tablesToMerge = TableMergingUtility.findConsecutiveTablesWithSameColCountAndSameHeaders(tablePageBlock, remainingBlocks);
                alreadyMerged.addAll(tablesToMerge);
-                TableNodeFactory.addTable(section, tablesToMerge, context);
+                TableNodeFactory.addTable(layoutParsingType, section, tablesToMerge, context);
            } else {
                throw new RuntimeException(format("Unhandled AbstractPageBlockType %s!", abstractPageBlock.getClass()));
            }
@ -171,6 +190,7 @@ public class SectionNodeFactory {
                .filter(abstractTextContainer -> abstractTextContainer.intersectsY(atc))
                .map(abstractTextContainer -> (TextPageBlock) abstractTextContainer)
                .filter(abstractTextContainer -> abstractTextContainer.getDir() == atc.getDir())
+                .filter(abstractTextContainer -> !abstractTextContainer.isToDuplicate())
                .toList();
    }

--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/TableNodeFactory.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/TableNodeFactory.java
@ -7,16 +7,17 @@ import java.util.List;
 import java.util.Set;
 import java.util.stream.Collectors;

+import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
 import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
-import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
-import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
-import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.GenericSemanticNode;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SemanticNode;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableCell;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
+import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
+import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
 import com.knecon.fforesight.service.layoutparser.processor.utils.TextPositionOperations;

 import lombok.experimental.UtilityClass;
@ -27,7 +28,7 @@ public class TableNodeFactory {
    public final double TABLE_CELL_MERGE_CONTENTS_SIZE_THRESHOLD = 0.05;


-    public void addTable(GenericSemanticNode parentNode, List<TablePageBlock> tablesToMerge, DocumentGraphFactory.Context context) {
+    public void addTable(LayoutParsingType layoutParsingType, GenericSemanticNode parentNode, List<TablePageBlock> tablesToMerge, DocumentGraphFactory.Context context) {

        setPageNumberInCells(tablesToMerge);
        Set<Page> pages = tablesToMerge.stream().map(AbstractPageBlock::getPage).map(context::getPage).collect(Collectors.toSet());
@ -43,7 +44,7 @@ public class TableNodeFactory {

        List<Integer> treeId = context.getDocumentTree().createNewChildEntryAndReturnId(parentNode, table);
        table.setTreeId(treeId);
-        addTableCells(mergedRows, table, context);
+        addTableCells(layoutParsingType, mergedRows, table, context);

        ifTableHasNoHeadersSetFirstRowAsHeaders(table);
    }
@ -88,18 +89,18 @@ public class TableNodeFactory {
    }


-    private void addTableCells(List<List<Cell>> rows, Table table, DocumentGraphFactory.Context context) {
+    private void addTableCells(LayoutParsingType layoutParsingType, List<List<Cell>> rows, Table table, DocumentGraphFactory.Context context) {

        for (int rowIndex = 0; rowIndex < rows.size(); rowIndex++) {
            for (int colIndex = 0; colIndex < rows.get(rowIndex).size(); colIndex++) {
-                addTableCell(rows.get(rowIndex).get(colIndex), rowIndex, colIndex, table, context);
+                addTableCell(layoutParsingType, rows.get(rowIndex).get(colIndex), rowIndex, colIndex, table, context);
            }
        }
    }


    @SuppressWarnings("PMD.UnusedPrivateMethod") // PMD actually flags this wrong
-    private void addTableCell(Cell cell, int rowIndex, int colIndex, Table tableNode, DocumentGraphFactory.Context context) {
+    private void addTableCell(LayoutParsingType layoutParsingType, Cell cell, int rowIndex, int colIndex, Table tableNode, DocumentGraphFactory.Context context) {

        Page page = context.getPage(cell.getPageNumber());

@ -116,7 +117,7 @@ public class TableNodeFactory {
            textBlock = context.getTextBlockFactory().buildAtomicTextBlock(cell.getTextBlocks().get(0).getSequences(), tableCell, context, page);
            tableCell.setLeafTextBlock(textBlock);
        } else if (firstTextBlockIsHeadline(cell)) {
-            SectionNodeFactory.addSection(tableCell, cell.getTextBlocks().stream().map(tb -> (AbstractPageBlock) tb).toList(), emptyList(), context);
+            SectionNodeFactory.addSection(layoutParsingType, tableCell, cell.getTextBlocks().stream().map(tb -> (AbstractPageBlock) tb).toList(), emptyList(), context);
        } else if (cellAreaIsSmallerThanPageAreaTimesThreshold(cell, page)) {
            List<TextPositionSequence> sequences = TextPositionOperations.mergeAndSortTextPositionSequenceByYThenX(cell.getTextBlocks());
            textBlock = context.getTextBlockFactory().buildAtomicTextBlock(sequences, tableCell, context, page);
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/mapper/DocumentDataMapper.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/mapper/DocumentDataMapper.java
@ -8,8 +8,6 @@ import java.util.Map;
 import java.util.Set;
 import java.util.stream.Collectors;

-import javax.xml.parsers.DocumentBuilder;
-
 import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
 import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPage;
 import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionData;
@ -18,6 +16,7 @@ import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.Do
 import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
+import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.DuplicatedParagraph;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Image;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
@ -33,27 +32,20 @@ public class DocumentDataMapper {
    public DocumentData toDocumentData(Document document) {

        List<DocumentTextData> documentTextData = document.streamTerminalTextBlocksInOrder()
-                .flatMap(textBlock -> textBlock.getAtomicTextBlocks()
-                        .stream())
+                .flatMap(textBlock -> textBlock.getAtomicTextBlocks().stream())
                .distinct()
                .map(DocumentDataMapper::toAtomicTextBlockData)
                .toList();

        List<DocumentPositionData> atomicPositionBlockData = document.streamTerminalTextBlocksInOrder()
-                .flatMap(textBlock -> textBlock.getAtomicTextBlocks()
-                        .stream())
+                .flatMap(textBlock -> textBlock.getAtomicTextBlocks().stream())
                .distinct()
                .map(DocumentDataMapper::toAtomicPositionBlockData)
                .toList();

-        Set<Long> nonEmptyTextBlocks = documentTextData.stream()
-                .mapToLong(DocumentTextData::getId).boxed()
-                .collect(Collectors.toSet());
+        Set<Long> nonEmptyTextBlocks = documentTextData.stream().mapToLong(DocumentTextData::getId).boxed().collect(Collectors.toSet());

-        List<DocumentPage> documentPageData = document.getPages()
-                .stream()
-                .map(DocumentDataMapper::toPageData)
-                .toList();
+        List<DocumentPage> documentPageData = document.getPages().stream().map(DocumentDataMapper::toPageData).toList();
        DocumentStructure tableOfContentsData = toDocumentTreeData(document.getDocumentTree());
        return DocumentData.builder()
                .documentTextData(documentTextData.toArray(new DocumentTextData[0]))
@ -84,22 +76,17 @@ public class DocumentDataMapper {
            case TABLE -> PropertiesMapper.buildTableProperties((Table) entry.getNode());
            case TABLE_CELL -> PropertiesMapper.buildTableCellProperties((TableCell) entry.getNode());
            case IMAGE -> PropertiesMapper.buildImageProperties((Image) entry.getNode());
+            case PARAGRAPH ->
+                    entry.getNode() instanceof DuplicatedParagraph duplicatedParagraph ? PropertiesMapper.buildDuplicateParagraphProperties(duplicatedParagraph) : new HashMap<>();
            default -> new HashMap<>();
        };

        DocumentStructure.EntryData.EntryDataBuilder documentBuilder = DocumentStructure.EntryData.builder()
                .treeId(toPrimitiveIntArray(entry.getTreeId()))
-                .children(entry.getChildren()
-                                  .stream()
-                                  .map(DocumentDataMapper::toEntryData)
-                                  .toList())
+                .children(entry.getChildren().stream().map(DocumentDataMapper::toEntryData).toList())
                .type(entry.getType())
                .atomicBlockIds(atomicTextBlocks)
-                .pageNumbers(entry.getNode().getPages()
-                                     .stream()
-                                     .map(Page::getNumber)
-                                     .map(Integer::longValue)
-                                     .toArray(Long[]::new))
+                .pageNumbers(entry.getNode().getPages().stream().map(Page::getNumber).map(Integer::longValue).toArray(Long[]::new))
                .properties(properties);
        if (entry.getNode() != null) {
            documentBuilder.engines(entry.getNode().getEngines());
@ -112,10 +99,7 @@ public class DocumentDataMapper {

    private Long[] toAtomicTextBlockIds(TextBlock textBlock) {

-        return textBlock.getAtomicTextBlocks()
-                .stream()
-                .map(AtomicTextBlock::getId)
-                .toArray(Long[]::new);
+        return textBlock.getAtomicTextBlocks().stream().map(AtomicTextBlock::getId).toArray(Long[]::new);
    }


@ -167,9 +151,7 @@ public class DocumentDataMapper {

    private int[] toPrimitiveIntArray(List<Integer> list) {

-        return list.stream()
-                .mapToInt(Integer::intValue)
-                .toArray();
+        return list.stream().mapToInt(Integer::intValue).toArray();
    }

 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/mapper/DocumentGraphMapper.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/mapper/DocumentGraphMapper.java
@ -7,13 +7,14 @@ import java.util.List;
 import java.util.Map;
 import java.util.NoSuchElementException;

-import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionData;
-import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextData;
 import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
-import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
 import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPage;
+import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionData;
+import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
+import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextData;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
+import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.DuplicatedParagraph;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Footer;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Header;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Headline;
@ -61,7 +62,7 @@ public class DocumentGraphMapper {

            SemanticNode node = switch (entryData.getType()) {
                case SECTION -> buildSection(context);
-                case PARAGRAPH -> buildParagraph(context);
+                case PARAGRAPH -> buildParagraph(context, entryData.getProperties());
                case HEADLINE -> buildHeadline(context);
                case HEADER -> buildHeader(context);
                case FOOTER -> buildFooter(context);
@ -140,7 +141,17 @@ public class DocumentGraphMapper {
    }


-    private Paragraph buildParagraph(Context context) {
+    private Paragraph buildParagraph(Context context, Map<String, String> properties) {
+
+        if (PropertiesMapper.isDuplicateParagraph(properties)) {
+
+            DuplicatedParagraph duplicatedParagraph = DuplicatedParagraph.builder().documentTree(context.documentTree).build();
+
+            Long[] unsortedTextblockIds = PropertiesMapper.getUnsortedTextblockIds(properties);
+            duplicatedParagraph.setUnsortedLeafTextBlock(toTextBlock(unsortedTextblockIds, context, duplicatedParagraph));
+            return duplicatedParagraph;
+
+        }

        return Paragraph.builder().documentTree(context.documentTree).build();
    }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/mapper/PropertiesMapper.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/mapper/PropertiesMapper.java
@ -1,17 +1,19 @@
 package com.knecon.fforesight.service.layoutparser.processor.services.mapper;

 import java.awt.geom.Rectangle2D;
-import java.util.Collections;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Locale;
 import java.util.Map;

 import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
+import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.DuplicatedParagraph;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Image;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.ImageType;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableCell;
+import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.AtomicTextBlock;
+import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;

 public class PropertiesMapper {

@ -76,6 +78,32 @@ public class PropertiesMapper {
    }


+    public static Map<String, String> buildDuplicateParagraphProperties(DuplicatedParagraph duplicatedParagraph) {
+
+        Map<String, String> properties = new HashMap<>();
+        properties.put(DocumentStructure.DuplicateParagraphProperties.UNSORTED_TEXTBLOCK_ID, Arrays.toString(toAtomicTextBlockIds(duplicatedParagraph.getUnsortedLeafTextBlock())));
+        return properties;
+    }
+
+
+    public static boolean isDuplicateParagraph(Map<String, String> properties) {
+
+        return properties.containsKey(DocumentStructure.DuplicateParagraphProperties.UNSORTED_TEXTBLOCK_ID);
+    }
+
+
+    public static Long[] getUnsortedTextblockIds(Map<String, String> properties) {
+
+        return toLongArray(properties.get(DocumentStructure.DuplicateParagraphProperties.UNSORTED_TEXTBLOCK_ID));
+    }
+
+
+    public static Long[] toLongArray(String ids) {
+
+        return Arrays.stream(ids.substring(1, ids.length() - 1).trim().split(",")).map(Long::valueOf).toArray(Long[]::new);
+    }
+
+
    private static ImageType parseImageType(String imageType) {

        return switch (imageType) {
@ -101,4 +129,10 @@ public class PropertiesMapper {
                rectangle2D.getHeight());
    }

+
+    private static Long[] toAtomicTextBlockIds(TextBlock textBlock) {
+
+        return textBlock.getAtomicTextBlocks().stream().map(AtomicTextBlock::getId).toArray(Long[]::new);
+    }
+
 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/parsing/PDFLinesTextStripper.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/parsing/PDFLinesTextStripper.java
@ -237,8 +237,13 @@ public class PDFLinesTextStripper extends PDFTextStripper {
        int startIndex = 0;
        RedTextPosition previous = null;

+        float direction = -1;
        for (int i = 0; i <= textPositions.size() - 1; i++) {

+            if (direction == -1) {
+                direction = textPositions.get(i).getDir();
+            }
+
            if (!textPositionSequences.isEmpty()) {
                previous = textPositionSequences.get(textPositionSequences.size() - 1)
                        .getTextPositions()
@ -250,6 +255,13 @@ public class PDFLinesTextStripper extends PDFTextStripper {
                continue;
            }

+            if (textPositions.get(i).getDir() != direction && startIndex != i) {
+                List<TextPosition> sublist = textPositions.subList(startIndex, i);
+                textPositionSequences.add(new TextPositionSequence(sublist, pageNumber, i == textPositions.size() - 1 && isParagraphStart));
+                startIndex = i;
+                direction = textPositions.get(i).getDir();
+            }
+
            // Strange but sometimes this is happening, for example: Metolachlor2.pdf
            if (checkIfCurrentPositionIsToTheRightOfPreviousPosition(i, textPositions)) {
                List<TextPosition> sublist = textPositions.subList(startIndex, i);
@ -329,6 +341,7 @@ public class PDFLinesTextStripper extends PDFTextStripper {
                .getXDirAdj() - (previous.getXDirAdj() + previous.getWidthDirAdj()) < maximumGapSize;
    }

+
    @Override
    public String getText(PDDocument doc) throws IOException {

--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/visualization/LayoutGridService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/visualization/LayoutGridService.java
@ -20,6 +20,7 @@ import org.springframework.stereotype.Service;
 import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
 import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
+import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.DuplicatedParagraph;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SemanticNode;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
@ -53,6 +54,8 @@ public class LayoutGridService {

    static Color INNER_LINES_COLOR = new Color(255, 175, 175);
    static Color PARAGRAPH_COLOR = new Color(70, 130, 180);
+
+    static Color DUPLICATE_PARAGRAPH_COLOR = new Color(70, 180, 101);
    static Color TABLE_COLOR = new Color(102, 205, 170);
    static Color SECTION_COLOR = new Color(50, 50, 50);
    static Color HEADLINE_COLOR = new Color(162, 56, 56);
@ -100,6 +103,11 @@ public class LayoutGridService {
                        case IMAGE -> IMAGE_COLOR;
                        default -> null;
                    };
+
+                    if (semanticNode instanceof DuplicatedParagraph) {
+                        color = DUPLICATE_PARAGRAPH_COLOR;
+                    }
+
                    if (isNotSectionOrTableCellOrDocument(semanticNode)) {
                        addAsRectangle(semanticNode, layoutGrid, color);
                    }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/TextPositionOperations.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/TextPositionOperations.java
@ -3,7 +3,6 @@ package com.knecon.fforesight.service.layoutparser.processor.utils;
 import java.util.List;
 import java.util.stream.Collectors;

-
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;

--- a/layoutparser-service/layoutparser-service-server/src/main/java/com/knecon/fforesight/service/layoutparser/server/queue/MessageHandler.java
+++ b/layoutparser-service/layoutparser-service-server/src/main/java/com/knecon/fforesight/service/layoutparser/server/queue/MessageHandler.java
@ -37,7 +37,7 @@ public class MessageHandler {

        LayoutParsingRequest layoutParsingRequest = objectMapper.readValue(message.getBody(), LayoutParsingRequest.class);

-        if (layoutParsingRequest.layoutParsingType().equals(LayoutParsingType.TAAS) && layoutParsingRequest.researchDocumentStorageId() == null) {
+        if (layoutParsingRequest.layoutParsingType().equals(LayoutParsingType.CLARIFYND) && layoutParsingRequest.researchDocumentStorageId() == null) {
            throw new IllegalArgumentException("ResearchDocumentDataStorageId is null!");
        }
        log.info("Layout parsing request received {}", layoutParsingRequest.identifier());
--- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/BdrJsonBuildTest.java
+++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/BdrJsonBuildTest.java
@ -48,12 +48,13 @@ public class BdrJsonBuildTest extends AbstractTest {
    @SneakyThrows
    protected Document buildGraph(File file) {

-        return DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.TAAS,
-                file,
-                new ImageServiceResponse(),
-                new TableServiceResponse(),
-                new VisualLayoutParsingResponse(),
-                file.toString()));
+        return DocumentGraphFactory.buildDocumentGraph(LayoutParsingType.CLARIFYND,
+                layoutParsingPipeline.parseLayout(LayoutParsingType.CLARIFYND,
+                        file,
+                        new ImageServiceResponse(),
+                        new TableServiceResponse(),
+                        new VisualLayoutParsingResponse(),
+                        file.toString()));
    }


--- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/HeadlinesGoldStandardIntegrationTest.java
+++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/HeadlinesGoldStandardIntegrationTest.java
@ -95,12 +95,13 @@ public class HeadlinesGoldStandardIntegrationTest {
        goldStandardLog.getRedactionLogEntry().removeIf(r -> !r.isRedacted() || r.getChanges().get(r.getChanges().size() - 1).getType().equals(ChangeType.REMOVED));
        goldStandardLog.getRedactionLogEntry().forEach(e -> goldStandardHeadlines.add(new Headline(e.getPositions().get(0).getPage(), e.getValue())));

-        Document documentGraph = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
-                pdfFileResource.getFile(),
-                new ImageServiceResponse(),
-                new TableServiceResponse(),
-                new VisualLayoutParsingResponse(),
-                filePath));
+        Document documentGraph = DocumentGraphFactory.buildDocumentGraph(LayoutParsingType.REDACT_MANAGER_OLD,
+                layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER_OLD,
+                        pdfFileResource.getFile(),
+                        new ImageServiceResponse(),
+                        new TableServiceResponse(),
+                        new VisualLayoutParsingResponse(),
+                        filePath));

        var foundHeadlines = documentGraph.streamAllSubNodes()
                .map(SemanticNode::getHeadline)
--- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/LayoutparserEnd2EndTest.java
+++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/LayoutparserEnd2EndTest.java
@ -26,7 +26,7 @@ public class LayoutparserEnd2EndTest extends AbstractTest {
    public void testLayoutParserEndToEnd() {

        prepareStorage("files/bdr/Wie weiter bei Kristeneinrichtungen.pdf");
-        LayoutParsingRequest layoutParsingRequest = buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER);
+        LayoutParsingRequest layoutParsingRequest = buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER_OLD);
        LayoutParsingFinishedEvent finishedEvent = layoutParsingPipeline.parseLayoutAndSaveFilesToStorage(layoutParsingRequest);
        Arrays.stream(finishedEvent.message().split("\n")).forEach(log::info);
    }
--- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java
+++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java
@ -55,12 +55,13 @@ public class DocumentGraphJsonWritingTest extends BuildDocumentTest {
    @SneakyThrows
    private void writeJsons(Path filename) {

-        Document documentGraph = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
-                filename.toFile(),
-                new ImageServiceResponse(),
-                new TableServiceResponse(),
-                new VisualLayoutParsingResponse(),
-                filename.toFile().toString()));
+        Document documentGraph = DocumentGraphFactory.buildDocumentGraph(LayoutParsingType.REDACT_MANAGER_OLD,
+                layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER_OLD,
+                        filename.toFile(),
+                        new ImageServiceResponse(),
+                        new TableServiceResponse(),
+                        new VisualLayoutParsingResponse(),
+                        filename.toFile().toString()));

        DocumentData documentData = DocumentDataMapper.toDocumentData(documentGraph);
        ObjectMapper mapper = ObjectMapperFactory.create();
--- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java
+++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java
@ -26,7 +26,7 @@ public class ViewerDocumentTest extends BuildDocumentTest {
    @SneakyThrows
    public void testViewerDocument() {

-        String fileName = "files/SinglePages/T5 VV-640252-Page16.pdf";
+        String fileName = "files/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf";
        String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf";

        var documentFile = new ClassPathResource(fileName).getFile();
@ -54,13 +54,14 @@ public class ViewerDocumentTest extends BuildDocumentTest {
        var documentFile = new ClassPathResource(fileName).getFile();

        var classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.DOCUMINE,
-                                                                       documentFile,
-                                                                       new ImageServiceResponse(),
-                                                                       tableResponse,
-                                                                       new VisualLayoutParsingResponse(),Path.of(fileName).getFileName().toFile().toString());
+                documentFile,
+                new ImageServiceResponse(),
+                tableResponse,
+                new VisualLayoutParsingResponse(),
+                Path.of(fileName).getFileName().toFile().toString());
        ViewerDocumentService viewerDocumentService = new ViewerDocumentService(null);
        LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService);
-        Document document = DocumentGraphFactory.buildDocumentGraph(classificationDocument);
+        Document document = DocumentGraphFactory.buildDocumentGraph(LayoutParsingType.DOCUMINE, classificationDocument);

        layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), true);
    }
--- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java
+++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java
@ -56,12 +56,12 @@ public class PdfSegmentationServiceTest extends AbstractTest {
    @SneakyThrows
    public ClassificationDocument buildClassificationDocument(File originDocument, TableServiceResponse tableServiceResponse) {

-        ClassificationDocument classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
-                                                                                          originDocument,
-                                                                                          new ImageServiceResponse(),
-                                                                                          tableServiceResponse,
-                                                                                          new VisualLayoutParsingResponse(),
-                                                                                          "document");
+        ClassificationDocument classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER_OLD,
+                originDocument,
+                new ImageServiceResponse(),
+                tableServiceResponse,
+                new VisualLayoutParsingResponse(),
+                "document");

        redactManagerClassificationService.classifyDocument(classificationDocument);

@ -112,16 +112,8 @@ public class PdfSegmentationServiceTest extends AbstractTest {
        var tableServiceResponse = objectMapper.readValue(cvTablesResource.getInputStream(), TableServiceResponse.class);

        ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile(), tableServiceResponse);
-        assertThat(document.getSections()
-                           .stream()
-                           .flatMap(paragraph -> paragraph.getTables()
-                                   .stream())
-                           .collect(Collectors.toList())).isNotEmpty();
-        var tables = document.getSections()
-                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
-                        .stream())
-                .toList();
+        assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
+        var tables = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList();

        // Quality of the table parsing is not good, because the file is rotated at scanning.
        // We only asset that the table border is not the page border.
@ -143,12 +135,12 @@ public class PdfSegmentationServiceTest extends AbstractTest {
        imageServiceResponse.getData()
                .forEach(imageMetadata -> images.computeIfAbsent(imageMetadata.getPosition().getPageNumber(), x -> new ArrayList<>())
                        .add(new ClassifiedImage(new Rectangle2D.Double(imageMetadata.getPosition().getX1(),
-                                                                        imageMetadata.getPosition().getY1(),
-                                                                        imageMetadata.getGeometry().getWidth(),
-                                                                        imageMetadata.getGeometry().getHeight()),
-                                                 ImageType.valueOf(imageMetadata.getClassification().getLabel().toUpperCase(Locale.ROOT)),
-                                                 imageMetadata.isAlpha(),
-                                                 imageMetadata.getPosition().getPageNumber())));
+                                imageMetadata.getPosition().getY1(),
+                                imageMetadata.getGeometry().getWidth(),
+                                imageMetadata.getGeometry().getHeight()),
+                                ImageType.valueOf(imageMetadata.getClassification().getLabel().toUpperCase(Locale.ROOT)),
+                                imageMetadata.isAlpha(),
+                                imageMetadata.getPosition().getPageNumber())));

        System.out.println("object");
    }
@ -160,22 +152,11 @@ public class PdfSegmentationServiceTest extends AbstractTest {
        ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Spanning Cells.pdf");

        ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
-        assertThat(document.getSections()
-                           .stream()
-                           .flatMap(paragraph -> paragraph.getTables()
-                                   .stream())
-                           .collect(Collectors.toList())).isNotEmpty();
-        TablePageBlock table = document.getSections()
-                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
-                        .stream())
-                .toList()
-                .get(0);
+        assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
+        TablePageBlock table = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(0);
        assertThat(table.getColCount()).isEqualTo(6);
        assertThat(table.getRowCount()).isEqualTo(13);
-        assertThat(table.getRows()
-                           .stream()
-                           .mapToInt(List::size).sum()).isEqualTo(6 * 13);
+        assertThat(table.getRows().stream().mapToInt(List::size).sum()).isEqualTo(6 * 13);
    }


@ -185,37 +166,15 @@ public class PdfSegmentationServiceTest extends AbstractTest {
        ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Table.pdf");

        ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
-        assertThat(document.getSections()
-                           .stream()
-                           .flatMap(paragraph -> paragraph.getTables()
-                                   .stream())
-                           .collect(Collectors.toList())).isNotEmpty();
-        TablePageBlock firstTable = document.getSections()
-                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
-                        .stream())
-                .toList()
-                .get(0);
+        assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
+        TablePageBlock firstTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(0);
        assertThat(firstTable.getColCount()).isEqualTo(8);
        assertThat(firstTable.getRowCount()).isEqualTo(1);
-        TablePageBlock secondTable = document.getSections()
-                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
-                        .stream())
-                .toList()
-                .get(1);
+        TablePageBlock secondTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(1);
        assertThat(secondTable.getColCount()).isEqualTo(8);
        assertThat(secondTable.getRowCount()).isEqualTo(2);
-        List<List<Cell>> firstTableHeaderCells = firstTable.getRows()
-                .get(0)
-                .stream()
-                .map(Collections::singletonList)
-                .collect(Collectors.toList());
-        assertThat(secondTable.getRows()
-                           .stream()
-                           .allMatch(row -> row.stream()
-                                   .map(Cell::getHeaderCells)
-                                   .toList().equals(firstTableHeaderCells))).isTrue();
+        List<List<Cell>> firstTableHeaderCells = firstTable.getRows().get(0).stream().map(Collections::singletonList).collect(Collectors.toList());
+        assertThat(secondTable.getRows().stream().allMatch(row -> row.stream().map(Cell::getHeaderCells).toList().equals(firstTableHeaderCells))).isTrue();
    }


@ -225,37 +184,15 @@ public class PdfSegmentationServiceTest extends AbstractTest {
        ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Multi Page Table.pdf");

        ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
-        assertThat(document.getSections()
-                           .stream()
-                           .flatMap(paragraph -> paragraph.getTables()
-                                   .stream())
-                           .collect(Collectors.toList())).isNotEmpty();
-        TablePageBlock firstTable = document.getSections()
-                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
-                        .stream())
-                .toList()
-                .get(0);
+        assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
+        TablePageBlock firstTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(0);
        assertThat(firstTable.getColCount()).isEqualTo(9);
        assertThat(firstTable.getRowCount()).isEqualTo(5);
-        TablePageBlock secondTable = document.getSections()
-                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
-                        .stream())
-                .toList()
-                .get(1);
+        TablePageBlock secondTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(1);
        assertThat(secondTable.getColCount()).isEqualTo(9);
        assertThat(secondTable.getRowCount()).isEqualTo(6);
-        List<List<Cell>> firstTableHeaderCells = firstTable.getRows()
-                .get(firstTable.getRowCount() - 1)
-                .stream()
-                .map(Cell::getHeaderCells)
-                .collect(Collectors.toList());
-        assertThat(secondTable.getRows()
-                           .stream()
-                           .allMatch(row -> row.stream()
-                                   .map(Cell::getHeaderCells)
-                                   .toList().equals(firstTableHeaderCells))).isTrue();
+        List<List<Cell>> firstTableHeaderCells = firstTable.getRows().get(firstTable.getRowCount() - 1).stream().map(Cell::getHeaderCells).collect(Collectors.toList());
+        assertThat(secondTable.getRows().stream().allMatch(row -> row.stream().map(Cell::getHeaderCells).toList().equals(firstTableHeaderCells))).isTrue();
    }


@ -265,37 +202,15 @@ public class PdfSegmentationServiceTest extends AbstractTest {
        ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Rotated Table Headers.pdf");

        ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
-        assertThat(document.getSections()
-                           .stream()
-                           .flatMap(paragraph -> paragraph.getTables()
-                                   .stream())
-                           .collect(Collectors.toList())).isNotEmpty();
-        TablePageBlock firstTable = document.getSections()
-                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
-                        .stream())
-                .toList()
-                .get(0);
+        assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
+        TablePageBlock firstTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(0);
        assertThat(firstTable.getColCount()).isEqualTo(8);
        assertThat(firstTable.getRowCount()).isEqualTo(1);
-        TablePageBlock secondTable = document.getSections()
-                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
-                        .stream())
-                .toList()
-                .get(1);
+        TablePageBlock secondTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(1);
        assertThat(secondTable.getColCount()).isEqualTo(8);
        assertThat(secondTable.getRowCount()).isEqualTo(6);
-        List<List<Cell>> firstTableHeaderCells = firstTable.getRows()
-                .get(0)
-                .stream()
-                .map(Collections::singletonList)
-                .collect(Collectors.toList());
-        assertThat(secondTable.getRows()
-                           .stream()
-                           .allMatch(row -> row.stream()
-                                   .map(Cell::getHeaderCells)
-                                   .toList().equals(firstTableHeaderCells))).isTrue();
+        List<List<Cell>> firstTableHeaderCells = firstTable.getRows().get(0).stream().map(Collections::singletonList).collect(Collectors.toList());
+        assertThat(secondTable.getRows().stream().allMatch(row -> row.stream().map(Cell::getHeaderCells).toList().equals(firstTableHeaderCells))).isTrue();
    }


@ -345,30 +260,29 @@ public class PdfSegmentationServiceTest extends AbstractTest {
        validateTable(document, 0, 8, 8, 0, 0);

        List<List<String>> values = Arrays.asList(Arrays.asList("Annex point Reference within DAR/RAR",
-                                                                "Author, date",
-                                                                "Study title",
-                                                                "Analytical method Author, date, No.",
-                                                                "Technique, LOQ of the method, validated working range",
-                                                                "Method meets analytical validation criteria",
-                                                                "Remarks (in case validation criteria are not met)",
-                                                                "Acceptability of the method"),
-                                                  Arrays.asList(
-                                                          "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
-                                                          "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
-                                                          "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
-                                                          "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
-                                                          "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
-                                                          "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
-                                                          "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
-                                                          "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies"),
-                                                  Arrays.asList("CA 7.1.2.1.1 DAR (2009)",
-                                                                "Evans P.G. 2001 TMJ4569B, VV-323245",
-                                                                "Azoxystrobin Laboratory Degradation Study in Three Soil Types, Sampled from Holland and the United Kingdom",
-                                                                "Method: RAM 269 Johnson R.I., Tummon O.J., Earl M. 1995 RJ1864B, VV-377731 Johnson R.I., Tummon O.J., Earl M. 1998 RAM 269/02, VV-124072 Johnson R.I., Tummon O.J., Earl M. 2000 RAM 269/03, VV-123986 Validation: Robinson N.J. 2001 TMJ4617B, VV-895845",
-                                                                "LC-MS/MS LOQ: 0.01 mg/kg (R401553 (SYN50165 7), R402173 (SYN501114 )) or 0.02 mg/kg (azoxystrobin, R230310, R234886) Working range: 0.02-1.0 or 0.01-0.5 mg/kg (depending on analyte) Other supporting quantificati on methods: HPLC-UV GC-MSD",
-                                                                "Y",
-                                                                "N/A",
-                                                                "Y"));
+                        "Author, date",
+                        "Study title",
+                        "Analytical method Author, date, No.",
+                        "Technique, LOQ of the method, validated working range",
+                        "Method meets analytical validation criteria",
+                        "Remarks (in case validation criteria are not met)",
+                        "Acceptability of the method"),
+                Arrays.asList("Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
+                        "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
+                        "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
+                        "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
+                        "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
+                        "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
+                        "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
+                        "Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies"),
+                Arrays.asList("CA 7.1.2.1.1 DAR (2009)",
+                        "Evans P.G. 2001 TMJ4569B, VV-323245",
+                        "Azoxystrobin Laboratory Degradation Study in Three Soil Types, Sampled from Holland and the United Kingdom",
+                        "Method: RAM 269 Johnson R.I., Tummon O.J., Earl M. 1995 RJ1864B, VV-377731 Johnson R.I., Tummon O.J., Earl M. 1998 RAM 269/02, VV-124072 Johnson R.I., Tummon O.J., Earl M. 2000 RAM 269/03, VV-123986 Validation: Robinson N.J. 2001 TMJ4617B, VV-895845",
+                        "LC-MS/MS LOQ: 0.01 mg/kg (R401553 (SYN50165 7), R402173 (SYN501114 )) or 0.02 mg/kg (azoxystrobin, R230310, R234886) Working range: 0.02-1.0 or 0.01-0.5 mg/kg (depending on analyte) Other supporting quantificati on methods: HPLC-UV GC-MSD",
+                        "Y",
+                        "N/A",
+                        "Y"));

        validateTable(document, 0, values);

@ -757,11 +671,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
    @SneakyThrows
    private void toHtml(ClassificationDocument document, String filename) {

-        var tables = document.getSections()
-                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
-                        .stream())
-                .toList();
+        var tables = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList();
        StringBuilder sb = new StringBuilder();

        int currentPage = 1;
@ -782,19 +692,9 @@ public class PdfSegmentationServiceTest extends AbstractTest {

    private void validateTable(ClassificationDocument document, int tableIndex, int colCount, int rowCount, int emptyCellsCountCorrect, int emptyCellsCountIncorrect) {

-        TablePageBlock table = document.getSections()
-                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
-                        .stream())
-                .toList()
-                .get(tableIndex);
+        TablePageBlock table = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(tableIndex);
        List<List<Cell>> rows = table.getRows();
-        int emptyCellsFoundFound = rows.stream()
-                .flatMap(List::stream)
-                .toList()
-                .stream()
-                .filter(f -> f.toString().isEmpty())
-                .toList().size();
+        int emptyCellsFoundFound = rows.stream().flatMap(List::stream).toList().stream().filter(f -> f.toString().isEmpty()).toList().size();

        for (List<Cell> row : table.getRows()) {
            row.forEach(r -> System.out.println(r.toString()));
@ -809,20 +709,11 @@ public class PdfSegmentationServiceTest extends AbstractTest {

    private void validateTable(ClassificationDocument document, int tableIndex, List<List<String>> values) {

-        TablePageBlock table = document.getSections()
-                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
-                        .stream())
-                .toList()
-                .get(tableIndex);
+        TablePageBlock table = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(tableIndex);
        List<List<Cell>> rows = table.getRows();

-        List<Cell> rowsFlattened = rows.stream()
-                .flatMap(List::stream)
-                .toList();
-        List<String> valuesFlattened = values.stream()
-                .flatMap(List::stream)
-                .toList();
+        List<Cell> rowsFlattened = rows.stream().flatMap(List::stream).toList();
+        List<String> valuesFlattened = values.stream().flatMap(List::stream).toList();

        for (int i = 0; i < valuesFlattened.size(); i++) {
            Cell cell = rowsFlattened.get(i);
@ -835,11 +726,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {

    private void validateTableSize(ClassificationDocument document, int tableSize) {

-        assertThat(document.getSections()
-                           .stream()
-                           .flatMap(paragraph -> paragraph.getTables()
-                                   .stream())
-                           .toList().size()).isEqualTo(tableSize);
+        assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().size()).isEqualTo(tableSize);

    }

--- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/BodyTextFrameServiceTest.java
+++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/BodyTextFrameServiceTest.java
@ -21,7 +21,7 @@ class BodyTextFrameServiceTest extends BuildDocumentTest {

        String filename = "files/211.pdf";
        String outputFilename = "/tmp/" + Path.of(filename).getFileName() + "_MAINBODY.pdf";
-        ClassificationDocument document = parseLayout(filename, LayoutParsingType.TAAS);
+        ClassificationDocument document = parseLayout(filename, LayoutParsingType.CLARIFYND);
        PdfDraw.drawRectanglesPerPage(filename,
                document.getPages().stream().map(page -> List.of(RectangleTransformations.toRectangle2D(page.getBodyTextFrame()))).toList(),
                outputFilename);
--- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/RulingCleaningServiceTest.java
+++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/RulingCleaningServiceTest.java
@ -74,7 +74,7 @@ public class RulingCleaningServiceTest extends BuildDocumentTest {
            cleanRulingsPerPage.add(rulingCleaningService.getCleanRulings(Collections.emptyList(), pageContent.getRulings()));
        }
        var cleanRulings = cleanRulingsPerPage.stream().map(CleanRulings::getVertical).collect(Collectors.toList());
-        PdfDraw.drawLinesPerPage(fileName,  cleanRulings, lineFileName);
+        PdfDraw.drawLinesPerPage(fileName, cleanRulings, lineFileName);

    }

@ -99,18 +99,20 @@ public class RulingCleaningServiceTest extends BuildDocumentTest {
    @SneakyThrows
    private void writeJsons(Path filename) {

-        Document documentGraphBefore = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
-                filename.toFile(),
-                new ImageServiceResponse(),
-                new TableServiceResponse(),
-                new VisualLayoutParsingResponse(),
-                filename.toFile().toString()));
-        Document documentGraphAfter = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
-                filename.toFile(),
-                new ImageServiceResponse(),
-                new TableServiceResponse(),
-                new VisualLayoutParsingResponse(),
-                filename.toFile().toString()));
+        Document documentGraphBefore = DocumentGraphFactory.buildDocumentGraph(LayoutParsingType.REDACT_MANAGER_OLD,
+                layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER_OLD,
+                        filename.toFile(),
+                        new ImageServiceResponse(),
+                        new TableServiceResponse(),
+                        new VisualLayoutParsingResponse(),
+                        filename.toFile().toString()));
+        Document documentGraphAfter = DocumentGraphFactory.buildDocumentGraph(LayoutParsingType.REDACT_MANAGER_OLD,
+                layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER_OLD,
+                        filename.toFile(),
+                        new ImageServiceResponse(),
+                        new TableServiceResponse(),
+                        new VisualLayoutParsingResponse(),
+                        filename.toFile().toString()));
        DocumentData documentDataBefore = DocumentDataMapper.toDocumentData(documentGraphBefore);
        DocumentData documentDataAfter = DocumentDataMapper.toDocumentData(documentGraphAfter);
        if (!compareStructures(documentDataBefore.getDocumentStructure(), documentDataAfter.getDocumentStructure())) {
--- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/AbstractTest.java
+++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/AbstractTest.java
@ -20,7 +20,6 @@ import org.springframework.context.annotation.Import;
 import org.springframework.context.annotation.Primary;
 import org.springframework.core.io.ClassPathResource;
 import org.springframework.test.context.junit.jupiter.SpringExtension;
-import org.xmlunit.builder.Input;

 import com.iqser.red.commons.jackson.ObjectMapperFactory;
 import com.iqser.red.storage.commons.service.StorageService;
@ -68,7 +67,7 @@ public abstract class AbstractTest {
    protected LayoutParsingRequest buildStandardLayoutParsingRequest() {

        return LayoutParsingRequest.builder()
-                .layoutParsingType(LayoutParsingType.REDACT_MANAGER)
+                .layoutParsingType(LayoutParsingType.REDACT_MANAGER_OLD)
                .originFileStorageId(ORIGIN_FILE_ID)
                .tablesFileStorageId(Optional.of(TABLE_FILE_ID))
                .imagesFileStorageId(Optional.of(IMAGE_FILE_ID))
@ -99,7 +98,7 @@ public abstract class AbstractTest {
    @SneakyThrows
    protected LayoutParsingRequest prepareStorage(String file) {

-        return prepareStorage(file, "cv_table_parsing_response/empty.json", "image_service_response/empty.json","visual_layout_parsing_response/empty.json");
+        return prepareStorage(file, "cv_table_parsing_response/empty.json", "image_service_response/empty.json", "visual_layout_parsing_response/empty.json");
    }


@ -107,7 +106,7 @@ public abstract class AbstractTest {
    protected LayoutParsingRequest prepareStorage(InputStream fileInputStream) {

        storageService.storeObject(TenantContext.getTenantId(), ORIGIN_FILE_ID, fileInputStream);
-        return buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER);
+        return buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER_OLD);
    }


@ -140,6 +139,7 @@ public abstract class AbstractTest {
        return prepareStorage(pdfFileResource.getInputStream(), cvServiceResponseFileResource.getInputStream(), imageInfoFileResource.getInputStream());
    }

+
    @SneakyThrows
    protected LayoutParsingRequest prepareStorage(String file, String cvServiceResponseFile, String imageInfoFile, String visualLayoutParsingResponseFile) {

@ -148,9 +148,13 @@ public abstract class AbstractTest {
        ClassPathResource imageInfoFileResource = new ClassPathResource(imageInfoFile);
        ClassPathResource visualLayoutParsingResponseResource = new ClassPathResource(visualLayoutParsingResponseFile);

-        return prepareStorage(pdfFileResource.getInputStream(), cvServiceResponseFileResource.getInputStream(), imageInfoFileResource.getInputStream(), visualLayoutParsingResponseResource.getInputStream());
+        return prepareStorage(pdfFileResource.getInputStream(),
+                cvServiceResponseFileResource.getInputStream(),
+                imageInfoFileResource.getInputStream(),
+                visualLayoutParsingResponseResource.getInputStream());
    }

+
    @SneakyThrows
    protected LayoutParsingRequest prepareStorage(InputStream fileStream, InputStream cvServiceResponseFileStream, InputStream imageInfoStream) {

@ -158,18 +162,22 @@ public abstract class AbstractTest {
        storageService.storeObject(TenantContext.getTenantId(), TABLE_FILE_ID, cvServiceResponseFileStream);
        storageService.storeObject(TenantContext.getTenantId(), ORIGIN_FILE_ID, fileStream);

-        return buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER);
+        return buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER_OLD);
    }

+
    @SneakyThrows
-    protected LayoutParsingRequest prepareStorage(InputStream fileStream, InputStream cvServiceResponseFileStream, InputStream imageInfoStream, InputStream visualLayoutParsingResponseFileStream) {
+    protected LayoutParsingRequest prepareStorage(InputStream fileStream,
+                                                  InputStream cvServiceResponseFileStream,
+                                                  InputStream imageInfoStream,
+                                                  InputStream visualLayoutParsingResponseFileStream) {

        storageService.storeObject(TenantContext.getTenantId(), IMAGE_FILE_ID, imageInfoStream);
        storageService.storeObject(TenantContext.getTenantId(), TABLE_FILE_ID, cvServiceResponseFileStream);
        storageService.storeObject(TenantContext.getTenantId(), ORIGIN_FILE_ID, fileStream);
-        storageService.storeObject(TenantContext.getTenantId(),VISUAL_LAYOUT_FILE,visualLayoutParsingResponseFileStream );
+        storageService.storeObject(TenantContext.getTenantId(), VISUAL_LAYOUT_FILE, visualLayoutParsingResponseFileStream);

-        return buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER);
+        return buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER_OLD);
    }


--- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/BuildDocumentTest.java
+++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/BuildDocumentTest.java
@ -26,14 +26,19 @@ public abstract class BuildDocumentTest extends AbstractTest {

        File fileResource = new ClassPathResource(filename).getFile();
        prepareStorage(filename);
-        return layoutParsingPipeline.parseLayout(layoutParsingType, fileResource, layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID), new TableServiceResponse(), new VisualLayoutParsingResponse(),filename);
+        return layoutParsingPipeline.parseLayout(layoutParsingType,
+                fileResource,
+                layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID),
+                new TableServiceResponse(),
+                new VisualLayoutParsingResponse(),
+                filename);
    }


    @SneakyThrows
    protected Document buildGraph(String filename) {

-        return buildGraph(filename, LayoutParsingType.REDACT_MANAGER);
+        return buildGraph(filename, LayoutParsingType.REDACT_MANAGER_OLD);
    }


@ -46,7 +51,7 @@ public abstract class BuildDocumentTest extends AbstractTest {
            prepareStorage(filename);
        }

-        return DocumentGraphFactory.buildDocumentGraph(parseLayout(filename, layoutParsingType));
+        return DocumentGraphFactory.buildDocumentGraph(layoutParsingType, parseLayout(filename, layoutParsingType));
    }

 }
--- a/layoutparser-service/layoutparser-service-server/src/test/resources/files/brokenTableOnOcr_ocred
+++ b/layoutparser-service/layoutparser-service-server/src/test/resources/files/brokenTableOnOcr_ocred
--- a/layoutparser-service/layoutparser-service-server/src/test/resources/files/new/wrongOrder
+++ b/layoutparser-service/layoutparser-service-server/src/test/resources/files/new/wrongOrder
--- a/layoutparser-service/viewer-doc-processor/build.gradle
+++ b/layoutparser-service/viewer-doc-processor/build.gradle
@ -1,6 +1,6 @@
 plugins {
    id("com.knecon.fforesight.java-conventions")
-    id("io.freefair.lombok") version "8.2.2"
+    id("io.freefair.lombok") version "8.4"
 }

 description = "Library for adding/removing layers in the viewer document"