RED-7074: Design Subsection section tree structure algorithm

* added abstract class SectionNode * both Section and SuperSection extend the SectionNode class, so that there is no inheritance between Section and SuperSection as well as no field duplication
2024-05-22 13:02:16 +02:00 · 2024-05-15 16:46:15 +02:00 · 2024-05-15 16:40:57 +02:00 · 2024-05-15 15:09:31 +02:00 · 2024-05-15 14:17:59 +02:00 · 2024-05-15 13:51:49 +02:00
42 changed files with 2020 additions and 228 deletions
--- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/NodeType.java
+++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/NodeType.java
@ -6,6 +6,7 @@ import java.util.Locale;
 public enum NodeType implements Serializable {
    DOCUMENT,
    SECTION,
    SUPER_SECTION,
    HEADLINE,
    PARAGRAPH,
    TABLE,
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java
@ -2,6 +2,7 @@ package com.knecon.fforesight.service.layoutparser.processor;
 import static java.lang.String.format;
 import java.awt.geom.Point2D;
 import java.awt.geom.Rectangle2D;
 import java.io.File;
 import java.io.IOException;
@ -29,6 +30,11 @@ import com.knecon.fforesight.service.layoutparser.processor.model.Classification
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.ImageType;
 import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
 import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineExtractorService;
 import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineObject;
 import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineValidationService;
 import com.knecon.fforesight.service.layoutparser.processor.model.outline.TOCEnrichmentService;
 import com.knecon.fforesight.service.layoutparser.processor.model.outline.TableOfContents;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
@ -45,6 +51,7 @@ import com.knecon.fforesight.service.layoutparser.processor.services.RulingClean
 import com.knecon.fforesight.service.layoutparser.processor.services.SectionsBuilderService;
 import com.knecon.fforesight.service.layoutparser.processor.services.SimplifiedSectionTextService;
 import com.knecon.fforesight.service.layoutparser.processor.services.TableExtractionService;
 import com.knecon.fforesight.service.layoutparser.processor.services.blockification.BlockificationPostprocessingService;
 import com.knecon.fforesight.service.layoutparser.processor.services.TextRulingsClassifier;
 import com.knecon.fforesight.service.layoutparser.processor.services.blockification.DocstrumBlockificationService;
 import com.knecon.fforesight.service.layoutparser.processor.services.blockification.DocuMineBlockificationService;
@ -90,12 +97,16 @@ public class LayoutParsingPipeline {
    TableExtractionService tableExtractionService;
    DocuMineBlockificationService docuMineBlockificationService;
    RedactManagerBlockificationService redactManagerBlockificationService;
    BlockificationPostprocessingService blockificationPostprocessingService;
    DocstrumBlockificationService docstrumBlockificationService;
    LayoutGridService layoutGridService;
    ObservationRegistry observationRegistry;
    VisualLayoutParsingAdapter visualLayoutParsingAdapter;
    ClarifyndClassificationService clarifyndClassificationService;
    GraphicExtractorService graphicExtractorService;
    OutlineExtractorService outlineExtractorService;
    OutlineValidationService outlineValidationService;
    TOCEnrichmentService tocEnrichmentService;
    LayoutparserSettings settings;
@ -105,21 +116,28 @@ public class LayoutParsingPipeline {
        log.info("Starting layout parsing for {}", layoutParsingRequest.identifier());
        File originFile = layoutParsingStorageService.getOriginFile(layoutParsingRequest.originFileStorageId());
-        File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId()).orElse(originFile);
+        File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId())
                .orElse(originFile);
        VisualLayoutParsingResponse visualLayoutParsingResponse = new VisualLayoutParsingResponse();
-        if (layoutParsingRequest.visualLayoutParsingFileId().isPresent()) {
+        if (layoutParsingRequest.visualLayoutParsingFileId()
-            visualLayoutParsingResponse = layoutParsingStorageService.getVisualLayoutParsingFile(layoutParsingRequest.visualLayoutParsingFileId().get());
+                .isPresent()) {
            visualLayoutParsingResponse = layoutParsingStorageService.getVisualLayoutParsingFile(layoutParsingRequest.visualLayoutParsingFileId()
                                                                                                         .get());
        }
        ImageServiceResponse imageServiceResponse = new ImageServiceResponse();
-        if (layoutParsingRequest.imagesFileStorageId().isPresent()) {
+        if (layoutParsingRequest.imagesFileStorageId()
-            imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.imagesFileStorageId().get());
+                .isPresent()) {
            imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.imagesFileStorageId()
                                                                                     .get());
        }
        TableServiceResponse tableServiceResponse = new TableServiceResponse();
-        if (layoutParsingRequest.tablesFileStorageId().isPresent()) {
+        if (layoutParsingRequest.tablesFileStorageId()
-            tableServiceResponse = layoutParsingStorageService.getTablesFile(layoutParsingRequest.tablesFileStorageId().get());
+                .isPresent()) {
            tableServiceResponse = layoutParsingStorageService.getTablesFile(layoutParsingRequest.tablesFileStorageId()
                                                                                     .get());
        }
        ClassificationDocument classificationDocument = parseLayout(settings.getLayoutParsingTypeOverride() == null //
@ -199,15 +217,15 @@ public class LayoutParsingPipeline {
    private String buildSemanticNodeCountMessage(int numberOfPages, Map<NodeType, Long> semanticNodeCounts) {
-        return String.format("%d pages with %d sections, %d headlines, %d paragraphs, %d tables with %d cells, %d headers, and %d footers parsed",
+        return format("%d pages with %d sections, %d headlines, %d paragraphs, %d tables with %d cells, %d headers, and %d footers parsed",
-                             numberOfPages,
+                      numberOfPages,
-                             semanticNodeCounts.get(NodeType.SECTION) == null ? 0 : semanticNodeCounts.get(NodeType.SECTION),
+                      semanticNodeCounts.get(NodeType.SECTION) == null ? 0 : semanticNodeCounts.get(NodeType.SECTION),
-                             semanticNodeCounts.get(NodeType.HEADLINE) == null ? 0 : semanticNodeCounts.get(NodeType.HEADLINE),
+                      semanticNodeCounts.get(NodeType.HEADLINE) == null ? 0 : semanticNodeCounts.get(NodeType.HEADLINE),
-                             semanticNodeCounts.get(NodeType.PARAGRAPH) == null ? 0 : semanticNodeCounts.get(NodeType.PARAGRAPH),
+                      semanticNodeCounts.get(NodeType.PARAGRAPH) == null ? 0 : semanticNodeCounts.get(NodeType.PARAGRAPH),
-                             semanticNodeCounts.get(NodeType.TABLE) == null ? 0 : semanticNodeCounts.get(NodeType.TABLE),
+                      semanticNodeCounts.get(NodeType.TABLE) == null ? 0 : semanticNodeCounts.get(NodeType.TABLE),
-                             semanticNodeCounts.get(NodeType.TABLE_CELL) == null ? 0 : semanticNodeCounts.get(NodeType.TABLE_CELL),
+                      semanticNodeCounts.get(NodeType.TABLE_CELL) == null ? 0 : semanticNodeCounts.get(NodeType.TABLE_CELL),
-                             semanticNodeCounts.get(NodeType.HEADER) == null ? 0 : semanticNodeCounts.get(NodeType.HEADER),
+                      semanticNodeCounts.get(NodeType.HEADER) == null ? 0 : semanticNodeCounts.get(NodeType.HEADER),
-                             semanticNodeCounts.get(NodeType.FOOTER) == null ? 0 : semanticNodeCounts.get(NodeType.FOOTER));
+                      semanticNodeCounts.get(NodeType.FOOTER) == null ? 0 : semanticNodeCounts.get(NodeType.FOOTER));
    }
@ -222,6 +240,7 @@ public class LayoutParsingPipeline {
        PDDocument originDocument = openDocument(originFile);
        addNumberOfPagesToTrace(originDocument.getNumberOfPages(), Files.size(originFile.toPath()));
        Map<Integer, List<TableCells>> pdfTableCells = cvTableParsingAdapter.buildCvParsedTablesPerPage(tableServiceResponse);
        Map<Integer, List<ClassifiedImage>> pdfImages = imageServiceResponseAdapter.buildClassifiedImagesPerPage(imageServiceResponse);
        Map<Integer, List<ClassifiedImage>> signatures = visualLayoutParsingAdapter.buildExtractedSignaturesPerPage(visualLayoutParsingResponse);
@ -232,6 +251,12 @@ public class LayoutParsingPipeline {
        }
        List<ClassificationPage> classificationPages = new ArrayList<>();
        OutlineObject lastProcessedOutlineObject = null;
        // parsing the structure elements could be useful as well
        if(layoutParsingType != LayoutParsingType.REDACT_MANAGER_OLD && layoutParsingType != LayoutParsingType.DOCUMINE_OLD) {
            classificationDocument.setOutlineObjectTree(outlineExtractorService.getOutlineObjectTree(originDocument));
        }
        long pageCount = originDocument.getNumberOfPages();
@ -277,7 +302,13 @@ public class LayoutParsingPipeline {
            TextRulingsClassifier.classifyUnderlinedAndStrikethroughText(words, cleanRulings);
-            List<Box> graphics = graphicExtractorService.extractPathElementGraphics(originDocument, pdPage, pageNumber, cleanRulings, stripper.getTextPositionSequences(), false);
+            List<Box> graphics = graphicExtractorService.extractPathElementGraphics(originDocument,
                                                                              pdPage,
                                                                              pageNumber,
                                                                              cleanRulings,
                                                                              stripper.getTextPositionSequences(),
                                                                              false);
            pdfImages.computeIfAbsent(pageNumber, x -> new ArrayList<>())
                    .addAll(graphics.stream()
@ -301,6 +332,20 @@ public class LayoutParsingPipeline {
            classificationPage.setPageWidth(cropbox.getWidth());
            classificationPage.setPageHeight(cropbox.getHeight());
            if(layoutParsingType != LayoutParsingType.REDACT_MANAGER_OLD && layoutParsingType != LayoutParsingType.DOCUMINE_OLD) {
                List<OutlineObject> outlineObjects = classificationDocument.getOutlineObjectTree().getOutlineObjectsPerPage().getOrDefault(pageNumber - 1, new ArrayList<>());
                OutlineObject notFoundOutlineObject = null;
                if (lastProcessedOutlineObject != null && !lastProcessedOutlineObject.isFound()) {
                    lastProcessedOutlineObject.setPoint(new Point2D.Float(0, cropbox.getHeight()));
                    notFoundOutlineObject = lastProcessedOutlineObject;
                }
                if (!outlineObjects.isEmpty()) {
                    classificationPage.setOutlineObjects(outlineObjects);
                    lastProcessedOutlineObject = blockificationPostprocessingService.sanitizeOutlineBlocks(classificationPage, notFoundOutlineObject);
                }
            }
            classificationDocument.getVisualizations().addMarkedContentVisualizations(stripper.getMarkedContents(), pageNumber);
            // MarkedContent needs to be converted at this point, otherwise it leads to GC Problems in Pdfbox.
            classificationPage.setMarkedContentBboxPerType(convertMarkedContents(stripper.getMarkedContents()));
@ -342,13 +387,22 @@ public class LayoutParsingPipeline {
            case CLARIFYND -> clarifyndClassificationService.classifyDocument(classificationDocument);
        }
        List<TextPageBlock> headlines = classificationDocument.getPages()
                .stream()
                .flatMap(classificationPage -> classificationPage.getTextBlocks()
                        .stream()
                        .filter(tb -> tb instanceof TextPageBlock && tb.getClassification() != null && tb.getClassification().isHeadline())
                        .map(tb -> (TextPageBlock) tb))
                .toList();
        TableOfContents tableOfContents = outlineValidationService.createToC(headlines);
        classificationDocument.setTableOfContents(tableOfContents);
        log.info("Building Sections for {}", identifier);
        switch (layoutParsingType) {
            case CLARIFYND_PARAGRAPH_DEBUG, REDACT_MANAGER_PARAGRAPH_DEBUG -> sectionsBuilderService.buildParagraphDebugSections(classificationDocument);
            default -> {
-                sectionsBuilderService.buildSections(classificationDocument);
+                tocEnrichmentService.assignSectionBlocksAndImages(classificationDocument);
                sectionsBuilderService.addImagesToSections(classificationDocument);
            }
        }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/ClassificationDocument.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/ClassificationDocument.java
@ -3,6 +3,8 @@ package com.knecon.fforesight.service.layoutparser.processor.model;
 import java.util.ArrayList;
 import java.util.List;
 import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineObjectTree;
 import com.knecon.fforesight.service.layoutparser.processor.model.outline.TableOfContents;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.StringFrequencyCounter;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.UnclassifiedText;
 import com.knecon.fforesight.service.layoutparser.processor.visualization.LayoutparsingVisualizations;
@ -28,4 +30,7 @@ public class ClassificationDocument {
    private long rulesVersion;
    private OutlineObjectTree outlineObjectTree;
    private TableOfContents tableOfContents;
 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/ClassificationPage.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/ClassificationPage.java
@ -8,13 +8,13 @@ import java.util.Map;
 import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle;
 import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
 import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineObject;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.StringFrequencyCounter;
 import lombok.Data;
 import lombok.NonNull;
 import lombok.RequiredArgsConstructor;
 import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDMarkedContent;
@Data
@RequiredArgsConstructor
@ -23,6 +23,10 @@ public class ClassificationPage {
    @NonNull
    private List<AbstractPageBlock> textBlocks;
    private List<OutlineObject> outlineObjects = new ArrayList<>();
    private List<AbstractPageBlock> headlines  = new ArrayList<>();
    private List<ClassifiedImage> images = new ArrayList<>();
    private Rectangle bodyTextFrame;
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/ClassificationSection.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/ClassificationSection.java
@ -12,6 +12,7 @@ import lombok.NoArgsConstructor;
@Data
@NoArgsConstructor
@Deprecated
 public class ClassificationSection {
    private List<AbstractPageBlock> pageBlocks = new ArrayList<>();
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/PageBlockType.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/PageBlockType.java
@ -31,6 +31,19 @@ public enum PageBlockType {
    }
    public static int getHeadlineNumber(PageBlockType pageBlockType) {
        return switch (pageBlockType) {
            case H1 -> 1;
            case H2 -> 2;
            case H3 -> 3;
            case H4 -> 4;
            case H5 -> 5;
            default -> 6;
        };
    }
    public boolean isHeadline() {
        return this.equals(H1) || this.equals(H2) || this.equals(H3) || this.equals(H4) || this.equals(H5) || this.equals(H6);
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/SectionIdentifier.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/SectionIdentifier.java
@ -8,6 +8,7 @@ import java.util.regex.Pattern;
 import lombok.AccessLevel;
 import lombok.AllArgsConstructor;
 import lombok.Getter;
 import lombok.experimental.FieldDefaults;
@AllArgsConstructor
@ -16,13 +17,15 @@ public class SectionIdentifier {
    static Pattern numericalIdentifierPattern = Pattern.compile("^[\\s]?(\\d+)[\\s.,;]?(\\d+)?[\\s.,;]?(\\d+)?[\\s.,;]?(\\d+)?");
-    private enum Format {
+    public enum Format {
        EMPTY,
        NUMERICAL,
        DOCUMENT
    }
    @Getter
    Format format;
    @Getter
    String identifierString;
    List<Integer> identifiers;
    boolean asChild;
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/DocumentTree.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/DocumentTree.java
@ -140,8 +140,8 @@ public class DocumentTree {
        if (treeId.isEmpty()) {
            return root;
        }
-        Entry entry = root.children.get(treeId.get(0));
+        Entry entry = root;
-        for (int id : treeId.subList(1, treeId.size())) {
+        for (int id : treeId) {
            entry = entry.children.get(id);
        }
        return entry;
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Section.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Section.java
@ -18,78 +18,20 @@ import lombok.Builder;
 import lombok.Data;
 import lombok.EqualsAndHashCode;
 import lombok.experimental.FieldDefaults;
 import lombok.experimental.SuperBuilder;
 import lombok.extern.slf4j.Slf4j;
@Slf4j
@Data
-@Builder
+@SuperBuilder
@AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
-public class Section implements GenericSemanticNode {
+@EqualsAndHashCode(callSuper = true)
-
+public class Section extends SectionNode {
    @Builder.Default
    Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
    List<Integer> treeId;
    TextBlock textBlock;
    @EqualsAndHashCode.Exclude
    DocumentTree documentTree;
    @Builder.Default
    @EqualsAndHashCode.Exclude
    Set<RedactionEntity> entities = new HashSet<>();
    @EqualsAndHashCode.Exclude
    Map<Page, Rectangle2D> bBoxCache;
    @Override
    public NodeType getType() {
        return NodeType.SECTION;
    }
    public boolean hasTables() {
        return streamAllSubNodesOfType(NodeType.TABLE).findAny()
                .isPresent();
    }
    @Override
    public TextBlock getTextBlock() {
        if (textBlock == null) {
            textBlock = GenericSemanticNode.super.getTextBlock();
        }
        return textBlock;
    }
    @Override
    public String toString() {
-        return treeId.toString() + ": " + NodeType.SECTION + ": " + this.getTextBlock().buildSummary();
+        return super.toString();
    }
    public Headline getHeadline() {
        return streamChildrenOfType(NodeType.HEADLINE)//
                .map(node -> (Headline) node)//
                .findFirst()//
                .orElseGet(() -> getParent().getHeadline());
    }
    @Override
    public Map<Page, Rectangle2D> getBBox() {
        if (bBoxCache == null) {
            bBoxCache = GenericSemanticNode.super.getBBox();
        }
        return bBoxCache;
    }
 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/SectionNode.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/SectionNode.java
@ -0,0 +1,103 @@
 package com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes;
 import java.awt.geom.Rectangle2D;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
 import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
 import lombok.AccessLevel;
 import lombok.AllArgsConstructor;
 import lombok.Builder;
 import lombok.Data;
 import lombok.EqualsAndHashCode;
 import lombok.NoArgsConstructor;
 import lombok.experimental.FieldDefaults;
 import lombok.experimental.SuperBuilder;
 import lombok.extern.slf4j.Slf4j;
@Slf4j
@Data
@SuperBuilder
@AllArgsConstructor
@NoArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
 public abstract class SectionNode implements GenericSemanticNode {
    @Builder.Default
    Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
    List<Integer> treeId;
    TextBlock textBlock;
    @EqualsAndHashCode.Exclude
    DocumentTree documentTree;
    @Builder.Default
    @EqualsAndHashCode.Exclude
    Set<RedactionEntity> entities = new HashSet<>();
    @EqualsAndHashCode.Exclude
    Map<Page, Rectangle2D> bBoxCache;
    @Override
    public NodeType getType() {
        return NodeType.SECTION;
    }
    public boolean hasTables() {
        return streamAllSubNodesOfType(NodeType.TABLE).findAny()
                .isPresent();
    }
    public boolean isLeafSection() {
        return streamAllSubNodesOfType(NodeType.SECTION).findAny()
                .isEmpty();
    }
    @Override
    public TextBlock getTextBlock() {
        if (textBlock == null) {
            textBlock = GenericSemanticNode.super.getTextBlock();
        }
        return textBlock;
    }
    @Override
    public String toString() {
        return treeId.toString() + ": " + NodeType.SECTION + ": " + this.getTextBlock().buildSummary();
    }
    public Headline getHeadline() {
        return streamChildrenOfType(NodeType.HEADLINE)//
                .map(node -> (Headline) node)//
                .findFirst()//
                .orElseGet(() -> getParent().getHeadline());
    }
    @Override
    public Map<Page, Rectangle2D> getBBox() {
        if (bBoxCache == null) {
            bBoxCache = GenericSemanticNode.super.getBBox();
        }
        return bBoxCache;
    }
 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/SuperSection.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/SuperSection.java
@ -0,0 +1,40 @@
 package com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes;
 import java.util.List;
 import java.util.Set;
 import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
 import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
 import lombok.AccessLevel;
 import lombok.AllArgsConstructor;
 import lombok.Builder;
 import lombok.Data;
 import lombok.EqualsAndHashCode;
 import lombok.ToString;
 import lombok.experimental.FieldDefaults;
 import lombok.experimental.SuperBuilder;
@Data
@SuperBuilder
@AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
@EqualsAndHashCode(callSuper = true)
 public class SuperSection extends SectionNode {
    @Override
    public NodeType getType() {
        return NodeType.SUPER_SECTION;
    }
    @Override
    public String toString() {
        return super.toString();
    }
 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/textblock/ConcatenatedTextBlock.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/textblock/ConcatenatedTextBlock.java
@ -50,14 +50,16 @@ public class ConcatenatedTextBlock implements TextBlock {
    public ConcatenatedTextBlock concat(TextBlock textBlock) {
        int start = textBlock.getBoundary().start();
        int end = textBlock.getBoundary().end();
        if (this.atomicTextBlocks.isEmpty()) {
-            boundary.setStart(textBlock.getBoundary().start());
+            boundary.setStart(start);
-            boundary.setEnd(textBlock.getBoundary().end());
+            boundary.setEnd(end);
-        } else if (boundary.end() != textBlock.getBoundary().start()) {
+        } else if (boundary.end() != start) {
            throw new UnsupportedOperationException(format("Can only concat consecutive TextBlocks, trying to concat %s and %s", boundary, textBlock.getBoundary()));
        }
        this.atomicTextBlocks.addAll(textBlock.getAtomicTextBlocks());
-        boundary.setEnd(textBlock.getBoundary().end());
+        boundary.setEnd(end);
        this.searchText = null;
        return this;
    }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineExtractorService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineExtractorService.java
@ -0,0 +1,209 @@
 package com.knecon.fforesight.service.layoutparser.processor.model.outline;
 import java.awt.geom.Point2D;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Locale;
 import java.util.Optional;
 import org.apache.pdfbox.cos.COSArray;
 import org.apache.pdfbox.cos.COSBase;
 import org.apache.pdfbox.cos.COSDictionary;
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.cos.COSString;
 import org.apache.pdfbox.pdmodel.PDDestinationNameTreeNode;
 import org.apache.pdfbox.pdmodel.PDDocument;
 import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary;
 import org.apache.pdfbox.pdmodel.PDPage;
 import org.apache.pdfbox.pdmodel.interactive.action.PDAction;
 import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDDestination;
 import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageFitHeightDestination;
 import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageFitRectangleDestination;
 import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageFitWidthDestination;
 import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageXYZDestination;
 import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline;
 import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem;
 import org.springframework.stereotype.Service;
 import lombok.SneakyThrows;
 import lombok.extern.slf4j.Slf4j;
@Service
@Slf4j
 public class OutlineExtractorService {
    private static final String PDDESTINATION_TYPE_FIT = "Fit";
    private static final String PDDESTINATION_TYPE_FIT_B = "FitB";
    private static final String PDDESTINATION_TYPE_FIT_H = "FitH";
    private static final String PDDESTINATION_TYPE_FIT_V = "FitV";
    private static final String PDDESTINATION_TYPE_FIT_R = "FitR";
    private static final String PDDESTINATION_TYPE_FIT_BH = "FitBH";
    private static final String PDDESTINATION_TYPE_FIT_BV = "FitBV";
    private static final String PDDESTINATION_TYPE_XYZ = "XYZ";
    @SneakyThrows
    public OutlineObjectTree getOutlineObjectTree(PDDocument document) {
        PDDocumentOutline documentOutline = document.getDocumentCatalog().getDocumentOutline();
        List<OutlineObjectTreeNode> rootNodes = new ArrayList<>();
        if (documentOutline != null) {
            for (PDOutlineItem child : documentOutline.children()) {
                Optional<OutlineObjectTreeNode> outlineObjectWithChildren = createOutlineObjectWithChildren(child, document, 1);
                outlineObjectWithChildren.ifPresent(rootNodes::add);
            }
        }
        return new OutlineObjectTree(rootNodes);
    }
    @SneakyThrows
    private Optional<OutlineObjectTreeNode> createOutlineObjectWithChildren(PDOutlineItem item, PDDocument document, int depth) {
        Optional<OutlineObjectTreeNode> outlineObject = createOutlineObject(item, document, depth);
        if (outlineObject.isPresent()) {
            for (var child : item.children()) {
                Optional<OutlineObjectTreeNode> outlineObjectWithChildren = createOutlineObjectWithChildren(child, document, depth + 1);
                outlineObjectWithChildren.ifPresent(outlineObjectTreeNode -> outlineObject.get().addChild(outlineObjectTreeNode));
            }
        }
        return outlineObject;
    }
    // if the structure elements are processed beforehand, another case can be handled here as well:
    // outline objects can reference structure elements (see pdf documentation)
    @SneakyThrows
    private Optional<OutlineObjectTreeNode> createOutlineObject(PDOutlineItem item, PDDocument document, int depth) {
        String title = item.getTitle();
        PDPage page = item.findDestinationPage(document);
        if (page == null) {
            return Optional.empty();
        }
        int pageNumber = document.getPages().indexOf(page);
        Optional<Point2D> outlinePosition = Optional.empty();
        try {
            PDDocumentNameDictionary names = document.getDocumentCatalog().getNames();
            PDDestinationNameTreeNode destinations = null;
            if (names != null) {
                destinations = names.getDests();
            }
            PDDestination destination = item.getDestination();
            if (destination != null) {
                outlinePosition = getLocationFromCOSBase(destinations, destination.getCOSObject());
            }
            if (outlinePosition.isEmpty()) {
                PDAction action = item.getAction();
                if (action != null) {
                    outlinePosition = extractOutlineLocationGoTo(destinations, action.getCOSObject());
                }
            }
        } catch (Exception e) {
            log.info(String.format("Error occurred during position resolution for outline item on page %s with title %s: " + e, pageNumber, title));
        }
        return Optional.of(new OutlineObjectTreeNode(new OutlineObject(title, pageNumber, outlinePosition.orElse(new Point2D.Float(0, 0)), depth)));
    }
    @SneakyThrows
    private static Optional<Point2D> extractOutlineLocationGoTo(PDDestinationNameTreeNode destinations, COSDictionary cosDictionary) {
        if (isGoToAction(cosDictionary)) {
            COSBase cosBase = cosDictionary.getItem(COSName.D);
            return getLocationFromCOSBase(destinations, cosBase);
        }
        return Optional.empty();
    }
    private static Optional<Point2D> getLocationFromCOSBase(PDDestinationNameTreeNode destinations, COSBase cosBase) throws IOException {
        if (cosBase != null) {
            if (cosBase instanceof COSArray cosArray) {
                return getLocationFromCosArray(cosArray);
            }
            if (cosBase instanceof COSString cosString) {
                String destinationName = cosString.getString();
                COSArray cosArray = destinations.getValue(destinationName).getCOSObject();
                return getLocationFromCosArray(cosArray);
            }
        }
        return Optional.empty();
    }
    private static Optional<Point2D> getLocationFromCosArray(COSArray cosArray) {
        boolean located = false;
        float x = 0;
        float y = 0;
        try {
            PDDestination destination = PDDestination.create(cosArray);
            COSName type = (COSName) cosArray.getObject(1);
            String typeString = type.getName();
            switch (typeString) {
                case PDDESTINATION_TYPE_FIT_V:
                case PDDESTINATION_TYPE_FIT_BV:
                    PDPageFitHeightDestination fitHeightDestination = (PDPageFitHeightDestination) destination;
                    x = fitHeightDestination.getLeft();
                    located = true;
                    break;
                case PDDESTINATION_TYPE_FIT_R:
                    PDPageFitRectangleDestination fitRectangleDestination = (PDPageFitRectangleDestination) destination;
                    x = fitRectangleDestination.getLeft();
                    y = fitRectangleDestination.getTop();
                    located = true;
                    break;
                case PDDESTINATION_TYPE_FIT_H:
                case PDDESTINATION_TYPE_FIT_BH:
                    PDPageFitWidthDestination fitWidthDestination = (PDPageFitWidthDestination) destination;
                    y = fitWidthDestination.getTop();
                    located = true;
                    break;
                case PDDESTINATION_TYPE_XYZ:
                    PDPageXYZDestination xyzDestination = (PDPageXYZDestination) destination;
                    x = xyzDestination.getLeft();
                    y = xyzDestination.getTop();
                    located = true;
                    break;
                case PDDESTINATION_TYPE_FIT:
                case PDDESTINATION_TYPE_FIT_B:
                default:
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        return located ? Optional.of(new Point2D.Float(x, y)) : Optional.empty();
    }
    private static boolean isGoToAction(COSDictionary cosDictionary) {
        return cosDictionary.getNameAsString("S").toLowerCase(Locale.ROOT).equals("goto");
    }
 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineObject.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineObject.java
@ -0,0 +1,35 @@
 package com.knecon.fforesight.service.layoutparser.processor.model.outline;
 import java.awt.geom.Point2D;
 import lombok.AllArgsConstructor;
 import lombok.Data;
 import lombok.RequiredArgsConstructor;
@Data
@RequiredArgsConstructor
@AllArgsConstructor
 public class OutlineObject {
    private final String title;
    private final int pageNumber;
    private Point2D point;
    private final int treeDepth;
    private boolean found;
    public OutlineObject(String title, int pageNumber, Point2D point2D, int depth) {
        this(title, pageNumber, depth);
        this.point = point2D;
    }
    @Override
    public String toString() {
        return "OutlineObject{" + "title='" + title + '\'' + '}';
    }
 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineObjectTree.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineObjectTree.java
@ -0,0 +1,42 @@
 package com.knecon.fforesight.service.layoutparser.processor.model.outline;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import lombok.Data;
 import lombok.RequiredArgsConstructor;
@Data
@RequiredArgsConstructor
 public class OutlineObjectTree {
    private List<OutlineObjectTreeNode> rootNodes = new ArrayList<>();
    private Map<Integer, List<OutlineObject>> outlineObjectsPerPage = new HashMap<>();
    public OutlineObjectTree(List<OutlineObjectTreeNode> rootNodes) {
        this.rootNodes = rootNodes;
        flattenNodesAndGroupByPage(rootNodes);
    }
    private void flattenNodesAndGroupByPage(List<OutlineObjectTreeNode> outlineObjectTreeNodes) {
        for (OutlineObjectTreeNode node : outlineObjectTreeNodes) {
            int pageNumber = node.getOutlineObject().getPageNumber();
            if (!this.outlineObjectsPerPage.containsKey(pageNumber)) {
                outlineObjectsPerPage.put(pageNumber, new ArrayList<>());
            }
            outlineObjectsPerPage.get(pageNumber).add(node.getOutlineObject());
            if (!node.getChildren().isEmpty()) {
                flattenNodesAndGroupByPage(node.getChildren());
            }
        }
    }
 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineObjectTreeNode.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineObjectTreeNode.java
@ -0,0 +1,34 @@
 package com.knecon.fforesight.service.layoutparser.processor.model.outline;
 import java.util.ArrayList;
 import java.util.List;
 import lombok.Data;
@Data
 public class OutlineObjectTreeNode {
    private OutlineObject outlineObject;
    private List<OutlineObjectTreeNode> children = new ArrayList<>();
    public OutlineObjectTreeNode(OutlineObject outlineObject) {
        this.outlineObject = outlineObject;
    }
    public void addChild(OutlineObjectTreeNode outlineObject) {
        children.add(outlineObject);
    }
    @Override
    public String toString() {
        return "OutlineObjectTreeNode{" + "outlineObject=" + outlineObject + '}';
    }
 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineValidationService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineValidationService.java
@ -0,0 +1,59 @@
 package com.knecon.fforesight.service.layoutparser.processor.model.outline;
 import static com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType.getHeadlineNumber;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.TreeSet;
 import org.springframework.stereotype.Service;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
 import lombok.extern.slf4j.Slf4j;
@Service
@Slf4j
 public class OutlineValidationService {
    public TableOfContents createToC(List<TextPageBlock> headlines) {
        List<TableOfContentItem> mainSections = new ArrayList<>();
        Map<Integer, TableOfContentItem> lastItemsPerDepth = new HashMap<>();
        TableOfContentItem last = null;
        TreeSet<Integer> depths = new TreeSet<>();
        for (TextPageBlock current : headlines) {
            int currentDepth = getHeadlineNumber(current.getClassification());
            Integer parentDepth = depths.floor(currentDepth - 1);
            var tocItem = new TableOfContentItem(current);
            if (parentDepth == null) {
                mainSections.add(tocItem);
            } else {
                assert last != null;
                int lastDepth = getHeadlineNumber(last.getHeadline().getClassification());
                if (lastDepth < parentDepth) {
                    parentDepth = lastDepth;
                } else if (lastDepth == currentDepth && last.getParent() != null) {
                    parentDepth = getHeadlineNumber(last.getParent().getHeadline().getClassification());
                }
                TableOfContentItem parent = lastItemsPerDepth.get(parentDepth);
                parent.addChild(tocItem);
            }
            last = tocItem;
            lastItemsPerDepth.put(currentDepth, tocItem);
            depths.add(currentDepth);
        }
        return new TableOfContents(mainSections);
    }
 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/TOCEnrichmentService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/TOCEnrichmentService.java
@ -0,0 +1,261 @@
 package com.knecon.fforesight.service.layoutparser.processor.model.outline;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
 import org.springframework.stereotype.Service;
 import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
 import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument;
 import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationFooter;
 import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationHeader;
 import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
 import com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType;
 import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
 import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
 public class TOCEnrichmentService {
    public void assignSectionBlocksAndImages(ClassificationDocument document) {
        TableOfContents toc = document.getTableOfContents();
        Iterator<TableOfContentItem> iterator = toc.iterator();
        TableOfContentItem currentTOCItem = null;
        if(iterator.hasNext()) {
            currentTOCItem = iterator.next();
        }
        List<AbstractPageBlock> startBlocks = new ArrayList<>();
        List<ClassifiedImage> startImages = new ArrayList<>();
        TableOfContentItem currentSection = null;
        boolean foundFirstHeadline = false;
        List<ClassificationHeader> headers = new ArrayList<>();
        List<ClassificationFooter> footers = new ArrayList<>();
        TablePageBlock previousTable = null;
        List<TableOfContentItem> lastFoundTOCItems = new ArrayList<>();
        for (ClassificationPage page : document.getPages()) {
            List<TableOfContentItem> currentPageTOCItems = new ArrayList<>();
            List<TextPageBlock> header = new ArrayList<>();
            List<TextPageBlock> footer = new ArrayList<>();
            for (AbstractPageBlock current : page.getTextBlocks()) {
                if (current.getClassification() == null) {
                    continue;
                }
                current.setPage(page.getPageNumber());
                if (current.getClassification().equals(PageBlockType.HEADER)) {
                    header.add((TextPageBlock) current);
                    continue;
                }
                if (current.getClassification().equals(PageBlockType.FOOTER)) {
                    footer.add((TextPageBlock) current);
                    continue;
                }
                if (current instanceof TablePageBlock table) {
                    if (previousTable != null) {
                        mergeTableMetadata(table, previousTable);
                    }
                    previousTable = table;
                }
                if (current instanceof TextPageBlock && currentTOCItem != null && currentTOCItem.getHeadline().getText().equals(current.getText())) {
                    if (!foundFirstHeadline) {
                        foundFirstHeadline = true;
                    }
                    currentSection = currentTOCItem;
                    currentTOCItem.getSectionBlocks().add(current);
                    currentPageTOCItems.add(currentTOCItem);
                    if(iterator.hasNext()) {
                        currentTOCItem = iterator.next();
                    }
                }
                if (!foundFirstHeadline) {
                    startBlocks.add(current);
                } else {
                    currentSection.getSectionBlocks().add(current);
                }
            }
            if (!currentPageTOCItems.isEmpty()) {
                lastFoundTOCItems = currentPageTOCItems;
            }
            for (ClassifiedImage image : page.getImages()) {
                Double xMin = null;
                Double yMin = null;
                Double xMax = null;
                Double yMax = null;
                for (TableOfContentItem tocItem : lastFoundTOCItems) {
                    var headline = tocItem.getHeadline();
                    if (headline.getPage() != page.getPageNumber()) {
                        continue;
                    }
                    if (headline.getMinX() < headline.getMaxX()) {
                        if (xMin == null || headline.getMinX() < xMin) {
                            xMin = headline.getMinX();
                        }
                        if (xMax == null || headline.getMaxX() > xMax) {
                            xMax = headline.getMaxX();
                        }
                    } else {
                        if (xMin == null || headline.getMaxX() < xMin) {
                            xMin = headline.getMaxX();
                        }
                        if (xMax == null || headline.getMinX() > xMax) {
                            xMax = headline.getMinX();
                        }
                    }
                    if (headline.getMinY() < headline.getMaxY()) {
                        if (yMin == null || headline.getMinY() < yMin) {
                            yMin = headline.getMinY();
                        }
                        if (yMax == null || headline.getMaxY() > yMax) {
                            yMax = headline.getMaxY();
                        }
                    } else {
                        if (yMin == null || headline.getMaxY() < yMin) {
                            yMin = headline.getMaxY();
                        }
                        if (yMax == null || headline.getMinY() > yMax) {
                            yMax = headline.getMinY();
                        }
                    }
                    log.debug("Image position x: {}, y: {}", image.getPosition().getX(), image.getPosition().getY());
                    log.debug("Headline position xMin: {}, xMax: {}, yMin: {}, yMax: {}", xMin, xMax, yMin, yMax);
                    if (image.getPosition().getX() >= xMin && image.getPosition().getX() <= xMax && image.getPosition().getY() >= yMin && image.getPosition().getY() <= yMax) {
                        tocItem.getImages().add(image);
                        image.setAppendedToSection(true);
                        break;
                    }
                }
                if (!image.isAppendedToSection()) {
                    log.debug("Image uses first paragraph");
                    if (!lastFoundTOCItems.isEmpty()) {
                        lastFoundTOCItems.get(0).getImages().add(image);
                    } else {
                        startImages.add(image);
                    }
                    image.setAppendedToSection(true);
                }
            }
            if (!header.isEmpty()) {
                headers.add(new ClassificationHeader(header));
            }
            if (!footer.isEmpty()) {
                footers.add(new ClassificationFooter(footer));
            }
        }
        if (!startBlocks.isEmpty()) {
            TableOfContentItem unassigned = new TableOfContentItem(null);
            unassigned.setSectionBlocks(startBlocks);
            unassigned.setImages(startImages);
            document.getTableOfContents().getMainSections().add(0, unassigned);
        }
        document.setHeaders(headers);
        document.setFooters(footers);
    }
    private void mergeTableMetadata(TablePageBlock currentTable, TablePageBlock previousTable) {
        // Distribute header information for subsequent tables
        if (previousTable != null && hasInvalidHeaderInformation(currentTable) && hasValidHeaderInformation(previousTable)) {
            List<Cell> previousTableNonHeaderRow = getRowWithNonHeaderCells(previousTable);
            List<Cell> tableNonHeaderRow = getRowWithNonHeaderCells(currentTable);
            // Allow merging of tables if header row is separated from first logical non-header row
            if (previousTableNonHeaderRow.isEmpty()
                && previousTable.getRowCount() == 1
                && previousTable.getRows()
                           .get(0).size() == tableNonHeaderRow.size()) {
                previousTableNonHeaderRow = previousTable.getRows()
                        .get(0)
                        .stream()
                        .map(cell -> {
                            Cell fakeCell = Cell.copy(cell);
                            fakeCell.setHeaderCells(Collections.singletonList(cell));
                            return fakeCell;
                        })
                        .toList();
            }
            if (previousTableNonHeaderRow.size() == tableNonHeaderRow.size()) {
                for (int i = currentTable.getRowCount() - 1; i >= 0; i--) { // Non header rows are most likely at bottom of table
                    List<Cell> row = currentTable.getRows()
                            .get(i);
                    if (row.size() == tableNonHeaderRow.size() && row.stream()
                            .allMatch(cell -> cell.getHeaderCells().isEmpty())) {
                        for (int j = 0; j < row.size(); j++) {
                            row.get(j).setHeaderCells(previousTableNonHeaderRow.get(j).getHeaderCells());
                        }
                    }
                }
            }
        }
    }
    private boolean hasValidHeaderInformation(TablePageBlock table) {
        return !hasInvalidHeaderInformation(table);
    }
    private boolean hasInvalidHeaderInformation(TablePageBlock table) {
        return table.getRows()
                .stream()
                .flatMap(row -> row.stream()
                        .filter(cell -> !cell.getHeaderCells().isEmpty()))
                .findAny().isEmpty();
    }
    private List<Cell> getRowWithNonHeaderCells(TablePageBlock table) {
        for (int i = table.getRowCount() - 1; i >= 0; i--) { // Non header rows are most likely at bottom of table
            List<Cell> row = table.getRows()
                    .get(i);
            if (row.size() == 1) {
                continue;
            }
            boolean allNonHeader = true;
            for (Cell cell : row) {
                if (cell.isHeaderCell()) {
                    allNonHeader = false;
                    break;
                }
            }
            if (allNonHeader) {
                return row;
            }
        }
        return Collections.emptyList();
    }
 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/TableOfContentItem.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/TableOfContentItem.java
@ -0,0 +1,110 @@
 package com.knecon.fforesight.service.layoutparser.processor.model.outline;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.stream.Collectors;
 import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Section;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SectionNode;
 import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
 import lombok.Data;
 import lombok.EqualsAndHashCode;
@Data
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
 public class TableOfContentItem {
    @EqualsAndHashCode.Include
    private TextPageBlock headline;
    private List<TableOfContentItem> children = new ArrayList<>();
    private TableOfContentItem parent;
    private List<AbstractPageBlock> sectionBlocks = new ArrayList<>();
    private List<ClassifiedImage> images = new ArrayList<>();
    private SectionNode section;
    public TableOfContentItem(TextPageBlock headline) {
        this.headline = headline;
    }
    public void addChild(TableOfContentItem tableOfContentItem) {
        children.add(tableOfContentItem);
        tableOfContentItem.setParent(this);
    }
    public TableOfContentItem getSiblingBefore() {
        if (parent != null) {
            int index = parent.getChildren().indexOf(this);
            if (index > 0) {
                return parent.getChildren()
                        .get(index - 1);
            }
        }
        return null;
    }
    public TableOfContentItem getSiblingAfter() {
        if (parent != null) {
            int index = parent.getChildren().indexOf(this);
            if (index >= 0 && index < parent.getChildren().size() - 1) {
                return parent.getChildren()
                        .get(index + 1);
            }
        }
        return null;
    }
    public boolean contains(TextPageBlock block) {
        if (headline.equals(block)) {
            return true;
        }
        for (TableOfContentItem child : children) {
            if (child.contains(block)) {
                return true;
            }
        }
        return false;
    }
    public boolean contains(TableOfContentItem tocItem) {
        if (this.equals(tocItem)) {
            return true;
        }
        for (TableOfContentItem child : children) {
            if (child.contains(tocItem)) {
                return true;
            }
        }
        return false;
    }
    public List<AbstractPageBlock> getNonEmptySectionBlocks() {
        return sectionBlocks.stream().filter(pageBlock -> !pageBlock.isEmpty()).collect(Collectors.toList());
    }
    @Override
    public String toString() {
        return "OutlineObjectTreeNode{" + "textPageBlock=" + headline + '}';
    }
 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/TableOfContents.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/TableOfContents.java
@ -0,0 +1,136 @@
 package com.knecon.fforesight.service.layoutparser.processor.model.outline;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Stack;
 import org.springframework.lang.NonNull;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
 import lombok.Data;
 import lombok.RequiredArgsConstructor;
@Data
@RequiredArgsConstructor
 public class TableOfContents implements Iterable<TableOfContentItem> {
    private List<TableOfContentItem> mainSections = new ArrayList<>();
    public TableOfContents(List<TableOfContentItem> mainSections) {
        this.mainSections = mainSections;
    }
    public List<TextPageBlock> getAllTextPageBlocks() {
        List<TextPageBlock> allTextPageBlocks = new ArrayList<>();
        for (TableOfContentItem item : mainSections) {
            collectTextPageBlocks(item, allTextPageBlocks);
        }
        return allTextPageBlocks;
    }
    private void collectTextPageBlocks(TableOfContentItem item, List<TextPageBlock> textPageBlocks) {
        textPageBlocks.add(item.getHeadline());
        for (TableOfContentItem child : item.getChildren()) {
            collectTextPageBlocks(child, textPageBlocks);
        }
    }
    public List<TableOfContentItem> getAllTableOfContentItems() {
        List<TableOfContentItem> allItems = new ArrayList<>();
        for (TableOfContentItem item : mainSections) {
            collectTableOfContentItems(item, allItems);
        }
        return allItems;
    }
    private void collectTableOfContentItems(TableOfContentItem item, List<TableOfContentItem> allItems) {
        allItems.add(item);
        for (TableOfContentItem child : item.getChildren()) {
            collectTableOfContentItems(child, allItems);
        }
    }
    private boolean containsBlock(TextPageBlock block) {
        for (TableOfContentItem existingItem : this.getMainSections()) {
            if (existingItem.getHeadline().equals(block) || existingItem.contains(block)) {
                return true;
            }
        }
        return false;
    }
    private boolean containsItem(TableOfContentItem tocItem) {
        for (TableOfContentItem existingItem : this.getMainSections()) {
            if (existingItem.equals(tocItem) || existingItem.contains(tocItem)) {
                return true;
            }
        }
        return false;
    }
    @Override
    public @NonNull Iterator<TableOfContentItem> iterator() {
        return new TableOfContentItemIterator(mainSections);
    }
    private static class TableOfContentItemIterator implements Iterator<TableOfContentItem> {
        private final Stack<Iterator<TableOfContentItem>> stack = new Stack<>();
        TableOfContentItemIterator(List<TableOfContentItem> mainSections) {
            stack.push(mainSections.iterator());
        }
        @Override
        public boolean hasNext() {
            ensureStackTopIsCurrent();
            return !stack.isEmpty() && stack.peek().hasNext();
        }
        @Override
        public TableOfContentItem next() {
            ensureStackTopIsCurrent();
            TableOfContentItem currentItem = stack.peek().next();
            if (currentItem.getChildren() != null && !currentItem.getChildren().isEmpty()) {
                stack.push(currentItem.getChildren()
                                   .iterator());
            }
            return currentItem;
        }
        private void ensureStackTopIsCurrent() {
            while (!stack.isEmpty() && !stack.peek().hasNext()) {
                stack.pop();
            }
        }
    }
 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPageBlock.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPageBlock.java
@ -58,6 +58,20 @@ public class TextPageBlock extends AbstractPageBlock {
    }
    @JsonIgnore
    public float getPageHeight() {
        return sequences.get(0).getPageHeight();
    }
    @JsonIgnore
    public float getPageWidth() {
        return sequences.get(0).getPageWidth();
    }
    private void calculateBBox() {
        if (sequences == null) {
@ -69,6 +83,12 @@ public class TextPageBlock extends AbstractPageBlock {
    }
    public void recalculateBBox() {
        calculateBBox();
    }
    public static TextPageBlock merge(List<TextPageBlock> textBlocksToMerge) {
        if (textBlocksToMerge.isEmpty()) {
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/SectionsBuilderService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/SectionsBuilderService.java
@ -27,8 +27,10 @@ import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@Deprecated
 public class SectionsBuilderService {
    public void buildSections(ClassificationDocument document) {
        List<AbstractPageBlock> chunkWords = new ArrayList<>();
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/BlockificationPostprocessingService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/BlockificationPostprocessingService.java
@ -0,0 +1,525 @@
 package com.knecon.fforesight.service.layoutparser.processor.services.blockification;
 import static com.knecon.fforesight.service.layoutparser.processor.services.blockification.DocstrumBlockificationService.buildTextBlock;
 import java.awt.geom.Rectangle2D;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.ListIterator;
 import java.util.Locale;
 import java.util.function.Function;
 import org.apache.commons.lang3.StringUtils;
 import org.springframework.stereotype.Service;
 import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
 import com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType;
 import com.knecon.fforesight.service.layoutparser.processor.model.SectionIdentifier;
 import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineObject;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
 import com.knecon.fforesight.service.layoutparser.processor.services.factory.SearchTextWithTextPositionFactory;
 import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
 import lombok.Data;
@Service
 public class BlockificationPostprocessingService {
    private static final float BLOCK_TO_OUTLINE_DISTANCE_THRESHOLD = 5.0f;
    private static final Function<TextPageBlock, Rectangle2D> blockToBoundingBox = (abstractPageBlock) -> abstractPageBlock.getSequences()
            .stream()
            .map(textPositionSequence -> textPositionSequence.getTextPositions()
                    .stream()
                    .map(tp -> SearchTextWithTextPositionFactory.mapRedTextPositionToInitialUserSpace(tp, textPositionSequence))
                    .collect(RectangleTransformations.collectBBox()))
            .collect(RectangleTransformations.collectBBox());
    public OutlineObject sanitizeOutlineBlocks(ClassificationPage classificationPage, OutlineObject notFoundOutlineObject) {
        List<OutlineObject> outlineObjects = classificationPage.getOutlineObjects();
        if (getTextPageBlocks(classificationPage).isEmpty() || outlineObjects.isEmpty()) {
            return null;
        }
        float pageHeight = classificationPage.getPageHeight();
        ListIterator<OutlineObject> outlineObjectListIterator = outlineObjects.listIterator();
        if (notFoundOutlineObject != null) {
            OutlineProcessionContext notFoundOutlineObjectProcessionContext = new OutlineProcessionContext(notFoundOutlineObject);
            processTextBlocks(getTextPageBlocks(classificationPage), pageHeight, notFoundOutlineObjectProcessionContext);
            OutlineObject firstOutlineObject = null;
            OutlineProcessionContext firstOutlineObjectProcessionContext = null;
            if (outlineObjectListIterator.hasNext()) {
                firstOutlineObject = outlineObjectListIterator.next();
                firstOutlineObjectProcessionContext = new OutlineProcessionContext(firstOutlineObject);
                processTextBlocks(getTextPageBlocks(classificationPage), pageHeight, firstOutlineObjectProcessionContext);
            }
            if (!contextsOverlap(notFoundOutlineObjectProcessionContext, firstOutlineObjectProcessionContext)) {
                notFoundOutlineObject.setFound(selectMatch(classificationPage, notFoundOutlineObjectProcessionContext));
            }
            if (firstOutlineObject != null) {
                // re-create the context for the updated blocks
                firstOutlineObjectProcessionContext = new OutlineProcessionContext(firstOutlineObject);
                processTextBlocks(getTextPageBlocks(classificationPage), pageHeight, firstOutlineObjectProcessionContext);
                firstOutlineObject.setFound(selectMatch(classificationPage, firstOutlineObjectProcessionContext));
            }
        }
        outlineObjectListIterator.forEachRemaining(outlineObject -> {
            OutlineProcessionContext outlineObjectProcessionContext = new OutlineProcessionContext(outlineObject);
            processTextBlocks(getTextPageBlocks(classificationPage), pageHeight, outlineObjectProcessionContext);
            outlineObject.setFound(selectMatch(classificationPage, outlineObjectProcessionContext));
        });
        if (!outlineObjects.isEmpty()) {
            return outlineObjects.get(outlineObjects.size() - 1);
        } else {
            return notFoundOutlineObject;
        }
    }
    private static List<TextPageBlock> getTextPageBlocks(ClassificationPage classificationPage) {
        return classificationPage.getTextBlocks()
                .stream()
                .filter(block -> block instanceof TextPageBlock)
                .map(block -> (TextPageBlock) block)
                .toList();
    }
    private boolean contextsOverlap(OutlineProcessionContext notFoundOutlineObjectProcessionContext, OutlineProcessionContext firstOutlineObjectProcessionContext) {
        if (firstOutlineObjectProcessionContext == null) {
            return false;
        }
        String notFoundTitle = notFoundOutlineObjectProcessionContext.getOutlineObject().getTitle();
        String firstTitle = firstOutlineObjectProcessionContext.getOutlineObject().getTitle();
        if (!firstTitle.startsWith(notFoundTitle)) {
            return false;
        }
        var blocksOfNotFoundOutline = getAllMatchingBlocks(notFoundOutlineObjectProcessionContext);
        var blocksOfFirstOutline = getAllMatchingBlocks(firstOutlineObjectProcessionContext);
        double maxYFirst = blocksOfFirstOutline.stream()
                .mapToDouble(TextPageBlock::getPdfMaxY)
                .max()
                .orElse(Double.NEGATIVE_INFINITY);
        return blocksOfNotFoundOutline.stream()
                .mapToDouble(TextPageBlock::getPdfMaxY)
                .anyMatch(y -> y >= maxYFirst);
    }
    private List<TextPageBlock> getAllMatchingBlocks(OutlineProcessionContext context) {
        List<TextPageBlock> blocks = new ArrayList<>();
        if (context.getDirectMatch() != null) {
            blocks.add(context.getDirectMatch());
        }
        if (context.getSplitCandidate() != null) {
            blocks.add(context.getSplitCandidate());
        }
        blocks.addAll(context.getMergeCandidates());
        return blocks;
    }
    private void processTextBlocks(List<TextPageBlock> textBlocks, float pageHeight, OutlineProcessionContext context) {
        OutlineObject outlineObject = context.getOutlineObject();
        ListIterator<TextPageBlock> iterator = textBlocks.listIterator();
        while (iterator.hasNext()) {
            TextPageBlock pageBlock = iterator.next();
            if (pageHeight - outlineObject.getPoint().getY() - BLOCK_TO_OUTLINE_DISTANCE_THRESHOLD <= pageBlock.getMaxY()) {
                break;
            }
        }
        if (iterator.hasPrevious()) {
            iterator.previous();
        }
        boolean earlyStop = false;
        while (iterator.hasNext() && !earlyStop) {
            TextPageBlock pageBlock = iterator.next();
            earlyStop = processOutlineForTextBlock(pageBlock, context);
        }
    }
    private boolean selectMatch(ClassificationPage classificationPage, OutlineProcessionContext context) {
        OutlineObject outlineObject = context.outlineObject;
        TextPageBlock directMatch = context.directMatch;
        List<TextPageBlock> mergeCandidates = context.mergeCandidates;
        TextPageBlock splitCandidate = context.splitCandidate;
        PageBlockType headlineType = PageBlockType.getHeadlineType(outlineObject.getTreeDepth());
        double distanceToDirectMatch = directMatch != null ? calculateDistance(outlineObject, directMatch) : Double.MAX_VALUE;
        double distanceToSplitCandidate = splitCandidate != null ? calculateDistance(outlineObject, splitCandidate) : Double.MAX_VALUE;
        double distanceToBestMergeCandidates = Double.MAX_VALUE;
        List<TextPageBlock> bestMergeCandidateCombination = new ArrayList<>();
        if (!mergeCandidates.isEmpty()) {
            // with this code adjacent blocks to the first and last merge candidate get added, this could be useful for some edge cases:
            //List<TextPageBlock> allMergeCandidates = new ArrayList<>(mergeCandidates);
            //addNeighborsOfCandidate(kdTree, mergeCandidates.get(0), allMergeCandidates);
            //if (mergeCandidates.size() > 1) {
            //    addNeighborsOfCandidate(kdTree, mergeCandidates.get(mergeCandidates.size() - 1), allMergeCandidates);
            //}
            //allMergeCandidates = allMergeCandidates.stream()
            //        .distinct()
            //        .toList();
            List<List<TextPageBlock>> combinations = findCombinations(outlineObject.getTitle(), mergeCandidates);
            for (List<TextPageBlock> combination : combinations) {
                double averageDistance = combination.stream()
                        .map(block -> calculateDistance(outlineObject, block))
                        .mapToDouble(Double::doubleValue).average()
                        .orElse(Double.MAX_VALUE);
                if (distanceToBestMergeCandidates > averageDistance) {
                    distanceToBestMergeCandidates = averageDistance;
                    bestMergeCandidateCombination = combination;
                }
            }
        }
        double minDistance = Math.min(distanceToDirectMatch, Math.min(distanceToSplitCandidate, distanceToBestMergeCandidates));
        if (minDistance == Double.MAX_VALUE) {
            return false;
        }
        if (minDistance == distanceToDirectMatch) {
            directMatch.setClassification(headlineType);
        } else if (minDistance == distanceToSplitCandidate) {
            SplitBlockResult splitBlockResult = splitBlock(classificationPage, splitCandidate, context.sectionIdentifier, outlineObject.getTitle());
            if (splitBlockResult.modifiedBlockToSplit) {
                splitCandidate.setClassification(headlineType);
            }
            splitBlockResult.otherBlocks.forEach(other -> other.setClassification(null));
        } else {
            var merged = mergeBlocks(classificationPage, bestMergeCandidateCombination);
            merged.setClassification(headlineType);
        }
        return true;
    }
    private SplitBlockResult splitBlock(ClassificationPage classificationPage, TextPageBlock blockToSplit, SectionIdentifier sectionIdentifier, String title) {
        List<TextPageBlock> otherBlocks = new ArrayList<>();
        int blockToSplitIdx = classificationPage.getTextBlocks().indexOf(blockToSplit);
        String headline = title;
        if (!sectionIdentifier.getFormat().equals(SectionIdentifier.Format.EMPTY) && !title.startsWith(sectionIdentifier.getIdentifierString())) {
            headline = sectionIdentifier + headline;
        }
        WordSequenceResult wordSequenceResult = findWordSequence(blockToSplit.getSequences(), headline);
        if (wordSequenceResult.inSequence.isEmpty() && !headline.equals(title)) {
            wordSequenceResult = findWordSequence(blockToSplit.getSequences(), title);
        }
        boolean modifiedBlockToSplit = false;
        if (!wordSequenceResult.inSequence.isEmpty()) {
            blockToSplit.setSequences(wordSequenceResult.inSequence);
            blockToSplit.recalculateBBox();
            modifiedBlockToSplit = true;
        }
        if (!wordSequenceResult.preSequence.isEmpty()) {
            TextPageBlock block = buildTextBlock(wordSequenceResult.preSequence, 0);
            classificationPage.getTextBlocks().add(blockToSplitIdx, block);
            otherBlocks.add(block);
            blockToSplitIdx++;
        }
        if (!wordSequenceResult.postSequence.isEmpty()) {
            TextPageBlock block = buildTextBlock(wordSequenceResult.postSequence, 0);
            classificationPage.getTextBlocks().add(blockToSplitIdx + 1, block);
            otherBlocks.add(block);
        }
        return new SplitBlockResult(modifiedBlockToSplit, otherBlocks);
    }
    private static WordSequenceResult findWordSequence(List<TextPositionSequence> textPositionSequences, String text) {
        String target = sanitizeString(text);
        List<TextPositionSequence> inSequence = new ArrayList<>();
        List<TextPositionSequence> preSequence = new ArrayList<>();
        List<TextPositionSequence> postSequence = new ArrayList<>();
        StringBuilder currentSequence = new StringBuilder();
        for (TextPositionSequence sequence : textPositionSequences) {
            currentSequence.append(sanitizeString(sequence.toString()));
            inSequence.add(sequence);
            if (currentSequence.length() >= target.length()) {
                if (currentSequence.toString().endsWith(target)) {
                    int index = 0;
                    String toRemove = currentSequence.substring(0, currentSequence.length() - target.length());
                    TextPositionSequence next = inSequence.get(index);
                    while (currentSequence.length() - next.length() >= target.length()) {
                        TextPositionSequence removed = inSequence.remove(index);
                        currentSequence.delete(0, removed.toString().length());
                        preSequence.add(removed);
                        next = inSequence.get(index);
                        toRemove = toRemove.substring(removed.length());
                    }
                    if (!toRemove.isEmpty()) {
                        SplitSequenceResult splitSequenceResult = splitSequence(inSequence.remove(index), toRemove);
                        currentSequence.delete(0, splitSequenceResult.out.length());
                        preSequence.add(splitSequenceResult.out);
                        inSequence.add(index, splitSequenceResult.in);
                    }
                } else if (currentSequence.toString().startsWith(target)) {
                    int index = inSequence.size() - 1;
                    String toRemove = currentSequence.substring(target.length());
                    SplitSequenceResult splitSequenceResult = splitSequence(inSequence.remove(index), toRemove);
                    currentSequence.delete(currentSequence.length() - splitSequenceResult.out.length(), currentSequence.length());
                    inSequence.add(index, splitSequenceResult.in);
                    postSequence.add(splitSequenceResult.out);
                }
                if (currentSequence.toString().equals(target)) {
                    postSequence.addAll(textPositionSequences.subList(textPositionSequences.indexOf(sequence) + 1, textPositionSequences.size()));
                    return new WordSequenceResult(inSequence, preSequence, postSequence);
                }
            }
        }
        return new WordSequenceResult();
    }
    private static SplitSequenceResult splitSequence(TextPositionSequence sequence, String toRemove) {
        TextPositionSequence in = null;
        TextPositionSequence out;
        String currentSequence = sequence.toString();
        int index = currentSequence.indexOf(toRemove);
        int endIndex = index + toRemove.length();
        out = createSubSequence(sequence, index, endIndex);
        if (index > 0) {
            in = createSubSequence(sequence, 0, index);
        } else if (endIndex < sequence.getTextPositions().size()) {
            in = createSubSequence(sequence, endIndex, sequence.getTextPositions().size());
        }
        return new SplitSequenceResult(in, out);
    }
    private static TextPositionSequence createSubSequence(TextPositionSequence sequence, int start, int end) {
        TextPositionSequence newSeq = new TextPositionSequence(new ArrayList<>(sequence.getTextPositions().subList(start, end)), sequence.getPage());
        newSeq.setParagraphStart(sequence.isParagraphStart());
        return newSeq;
    }
    private TextPageBlock mergeBlocks(ClassificationPage classificationPage, List<TextPageBlock> blocksToMerge) {
        TextPageBlock firstBlock = blocksToMerge.get(0);
        if (blocksToMerge.size() > 1) {
            List<TextPageBlock> mergedBlocks = new ArrayList<>();
            for (TextPageBlock textPageBlock : blocksToMerge.subList(1, blocksToMerge.size())) {
                if (firstBlock != null && !firstBlock.getSequences().isEmpty()) {
                    if (textPageBlock.getDir() == firstBlock.getDir()) {
                        firstBlock.getSequences().addAll(textPageBlock.getSequences());
                        mergedBlocks.add(textPageBlock);
                    }
                }
            }
            assert firstBlock != null;
            firstBlock.setToDuplicate(false);
            firstBlock.recalculateBBox();
            classificationPage.getTextBlocks().removeAll(mergedBlocks);
        }
        return firstBlock;
    }
    private static List<List<TextPageBlock>> findCombinations(String title, List<TextPageBlock> blocks) {
        List<List<TextPageBlock>> combinations = new ArrayList<>();
        findCombinations(title, blocks, new ArrayList<>(), combinations);
        return combinations;
    }
    private static void findCombinations(String title, List<TextPageBlock> blocks, List<TextPageBlock> current, List<List<TextPageBlock>> combinations) {
        String target = sanitizeString(title);
        if (target.isEmpty()) {
            combinations.add(new ArrayList<>(current));
            return;
        }
        List<TextPageBlock> remaining = blocks.stream()
                .filter(block -> !current.contains(block))
                .toList();
        for (TextPageBlock block : remaining) {
            String prefix = sanitizeString(block.getText());
            if (target.startsWith(prefix)) {
                current.add(block);
                findCombinations(target.substring(prefix.length()), blocks.subList(blocks.indexOf(block) + 1, blocks.size()), current, combinations);
                current.remove(current.size() - 1);
            }
        }
    }
    private double calculateDistance(OutlineObject outlineObject, TextPageBlock pageBlock) {
        double deltaX = outlineObject.getPoint().getX() - pageBlock.getMinX();
        double deltaY = pageBlock.getPageHeight() - outlineObject.getPoint().getY() - pageBlock.getMinY();
        return Math.sqrt(deltaX * deltaX + deltaY * deltaY);
    }
    // currently only three cases are handled here:
    // 1. equality
    // 2. outline title contains block text
    // 3. block text contains outline title
    // another possible case is an intersection, meaning a title is split up between two different blocks
    // this should not happen with how docstrum creates the blocks
    // if it is indeed necessary, a splitting has to be done with a follow-up merge
    private boolean processOutlineForTextBlock(TextPageBlock pageBlock, OutlineProcessionContext context) {
        OutlineObject outlineObject = context.getOutlineObject();
        String blockText = sanitizeString(pageBlock.getText());
        String outlineTitle = sanitizeString(outlineObject.getTitle());
        boolean blockTextContainsOutlineTitle = blockText.contains(outlineTitle);
        boolean outlineTitleContainsBlockText = outlineTitle.contains(blockText);
        if (!blockTextContainsOutlineTitle && !outlineTitleContainsBlockText) {
            return false;
        }
        if (blockText.equals(outlineTitle) && context.directMatch == null) {
            context.directMatch = pageBlock;
            return true;
        }
        if (outlineTitleContainsBlockText) {
            context.mergeCandidates.add(pageBlock);
        }
        if (blockTextContainsOutlineTitle) {
            SectionIdentifier sectionIdentifier = SectionIdentifier.fromSearchText(blockText);
            if (sectionIdentifier.getFormat() != SectionIdentifier.Format.EMPTY && !outlineTitle.startsWith(sectionIdentifier.getIdentifierString())) {
                if (blockText.startsWith(sectionIdentifier.getIdentifierString()) && blockText.endsWith(outlineTitle) && context.directMatch == null) {
                    context.directMatch = pageBlock;
                    return true;
                } else if (context.splitCandidate == null) {
                    context.sectionIdentifier = sectionIdentifier;
                }
            }
            if (context.splitCandidate == null) {
                context.splitCandidate = pageBlock;
            }
        }
        return false;
    }
    private static String sanitizeString(String text) {
        return StringUtils.deleteWhitespace(text).toLowerCase(Locale.ROOT);
    }
    @Data
    private static class OutlineProcessionContext {
        private TextPageBlock directMatch;
        private OutlineObject outlineObject;
        private List<TextPageBlock> mergeCandidates;
        private TextPageBlock splitCandidate;
        private SectionIdentifier sectionIdentifier;
        OutlineProcessionContext(OutlineObject outlineObject) {
            this.outlineObject = outlineObject;
            this.directMatch = null;
            this.mergeCandidates = new ArrayList<>();
            this.splitCandidate = null;
            this.sectionIdentifier = SectionIdentifier.empty();
        }
    }
    public static class WordSequenceResult {
        public List<TextPositionSequence> inSequence;
        public List<TextPositionSequence> preSequence;
        public List<TextPositionSequence> postSequence;
        public WordSequenceResult(List<TextPositionSequence> inSequence, List<TextPositionSequence> preSequence, List<TextPositionSequence> postSequence) {
            this.inSequence = inSequence;
            this.preSequence = preSequence;
            this.postSequence = postSequence;
        }
        public WordSequenceResult() {
            this.inSequence = new ArrayList<>();
            this.preSequence = new ArrayList<>();
            this.postSequence = new ArrayList<>();
        }
    }
    public record SplitBlockResult(boolean modifiedBlockToSplit, List<TextPageBlock> otherBlocks) {
    }
    public record SplitSequenceResult(TextPositionSequence in, TextPositionSequence out) {
    }
 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocstrumBlockificationService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocstrumBlockificationService.java
@ -126,6 +126,16 @@ public class DocstrumBlockificationService {
                    continue;
                }
                if (current.isHeadline() || previous.isHeadline()) {
                    if (intersectsYWithPreviousHavingMaxOneLine(previous, current, page)) {
                        previous = combineBlocksAndResetIterator(previous, current, itty, false);
                    } else {
                        previous = current;
                    }
                    continue;
                }
                if (areTheOnlyTwoBlocksOnHeightsWithBothMoreThanTwoLines(previous, current, page)) {
                    previous = combineBlocksAndResetIterator(previous, current, itty, true);
                    continue;
@ -172,6 +182,12 @@ public class DocstrumBlockificationService {
    }
    private boolean intersectsYWithPreviousHavingMaxOneLine(TextPageBlock previous, TextPageBlock current, ClassificationPage page) {
        return previous.intersectsY(current) && (previous.getNumberOfLines() == 1 && current.getNumberOfLines() >= 1);
    }
    private boolean areTheOnlyTwoBlocksOnHeightsWithBothMoreThanTwoLines(TextPageBlock previous, TextPageBlock current, ClassificationPage page) {
        return previous.getNumberOfLines() >= 2 && current.getNumberOfLines() >= 2 //
@ -185,6 +201,9 @@ public class DocstrumBlockificationService {
        previous.getSequences().addAll(current.getSequences());
        previous = buildTextBlock(previous.getSequences(), 0);
        previous.setToDuplicate(toDuplicate);
        if (current.getClassification() != null && previous.getClassification() == null) {
            previous.setClassification(current.getClassification());
        }
        itty.remove();
        itty.previous();
        itty.set(previous);
@ -244,21 +263,30 @@ public class DocstrumBlockificationService {
                continue;
            }
            if (block.getClassification() != null && block.getClassification().isHeadline()) {
                continue;
            }
            TextPageBlock current = (TextPageBlock) block;
            for (int i = 0; i < blocks.size(); i++) {
-                if (blocks.get(i) == null) {
+                AbstractPageBlock abstractPageBlock = blocks.get(i);
                if (abstractPageBlock == null) {
                    continue;
                }
-                if (blocks.get(i) == current) {
+                if (abstractPageBlock == current) {
                    continue;
                }
-                if (blocks.get(i) instanceof TablePageBlock) {
+                if (abstractPageBlock instanceof TablePageBlock) {
                    continue;
                }
-                TextPageBlock inner = (TextPageBlock) blocks.get(i);
+                if (abstractPageBlock.getClassification() != null && abstractPageBlock.getClassification().isHeadline()) {
                    continue;
                }
                TextPageBlock inner = (TextPageBlock) abstractPageBlock;
                if (usedRulings.lineBetween(current, blocks.get(i))) {
                    continue;
@ -285,7 +313,7 @@ public class DocstrumBlockificationService {
    }
-    private TextPageBlock buildTextBlock(List<TextPositionSequence> wordBlockList, int indexOnPage) {
+    public static TextPageBlock buildTextBlock(List<TextPositionSequence> wordBlockList, int indexOnPage) {
        return new TextPageBlock(wordBlockList);
    }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/ClarifyndClassificationService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/ClarifyndClassificationService.java
@ -21,12 +21,16 @@ import lombok.extern.slf4j.Slf4j;
@RequiredArgsConstructor
 public class ClarifyndClassificationService {
    private final HeadlineClassificationService headlineClassificationService;
    public void classifyDocument(ClassificationDocument document) {
        List<Float> headlineFontSizes = document.getFontSizeCounter().getHighterThanMostPopular();
        log.debug("Document FontSize counters are: {}", document.getFontSizeCounter().getCountPerValue());
        headlineClassificationService.resetContext();
        for (ClassificationPage page : document.getPages()) {
            classifyPage(page, document, headlineFontSizes);
        }
@ -47,6 +51,10 @@ public class ClarifyndClassificationService {
        var bodyTextFrame = page.getBodyTextFrame();
        if (textBlock.getClassification() != null && textBlock.getClassification().isHeadline()) {
            headlineClassificationService.setLastHeadlineFromOutline(textBlock);
            return;
        }
        if (document.getFontSizeCounter().getMostPopular() == null) {
            textBlock.setClassification(PageBlockType.PARAGRAPH);
            return;
@ -79,7 +87,8 @@ public class ClarifyndClassificationService {
            for (int i = 1; i <= headlineFontSizes.size(); i++) {
                if (textBlock.getMostPopularWordFontSize() == headlineFontSizes.get(i - 1)) {
-                    textBlock.setClassification(PageBlockType.getHeadlineType(i));
+                    PageBlockType headlineType = PageBlockType.getHeadlineType(i);
                    headlineClassificationService.classifyHeadline(textBlock, headlineType);
                    document.setHeadlines(true);
                }
            }
@ -89,7 +98,8 @@ public class ClarifyndClassificationService {
                .getTextPositions()
                .get(0)
                .getFontSizeInPt() >= textBlock.getMostPopularWordFontSize()) {
-            textBlock.setClassification(PageBlockType.getHeadlineType(headlineFontSizes.size() + 1));
+            PageBlockType headlineType = PageBlockType.getHeadlineType(headlineFontSizes.size() + 1);
            headlineClassificationService.classifyHeadline(textBlock, headlineType);
            document.setHeadlines(true);
        } else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter()
                .getMostPopular() && textBlock.getMostPopularWordStyle().equals("bold") && !document.getFontStyleCounter().getMostPopular().equals("bold")) {
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/DocuMineClassificationService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/DocuMineClassificationService.java
@ -24,6 +24,7 @@ import lombok.extern.slf4j.Slf4j;
@RequiredArgsConstructor
 public class DocuMineClassificationService {
    private final HeadlineClassificationService headlineClassificationService;
    private static final Pattern pattern = Pattern.compile("^([1-9]\\d?\\.){1,3}\\d{1,2}\\.?\\s[0-9A-Za-z \\[\\]]{2,50}", Pattern.CASE_INSENSITIVE);
    private static final Pattern pattern2 = Pattern.compile("\\p{L}{3,}", Pattern.CASE_INSENSITIVE);
    private static final Pattern pattern3 = Pattern.compile("^(\\d{1,1}\\.){1,3}\\d{1,2}\\.?\\s[a-z]{1,2}\\/[a-z]{1,2}.*");
@ -35,6 +36,8 @@ public class DocuMineClassificationService {
        log.debug("Document FontSize counters are: {}", document.getFontSizeCounter().getCountPerValue());
        headlineClassificationService.resetContext();
        for (ClassificationPage page : document.getPages()) {
            classifyPage(page, document, headlineFontSizes);
        }
@ -60,6 +63,10 @@ public class DocuMineClassificationService {
        Matcher matcher2 = pattern2.matcher(textBlock.toString());
        Matcher matcher3 = pattern3.matcher(textBlock.toString());
        if (textBlock.getClassification() != null && textBlock.getClassification().isHeadline()) {
            headlineClassificationService.setLastHeadlineFromOutline(textBlock);
            return;
        }
        if (document.getFontSizeCounter().getMostPopular() == null) {
            textBlock.setClassification(PageBlockType.OTHER);
            return;
@ -95,6 +102,7 @@ public class DocuMineClassificationService {
                   && (textBlock.getMostPopularWordHeight() > document.getTextHeightCounter().getMostPopular()
                       || textBlock.getMostPopularWordFontSize() > document.getFontSizeCounter().getMostPopular())
                   && PositionUtils.getApproxLineCount(textBlock) < 5.9
                   && (textBlock.getMostPopularWordStyle().contains("bold") && Character.isDigit(textBlock.toString().charAt(0)) && matcher2.reset().find() && !textBlock.toString()
                .contains(":")
                       || textBlock.toString().equals(textBlock.toString().toUpperCase(Locale.ROOT)) && matcher2.reset().find() && !textBlock.toString().contains(":")
@ -103,11 +111,13 @@ public class DocuMineClassificationService {
                       || textBlock.toString().startsWith("TABLE"))
                   && !textBlock.toString().endsWith(":")
                   && matcher2.reset().find()) {
-            textBlock.setClassification(PageBlockType.getHeadlineType(1));
+            PageBlockType headlineType = PageBlockType.getHeadlineType(1);
            headlineClassificationService.classifyHeadline(textBlock, headlineType);
            document.setHeadlines(true);
        } else if (matcher.reset().find() && PositionUtils.getApproxLineCount(textBlock) < 2.9 && matcher2.reset().find() && !matcher3.reset().matches()) {
-            textBlock.setClassification(PageBlockType.getHeadlineType(2));
+            PageBlockType headlineType = PageBlockType.getHeadlineType(2);
            headlineClassificationService.classifyHeadline(textBlock, headlineType);
            document.setHeadlines(true);
        } else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock)
                   && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter().getMostPopular()
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/HeadlineClassificationService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/HeadlineClassificationService.java
@ -0,0 +1,62 @@
 package com.knecon.fforesight.service.layoutparser.processor.services.classification;
 import static com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType.getHeadlineNumber;
 import org.springframework.stereotype.Service;
 import com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
 import lombok.Getter;
 import lombok.Setter;
@Service
@Getter
@Setter
 public class HeadlineClassificationService {
    TextPageBlock lastHeadline;
    PageBlockType originalClassifiedBlockType;
    TextPageBlock lastHeadlineFromOutline;
    public void resetContext() {
        setLastHeadline(null);
        setOriginalClassifiedBlockType(null);
        setLastHeadlineFromOutline(null);
    }
    public void setLastHeadlineFromOutline(TextPageBlock lastHeadlineFromOutline) {
        this.lastHeadlineFromOutline = lastHeadlineFromOutline;
        this.setLastHeadline(lastHeadlineFromOutline);
    }
    public void classifyHeadline(TextPageBlock textBlock, PageBlockType initialHeadlineType) {
        TextPageBlock lastHeadline = getLastHeadline();
        TextPageBlock lastHeadlineFromOutline = getLastHeadlineFromOutline();
        PageBlockType originalClassifiedBlockType = getOriginalClassifiedBlockType();
        PageBlockType finalHeadlineType = initialHeadlineType;
        if (lastHeadline != null) {
            if (lastHeadline.equals(lastHeadlineFromOutline)) {
                finalHeadlineType = PageBlockType.getHeadlineType(getHeadlineNumber(lastHeadline.getClassification()) + 1);
            } else if (originalClassifiedBlockType != null && lastHeadline.getClassification() != originalClassifiedBlockType) {
                PageBlockType lastHeadlineType = lastHeadline.getClassification();
                int difference = getHeadlineNumber(originalClassifiedBlockType) - getHeadlineNumber(lastHeadlineType);
                finalHeadlineType = PageBlockType.getHeadlineType(getHeadlineNumber(initialHeadlineType) - difference);
            }
        }
        setOriginalClassifiedBlockType(initialHeadlineType);
        textBlock.setClassification(finalHeadlineType);
        setLastHeadline(textBlock);
    }
 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/RedactManagerClassificationService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/RedactManagerClassificationService.java
@ -22,12 +22,17 @@ import lombok.extern.slf4j.Slf4j;
@RequiredArgsConstructor
 public class RedactManagerClassificationService {
    private final HeadlineClassificationService headlineClassificationService;
    public void classifyDocument(ClassificationDocument document) {
        List<Float> headlineFontSizes = document.getFontSizeCounter().getHighterThanMostPopular();
        log.debug("Document FontSize counters are: {}", document.getFontSizeCounter().getCountPerValue());
        headlineClassificationService.resetContext();
        for (ClassificationPage page : document.getPages()) {
            classifyPage(page, document, headlineFontSizes);
        }
@ -48,6 +53,10 @@ public class RedactManagerClassificationService {
        var bodyTextFrame = page.getBodyTextFrame();
        if (textBlock.getClassification() != null && textBlock.getClassification().isHeadline()) {
            headlineClassificationService.setLastHeadlineFromOutline(textBlock);
            return;
        }
        if (document.getFontSizeCounter().getMostPopular() == null) {
            textBlock.setClassification(PageBlockType.OTHER);
            return;
@ -60,58 +69,64 @@ public class RedactManagerClassificationService {
            textBlock.setClassification(PageBlockType.PARAGRAPH);
            return;
        }
-
+        if (MarkedContentUtils.intersects(textBlock, page.getMarkedContentBboxPerType(), MarkedContentUtils.HEADER)
-        if (MarkedContentUtils.intersects(textBlock, page.getMarkedContentBboxPerType(), MarkedContentUtils.HEADER) || PositionUtils.isOverBodyTextFrame(bodyTextFrame,
+            || PositionUtils.isOverBodyTextFrame(bodyTextFrame, textBlock, page.getRotation()) && (document.getFontSizeCounter().getMostPopular() == null
-                textBlock,
+                                                                                                   || textBlock.getHighestFontSize() <= document.getFontSizeCounter()
                page.getRotation()) && (document.getFontSizeCounter().getMostPopular() == null || textBlock.getHighestFontSize() <= document.getFontSizeCounter()
                .getMostPopular())) {
            textBlock.setClassification(PageBlockType.HEADER);
-        } else if (MarkedContentUtils.intersects(textBlock, page.getMarkedContentBboxPerType(), MarkedContentUtils.FOOTER) || PositionUtils.isUnderBodyTextFrame(bodyTextFrame,
+        } else if (MarkedContentUtils.intersects(textBlock, page.getMarkedContentBboxPerType(), MarkedContentUtils.FOOTER)
-                textBlock,
+                   || PositionUtils.isUnderBodyTextFrame(bodyTextFrame, textBlock, page.getRotation()) && (document.getFontSizeCounter().getMostPopular() == null
-                page.getRotation()) && (document.getFontSizeCounter().getMostPopular() == null || textBlock.getHighestFontSize() <= document.getFontSizeCounter()
+                                                                                                           || textBlock.getHighestFontSize() <= document.getFontSizeCounter()
                .getMostPopular())) {
            textBlock.setClassification(PageBlockType.FOOTER);
-        } else if (page.getPageNumber() == 1 && (PositionUtils.getHeightDifferenceBetweenChunkWordAndDocumentWord(textBlock,
+        } else if (page.getPageNumber() == 1 && (PositionUtils.getHeightDifferenceBetweenChunkWordAndDocumentWord(textBlock, document.getTextHeightCounter().getMostPopular()) > 2.5
-                document.getTextHeightCounter().getMostPopular()) > 2.5 && textBlock.getHighestFontSize() > document.getFontSizeCounter().getMostPopular() || page.getTextBlocks()
+                                                 && textBlock.getHighestFontSize() > document.getFontSizeCounter().getMostPopular() || page.getTextBlocks().size() == 1)) {
                .size() == 1)) {
            if (!Pattern.matches("[0-9]+", textBlock.toString())) {
                textBlock.setClassification(PageBlockType.TITLE);
            }
-        } else if (textBlock.getMostPopularWordFontSize() > document.getFontSizeCounter()
+        } else if (textBlock.getMostPopularWordFontSize() > document.getFontSizeCounter().getMostPopular()
-                .getMostPopular() && PositionUtils.getApproxLineCount(textBlock) < 4.9 && (textBlock.getMostPopularWordStyle().equals("bold") || !document.getFontStyleCounter()
+                   && PositionUtils.getApproxLineCount(textBlock) < 4.9
-                .getCountPerValue()
+                   && (textBlock.getMostPopularWordStyle().equals("bold")
-                .containsKey("bold") && textBlock.getMostPopularWordFontSize() > document.getFontSizeCounter().getMostPopular() + 1) && textBlock.getSequences()
+                       || !document.getFontStyleCounter().getCountPerValue().containsKey("bold")
-                .get(0)
+                          && textBlock.getMostPopularWordFontSize() > document.getFontSizeCounter().getMostPopular() + 1)
-                .getTextPositions()
+                   && textBlock.getSequences()
-                .get(0)
+                              .get(0).getTextPositions()
-                .getFontSizeInPt() >= textBlock.getMostPopularWordFontSize()) {
+                              .get(0).getFontSizeInPt() >= textBlock.getMostPopularWordFontSize()) {
            for (int i = 1; i <= headlineFontSizes.size(); i++) {
                if (textBlock.getMostPopularWordFontSize() == headlineFontSizes.get(i - 1)) {
-                    textBlock.setClassification(PageBlockType.getHeadlineType(i));
+                    PageBlockType headlineType = PageBlockType.getHeadlineType(i);
                    headlineClassificationService.classifyHeadline(textBlock, headlineType);
                    document.setHeadlines(true);
                }
            }
-        } else if (!textBlock.getText().startsWith("Figure ") && PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordStyle()
+        } else if (!textBlock.getText().startsWith("Figure ")
-                .equals("bold") && !document.getFontStyleCounter().getMostPopular().equals("bold") && PositionUtils.getApproxLineCount(textBlock) < 2.9 && textBlock.getSequences()
+                   && PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock)
-                .get(0)
+                   && textBlock.getMostPopularWordStyle().equals("bold")
-                .getTextPositions()
+                   && !document.getFontStyleCounter().getMostPopular().equals("bold")
-                .get(0)
+                   && PositionUtils.getApproxLineCount(textBlock) < 2.9
-                .getFontSizeInPt() >= textBlock.getMostPopularWordFontSize()) {
+                   && textBlock.getSequences()
-            textBlock.setClassification(PageBlockType.getHeadlineType(headlineFontSizes.size() + 1));
+                              .get(0).getTextPositions()
                              .get(0).getFontSizeInPt() >= textBlock.getMostPopularWordFontSize()) {
            PageBlockType headlineType = PageBlockType.getHeadlineType(headlineFontSizes.size() + 1);
            headlineClassificationService.classifyHeadline(textBlock, headlineType);
            document.setHeadlines(true);
-        } else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter()
+        } else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock)
-                .getMostPopular() && textBlock.getMostPopularWordStyle().equals("bold") && !document.getFontStyleCounter().getMostPopular().equals("bold")) {
+                   && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter().getMostPopular()
                   && textBlock.getMostPopularWordStyle().equals("bold")
                   && !document.getFontStyleCounter().getMostPopular().equals("bold")) {
            textBlock.setClassification(PageBlockType.PARAGRAPH_BOLD);
-        } else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFont()
+        } else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock)
-                .equals(document.getFontCounter().getMostPopular()) && textBlock.getMostPopularWordStyle()
+                   && textBlock.getMostPopularWordFont().equals(document.getFontCounter().getMostPopular())
-                .equals(document.getFontStyleCounter().getMostPopular()) && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter().getMostPopular()) {
+                   && textBlock.getMostPopularWordStyle().equals(document.getFontStyleCounter().getMostPopular())
                   && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter().getMostPopular()) {
            textBlock.setClassification(PageBlockType.PARAGRAPH);
-        } else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter()
+        } else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock)
-                .getMostPopular() && textBlock.getMostPopularWordStyle().equals("italic") && !document.getFontStyleCounter()
+                   && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter().getMostPopular()
-                .getMostPopular()
+                   && textBlock.getMostPopularWordStyle().equals("italic")
-                .equals("italic") && PositionUtils.getApproxLineCount(textBlock) < 2.9) {
+                   && !document.getFontStyleCounter().getMostPopular().equals("italic")
                   && PositionUtils.getApproxLineCount(textBlock) < 2.9) {
            textBlock.setClassification(PageBlockType.PARAGRAPH_ITALIC);
        } else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock)) {
            textBlock.setClassification(PageBlockType.PARAGRAPH_UNKNOWN);
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/DocumentGraphFactory.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/DocumentGraphFactory.java
@ -11,6 +11,7 @@ import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.NoSuchElementException;
 import java.util.Optional;
 import java.util.Set;
 import java.util.stream.Collectors;
@ -31,8 +32,10 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Im
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Paragraph;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Section;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SectionNode;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.AtomicTextBlock;
 import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
 import com.knecon.fforesight.service.layoutparser.processor.model.outline.TableOfContentItem;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
 import com.knecon.fforesight.service.layoutparser.processor.utils.IdBuilder;
 import com.knecon.fforesight.service.layoutparser.processor.utils.TextPositionOperations;
@ -57,11 +60,6 @@ public class DocumentGraphFactory {
        document.getPages()
                .forEach(context::buildAndAddPageWithCounter);
        document.getSections()
                .stream()
                .flatMap(section -> section.getImages()
                        .stream())
                .forEach(image -> context.getImages().add(image));
        addSections(layoutParsingType, document, context, documentGraph);
        addHeaderAndFooterToEachPage(document, context);
@ -75,8 +73,17 @@ public class DocumentGraphFactory {
    private void addSections(LayoutParsingType layoutParsingType, ClassificationDocument classificationDocument, Context context, Document document) {
-        classificationDocument.getSections()
+        for (TableOfContentItem tocItem : classificationDocument.getTableOfContents()) {
-                .forEach(section -> SectionNodeFactory.addSection(layoutParsingType, null, section.getNonEmptyPageBlocks(), section.getImages(), context, document));
+            var parent = tocItem.getParent() == null ? null : tocItem.getParent().getSection();
            Optional<SectionNode> section = SectionNodeFactory.addSection(layoutParsingType,
                                                                      parent,
                                                                      tocItem.getChildren().isEmpty(),
                                                                      tocItem.getNonEmptySectionBlocks(),
                                                                      tocItem.getImages(),
                                                                      context,
                                                                      document);
            tocItem.setSection(section.orElse(null));
        }
    }
@ -181,10 +188,7 @@ public class DocumentGraphFactory {
        Page page = context.getPage(textBlocks.get(0).getPage());
        Footer footer = Footer.builder().documentTree(context.getDocumentTree()).build();
-        AtomicTextBlock textBlock = context.textBlockFactory.buildAtomicTextBlock(TextPositionOperations.mergeTextPositionSequence(textBlocks),
+        AtomicTextBlock textBlock = context.textBlockFactory.buildAtomicTextBlock(TextPositionOperations.mergeTextPositionSequence(textBlocks), footer, context, page);
                                                                                  footer,
                                                                                  context,
                                                                                  page);
        List<Integer> tocId = context.getDocumentTree().createNewMainEntryAndReturnId(footer);
        footer.setTreeId(tocId);
        footer.setLeafTextBlock(textBlock);
@ -236,7 +240,7 @@ public class DocumentGraphFactory {
        DocumentTree documentTree;
        Map<Page, Integer> pages;
-        List<Section> sections;
+        List<SectionNode> sections;
        List<ClassifiedImage> images;
        TextBlockFactory textBlockFactory;
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/SectionNodeFactory.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/SectionNodeFactory.java
@ -9,6 +9,7 @@ import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
 import java.util.Set;
 import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
@ -17,6 +18,8 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Do
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.GenericSemanticNode;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Section;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SectionNode;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SuperSection;
 import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
@ -27,12 +30,13 @@ import lombok.experimental.UtilityClass;
@UtilityClass
 public class SectionNodeFactory {
-    public void addSection(LayoutParsingType layoutParsingType,
+    public Optional<SectionNode> addSection(LayoutParsingType layoutParsingType,
-                           GenericSemanticNode parentNode,
+                                        GenericSemanticNode parentNode,
-                           List<AbstractPageBlock> pageBlocks,
+                                        boolean isLeaf,
-                           List<ClassifiedImage> images,
+                                        List<AbstractPageBlock> pageBlocks,
-                           DocumentGraphFactory.Context context,
+                                        List<ClassifiedImage> images,
-                           Document document) {
+                                        DocumentGraphFactory.Context context,
                                        Document document) {
        // This is for the case where we have images on a page without any text/footer/header.
        // The pageBlocks list is empty, but we still need to add those images to the document.
@ -40,16 +44,22 @@ public class SectionNodeFactory {
            images.stream()
                    .distinct()
                    .forEach(image -> DocumentGraphFactory.addImage(document, image, context));
-            return;
+            return Optional.empty();
        }
        if (pageBlocks.isEmpty()) {
-            return;
+            return Optional.empty();
        }
        Map<Integer, List<AbstractPageBlock>> blocksPerPage = pageBlocks.stream()
                .collect(groupingBy(AbstractPageBlock::getPage));
-        Section section = Section.builder().documentTree(context.getDocumentTree()).build();
+
        SectionNode section;
        if (isLeaf) {
            section = Section.builder().documentTree(context.getDocumentTree()).build();
        } else {
            section = SuperSection.builder().documentTree(context.getDocumentTree()).build();
        }
        context.getSections().add(section);
        blocksPerPage.keySet()
@ -59,12 +69,24 @@ public class SectionNodeFactory {
        addFirstHeadlineDirectlyToSection(layoutParsingType, pageBlocks, context, section, document);
        if (containsTablesAndTextBlocks(pageBlocks)) {
            if (pageBlocks.get(0).isHeadline()) {
                pageBlocks.remove(0);
            }
            splitPageBlocksIntoSubSections(pageBlocks).forEach(subSectionPageBlocks -> addSection(layoutParsingType,
                                                                                                  section,
                                                                                                  true,
                                                                                                  subSectionPageBlocks,
                                                                                                  emptyList(),
                                                                                                  context,
                                                                                                  document));
        } else if (!isLeaf) {
            if (pageBlocks.get(0).isHeadline()) {
                pageBlocks.remove(0);
            }
            addSection(layoutParsingType, section, true, pageBlocks, emptyList(), context, document);
        } else {
            addTablesAndParagraphsAndHeadlinesToSection(layoutParsingType, pageBlocks, context, section, document);
        }
@ -72,10 +94,12 @@ public class SectionNodeFactory {
        images.stream()
                .distinct()
                .forEach(image -> DocumentGraphFactory.addImage(section, image, context));
        return Optional.of(section);
    }
-    private List<Integer> getTreeId(GenericSemanticNode parentNode, DocumentGraphFactory.Context context, Section section) {
+    private List<Integer> getTreeId(GenericSemanticNode parentNode, DocumentGraphFactory.Context context, SectionNode section) {
        if (parentNode == null) {
            return context.getDocumentTree().createNewMainEntryAndReturnId(section);
@ -88,7 +112,7 @@ public class SectionNodeFactory {
    private void addFirstHeadlineDirectlyToSection(LayoutParsingType layoutParsingType,
                                                   List<AbstractPageBlock> pageBlocks,
                                                   DocumentGraphFactory.Context context,
-                                                   Section section,
+                                                   SectionNode section,
                                                   Document document) {
        if (pageBlocks.get(0).isHeadline()) {
@ -101,7 +125,7 @@ public class SectionNodeFactory {
    private void addTablesAndParagraphsAndHeadlinesToSection(LayoutParsingType layoutParsingType,
                                                             List<AbstractPageBlock> pageBlocks,
                                                             DocumentGraphFactory.Context context,
-                                                             Section section,
+                                                             SectionNode section,
                                                             Document document) {
        Set<AbstractPageBlock> alreadyMerged = new HashSet<>();
@ -226,7 +250,7 @@ public class SectionNodeFactory {
    }
-    private void addSectionNodeToPageNode(DocumentGraphFactory.Context context, Section section, Integer pageNumber) {
+    private void addSectionNodeToPageNode(DocumentGraphFactory.Context context, SectionNode section, Integer pageNumber) {
        Page page = context.getPage(pageNumber);
        page.getMainBody().add(section);
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/TableNodeFactory.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/TableNodeFactory.java
@ -12,6 +12,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBl
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.GenericSemanticNode;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Section;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SemanticNode;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableCell;
@ -154,10 +155,11 @@ public class TableNodeFactory {
        } else if (firstTextBlockIsHeadline(cell)) {
            SectionNodeFactory.addSection(layoutParsingType,
                                          tableCell,
                                          true,
                                          cell.getTextBlocks()
                                                  .stream()
                                                  .map(tb -> (AbstractPageBlock) tb)
-                                                  .toList(),
+                                                  .collect(Collectors.toList()),
                                          emptyList(),
                                          context,
                                          document);
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/mapper/DocumentGraphMapper.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/mapper/DocumentGraphMapper.java
@ -61,7 +61,7 @@ public class DocumentGraphMapper {
            List<Page> pages = Arrays.stream(entryData.getPageNumbers()).map(pageNumber -> getPage(pageNumber, context)).toList();
            SemanticNode node = switch (entryData.getType()) {
-                case SECTION -> buildSection(context);
+                case SECTION, SUPER_SECTION -> buildSection(context);
                case PARAGRAPH -> buildParagraph(context, entryData.getProperties());
                case HEADLINE -> buildHeadline(context);
                case HEADER -> buildHeader(context);
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/visualization/LayoutGridService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/visualization/LayoutGridService.java
@ -193,10 +193,11 @@ public class LayoutGridService {
        List<SemanticNode> subSections = semanticNode.streamAllSubNodesOfType(NodeType.SECTION)
                .toList();
        Page firstPage = semanticNode.getFirstPage();
        String treeIdString = buildTreeIdString(semanticNode);
        if (!subSections.isEmpty()) {
-            addPlacedText(firstPage, bBoxMap.get(firstPage), buildTreeIdString(semanticNode), layoutGrid);
+            addPlacedText(firstPage, bBoxMap.get(firstPage), treeIdString, layoutGrid);
        } else {
-            bBoxMap.forEach(((page, textBBox) -> addPlacedText(page, textBBox, buildTreeIdString(semanticNode), layoutGrid)));
+            bBoxMap.forEach(((page, textBBox) -> addPlacedText(page, textBBox, treeIdString, layoutGrid)));
        }
        if (bBoxMap.values().size() == 1) {
            Rectangle2D r = RectangleTransformations.pad(bBoxMap.get(firstPage), LINE_WIDTH, LINE_WIDTH);
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/PdfVisualisationUtility.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/PdfVisualisationUtility.java
@ -112,8 +112,8 @@ public class PdfVisualisationUtility {
            case DOCUMENT -> Color.LIGHT_GRAY;
            case HEADER, FOOTER -> Color.GREEN;
            case PARAGRAPH -> Color.BLUE;
            case SUPER_SECTION, SECTION -> Color.BLACK;
            case HEADLINE -> Color.RED;
            case SECTION -> Color.BLACK;
            case TABLE -> Color.ORANGE;
            case TABLE_CELL -> Color.GRAY;
            case IMAGE -> Color.MAGENTA;
--- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java
+++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java
@ -32,6 +32,7 @@ public class ViewerDocumentTest extends BuildDocumentTest {
    public void testViewerDocument() {
        String fileName = "files/syngenta/CustomerFiles/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf";
        String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf";
        var documentFile = new ClassPathResource(fileName).getFile();
--- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java
+++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java
@ -37,8 +37,6 @@ import com.knecon.fforesight.service.layoutparser.processor.python_api.model.ima
 import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
 import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse;
 import com.knecon.fforesight.service.layoutparser.processor.services.PageContentExtractor;
 import com.knecon.fforesight.service.layoutparser.processor.services.SectionsBuilderService;
 import com.knecon.fforesight.service.layoutparser.processor.services.classification.RedactManagerClassificationService;
 import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
 import com.knecon.fforesight.service.layoutparser.server.utils.AbstractTest;
@ -62,6 +60,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
                                                 tableServiceResponse,
                                                 new VisualLayoutParsingResponse(),
                                                 Map.of("file", "document"));
    }
@ -134,6 +133,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
    @Test
    @SneakyThrows
    public void testTableAndCellRotations() {
        String fileName = "files/Minimal Examples/simpleTablesRotated.pdf";
        ClassPathResource pdfFileResource = new ClassPathResource(fileName);
@ -141,7 +141,6 @@ public class PdfSegmentationServiceTest extends AbstractTest {
    }
    @Disabled
    @Test
    public void testScanRotationBorderIsIgnored() throws IOException {
@ -151,15 +150,19 @@ public class PdfSegmentationServiceTest extends AbstractTest {
        var tableServiceResponse = objectMapper.readValue(cvTablesResource.getInputStream(), TableServiceResponse.class);
        ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile(), tableServiceResponse);
-        assertThat(document.getSections()
+        assertThat(document.getTableOfContents().getAllTableOfContentItems()
                           .stream()
-                           .flatMap(paragraph -> paragraph.getTables()
+                           .flatMap(tocItem -> tocItem.getSectionBlocks()
-                                   .stream())
+                                   .stream()
-                           .collect(Collectors.toList())).isNotEmpty();
+                                   .filter(abstractPageBlock -> abstractPageBlock instanceof TablePageBlock))
-        var tables = document.getSections()
+                           .map(abstractPageBlock -> (TablePageBlock) abstractPageBlock)
                           .toList()).isNotEmpty();
        var tables = document.getTableOfContents().getAllTableOfContentItems()
                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
+                .flatMap(tocItem -> tocItem.getSectionBlocks()
-                        .stream())
+                        .stream()
                        .filter(abstractPageBlock -> abstractPageBlock instanceof TablePageBlock))
                .map(abstractPageBlock -> (TablePageBlock) abstractPageBlock)
                .toList();
        // Quality of the table parsing is not good, because the file is rotated at scanning.
@ -199,15 +202,19 @@ public class PdfSegmentationServiceTest extends AbstractTest {
        ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Spanning Cells - Page131_S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf");
        ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
-        assertThat(document.getSections()
+        assertThat(document.getTableOfContents().getAllTableOfContentItems()
                           .stream()
-                           .flatMap(paragraph -> paragraph.getTables()
+                           .flatMap(tocItem -> tocItem.getSectionBlocks()
-                                   .stream())
+                                   .stream()
-                           .collect(Collectors.toList())).isNotEmpty();
+                                   .filter(abstractPageBlock -> abstractPageBlock instanceof TablePageBlock))
-        TablePageBlock table = document.getSections()
+                           .map(abstractPageBlock -> (TablePageBlock) abstractPageBlock)
                           .toList()).isNotEmpty();
        TablePageBlock table = document.getTableOfContents().getAllTableOfContentItems()
                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
+                .flatMap(tocItem -> tocItem.getSectionBlocks()
-                        .stream())
+                        .stream()
                        .filter(abstractPageBlock -> abstractPageBlock instanceof TablePageBlock))
                .map(abstractPageBlock -> (TablePageBlock) abstractPageBlock)
                .toList()
                .get(0);
        assertThat(table.getColCount()).isEqualTo(6);
@ -225,23 +232,29 @@ public class PdfSegmentationServiceTest extends AbstractTest {
                "files/syngenta/CustomerFiles/SinglePages/Merge Table - Page5_26 A8637C - EU AIR3 - LCP Section 10 - Ecotoxicological studies on the plant protection product - Reference list.pdf");
        ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
-        assertThat(document.getSections()
+        assertThat(document.getTableOfContents().getAllTableOfContentItems()
                           .stream()
-                           .flatMap(paragraph -> paragraph.getTables()
+                           .flatMap(tocItem -> tocItem.getSectionBlocks()
-                                   .stream())
+                                   .stream()
-                           .collect(Collectors.toList())).isNotEmpty();
+                                   .filter(abstractPageBlock -> abstractPageBlock instanceof TablePageBlock))
-        TablePageBlock firstTable = document.getSections()
+                           .map(abstractPageBlock -> (TablePageBlock) abstractPageBlock)
                           .toList()).isNotEmpty();
        TablePageBlock firstTable = document.getTableOfContents().getAllTableOfContentItems()
                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
+                .flatMap(tocItem -> tocItem.getSectionBlocks()
-                        .stream())
+                        .stream()
                        .filter(abstractPageBlock -> abstractPageBlock instanceof TablePageBlock))
                .map(abstractPageBlock -> (TablePageBlock) abstractPageBlock)
                .toList()
                .get(0);
        assertThat(firstTable.getColCount()).isEqualTo(8);
        assertThat(firstTable.getRowCount()).isEqualTo(1);
-        TablePageBlock secondTable = document.getSections()
+        TablePageBlock secondTable = document.getTableOfContents().getAllTableOfContentItems()
                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
+                .flatMap(tocItem -> tocItem.getSectionBlocks()
-                        .stream())
+                        .stream()
                        .filter(abstractPageBlock -> abstractPageBlock instanceof TablePageBlock))
                .map(abstractPageBlock -> (TablePageBlock) abstractPageBlock)
                .toList()
                .get(1);
        assertThat(secondTable.getColCount()).isEqualTo(8);
@ -266,23 +279,29 @@ public class PdfSegmentationServiceTest extends AbstractTest {
                "files/syngenta/CustomerFiles/SinglePages/Merge Multi Page Table - Page4_Page5_51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
        ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
-        assertThat(document.getSections()
+        assertThat(document.getTableOfContents().getAllTableOfContentItems()
                           .stream()
-                           .flatMap(paragraph -> paragraph.getTables()
+                           .flatMap(tocItem -> tocItem.getSectionBlocks()
-                                   .stream())
+                                   .stream()
-                           .collect(Collectors.toList())).isNotEmpty();
+                                   .filter(abstractPageBlock -> abstractPageBlock instanceof TablePageBlock))
-        TablePageBlock firstTable = document.getSections()
+                           .map(abstractPageBlock -> (TablePageBlock) abstractPageBlock)
                           .toList()).isNotEmpty();
        TablePageBlock firstTable = document.getTableOfContents().getAllTableOfContentItems()
                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
+                .flatMap(tocItem -> tocItem.getSectionBlocks()
-                        .stream())
+                        .stream()
                        .filter(abstractPageBlock -> abstractPageBlock instanceof TablePageBlock))
                .map(abstractPageBlock -> (TablePageBlock) abstractPageBlock)
                .toList()
                .get(0);
        assertThat(firstTable.getColCount()).isEqualTo(9);
        assertThat(firstTable.getRowCount()).isEqualTo(5);
-        TablePageBlock secondTable = document.getSections()
+        TablePageBlock secondTable = document.getTableOfContents().getAllTableOfContentItems()
                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
+                .flatMap(tocItem -> tocItem.getSectionBlocks()
-                        .stream())
+                        .stream()
                        .filter(abstractPageBlock -> abstractPageBlock instanceof TablePageBlock))
                .map(abstractPageBlock -> (TablePageBlock) abstractPageBlock)
                .toList()
                .get(1);
        assertThat(secondTable.getColCount()).isEqualTo(9);
@ -307,23 +326,29 @@ public class PdfSegmentationServiceTest extends AbstractTest {
                "files/syngenta/CustomerFiles/SinglePages/Rotated Table Headers - Page4_65 Mesotrione - EU AIR3 - LCA Section 1 Supplement Reference List.pdf");
        ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
-        assertThat(document.getSections()
+        assertThat(document.getTableOfContents().getAllTableOfContentItems()
                           .stream()
-                           .flatMap(paragraph -> paragraph.getTables()
+                           .flatMap(tocItem -> tocItem.getSectionBlocks()
-                                   .stream())
+                                   .stream()
-                           .collect(Collectors.toList())).isNotEmpty();
+                                   .filter(abstractPageBlock -> abstractPageBlock instanceof TablePageBlock))
-        TablePageBlock firstTable = document.getSections()
+                           .map(abstractPageBlock -> (TablePageBlock) abstractPageBlock)
                           .toList()).isNotEmpty();
        TablePageBlock firstTable = document.getTableOfContents().getAllTableOfContentItems()
                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
+                .flatMap(tocItem -> tocItem.getSectionBlocks()
-                        .stream())
+                        .stream()
                        .filter(abstractPageBlock -> abstractPageBlock instanceof TablePageBlock))
                .map(abstractPageBlock -> (TablePageBlock) abstractPageBlock)
                .toList()
                .get(0);
        assertThat(firstTable.getColCount()).isEqualTo(8);
        assertThat(firstTable.getRowCount()).isEqualTo(1);
-        TablePageBlock secondTable = document.getSections()
+        TablePageBlock secondTable = document.getTableOfContents().getAllTableOfContentItems()
                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
+                .flatMap(tocItem -> tocItem.getSectionBlocks()
-                        .stream())
+                        .stream()
                        .filter(abstractPageBlock -> abstractPageBlock instanceof TablePageBlock))
                .map(abstractPageBlock -> (TablePageBlock) abstractPageBlock)
                .toList()
                .get(1);
        assertThat(secondTable.getColCount()).isEqualTo(8);
@ -818,10 +843,12 @@ public class PdfSegmentationServiceTest extends AbstractTest {
    @SneakyThrows
    private void toHtml(ClassificationDocument document, String filename) {
-        var tables = document.getSections()
+        var tables = document.getTableOfContents().getAllTableOfContentItems()
                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
+                .flatMap(tocItem -> tocItem.getSectionBlocks()
-                        .stream())
+                        .stream()
                        .filter(abstractPageBlock -> abstractPageBlock instanceof TablePageBlock))
                .map(abstractPageBlock -> (TablePageBlock) abstractPageBlock)
                .toList();
        StringBuilder sb = new StringBuilder();
@ -843,12 +870,15 @@ public class PdfSegmentationServiceTest extends AbstractTest {
    private void validateTable(ClassificationDocument document, int tableIndex, int colCount, int rowCount, int emptyCellsCountCorrect, int emptyCellsCountIncorrect) {
-        TablePageBlock table = document.getSections()
+        TablePageBlock table = document.getTableOfContents().getAllTableOfContentItems()
                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
+                .flatMap(tocItem -> tocItem.getSectionBlocks()
-                        .stream())
+                        .stream()
                        .filter(abstractPageBlock -> abstractPageBlock instanceof TablePageBlock))
                .map(abstractPageBlock -> (TablePageBlock) abstractPageBlock)
                .toList()
                .get(tableIndex);
        List<List<Cell>> rows = table.getRows();
        int emptyCellsFoundFound = rows.stream()
                .flatMap(List::stream)
@ -870,10 +900,12 @@ public class PdfSegmentationServiceTest extends AbstractTest {
    private void validateTable(ClassificationDocument document, int tableIndex, List<List<String>> values) {
-        TablePageBlock table = document.getSections()
+        TablePageBlock table = document.getTableOfContents().getAllTableOfContentItems()
                .stream()
-                .flatMap(paragraph -> paragraph.getTables()
+                .flatMap(tocItem -> tocItem.getSectionBlocks()
-                        .stream())
+                        .stream()
                        .filter(abstractPageBlock -> abstractPageBlock instanceof TablePageBlock))
                .map(abstractPageBlock -> (TablePageBlock) abstractPageBlock)
                .toList()
                .get(tableIndex);
        List<List<Cell>> rows = table.getRows();
@ -896,10 +928,11 @@ public class PdfSegmentationServiceTest extends AbstractTest {
    private void validateTableSize(ClassificationDocument document, int tableSize) {
-        assertThat(document.getSections()
+        assertThat(document.getTableOfContents().getAllTableOfContentItems()
                           .stream()
-                           .flatMap(paragraph -> paragraph.getTables()
+                           .flatMap(tocItem -> tocItem.getSectionBlocks()
-                                   .stream())
+                                   .stream()
                                   .filter(abstractPageBlock -> abstractPageBlock instanceof TablePageBlock))
                           .toList().size()).isEqualTo(tableSize);
    }
--- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/RulingCleaningServiceTest.java
+++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/RulingCleaningServiceTest.java
@ -93,6 +93,7 @@ public class RulingCleaningServiceTest extends BuildDocumentTest {
                .toList();
        for (String pdfFileName : pdfFileNames) {
            writeJsons(Path.of(pdfFileName));
        }
    }
@ -102,15 +103,15 @@ public class RulingCleaningServiceTest extends BuildDocumentTest {
    @SneakyThrows
    private void writeJsons(Path filename) {
-        Document documentGraphBefore = DocumentGraphFactory.buildDocumentGraph(LayoutParsingType.REDACT_MANAGER_OLD,
+        Document documentGraphBefore = DocumentGraphFactory.buildDocumentGraph(LayoutParsingType.REDACT_MANAGER,
-                layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER_OLD,
+                layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
                        filename.toFile(),
                        new ImageServiceResponse(),
                        new TableServiceResponse(),
                        new VisualLayoutParsingResponse(),
                        Map.of("file",filename.toFile().toString())));
-        Document documentGraphAfter = DocumentGraphFactory.buildDocumentGraph(LayoutParsingType.REDACT_MANAGER_OLD,
+        Document documentGraphAfter = DocumentGraphFactory.buildDocumentGraph(LayoutParsingType.REDACT_MANAGER,
-                layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER_OLD,
+                layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
                        filename.toFile(),
                        new ImageServiceResponse(),
                        new TableServiceResponse(),
--- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/visualizations/PdfDraw.java
+++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/visualizations/PdfDraw.java
@ -229,7 +229,7 @@ public class PdfDraw {
            case HEADER, FOOTER -> Color.GREEN;
            case PARAGRAPH -> Color.BLUE;
            case HEADLINE -> Color.RED;
-            case SECTION -> Color.BLACK;
+            case SECTION, SUPER_SECTION -> Color.BLACK;
            case TABLE -> Color.ORANGE;
            case TABLE_CELL -> Color.GRAY;
            case IMAGE -> Color.MAGENTA;
--- a/layoutparser-service/layoutparser-service-server/src/test/resources/files/Minimal
+++ b/layoutparser-service/layoutparser-service-server/src/test/resources/files/Minimal
--- a/layoutparser-service/layoutparser-service-server/src/test/resources/files/Minimal
+++ b/layoutparser-service/layoutparser-service-server/src/test/resources/files/Minimal
--- a/layoutparser-service/layoutparser-service-server/src/test/resources/files/SinglePages/AbsolutelyEnormousTable.pdf
+++ b/layoutparser-service/layoutparser-service-server/src/test/resources/files/SinglePages/AbsolutelyEnormousTable.pdf
--- a/layoutparser-service/layoutparser-service-server/src/test/resources/files/SinglePages/DontMergeNonConsecutiveTables.pdf
+++ b/layoutparser-service/layoutparser-service-server/src/test/resources/files/SinglePages/DontMergeNonConsecutiveTables.pdf
Author	SHA1	Message	Date
maverickstuder	0c8b2e6d44	RED-7074: Design Subsection section tree structure algorithm * added abstract class SectionNode * both Section and SuperSection extend the SectionNode class, so that there is no inheritance between Section and SuperSection as well as no field duplication	2024-05-22 13:02:16 +02:00
maverickstuder	b08ed2037e	RED-7074: Design Subsection section tree structure algorithm * fix pmd and checkstyle	2024-05-15 16:46:15 +02:00
maverickstuder	b50bfed69d	RED-7074: Design Subsection section tree structure algorithm * fix all failing tests	2024-05-15 16:40:57 +02:00
maverickstuder	49f13d1f03	RED-7074: Design Subsection section tree structure algorithm * post rebase fixup	2024-05-15 15:09:31 +02:00
maverickstuder	61c90fc30d	Merge branch 'main' into RED-7074 # Conflicts: # layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java # layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPageBlock.java # layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/SectionsBuilderService.java # layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/TableExtractionService.java # layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocstrumBlockificationService.java # layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/DocuMineClassificationService.java # layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/DocumentGraphFactory.java # layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/SectionNodeFactory.java # layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/HeadlinesGoldStandardIntegrationTest.java	2024-05-15 14:17:59 +02:00
maverickstuder	6a0661cf09	RED-7074: Design Subsection section tree structure algorithm * bugfix	2024-05-15 13:51:49 +02:00
maverickstuder	2d33615b94	RED-7074: Design Subsection section tree structure algorithm * added redactmanager logic for headline classification to documine and clarifynd * refactored headline classification * added supersection for non-leaf sections (containing other sections instead of only paragraphs, images, ...) * bugfix for certain edge cases in some files running into error state	2024-05-15 10:29:39 +02:00
maverickstuder	1856fed640	RED-7074: Design Subsection section tree structure algorithm * improved merging of headlines as well as splitting logic so that more headlines are detected correctly	2024-05-14 17:41:44 +02:00
maverickstuder	2fcaeb3d8c	RED-7074: Design Subsection section tree structure algorithm * added supersection and changed logic so that each normal section only contains leaf nodes * added SectionIdentifier logic for headline splitting and merging * fixed many edge cases which resulted in error state files	2024-05-14 10:51:05 +02:00
maverickstuder	4e07ba4ff1	RED-7074: Design Subsection section tree structure algorithm * import optimized	2024-05-08 14:16:29 +02:00
maverickstuder	cfb6f0acfa	RED-7074: Design Subsection section tree structure algorithm * lots of refactoring to splitting logic for text blocks which resulted in some empty blocks to be created which can then not be localized (i.e. by containsBlock)	2024-05-08 14:15:27 +02:00
maverickstuder	a9338262c5	RED-7074: Design Subsection section tree structure algorithm * fix for boundary error	2024-05-07 15:51:54 +02:00
maverickstuder	d2dc369df3	RED-7074: Design Subsection section tree structure algorithm * temp	2024-05-07 14:25:54 +02:00
maverickstuder	f7aeb9a406	RED-7074: Design Subsection section tree structure algorithm * refactoring	2024-05-02 10:36:36 +02:00
maverickstuder	9bf2f5c56c	Merge remote-tracking branch 'origin/RED-7074' into RED-7074 # Conflicts: # layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java # layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/ClassificationDocument.java # layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineValidationService.java # layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/TableOfContentItem.java # layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/TableOfContents.java # layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/RedactManagerClassificationService.java # layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java # layoutparser-service/layoutparser-service-server/src/test/resources/files/new/UTT-Books-53.pdf	2024-04-30 14:44:26 +02:00
maverickstuder	c071a133e6	RED-7074: Design Subsection section tree structure algorithm * added toc enrichment logic and changed section computation to build upon created toc	2024-04-30 14:41:17 +02:00
maverickstuder	9f9ea68706	RED-7074: Design Subsection section tree structure algorithm * first draft: further implementations	2024-04-29 15:00:49 +02:00
maverickstuder	85e3cf0ecc	RED-7074: Design Subsection section tree structure algorithm * first draft: further implementations	2024-04-29 15:00:49 +02:00
maverickstuder	17756f5977	RED-7074: Design Subsection section tree structure algorithm * first draft: further implementations	2024-04-29 15:00:48 +02:00
maverickstuder	59d9d6c3e6	RED-7074: Design Subsection section tree structure algorithm * first draft: further implementations	2024-04-29 15:00:34 +02:00
maverickstuder	c888746761	RED-7074: Design Subsection section tree structure algorithm * first draft: further implementations	2024-04-29 15:00:34 +02:00
maverickstuder	7279d0a870	RED-7074: Design Subsection section tree structure algorithm * first draft	2024-04-29 15:00:34 +02:00
maverickstuder	c84a199f9d	RED-7074: Design Subsection section tree structure algorithm * first draft	2024-04-29 15:00:32 +02:00
maverickstuder	09148960cf	RED-7074: Design Subsection section tree structure algorithm * first draft: further implementations	2024-04-19 11:31:34 +02:00
maverickstuder	77ee8dd5bd	RED-7074: Design Subsection section tree structure algorithm * first draft: further implementations	2024-04-18 17:52:33 +02:00
maverickstuder	e9d1bdc94f	RED-7074: Design Subsection section tree structure algorithm * first draft: further implementations	2024-04-17 14:31:48 +02:00
maverickstuder	894355c7cd	RED-7074: Design Subsection section tree structure algorithm * first draft: further implementations	2024-04-16 12:35:26 +02:00
maverickstuder	ca35feeb63	RED-7074: Design Subsection section tree structure algorithm * first draft: further implementations	2024-04-15 16:43:40 +02:00
maverickstuder	a32a43fc62	RED-7074: Design Subsection section tree structure algorithm * first draft	2024-04-10 12:28:42 +02:00
maverickstuder	7f675b41cf	RED-7074: Design Subsection section tree structure algorithm * first draft	2024-04-09 16:53:57 +02:00