Merge branch 'RED-10752-main' into 'main'

RED-10752: Enabled prometheus See merge request fforesight/layout-parser!267
RED-10752: Enabled prometheus
2025-01-29 13:34:01 +01:00 · 2025-01-29 11:09:29 +01:00 · 2025-01-14 13:04:10 +01:00 · 2025-01-14 12:59:01 +01:00 · 2025-01-10 12:33:18 +01:00 · 2025-01-10 12:12:14 +01:00
177 changed files with 4437 additions and 6092 deletions
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@ -21,5 +21,6 @@ deploy:
      dotenv: version.env
  rules:
    - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH 
+    - if: $CI_COMMIT_BRANCH =~ /^feature/ && $CI_COMMIT_TAG == ""
    - if: $CI_COMMIT_BRANCH =~ /^release/
    - if: $CI_COMMIT_TAG
--- a/buildSrc/src/main/kotlin/com.knecon.fforesight.java-conventions.gradle.kts
+++ b/buildSrc/src/main/kotlin/com.knecon.fforesight.java-conventions.gradle.kts
@ -8,6 +8,8 @@ plugins {

 group = "com.knecon.fforesight"

+val documentVersion by rootProject.extra { "4.433.0" }
+
 java.sourceCompatibility = JavaVersion.VERSION_17
 java.targetCompatibility = JavaVersion.VERSION_17

@ -51,6 +53,10 @@ allprojects {
        }
    }

+    pmd {
+        setConsoleOutput(true)
+    }
+
    publishing {
        publications {
            create<MavenPublication>(name) {
--- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentData.java
+++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentData.java
@ -1,28 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
-
-import java.io.Serializable;
-
-import io.swagger.v3.oas.annotations.media.Schema;
-import lombok.AccessLevel;
-import lombok.AllArgsConstructor;
-import lombok.Builder;
-import lombok.Data;
-import lombok.experimental.FieldDefaults;
-
-@Data
-@Builder
-@AllArgsConstructor
-@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
-@Schema(description = "Object containing the complete document layout parsing information. It is split into 4 categories, structure, text, positions and pages: " + "The document tree structure of SemanticNodes such as Section, Paragraph, Headline, etc. " + "The text, which is stored as separate blocks of data. " + "The text positions, which are also stored as separate blocks. The Blocks are equal to the text blocks in length and order. " + "The page information.")
-public class DocumentData implements Serializable {
-
-    @Schema(description = "Contains information about the document's pages.")
-    DocumentPage[] documentPages;
-    @Schema(description = "Contains information about the document's text.")
-    DocumentTextData[] documentTextData;
-    @Schema(description = "Contains information about the document's text positions.")
-    DocumentPositionData[] documentPositions;
-    @Schema(description = "Contains information about the document's semantic structure.")
-    DocumentStructure documentStructure;
-
-}
--- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentPage.java
+++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentPage.java
@ -1,30 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
-
-import java.io.Serializable;
-
-import io.swagger.v3.oas.annotations.media.Schema;
-import lombok.AccessLevel;
-import lombok.AllArgsConstructor;
-import lombok.Builder;
-import lombok.Data;
-import lombok.NoArgsConstructor;
-import lombok.experimental.FieldDefaults;
-
-@Data
-@Builder
-@NoArgsConstructor
-@AllArgsConstructor
-@FieldDefaults(level = AccessLevel.PRIVATE)
-@Schema(description = "Object containing information about the document's pages.")
-public class DocumentPage implements Serializable {
-
-    @Schema(description = "The page number, starting with 1.")
-    int number;
-    @Schema(description = "The page height in PDF user units.", example = "792")
-    int height;
-    @Schema(description = "The page width in PDF user units.", example = "694")
-    int width;
-    @Schema(description = "The page rotation as specified by the PDF.", example = "90", allowableValues = {"0", "90", "180", "270"})
-    int rotation;
-
-}
--- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentPositionData.java
+++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentPositionData.java
@ -1,28 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
-
-import java.io.Serializable;
-
-import io.swagger.v3.oas.annotations.media.Schema;
-import lombok.AccessLevel;
-import lombok.AllArgsConstructor;
-import lombok.Builder;
-import lombok.Data;
-import lombok.NoArgsConstructor;
-import lombok.experimental.FieldDefaults;
-
-@Data
-@Builder
-@NoArgsConstructor
-@AllArgsConstructor
-@FieldDefaults(level = AccessLevel.PRIVATE)
-@Schema(description = "Object containing text positional information of a specific text block. A document is split into multiple text blocks, which are supposed to be read in order. Every text block can only occur on a single page.")
-public class DocumentPositionData implements Serializable {
-
-    @Schema(description = "Identifier of the text block.")
-    Long id;
-    @Schema(description = "For each string coordinate in the search text of the text block, the array contains an entry relating the string coordinate to the position coordinate. This is required due to the text and position coordinates not being equal.")
-    int[] stringIdxToPositionIdx;
-    @Schema(description = "The bounding box for each glyph as a rectangle. This matrix is of size (n,4), where n is the number of glyphs in the text block. The second dimension specifies the rectangle with the value x, y, width, height, with x, y specifying the lower left corner. In order to access this information, the stringIdxToPositionIdx array must be used to transform the coordinates.")
-    float[][] positions;
-
-}
--- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentStructure.java
+++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentStructure.java
@ -1,172 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
-
-import java.awt.geom.Rectangle2D;
-import java.io.Serializable;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.stream.Stream;
-
-import io.swagger.v3.oas.annotations.media.Schema;
-import lombok.AccessLevel;
-import lombok.AllArgsConstructor;
-import lombok.Builder;
-import lombok.Data;
-import lombok.NoArgsConstructor;
-import lombok.experimental.FieldDefaults;
-
-@Data
-@Builder
-@NoArgsConstructor
-@AllArgsConstructor
-@FieldDefaults(level = AccessLevel.PRIVATE)
-@Schema(description = "Object containing information about the parsed tree structure of the SemanticNodes, such as Section, Paragraph, Headline etc inside of the document.")
-public class DocumentStructure implements Serializable {
-
-    @Schema(description = "The root EntryData represents the Document.")
-    EntryData root;
-
-    @Schema(description = "Object containing the extra field names, a table has in its properties field.")
-    public static class TableProperties implements Serializable {
-
-        public static final String NUMBER_OF_ROWS = "numberOfRows";
-        public static final String NUMBER_OF_COLS = "numberOfCols";
-
-    }
-
-    @Schema(description = "Object containing the extra field names, an Image has in its properties field.")
-    public static class ImageProperties implements Serializable {
-
-        public static final String TRANSPARENT = "transparent";
-        public static final String IMAGE_TYPE = "imageType";
-        public static final String POSITION = "position";
-        public static final String ID = "id";
-
-        public static final String REPRESENTATION_HASH = "representationHash";
-
-    }
-
-    @Schema(description = "Object containing the extra field names, a table cell has in its properties field.")
-    public static class TableCellProperties implements Serializable {
-
-        public static final String B_BOX = "bBox";
-        public static final String ROW = "row";
-        public static final String COL = "col";
-        public static final String HEADER = "header";
-
-    }
-
-    @Schema(description = "Object containing the extra field names, a duplicate paragraph has in its properties field.")
-    public static class DuplicateParagraphProperties implements Serializable {
-
-        public static final String UNSORTED_TEXTBLOCK_ID = "utbid";
-
-    }
-
-    public static final String RECTANGLE_DELIMITER = ";";
-
-
-    public static Rectangle2D parseRectangle2D(String bBox) {
-
-        List<Float> floats = Arrays.stream(bBox.split(RECTANGLE_DELIMITER))
-                .map(Float::parseFloat)
-                .toList();
-        return new Rectangle2D.Float(floats.get(0), floats.get(1), floats.get(2), floats.get(3));
-    }
-
-
-    public static double[] parseRepresentationVector(String representationHash) {
-
-        String[] stringArray = representationHash.split("[,\\s]+");
-        double[] doubleArray = new double[stringArray.length];
-        for (int i = 0; i < stringArray.length; i++) {
-            doubleArray[i] = Double.parseDouble(stringArray[i]);
-        }
-
-        return doubleArray;
-    }
-
-
-    public EntryData get(List<Integer> tocId) {
-
-        if (tocId.isEmpty()) {
-            return root;
-        }
-        EntryData entry = root.children.get(tocId.get(0));
-        for (int id : tocId.subList(1, tocId.size())) {
-            entry = entry.children.get(id);
-        }
-        return entry;
-    }
-
-
-    public Stream<EntryData> streamAllEntries() {
-
-        return Stream.concat(Stream.of(root), root.children.stream())
-                .flatMap(DocumentStructure::flatten);
-    }
-
-
-    public String toString() {
-
-        return String.join("\n",
-                           streamAllEntries().map(EntryData::toString)
-                                   .toList());
-    }
-
-
-    private static Stream<EntryData> flatten(EntryData entry) {
-
-        return Stream.concat(Stream.of(entry),
-                             entry.children.stream()
-                                     .flatMap(DocumentStructure::flatten));
-    }
-
-
-    @Data
-    @Builder
-    @NoArgsConstructor
-    @AllArgsConstructor
-    @FieldDefaults(level = AccessLevel.PRIVATE)
-    @Schema(description = "Object containing information of a SemanticNode and also structuring the layout with children.")
-    public static class EntryData implements Serializable {
-
-        @Schema(description = "Type of the semantic node.", allowableValues = {"DOCUMENT", "SECTION", "PARAGRAPH", "HEADLINE", "TABLE", "TABLE_CELL", "HEADER", "FOOTER", "IMAGE"})
-        NodeType type;
-        @Schema(description = "Specifies the position in the parsed tree structure.", example = "[1, 0, 2]")
-        int[] treeId;
-        @Schema(description = "Specifies the text block IDs associated with this semantic node. The value should be joined with the DocumentTextData/DocumentPositionData. Is empty, if no text block is directly associated with this semantic node. Only Paragraph, Headline, Header or Footer is directly associated with a text block.", example = "[1]")
-        Long[] atomicBlockIds;
-        @Schema(description = "Specifies the pages this semantic node appears on. The value should be joined with the PageData.", example = "[1, 2, 3]")
-        Long[] pageNumbers;
-        @Schema(description = "Some semantic nodes have additional information, this information is stored in this Map. The extra fields are specified by the Properties subclasses.", example = "For a Table: {\"numberOfRows\": 3, \"numberOfCols\": 4}")
-        Map<String, String> properties;
-        @Schema(description = "All child Entries of this Entry.", example = "[1, 2, 3]")
-        List<EntryData> children;
-        @Schema(description = "Describes the origin of the semantic node", example = "[ALGORITHM]")
-        Set<LayoutEngine> engines;
-
-
-        @Override
-        public String toString() {
-
-            StringBuilder sb = new StringBuilder();
-            sb.append("[");
-            for (int i : treeId) {
-                sb.append(i);
-                sb.append(",");
-            }
-            sb.delete(sb.length() - 1, sb.length());
-            sb.append("]: ");
-
-            sb.append(type);
-            sb.append(" atbs = ");
-            sb.append(atomicBlockIds.length);
-
-            return sb.toString();
-        }
-
-    }
-
-}
--- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentTextData.java
+++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/DocumentTextData.java
@ -1,36 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
-
-import java.io.Serializable;
-
-import io.swagger.v3.oas.annotations.media.Schema;
-import lombok.AccessLevel;
-import lombok.AllArgsConstructor;
-import lombok.Builder;
-import lombok.Data;
-import lombok.NoArgsConstructor;
-import lombok.experimental.FieldDefaults;
-
-@Data
-@Builder
-@NoArgsConstructor
-@AllArgsConstructor
-@FieldDefaults(level = AccessLevel.PRIVATE)
-@Schema(description = "Object containing text information of a specific text block. A document is split into multiple text blocks, which are supposed to be read in order. Every text block can only occur on a single page.")
-public class DocumentTextData implements Serializable {
-
-    @Schema(description = "Identifier of the text block.")
-    Long id;
-    @Schema(description = "The page the text block occurs on.")
-    Long page;
-    @Schema(description = "The text the text block.")
-    String searchText;
-    @Schema(description = "Each text block is assigned a number on a page, starting from 0.")
-    int numberOnPage;
-    @Schema(description = "The text blocks are ordered, this number represents the start of the text block as a string offset.")
-    int start;
-    @Schema(description = "The text blocks are ordered, this number represents the end of the text block as a string offset.")
-    int end;
-    @Schema(description = "The line breaks in the text of this semantic node in string offsets. They are exclusive end. At the end of each semantic node there is an implicit linebreak.", example = "[5, 10]")
-    int[] lineBreaks;
-
-}
--- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/LayoutEngine.java
+++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/LayoutEngine.java
@ -1,7 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
-
-public enum LayoutEngine {
-    ALGORITHM,
-    AI,
-    OUTLINE
-}
--- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/NodeType.java
+++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/NodeType.java
@ -1,23 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
-
-import java.io.Serializable;
-import java.util.Locale;
-
-public enum NodeType implements Serializable {
-    DOCUMENT,
-    SECTION,
-    SUPER_SECTION,
-    HEADLINE,
-    PARAGRAPH,
-    TABLE,
-    TABLE_CELL,
-    IMAGE,
-    HEADER,
-    FOOTER;
-
-
-    public String toString() {
-
-        return this.name().charAt(0) + this.name().substring(1).toLowerCase(Locale.ROOT);
-    }
-}
--- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/SimplifiedText.java
+++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/SimplifiedText.java
@ -21,5 +21,14 @@ public class SimplifiedText {
    @Schema(description = "A List of simplified Sections, which contains almost exclusively text.")
    @Builder.Default
    private List<SimplifiedSectionText> sectionTexts = new ArrayList<>();
+    @Schema(description = "A list of the main section numbers ")
+    @Builder.Default
+    private List<String> mainSectionNumbers = new ArrayList<>();
+    @Schema(description = "A list of the header section numbers ")
+    @Builder.Default
+    private List<String> headerSectionNumbers = new ArrayList<>();
+    @Schema(description = "A list of the footer section numbers ")
+    @Builder.Default
+    private List<String> footerSectionNumbers = new ArrayList<>();

 }
--- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingFinishedEvent.java
+++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingFinishedEvent.java
@ -8,13 +8,20 @@ import lombok.Builder;
@Builder
@Schema(description = "Object containing information about the layout parsing.")
 public record LayoutParsingFinishedEvent(
-        @Schema(description = "General purpose identifier. It is returned exactly the same way it is inserted with the LayoutParsingRequest.")
-        Map<String, String> identifier,//
-        @Schema(description = "The duration of a single layout parsing in ms.")
-        long duration,//
-        @Schema(description = "The number of pages of the parsed document.")
-        int numberOfPages,//
-        @Schema(description = "A general message. It contains some information useful for a developer, like the paths where the files are stored. Not meant to be machine readable.")
-        String message) {
+        @Schema(description = "General purpose identifier. It is returned exactly the same way it is inserted with the LayoutParsingRequest.") //
+        Map<String, String> identifier,
+
+        @Schema(description = "The duration of a single layout parsing in ms.") //
+        long duration,
+
+        @Schema(description = "The number of pages of the parsed document.") //
+        int numberOfPages,
+
+        @Schema(description = "A general message. It contains some information useful for a developer, like the paths where the files are stored. Not meant to be machine readable.") //
+        String message,
+
+        @Schema(description = "The app version of the layout parser.") //
+        String layoutParserVersion
+) {

 }
--- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingQueueNames.java
+++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingQueueNames.java
@ -2,6 +2,9 @@ package com.knecon.fforesight.service.layoutparser.internal.api.queue;

 public class LayoutParsingQueueNames {

-    public static final String LAYOUT_PARSING_REQUEST_QUEUE = "layout_parsing_request_queue";
-    public static final String LAYOUT_PARSING_FINISHED_EVENT_QUEUE = "layout_parsing_response_queue";
+    public static final String LAYOUT_PARSING_REQUEST_QUEUE_PREFIX = "layout_parsing_request";
+    public static final String LAYOUT_PARSING_REQUEST_EXCHANGE = "layout_parsing_request_exchange";
+    public static final String LAYOUT_PARSING_RESPONSE_QUEUE_PREFIX = "layout_parsing_response";
+    public static final String LAYOUT_PARSING_RESPONSE_EXCHANGE = "layout_parsing_response_exchange";
+    public static final String LAYOUT_PARSING_DLQ = "layout_parsing_error";
 }
--- a/layoutparser-service/layoutparser-service-processor/build.gradle.kts
+++ b/layoutparser-service/layoutparser-service-processor/build.gradle.kts
@ -8,16 +8,20 @@ description = "layoutparser-service-processor"
 val jacksonVersion = "2.15.2"
 val pdfBoxVersion = "3.0.0"

+
 dependencies {
    implementation(project(":layoutparser-service-internal-api"))
    implementation(project(":viewer-doc-processor"))

-    implementation("com.iqser.red.service:persistence-service-shared-api-v1:2.144.0") {
+    implementation("com.knecon.fforesight:document:${rootProject.extra.get("documentVersion")}")
+    implementation("com.iqser.red.service:persistence-service-shared-api-v1:2.564.0-RED9010.0") {
        exclude("org.springframework.boot", "spring-boot-starter-security")
        exclude("org.springframework.boot", "spring-boot-starter-validation")
    }
-    implementation("com.knecon.fforesight:tenant-commons:0.21.0")
-    implementation("com.iqser.red.commons:storage-commons:2.45.0")
+    implementation("com.knecon.fforesight:tenant-commons:0.30.0") {
+        exclude("com.iqser.red.commons", "storage-commons")
+    }
+    implementation("com.iqser.red.commons:storage-commons:2.50.0")

    implementation("org.apache.pdfbox:pdfbox:${pdfBoxVersion}")
    implementation("org.apache.pdfbox:pdfbox-tools:${pdfBoxVersion}")
@ -25,9 +29,12 @@ dependencies {
    implementation("com.fasterxml.jackson.datatype:jackson-datatype-jsr310:${jacksonVersion}")
    implementation("org.springframework.boot:spring-boot-starter-web:3.1.3")
    implementation("org.jgrapht:jgrapht-core:1.5.2")
+    implementation("org.apache.pdfbox:jbig2-imageio:3.0.4")
+    implementation("com.github.jai-imageio:jai-imageio-core:1.4.0")
+    implementation("com.github.jai-imageio:jai-imageio-jpeg2000:1.4.0")
    implementation("org.tinspin:tinspin-indexes:2.1.3")
    implementation("org.commonmark:commonmark:0.22.0")
    implementation("org.commonmark:commonmark-ext-gfm-tables:0.22.0")
    implementation("com.pdftron:PDFNet:10.11.0")
-
+    implementation("org.apache.commons:commons-text:1.12.0")
 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParserSettings.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParserSettings.java
@ -13,9 +13,8 @@ import lombok.experimental.FieldDefaults;
@Configuration
@ConfigurationProperties("layoutparser")
@FieldDefaults(level = AccessLevel.PRIVATE)
-public class LayoutparserSettings {
+public class LayoutParserSettings {

    boolean debug;
    LayoutParsingType layoutParsingTypeOverride;
-    String pdftronLicense;
 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java
@ -2,12 +2,13 @@ package com.knecon.fforesight.service.layoutparser.processor;

 import static java.lang.String.format;

-import java.awt.geom.Point2D;
+import java.awt.geom.AffineTransform;
 import java.awt.geom.Rectangle2D;
 import java.io.File;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
@ -19,28 +20,35 @@ import org.apache.pdfbox.pdmodel.PDDocument;
 import org.apache.pdfbox.pdmodel.PDPage;
 import org.apache.pdfbox.pdmodel.common.PDRectangle;
 import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDMarkedContent;
+import org.springframework.beans.factory.annotation.Value;
 import org.springframework.stereotype.Service;

-import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
+import com.iqser.red.service.redaction.v1.server.mapper.DocumentDataMapper;
+import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType;
+import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType;
 import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingFinishedEvent;
 import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest;
 import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
-import com.knecon.fforesight.service.layoutparser.processor.markdown.MarkdownMapper;
+import com.knecon.fforesight.service.layoutparser.processor.docstrum.model.Character;
+import com.knecon.fforesight.service.layoutparser.processor.model.DocumentWithVisualization;
+import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.RedTextPosition;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.TextDirection;
+import com.knecon.fforesight.service.layoutparser.processor.services.classification.ClassificationService;
+import com.knecon.fforesight.service.layoutparser.processor.services.mapper.MarkdownMapper;
 import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
 import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument;
 import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.ImageType;
+
 import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
 import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineExtractorService;
-import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineObject;
-import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineValidationService;
-import com.knecon.fforesight.service.layoutparser.processor.model.outline.TOCEnrichmentService;
-import com.knecon.fforesight.service.layoutparser.processor.model.outline.TableOfContents;
+import com.knecon.fforesight.service.layoutparser.processor.model.outline.SectionTreeBuilderService;
+import com.knecon.fforesight.service.layoutparser.processor.model.outline.SectionTreeEnhancementService;
+import com.knecon.fforesight.service.layoutparser.processor.model.outline.SectionTree;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
-import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.Word;
 import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.CvTableParsingAdapter;
 import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.ImageServiceResponseAdapter;
 import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.VisualLayoutParsingAdapter;
@ -48,7 +56,6 @@ import com.knecon.fforesight.service.layoutparser.processor.python_api.model.ima
 import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableCells;
 import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
 import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse;
-import com.knecon.fforesight.service.layoutparser.processor.services.BodyTextFrameService;
 import com.knecon.fforesight.service.layoutparser.processor.services.RulingCleaningService;
 import com.knecon.fforesight.service.layoutparser.processor.services.SectionsBuilderService;
 import com.knecon.fforesight.service.layoutparser.processor.services.SimplifiedSectionTextService;
@ -58,13 +65,9 @@ import com.knecon.fforesight.service.layoutparser.processor.services.blockificat
 import com.knecon.fforesight.service.layoutparser.processor.services.blockification.DocstrumBlockificationService;
 import com.knecon.fforesight.service.layoutparser.processor.services.blockification.DocuMineBlockificationService;
 import com.knecon.fforesight.service.layoutparser.processor.services.blockification.RedactManagerBlockificationService;
-import com.knecon.fforesight.service.layoutparser.processor.services.classification.ClarifyndClassificationService;
-import com.knecon.fforesight.service.layoutparser.processor.services.classification.DocuMineClassificationService;
-import com.knecon.fforesight.service.layoutparser.processor.services.classification.RedactManagerClassificationService;
 import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
 import com.knecon.fforesight.service.layoutparser.processor.services.graphics.Box;
 import com.knecon.fforesight.service.layoutparser.processor.services.graphics.GraphicExtractorService;
-import com.knecon.fforesight.service.layoutparser.processor.services.mapper.DocumentDataMapper;
 import com.knecon.fforesight.service.layoutparser.processor.services.mapper.TaasDocumentDataMapper;
 import com.knecon.fforesight.service.layoutparser.processor.services.parsing.PDFLinesTextStripper;
 import com.knecon.fforesight.service.layoutparser.processor.services.visualization.LayoutGridService;
@ -85,32 +88,32 @@ import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
-@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
+@FieldDefaults(level = AccessLevel.PRIVATE)
 public class LayoutParsingPipeline {

-    ImageServiceResponseAdapter imageServiceResponseAdapter;
-    CvTableParsingAdapter cvTableParsingAdapter;
-    LayoutParsingStorageService layoutParsingStorageService;
-    SectionsBuilderService sectionsBuilderService;
-    RedactManagerClassificationService redactManagerClassificationService;
-    DocuMineClassificationService docuMineClassificationService;
-    SimplifiedSectionTextService simplifiedSectionTextService;
-    BodyTextFrameService bodyTextFrameService;
-    RulingCleaningService rulingCleaningService;
-    TableExtractionService tableExtractionService;
-    DocuMineBlockificationService docuMineBlockificationService;
-    RedactManagerBlockificationService redactManagerBlockificationService;
-    BlockificationPostprocessingService blockificationPostprocessingService;
-    DocstrumBlockificationService docstrumBlockificationService;
-    LayoutGridService layoutGridService;
-    ObservationRegistry observationRegistry;
-    VisualLayoutParsingAdapter visualLayoutParsingAdapter;
-    ClarifyndClassificationService clarifyndClassificationService;
-    GraphicExtractorService graphicExtractorService;
-    OutlineExtractorService outlineExtractorService;
-    OutlineValidationService outlineValidationService;
-    TOCEnrichmentService tocEnrichmentService;
-    LayoutparserSettings settings;
+    final ImageServiceResponseAdapter imageServiceResponseAdapter;
+    final CvTableParsingAdapter cvTableParsingAdapter;
+    final LayoutParsingStorageService layoutParsingStorageService;
+    final SectionsBuilderService sectionsBuilderService;
+    final SimplifiedSectionTextService simplifiedSectionTextService;
+    final RulingCleaningService rulingCleaningService;
+    final TableExtractionService tableExtractionService;
+    final DocuMineBlockificationService docuMineBlockificationService;
+    final RedactManagerBlockificationService redactManagerBlockificationService;
+    final BlockificationPostprocessingService blockificationPostprocessingService;
+    final DocstrumBlockificationService docstrumBlockificationService;
+    final LayoutGridService layoutGridService;
+    final ObservationRegistry observationRegistry;
+    final VisualLayoutParsingAdapter visualLayoutParsingAdapter;
+    final GraphicExtractorService graphicExtractorService;
+    final OutlineExtractorService outlineExtractorService;
+    final SectionTreeBuilderService sectionTreeBuilderService;
+    final SectionTreeEnhancementService sectionTreeEnhancementService;
+    final LayoutParserSettings settings;
+    final ClassificationService classificationService;
+
+    @Value("${LAYOUT_PARSER_VERSION:}")
+    private String layoutParserVersion;


    public LayoutParsingFinishedEvent parseLayoutAndSaveFilesToStorage(LayoutParsingRequest layoutParsingRequest) throws IOException {
@ -119,17 +122,23 @@ public class LayoutParsingPipeline {
        log.info("Starting layout parsing for {}", layoutParsingRequest.identifier());

        File originFile = layoutParsingStorageService.getOriginFile(layoutParsingRequest.originFileStorageId());
-        File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId()).orElse(originFile);
+        File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId())
+                .orElse(originFile);

        VisualLayoutParsingResponse visualLayoutParsingResponse = layoutParsingRequest.visualLayoutParsingFileId()
-                .map(layoutParsingStorageService::getVisualLayoutParsingFile).orElse(new VisualLayoutParsingResponse());
+                .map(layoutParsingStorageService::getVisualLayoutParsingFile)
+                .orElse(new VisualLayoutParsingResponse());
        ImageServiceResponse imageServiceResponse = layoutParsingRequest.imagesFileStorageId()
-                .map(layoutParsingStorageService::getImagesFile).orElse(new ImageServiceResponse());
+                .map(layoutParsingStorageService::getImagesFile)
+                .orElse(new ImageServiceResponse());
        TableServiceResponse tableServiceResponse = layoutParsingRequest.tablesFileStorageId()
-                .map(layoutParsingStorageService::getTablesFile).orElse(new TableServiceResponse());
+                .map(layoutParsingStorageService::getTablesFile)
+                .orElse(new TableServiceResponse());

-        ClassificationDocument classificationDocument = parseLayout(settings.getLayoutParsingTypeOverride() == null //
-                                                                            ? layoutParsingRequest.layoutParsingType() : settings.getLayoutParsingTypeOverride(),
+        LayoutParsingType layoutParsingType = settings.getLayoutParsingTypeOverride() == null //
+                ? layoutParsingRequest.layoutParsingType() : settings.getLayoutParsingTypeOverride();
+
+        ClassificationDocument classificationDocument = parseLayout(layoutParsingType,
                                                                    originFile,
                                                                    imageServiceResponse,
                                                                    tableServiceResponse,
@ -138,36 +147,37 @@ public class LayoutParsingPipeline {

        log.info("Building document graph for {}", layoutParsingRequest.identifier());

-        Document documentGraph = observeBuildDocumentGraph(settings.getLayoutParsingTypeOverride() == null //
-                                                                   ? layoutParsingRequest.layoutParsingType() : settings.getLayoutParsingTypeOverride(), classificationDocument);
+        DocumentWithVisualization documentWithVisualization = observeBuildDocumentGraph(layoutParsingType, classificationDocument);

        log.info("Creating viewer document for {}", layoutParsingRequest.identifier());

-        layoutGridService.addLayoutGrid(viewerDocumentFile, documentGraph, viewerDocumentFile, false, layoutParsingRequest.visualLayoutParsingFileId().isPresent());
+        layoutGridService.addLayoutGrid(viewerDocumentFile, documentWithVisualization, viewerDocumentFile, layoutParsingType, layoutParserVersion, false);

        log.info("Storing resulting files for {}", layoutParsingRequest.identifier());

-        layoutParsingStorageService.storeDocumentData(layoutParsingRequest, DocumentDataMapper.toDocumentData(documentGraph));
-        if (layoutParsingRequest.documentMarkdownFileStorageId().isPresent()) {
-            layoutParsingStorageService.storeMarkdownFile(layoutParsingRequest.documentMarkdownFileStorageId().get(), new MarkdownMapper().toMarkdownContent(documentGraph));
+        layoutParsingStorageService.storeDocumentData(layoutParsingRequest, DocumentDataMapper.toDocumentData(documentWithVisualization.document()));
+        if (layoutParsingRequest.documentMarkdownFileStorageId()
+                .isPresent()) {
+            layoutParsingStorageService.storeMarkdownFile(layoutParsingRequest.documentMarkdownFileStorageId()
+                                                                  .get(), new MarkdownMapper().toMarkdownContent(documentWithVisualization.document()));
        }
-        layoutParsingStorageService.storeSimplifiedText(layoutParsingRequest, simplifiedSectionTextService.toSimplifiedText(documentGraph));
+        layoutParsingStorageService.storeSimplifiedText(layoutParsingRequest, simplifiedSectionTextService.toSimplifiedText(documentWithVisualization.document()));
        layoutParsingStorageService.storeViewerDocument(layoutParsingRequest, viewerDocumentFile);

        if (layoutParsingRequest.researchDocumentStorageId() != null) {
            log.info("Building research document data for {}", layoutParsingRequest.identifier());
-            var researchDocumentData = TaasDocumentDataMapper.fromDocument(documentGraph);
+            var researchDocumentData = TaasDocumentDataMapper.fromDocument(documentWithVisualization.document());
            layoutParsingStorageService.storeResearchDocumentData(layoutParsingRequest, researchDocumentData);
        }

        if (!viewerDocumentFile.equals(originFile)) {
-            viewerDocumentFile.delete();
+            assert !viewerDocumentFile.exists() || viewerDocumentFile.delete();
        }
-        originFile.delete();
+        assert !originFile.exists() || originFile.delete();

        return LayoutParsingFinishedEvent.builder()
                .identifier(layoutParsingRequest.identifier())
-                .numberOfPages(documentGraph.getNumberOfPages())
+                .numberOfPages(documentWithVisualization.document().getNumberOfPages())
                .duration(System.currentTimeMillis() - start)
                .message(format("""
                                        Layout parsing has finished in %.02f s.
@ -182,21 +192,22 @@ public class LayoutParsingPipeline {
                                        Viewer Doc: %s""",
                                ((float) (System.currentTimeMillis() - start)) / 1000,
                                layoutParsingRequest.identifier(),
-                                buildSemanticNodeCountMessage(documentGraph.getNumberOfPages(), documentGraph.buildSemanticNodeCounts()),
+                                buildSemanticNodeCountMessage(documentWithVisualization.document().getNumberOfPages(), documentWithVisualization.buildSemanticNodeCounts()),
                                layoutParsingRequest.structureFileStorageId(),
                                layoutParsingRequest.textBlockFileStorageId(),
                                layoutParsingRequest.positionBlockFileStorageId(),
                                layoutParsingRequest.pageFileStorageId(),
                                layoutParsingRequest.simplifiedTextStorageId(),
                                layoutParsingRequest.viewerDocumentStorageId()))
+                .layoutParserVersion(layoutParserVersion)
                .build();

    }


-    private Document observeBuildDocumentGraph(LayoutParsingType layoutParsingType, ClassificationDocument classificationDocument) {
+    private DocumentWithVisualization observeBuildDocumentGraph(LayoutParsingType layoutParsingType, ClassificationDocument classificationDocument) {

-        AtomicReference<Document> documentReference = new AtomicReference<>();
+        AtomicReference<DocumentWithVisualization> documentReference = new AtomicReference<>();

        Observation.createNotStarted("LayoutParsingPipeline", observationRegistry)
                .contextualName("build-document-graph")
@ -243,12 +254,8 @@ public class LayoutParsingPipeline {
        }

        List<ClassificationPage> classificationPages = new ArrayList<>();
-        OutlineObject lastProcessedOutlineObject = null;

-        // parsing the structure elements could be useful as well
-        if (layoutParsingType != LayoutParsingType.REDACT_MANAGER_OLD && layoutParsingType != LayoutParsingType.DOCUMINE_OLD) {
-            classificationDocument.setOutlineObjectTree(outlineExtractorService.getOutlineObjectTree(originDocument));
-        }
+        classificationDocument.setOutlineObjectTree(outlineExtractorService.getOutlineObjectTree(originDocument));

        long pageCount = originDocument.getNumberOfPages();

@ -273,22 +280,22 @@ public class LayoutParsingPipeline {
            stripper.setEndPage(pageNumber);
            stripper.setPdpage(pdPage);
            stripper.getText(originDocument);
-            List<TextPositionSequence> words = stripper.getTextPositionSequences();
+            List<Word> words = stripper.getWords();
+
+//            rotateDirAdjExactly(words, pdPage); // works really well for many highly rotated documents (e.g. VV-331340.pdf), but it decreases the headline performance by 1.3%, so I am leaving it out for now
+
            if (layoutParsingType.equals(LayoutParsingType.DOCUMINE_OLD)) {
                var lines = TextPositionOperations.groupByLine(new HashSet<>(words));
                classificationDocument.getLayoutDebugLayer().addLineVisualizationsFromNestedTextPosition(lines, pageNumber);
-                words = TextPositionOperations.sortLines(lines);
+                words = TextPositionOperations.sortWords(lines);
            }
            classificationDocument.getLayoutDebugLayer().addTextVisualizations(words, pageNumber);

            PDRectangle pdr = pdPage.getMediaBox();

-            int rotation = pdPage.getRotation();
-            boolean isLandscape = pdr.getWidth() > pdr.getHeight() && (rotation == 0 || rotation == 180) || pdr.getHeight() > pdr.getWidth() && (rotation == 90 || rotation == 270);
-
-            PDRectangle cropbox = pdPage.getCropBox();
-            classificationDocument.getLayoutDebugLayer().addRulingVisualization(stripper.getRulings(), pageNumber);
-            CleanRulings cleanRulings = rulingCleaningService.deduplicateAndStraightenRulings(pdfTableCells.get(pageNumber), stripper.getRulings());
+            List<Ruling> rulings = stripper.getRulings();
+            classificationDocument.getLayoutDebugLayer().addRulingVisualization(rulings, pageNumber);
+            CleanRulings cleanRulings = rulingCleaningService.deduplicateAndStraightenRulings(pdfTableCells.get(pageNumber), rulings);

            PageInformation pageInformation = PageInformation.fromPDPage(pageNumber, pdPage);
            List<Cell> emptyTableCells = TableExtractionService.findCells(cleanRulings.getHorizontals(), cleanRulings.getVerticals(), pageInformation);
@ -296,7 +303,7 @@ public class LayoutParsingPipeline {

            TextRulingsClassifier.classifyUnderlinedAndStrikethroughText(words, cleanRulings);

-            List<Box> graphics = graphicExtractorService.extractPathElementGraphics(originDocument, pdPage, pageNumber, cleanRulings, stripper.getTextPositionSequences(), false);
+            List<Box> graphics = graphicExtractorService.extractPathElementGraphics(originDocument, pdPage, pageNumber, cleanRulings, stripper.getWords(), false);

            pdfImages.computeIfAbsent(pageNumber, x -> new ArrayList<>())
                    .addAll(graphics.stream()
@ -308,8 +315,7 @@ public class LayoutParsingPipeline {
                                    .toList());

            ClassificationPage classificationPage = switch (layoutParsingType) {
-                case REDACT_MANAGER_OLD ->
-                        redactManagerBlockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings, classificationDocument.getLayoutDebugLayer());
+                case REDACT_MANAGER_OLD -> redactManagerBlockificationService.blockify(stripper.getWords(), cleanRulings, classificationDocument.getLayoutDebugLayer());
                case DOCUMINE_OLD -> docuMineBlockificationService.blockify(words, cleanRulings);
                case DOCUMINE, REDACT_MANAGER, REDACT_MANAGER_PARAGRAPH_DEBUG, REDACT_MANAGER_WITHOUT_DUPLICATE_PARAGRAPH ->
                        docstrumBlockificationService.blockify(words, cleanRulings, true, classificationDocument.getLayoutDebugLayer(), layoutParsingType);
@ -317,26 +323,9 @@ public class LayoutParsingPipeline {
                        docstrumBlockificationService.blockify(words, cleanRulings, false, classificationDocument.getLayoutDebugLayer(), layoutParsingType);
            };

-            classificationPage.setCleanRulings(cleanRulings);
-            classificationPage.setRotation(rotation);
-            classificationPage.setLandscape(isLandscape);
-            classificationPage.setPageNumber(pageNumber);
-            classificationPage.setPageWidth(cropbox.getWidth());
-            classificationPage.setPageHeight(cropbox.getHeight());
+            updateClassificationPage(pdPage, pdr, classificationPage, cleanRulings, pageNumber, pageInformation);

-            if (layoutParsingType != LayoutParsingType.REDACT_MANAGER_OLD && layoutParsingType != LayoutParsingType.DOCUMINE_OLD) {
-                List<OutlineObject> outlineObjects = classificationDocument.getOutlineObjectTree().getOutlineObjectsPerPage().getOrDefault(pageNumber - 1, new ArrayList<>());
-
-                OutlineObject notFoundOutlineObject = null;
-                if (lastProcessedOutlineObject != null && !lastProcessedOutlineObject.isFound()) {
-                    lastProcessedOutlineObject.setPoint(new Point2D.Float(0, cropbox.getHeight()));
-                    notFoundOutlineObject = lastProcessedOutlineObject;
-                }
-                if (!outlineObjects.isEmpty()) {
-                    classificationPage.setOutlineObjects(outlineObjects);
-                    lastProcessedOutlineObject = blockificationPostprocessingService.sanitizeOutlineBlocks(classificationPage, notFoundOutlineObject);
-                }
-            }
+            blockificationPostprocessingService.findHeadlinesFromOutline(classificationDocument, pageNumber, classificationPage, pageInformation);

            classificationDocument.getLayoutDebugLayer().addMarkedContentVisualizations(stripper.getMarkedContents(), pageNumber);
            // MarkedContent needs to be converted at this point, otherwise it leads to GC Problems in Pdfbox.
@ -366,40 +355,67 @@ public class LayoutParsingPipeline {

        originDocument.close();

-        log.info("Calculating BodyTextFrame for {}", identifier);
-        bodyTextFrameService.setBodyTextFrames(classificationDocument, layoutParsingType);
-        for (ClassificationPage page : classificationDocument.getPages()) {
-            classificationDocument.getLayoutDebugLayer().addCleanRulingVisualization(page.getCleanRulings(), page.getPageNumber());
-        }
-        log.info("Classify TextBlocks for {}", identifier);
-        switch (layoutParsingType) {
-            case REDACT_MANAGER, REDACT_MANAGER_PARAGRAPH_DEBUG, REDACT_MANAGER_OLD, CLARIFYND_PARAGRAPH_DEBUG, REDACT_MANAGER_WITHOUT_DUPLICATE_PARAGRAPH ->
-                    redactManagerClassificationService.classifyDocument(classificationDocument);
-            case DOCUMINE_OLD, DOCUMINE -> docuMineClassificationService.classifyDocument(classificationDocument);
-            case CLARIFYND -> clarifyndClassificationService.classifyDocument(classificationDocument);
-        }
+        classificationService.classify(classificationDocument, layoutParsingType, identifier);

-        List<TextPageBlock> headlines = classificationDocument.getPages()
-                .stream()
-                .flatMap(classificationPage -> classificationPage.getTextBlocks()
-                        .stream()
-                        .filter(tb -> tb instanceof TextPageBlock && tb.getClassification() != null && tb.getClassification().isHeadline())
-                        .map(tb -> (TextPageBlock) tb))
-                .toList();
-        TableOfContents tableOfContents = outlineValidationService.createToC(headlines);
-        classificationDocument.setTableOfContents(tableOfContents);
+        SectionTree sectionTree = sectionTreeBuilderService.createSectionTree(classificationDocument);
+        classificationDocument.setSectionTree(sectionTree);

        log.info("Building Sections for {}", identifier);

        switch (layoutParsingType) {
            case CLARIFYND_PARAGRAPH_DEBUG, REDACT_MANAGER_PARAGRAPH_DEBUG -> sectionsBuilderService.buildParagraphDebugSections(classificationDocument);
-            default -> tocEnrichmentService.assignSectionBlocksAndImages(classificationDocument);
+            default -> sectionTreeEnhancementService.assignSectionBlocksAndImages(classificationDocument);
        }

        return classificationDocument;
    }


+    private static void updateClassificationPage(PDPage pdPage,
+                                                 PDRectangle pdr,
+                                                 ClassificationPage classificationPage,
+                                                 CleanRulings cleanRulings,
+                                                 int pageNumber,
+                                                 PageInformation pageInformation) {
+
+        int rotation = pdPage.getRotation();
+        boolean isLandscape = pdr.getWidth() > pdr.getHeight() && (rotation == 0 || rotation == 180) || pdr.getHeight() > pdr.getWidth() && (rotation == 90 || rotation == 270);
+        classificationPage.setCleanRulings(cleanRulings);
+        classificationPage.setRotation(rotation);
+        classificationPage.setLandscape(isLandscape);
+        classificationPage.setPageNumber(pageNumber);
+        classificationPage.setPageWidth((float) pageInformation.width());
+        classificationPage.setPageHeight((float) pageInformation.height());
+    }
+
+
+    private static void rotateDirAdjExactly(List<Word> words, PDPage pdPage) {
+
+        for (TextDirection dir : TextDirection.values()) {
+            double averageRotation = words.stream()
+                    .map(Word::getCharacters)
+                    .flatMap(Collection::stream)
+                    .map(Character::getTextPosition)
+                    .filter(pos -> pos.getDir().equals(dir))
+                    .mapToDouble(RedTextPosition::getExactDir).average()
+                    .orElse(0);
+
+            if (averageRotation == 0) {
+                continue;
+            }
+
+            AffineTransform rotateInstance = AffineTransform.getRotateInstance(averageRotation, pdPage.getMediaBox().getWidth() / 2, pdPage.getMediaBox().getHeight() / 2);
+
+            for (Word word : words) {
+                if (!dir.equals(word.getDir())) {
+                    continue;
+                }
+                word.transform(rotateInstance);
+            }
+        }
+    }
+
+
    private void addNumberOfPagesToTrace(int numberOfPages, long size) {

        if (observationRegistry.getCurrentObservation() != null) {
@ -441,10 +457,10 @@ public class LayoutParsingPipeline {
        // Collect all statistics for the classificationPage, except from blocks inside tables, as tables will always be added to BodyTextFrame.
        for (AbstractPageBlock textBlock : classificationPage.getTextBlocks()) {
            if (textBlock instanceof TextPageBlock) {
-                if (((TextPageBlock) textBlock).getSequences() == null) {
+                if (((TextPageBlock) textBlock).getWords() == null) {
                    continue;
                }
-                for (TextPositionSequence word : ((TextPageBlock) textBlock).getSequences()) {
+                for (Word word : ((TextPageBlock) textBlock).getWords()) {
                    classificationPage.getTextHeightCounter().add(word.getTextHeight());
                    classificationPage.getFontCounter().add(word.getFont());
                    classificationPage.getFontSizeCounter().add(word.getFontSize());
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingStorageService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingStorageService.java
@ -11,12 +11,14 @@ import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.nio.file.StandardOpenOption;
 import java.util.Optional;
+import java.util.concurrent.CompletableFuture;

+import org.springframework.core.task.TaskExecutor;
 import org.springframework.stereotype.Service;

 import com.fasterxml.jackson.databind.ObjectMapper;
+import com.iqser.red.service.redaction.v1.server.data.DocumentData;
 import com.iqser.red.storage.commons.service.StorageService;
-import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
 import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.SimplifiedText;
 import com.knecon.fforesight.service.layoutparser.internal.api.data.taas.ResearchDocumentData;
 import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest;
@ -39,6 +41,8 @@ public class LayoutParsingStorageService {
    private final StorageService storageService;
    private final ObjectMapper objectMapper;

+    private final TaskExecutor taskExecutor;
+

    @Observed(name = "LayoutParsingStorageService", contextualName = "get-origin-file")
    public File getOriginFile(String storageId) throws IOException {
@ -100,13 +104,35 @@ public class LayoutParsingStorageService {
    }


+    @SneakyThrows
    @Observed(name = "LayoutParsingStorageService", contextualName = "store-document-data")
    public void storeDocumentData(LayoutParsingRequest layoutParsingRequest, DocumentData documentData) {

-        storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.structureFileStorageId(), documentData.getDocumentStructure());
-        storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.textBlockFileStorageId(), documentData.getDocumentTextData());
-        storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.positionBlockFileStorageId(), documentData.getDocumentPositions());
-        storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.pageFileStorageId(), documentData.getDocumentPages());
+        Runnable storeDocumentStructureRunnable = () -> storageService.storeProtoObject(TenantContext.getTenantId(),
+                                                                                       layoutParsingRequest.structureFileStorageId(),
+                                                                                       documentData.getDocumentStructure());
+
+        CompletableFuture<Void> storeDocumentStructureFuture = CompletableFuture.runAsync(storeDocumentStructureRunnable, taskExecutor);
+
+        Runnable storeDocumentTextDataRunnable = () -> storageService.storeProtoObject(TenantContext.getTenantId(),
+                                                                                      layoutParsingRequest.textBlockFileStorageId(),
+                                                                                      documentData.getDocumentTextData());
+
+        CompletableFuture<Void> storeDocumentTextDataFuture = CompletableFuture.runAsync(storeDocumentTextDataRunnable, taskExecutor);
+
+        Runnable storeDocumentPositionsRunnable = () -> storageService.storeProtoObject(TenantContext.getTenantId(),
+                                                                                       layoutParsingRequest.positionBlockFileStorageId(),
+                                                                                       documentData.getDocumentPositionData());
+
+        CompletableFuture<Void> storeDocumentPositionsFuture = CompletableFuture.runAsync(storeDocumentPositionsRunnable, taskExecutor);
+
+        Runnable storeDocumentPagesRunnable = () -> storageService.storeProtoObject(TenantContext.getTenantId(),
+                                                                                   layoutParsingRequest.pageFileStorageId(),
+                                                                                   documentData.getDocumentPages());
+
+        CompletableFuture<Void> storeDocumentPagesFuture = CompletableFuture.runAsync(storeDocumentPagesRunnable, taskExecutor);
+
+        CompletableFuture.allOf(storeDocumentStructureFuture, storeDocumentTextDataFuture, storeDocumentPositionsFuture, storeDocumentPagesFuture).join();
    }


--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/docstrum/DocstrumSegmentationService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/docstrum/DocstrumSegmentationService.java
@ -18,7 +18,7 @@ import com.knecon.fforesight.service.layoutparser.processor.docstrum.service.Zon
 import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.RedTextPosition;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextDirection;
-import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.Word;
 import com.knecon.fforesight.service.layoutparser.processor.visualization.LayoutDebugLayer;

 import lombok.RequiredArgsConstructor;
@ -35,7 +35,7 @@ public class DocstrumSegmentationService {
    private final ReadingOrderService readingOrderService;


-    public List<Zone> segmentPage(List<TextPositionSequence> textPositions, boolean xyOrder, CleanRulings usedRulings, LayoutDebugLayer visualizations) {
+    public List<Zone> segmentPage(List<Word> textPositions, boolean xyOrder, CleanRulings usedRulings, LayoutDebugLayer visualizations) {

        EnumMap<TextDirection, Integer> directionCounts = new EnumMap<>(TextDirection.class);

@ -78,18 +78,14 @@ public class DocstrumSegmentationService {
    }


-    private List<Zone> computeZones(List<TextPositionSequence> textPositions, CleanRulings rulings, LayoutDebugLayer visualizations, TextDirection direction) {
+    private List<Zone> computeZones(List<Word> textPositions, CleanRulings rulings, LayoutDebugLayer visualizations, TextDirection direction) {

-        List<RedTextPosition> positions = textPositions.stream()
+        List<Character> characters = textPositions.stream()
                .filter(t -> t.getDir() == direction)
-                .map(TextPositionSequence::getTextPositions)
+                .map(Word::getCharacters)
                .flatMap(List::stream)
                .toList();

-        List<Character> characters = positions.stream()
-                .map(Character::new)
-                .collect(Collectors.toList());
-
        nearestNeighbourService.findNearestNeighbors(characters);

        double characterSpacing = spacingService.computeCharacterSpacing(characters);
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/docstrum/model/BoundingBox.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/docstrum/model/BoundingBox.java
@ -133,7 +133,7 @@ public abstract class BoundingBox {
    }


-    private boolean intersectsX(BoundingBox other, float threshold) {
+    public boolean intersectsX(BoundingBox other, float threshold) {

        return this.getX() - threshold <= other.getMaxX() && this.getMaxX() + threshold >= other.getX();
    }
@ -225,33 +225,31 @@ public abstract class BoundingBox {

    public double horizontalDistance(BoundingBox other) {

-        Rectangle2D left;
-        Rectangle2D right;
-        if (this.leftOf(other)) {
-            left = this.getBBox();
-            right = other.getBBox();
-        } else {
-            left = other.getBBox();
-            right = this.getBBox();
-        }
+        double rect1Right = getMaxX();
+        double rect1Left = getMinX();
+        double rect2Right = other.getMaxX();
+        double rect2Left = other.getMinX();

-        return Math.max(0, right.getMinX() - left.getMaxX());
+        if (rect1Left > rect2Right || rect2Left > rect1Right) {
+            return Math.max(rect2Left - rect1Right, rect1Left - rect2Right);
+        } else {
+            return 0;
+        }
    }


    public double verticalDistance(BoundingBox other) {

-        Rectangle2D bottom;
-        Rectangle2D top;
-        if (this.isAbove(other)) {
-            top = this.getBBox();
-            bottom = other.getBBox();
-        } else {
-            bottom = this.getBBox();
-            top = other.getBBox();
-        }
+        double rect1Top = getMaxY();
+        double rect1Bottom = getMinY();
+        double rect2Top = other.getMaxY();
+        double rect2Bottom = other.getMinY();

-        return Math.max(0, bottom.getMinY() - top.getMaxY());
+        if (rect1Bottom > rect2Top || rect2Bottom > rect1Top) {
+            return Math.max(rect2Bottom - rect1Top, rect1Bottom - rect2Top);
+        } else {
+            return 0;
+        }
    }


--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/docstrum/model/Line.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/docstrum/model/Line.java
@ -1,9 +1,9 @@
 package com.knecon.fforesight.service.layoutparser.processor.docstrum.model;

-import static com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence.BOLD;
-import static com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence.BOLD_ITALIC;
-import static com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence.ITALIC;
-import static com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence.STANDARD;
+import static com.knecon.fforesight.service.layoutparser.processor.model.text.Word.BOLD;
+import static com.knecon.fforesight.service.layoutparser.processor.model.text.Word.BOLD_ITALIC;
+import static com.knecon.fforesight.service.layoutparser.processor.model.text.Word.ITALIC;
+import static com.knecon.fforesight.service.layoutparser.processor.model.text.Word.STANDARD;

 import java.util.ArrayList;
 import java.util.Arrays;
@ -14,7 +14,7 @@ import java.util.Map;
 import java.util.concurrent.atomic.AtomicInteger;

 import com.knecon.fforesight.service.layoutparser.processor.docstrum.utils.FastAtan2;
-import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.Word;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.FontStyle;

 import lombok.Data;
@ -24,7 +24,7 @@ import lombok.EqualsAndHashCode;
@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false)
 public class Line extends TextBoundingBox {

-    private static final double WORD_DISTANCE_MULTIPLIER = 0.18;
+    private static final double WORD_DISTANCE_MULTIPLIER = 0.17;

    @EqualsAndHashCode.Include
    private final double x0;
@ -36,18 +36,13 @@ public class Line extends TextBoundingBox {
    @EqualsAndHashCode.Include
    private final double y1;

-    private final double height;
-
    private FontStyle fontStyle;

-    private final List<Character> characters;
-    private final List<TextPositionSequence> words = new ArrayList<>();
+    private final List<Word> words;


    public Line(List<Character> characters, double wordSpacing) {

-        this.characters = characters;
-
        if (characters.size() >= 2) {
            // linear regression
            double sx = 0.0;
@ -76,20 +71,32 @@ public class Line extends TextBoundingBox {
            this.y0 = character.getY() - dy;
            this.y1 = character.getY() + dy;
        }
-        height = computeHeight();
-        computeWords(wordSpacing * WORD_DISTANCE_MULTIPLIER);
+        this.words = new ArrayList<>();
+        computeWords(characters, wordSpacing * WORD_DISTANCE_MULTIPLIER);
        buildBBox();
        computeFontStyle();
    }


+    public Line(List<Word> words) {
+
+        this.words = words;
+        buildBBox();
+        x0 = getMinX();
+        y0 = getMinY();
+        x1 = getMaxX();
+        y1 = getMaxY();
+        computeFontStyle();
+    }
+
+
    private void computeFontStyle() {

        EnumMap<FontStyle, AtomicInteger> fontStyleCounter = new EnumMap<>(FontStyle.class);
        for (FontStyle fontStyle : FontStyle.values()) {
            fontStyleCounter.put(fontStyle, new AtomicInteger(0));
        }
-        for (TextPositionSequence word : words) {
+        for (Word word : words) {
            switch (word.getFontStyle()) {
                case STANDARD -> fontStyleCounter.get(FontStyle.REGULAR).getAndIncrement();
                case BOLD -> fontStyleCounter.get(FontStyle.BOLD).getAndIncrement();
@ -100,8 +107,7 @@ public class Line extends TextBoundingBox {
        fontStyle = fontStyleCounter.entrySet()
                .stream()
                .max(Comparator.comparing(entry -> entry.getValue().get()))
-                .map(Map.Entry::getKey)
-                .orElse(FontStyle.REGULAR);
+                .map(Map.Entry::getKey).orElse(FontStyle.REGULAR);
    }


@ -117,14 +123,6 @@ public class Line extends TextBoundingBox {
    }


-    private double computeHeight() {
-
-        return characters.stream()
-                       .map(Character::getHeight)
-                       .reduce(0d, Double::sum) / characters.size();
-    }
-
-
    public double angularDifference(Line j) {

        double diff = Math.abs(getAngle() - j.getAngle());
@ -157,19 +155,22 @@ public class Line extends TextBoundingBox {
    }


-    private void computeWords(double wordSpacing) {
+    private void computeWords(List<Character> characters, double wordSpacing) {

-        TextPositionSequence word = new TextPositionSequence();
+        // Imo, the width of space should be scaled with the font size, but it only depends on the median distance between horizontal neighbours.
+        // If there are large differences in fontsize on a page, this might lead to missing spaces for the smaller fonts and too many for larger fonts.
+        // I've just now changed the scaling factor. If you come across this comment with missing whitespaces again, try scaling the fontsize instead of simply changing the factor again.
+        Word word = new Word();
        Character previous = null;
        for (Character current : characters) {
            if (previous != null) {
                double dist = current.getTextPosition().getXDirAdj() - previous.getTextPosition().getXDirAdj() - previous.getTextPosition().getWidthDirAdj();
                if (dist > wordSpacing) {
                    words.add(word);
-                    word = new TextPositionSequence();
+                    word = new Word();
                }
            }
-            word.getTextPositions().add(current.getTextPosition());
+            word.add(current);
            previous = current;
        }
        words.add(word);
@ -178,9 +179,7 @@ public class Line extends TextBoundingBox {

    private void buildBBox() {

-        this.setToBBoxOfComponents(characters.stream()
-                                           .map(Character::getTextPosition)
-                                           .toList());
+        this.setToBBoxOfComponents(words);
    }


--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/docstrum/model/TextBoundingBox.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/docstrum/model/TextBoundingBox.java
@ -99,4 +99,82 @@ public abstract class TextBoundingBox extends BoundingBox {
        return this.bBoxDirAdj.getCenterX();
    }

+
+    public double horizontalDistanceDirAdj(TextBoundingBox other) {
+
+        double rect1Right = getMaxXDirAdj();
+        double rect1Left = getXDirAdj();
+        double rect2Right = other.getMaxXDirAdj();
+        double rect2Left = other.getXDirAdj();
+
+        if (rect1Left > rect2Right || rect2Left > rect1Right) {
+            return Math.max(rect2Left - rect1Right, rect1Left - rect2Right);
+        } else {
+            return 0;
+        }
+    }
+
+
+    public double verticalDistanceDirAdj(TextBoundingBox other) {
+
+        double rect1Top = getMaxYDirAdj();
+        double rect1Bottom = getYDirAdj();
+        double rect2Top = other.getMaxYDirAdj();
+        double rect2Bottom = other.getYDirAdj();
+
+        if (rect1Bottom > rect2Top || rect2Bottom > rect1Top) {
+            return Math.max(rect2Bottom - rect1Top, rect1Bottom - rect2Top);
+        } else {
+            return 0;
+        }
+    }
+
+
+    public boolean intersectsDirAdj(TextBoundingBox other) {
+
+        return this.intersectsXDirAdj(other) && this.intersectsYDirAdj(other);
+    }
+
+
+    public boolean intersectsDirAdj(TextBoundingBox other, float yThreshold, float xThreshold) {
+
+        return this.intersectsXDirAdj(other, xThreshold) && this.intersectsYDirAdj(other, yThreshold);
+    }
+
+
+    public boolean intersectsXDirAdj(TextBoundingBox other, float threshold) {
+
+        return this.getXDirAdj() - threshold <= other.getMaxXDirAdj() && this.getMaxXDirAdj() + threshold >= other.getXDirAdj();
+    }
+
+
+    public boolean intersectsXDirAdj(TextBoundingBox other) {
+
+        return this.getXDirAdj() <= other.getMaxXDirAdj() && this.getMaxXDirAdj() >= other.getXDirAdj();
+    }
+
+
+    public boolean intersectsYDirAdj(TextBoundingBox other) {
+
+        return this.getYDirAdj() <= other.getMaxYDirAdj() && this.getMaxYDirAdj() >= other.getYDirAdj();
+    }
+
+
+    public boolean intersectsYDirAdj(TextBoundingBox other, float threshold) {
+
+        return this.getYDirAdj() - threshold <= other.getMaxYDirAdj() && this.getMaxYDirAdj() + threshold >= other.getYDirAdj();
+    }
+
+
+    public boolean isAboveDirAdj(TextBoundingBox other) {
+
+        return other.isBelow(this);
+    }
+
+
+    public boolean isBelowDirAdj(TextBoundingBox other) {
+
+        return this.intersectsXDirAdj(other) && this.getYDirAdj() >= other.getMaxYDirAdj();
+    }
+
 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/docstrum/model/UnionFind.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/docstrum/model/UnionFind.java
@ -28,4 +28,10 @@ public class UnionFind<T> extends org.jgrapht.alg.util.UnionFind<T> {
        return setRep.values();
    }

+
+    public Collection<T> getElements() {
+
+        return getParentMap().keySet();
+    }
+
 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/docstrum/model/Zone.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/docstrum/model/Zone.java
@ -18,7 +18,6 @@ public class Zone extends TextBoundingBox {
    @SuppressWarnings("PMD.ConstructorCallsOverridableMethod")
    public Zone(List<Line> lines) {

-        lines.sort(Comparator.comparingDouble(Line::getY0));
        this.lines = lines;
        setToBBoxOfComponents(lines);
    }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/docstrum/service/LineBuilderService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/docstrum/service/LineBuilderService.java
@ -17,7 +17,7 @@ public class LineBuilderService {

    private static final double CHARACTER_SPACING_DISTANCE_MULTIPLIER = 3.5;
    private static final double LINE_SPACING_THRESHOLD_MULTIPLIER = 0.67;
-    private static final double ANGLE_TOLERANCE = Math.PI / 6;
+    private static final double ANGLE_TOLERANCE = Math.toRadians(5);


    public List<Line> buildLines(List<Character> characters, double characterSpacing, double lineSpacing, CleanRulings rulings) {
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/docstrum/service/ZoneBuilderService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/docstrum/service/ZoneBuilderService.java
@ -1,9 +1,7 @@
 package com.knecon.fforesight.service.layoutparser.processor.docstrum.service;

-import static com.knecon.fforesight.service.layoutparser.processor.model.SectionIdentifier.numericalIdentifierPattern;
-
 import java.util.ArrayList;
-import java.util.Comparator;
+import java.util.Collection;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
@ -11,11 +9,12 @@ import java.util.stream.Collectors;

 import org.springframework.stereotype.Service;

-import com.knecon.fforesight.service.layoutparser.processor.docstrum.model.Character;
 import com.knecon.fforesight.service.layoutparser.processor.docstrum.model.Line;
 import com.knecon.fforesight.service.layoutparser.processor.docstrum.model.UnionFind;
 import com.knecon.fforesight.service.layoutparser.processor.docstrum.model.Zone;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.Word;
+import com.knecon.fforesight.service.layoutparser.processor.utils.TextPositionOperations;

@Service
 public class ZoneBuilderService {
@ -31,7 +30,7 @@ public class ZoneBuilderService {

    private static final double MAX_LINE_SIZE_SCALE = 2.5;

-    private static final double ANGLE_TOLERANCE = Math.PI / 6;
+    private static final double ANGLE_TOLERANCE = Math.toRadians(5);

    private static final double MAX_VERTICAL_MERGE_DISTANCE = 0.5;

@ -114,64 +113,14 @@ public class ZoneBuilderService {

    private Zone mergeLinesInZone(List<Line> lines, double characterSpacing, double lineSpacing) {

-        double maxHorizontalDistance = 0;
-        double minVerticalDistance = 0;
-        double maxVerticalDistance = lineSpacing * MAX_VERTICAL_MERGE_DISTANCE;
+        Set<Word> words = lines.stream()
+                .map(Line::getWords)
+                .flatMap(Collection::stream)
+                .collect(Collectors.toSet());
+        Collection<Set<Word>> groupedLines = TextPositionOperations.groupByLine(words);

-        UnionFind<Line> unionFind = new UnionFind<>(new HashSet<>(lines));
-
-        lines.forEach(outer -> {
-            lines.forEach(inner -> {
-                if (inner == outer) {
-                    return;
-                }
-
-                double horizontalDistance = outer.horizontalDistance(inner);
-                double verticalDistance = outer.verticalDistance(inner);
-
-                if (horizontalDistance <= maxHorizontalDistance && minVerticalDistance <= verticalDistance && verticalDistance <= maxVerticalDistance) {
-
-                    unionFind.union(outer, inner);
-
-                } else if (minVerticalDistance <= verticalDistance
-                           && verticalDistance <= maxVerticalDistance
-                           && Math.abs(horizontalDistance - Math.min(outer.getLength(), inner.getLength())) < 0.1) {
-
-                    boolean characterOverlap = false;
-                    int overlappingCount = 0;
-                    for (Character outerCharacter : outer.getCharacters()) {
-                        for (Character innerCharacter : inner.getCharacters()) {
-                            double characterOverlapDistance = outerCharacter.overlappingDistance(innerCharacter);
-                            if (characterOverlapDistance > 2) {
-                                characterOverlap = true;
-                            }
-                            if (characterOverlapDistance > 0) {
-                                overlappingCount++;
-                            }
-                        }
-                    }
-                    if (!characterOverlap && overlappingCount <= 2) {
-                        unionFind.union(outer, inner);
-                    }
-                }
-
-            });
-        });
-
-        List<Line> outputZone = new ArrayList<>();
-        for (Set<Line> group : unionFind.getGroups()) {
-            List<Character> characters = new ArrayList<>();
-            for (Line line : group) {
-                characters.addAll(line.getCharacters());
-            }
-            characters.sort(Comparator.comparingDouble(Character::getX));
-
-            outputZone.add(new Line(characters, characterSpacing));
-        }
-
-        return new Zone(outputZone.stream()
-                                .sorted(Comparator.comparing(Line::getY0))
-                                .collect(Collectors.toList()));
+        List<Line> sortedLines = TextPositionOperations.sortLines(groupedLines);
+        return new Zone(sortedLines);
    }

 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/AbstractPageBlock.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/AbstractPageBlock.java
@ -4,7 +4,7 @@ import java.util.HashSet;
 import java.util.Set;

 import com.fasterxml.jackson.annotation.JsonIgnore;
-import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
+import com.iqser.red.service.redaction.v1.server.model.document.nodes.LayoutEngine;
 import com.knecon.fforesight.service.layoutparser.processor.docstrum.model.BoundingBox;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;

--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/ClassificationDocument.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/ClassificationDocument.java
@ -4,7 +4,7 @@ import java.util.ArrayList;
 import java.util.List;

 import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineObjectTree;
-import com.knecon.fforesight.service.layoutparser.processor.model.outline.TableOfContents;
+import com.knecon.fforesight.service.layoutparser.processor.model.outline.SectionTree;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.StringFrequencyCounter;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.UnclassifiedText;
 import com.knecon.fforesight.service.layoutparser.processor.visualization.LayoutDebugLayer;
@ -31,6 +31,6 @@ public class ClassificationDocument {
    private long rulesVersion;

    private OutlineObjectTree outlineObjectTree;
-    private TableOfContents tableOfContents;
+    private SectionTree sectionTree;

 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/DocumentWithVisualization.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/DocumentWithVisualization.java
@ -0,0 +1,19 @@
+package com.knecon.fforesight.service.layoutparser.processor.model;
+
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
+import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType;
+import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
+import com.knecon.fforesight.service.layoutparser.processor.visualization.LayoutDebugLayer;
+
+public record DocumentWithVisualization(Document document, LayoutDebugLayer layoutDebugLayer) {
+
+    public Map<NodeType, Long> buildSemanticNodeCounts() {
+
+        return document.streamAllSubNodes()
+                .collect(Collectors.groupingBy(SemanticNode::getType, Collectors.counting()));
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/FloatFrequencyCounter.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/FloatFrequencyCounter.java
@ -1,6 +1,5 @@
 package com.knecon.fforesight.service.layoutparser.processor.model;

-import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
@ -13,10 +12,14 @@ import lombok.Getter;
 public class FloatFrequencyCounter {

    Map<Double, Integer> countPerValue = new HashMap<>();
+    boolean changed;
+    Double mostPopularCache;


    public void add(double value) {

+        changed = true;
+
        if (!countPerValue.containsKey(value)) {
            countPerValue.put(value, 1);
        } else {
@ -27,6 +30,8 @@ public class FloatFrequencyCounter {

    public void addAll(Map<Double, Integer> otherCounter) {

+        changed = true;
+
        for (Map.Entry<Double, Integer> entry : otherCounter.entrySet()) {
            if (countPerValue.containsKey(entry.getKey())) {
                countPerValue.put(entry.getKey(), countPerValue.get(entry.getKey()) + entry.getValue());
@ -39,27 +44,27 @@ public class FloatFrequencyCounter {

    public Double getMostPopular() {

-        Map.Entry<Double, Integer> mostPopular = null;
-        for (Map.Entry<Double, Integer> entry : countPerValue.entrySet()) {
-            if (mostPopular == null || entry.getValue() >= mostPopular.getValue()) {
-                mostPopular = entry;
+        if (changed || mostPopularCache == null) {
+            Map.Entry<Double, Integer> mostPopular = null;
+            for (Map.Entry<Double, Integer> entry : countPerValue.entrySet()) {
+                if (mostPopular == null || entry.getValue() >= mostPopular.getValue()) {
+                    mostPopular = entry;
+                }
            }
+            mostPopularCache = mostPopular != null ? mostPopular.getKey() : 0;
+            changed = false;
        }
-        return mostPopular != null ? mostPopular.getKey() : null;
+
+        return mostPopularCache;
    }


-    public List<Double> getHigherThanMostPopular() {
+    public List<Double> getValuesInReverseOrder() {

-        Double mostPopular = getMostPopular();
-        List<Double> higher = new ArrayList<>();
-        for (Double value : countPerValue.keySet()) {
-            if (value > mostPopular) {
-                higher.add(value);
-            }
-        }
-
-        return higher.stream().sorted(Collections.reverseOrder()).collect(Collectors.toList());
+        return countPerValue.keySet()
+                .stream()
+                .sorted(Collections.reverseOrder())
+                .collect(Collectors.toList());
    }


--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/LineInformation.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/LineInformation.java
@ -3,7 +3,7 @@ package com.knecon.fforesight.service.layoutparser.processor.model;
 import java.awt.geom.Rectangle2D;
 import java.util.List;

-import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.Word;

 import lombok.AccessLevel;
 import lombok.AllArgsConstructor;
@ -16,8 +16,8 @@ import lombok.experimental.FieldDefaults;
 public class LineInformation {

    List<Rectangle2D> lineBBox;
-    List<List<TextPositionSequence>> sequencesByLines;
+    List<List<Word>> sequencesByLines;
    List<List<Rectangle2D>> bBoxWithGapsByLines;
-    List<List<List<TextPositionSequence>>> sequencesWithGapsByLines;
+    List<List<List<Word>>> sequencesWithGapsByLines;

 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/PageBlockType.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/PageBlockType.java
@ -9,12 +9,14 @@ public enum PageBlockType {
    H6,
    HEADER,
    FOOTER,
-    TITLE,
    PARAGRAPH,
    PARAGRAPH_BOLD,
    PARAGRAPH_ITALIC,
    PARAGRAPH_UNKNOWN,
    OTHER,
+    TABLE_OF_CONTENTS_HEADLINE,
+    TABLE_OF_CONTENTS_ITEM,
+    LIST_ITEM,
    TABLE;


@ -34,7 +36,7 @@ public enum PageBlockType {
    public static int getHeadlineNumber(PageBlockType pageBlockType) {

        return switch (pageBlockType) {
-            case H1 -> 1;
+            case H1, TABLE_OF_CONTENTS_HEADLINE -> 1;
            case H2 -> 2;
            case H3 -> 3;
            case H4 -> 4;
@ -46,6 +48,6 @@ public enum PageBlockType {

    public boolean isHeadline() {

-        return this.equals(H1) || this.equals(H2) || this.equals(H3) || this.equals(H4) || this.equals(H5) || this.equals(H6);
+        return this.equals(H1) || this.equals(H2) || this.equals(H3) || this.equals(H4) || this.equals(H5) || this.equals(H6) || this.equals(TABLE_OF_CONTENTS_HEADLINE);
    }
 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/PageContents.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/PageContents.java
@ -4,7 +4,7 @@ import java.awt.geom.Rectangle2D;
 import java.util.List;

 import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling;
-import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.Word;

 import lombok.AllArgsConstructor;
 import lombok.Builder;
@ -15,7 +15,7 @@ import lombok.Getter;
@AllArgsConstructor
 public class PageContents {

-    List<TextPositionSequence> sortedTextPositionSequences;
+    List<Word> sortedWords;
    Rectangle2D cropBox;
    Rectangle2D mediaBox;
    List<Ruling> rulings;
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/SectionIdentifier.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/SectionIdentifier.java
@ -3,6 +3,7 @@ package com.knecon.fforesight.service.layoutparser.processor.model;
 import java.util.Collections;
 import java.util.LinkedList;
 import java.util.List;
+import java.util.Locale;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

@ -15,11 +16,13 @@ import lombok.experimental.FieldDefaults;
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
 public class SectionIdentifier {

-    public static Pattern numericalIdentifierPattern = Pattern.compile("^[\\s]?(\\d+)[\\s.,;]?(\\d+)?[\\s.,;]?(\\d+)?[\\s.,;]?(\\d+)?");
+    public static Pattern numericalIdentifierPattern = Pattern.compile("^[\\s]?(\\d{1,2})(?:[\\s.,;](\\d{1,2}))?(?:[\\s.,;](\\d{1,2}))?(?:[\\s.,;](\\d{1,2}))?");
+    public static Pattern alphanumericIdentifierPattern = Pattern.compile("^[\\s]?[A-Za-z][\\s.,;]?(\\d{1,2})[\\s.,;]?(\\d{1,2})?[\\s.,;]?(\\d{1,2})?[\\s.,;]?(\\d{1,2})?[\\s.,;]?");

    public enum Format {
        EMPTY,
        NUMERICAL,
+        ALPHANUMERIC,
        DOCUMENT
    }

@ -41,6 +44,10 @@ public class SectionIdentifier {
        if (numericalIdentifierMatcher.find()) {
            return buildNumericalSectionIdentifier(headline, numericalIdentifierMatcher);
        }
+        Matcher alphanumericIdentifierMatcher = alphanumericIdentifierPattern.matcher(headline);
+        if (alphanumericIdentifierMatcher.find()) {
+            return buildAlphanumericSectionIdentifier(headline, alphanumericIdentifierMatcher);
+        }
        // more formats here
        return SectionIdentifier.empty();
    }
@ -75,7 +82,36 @@ public class SectionIdentifier {
            }
            identifiers.add(Integer.parseInt(numericalIdentifier.trim()));
        }
-        return new SectionIdentifier(Format.NUMERICAL, identifierString, identifiers.stream().toList(), false);
+        return new SectionIdentifier(Format.NUMERICAL,
+                                     identifierString,
+                                     identifiers.stream()
+                                             .toList(),
+                                     false);
+    }
+
+
+    private static SectionIdentifier buildAlphanumericSectionIdentifier(String headline, Matcher alphanumericIdentifierMatcher) {
+
+        String identifierString = headline.substring(alphanumericIdentifierMatcher.start(), alphanumericIdentifierMatcher.end());
+
+        String alphanumericIdentifier = alphanumericIdentifierMatcher.group(0).substring(0, 1).toUpperCase(Locale.ENGLISH);
+        int mappedCharacterValue = alphanumericIdentifier.charAt(0) - 'A' + 1;
+        List<Integer> identifiers = new LinkedList<>();
+        identifiers.add(mappedCharacterValue);
+
+        for (int i = 1; i <= 3; i++) {
+            String numericalIdentifier = alphanumericIdentifierMatcher.group(i);
+            if (numericalIdentifier == null || numericalIdentifier.equals("0") || numericalIdentifier.isEmpty() || numericalIdentifier.isBlank()) {
+                break;
+            }
+            identifiers.add(Integer.parseInt(numericalIdentifier.trim()));
+        }
+
+        return new SectionIdentifier(Format.ALPHANUMERIC,
+                                     identifierString,
+                                     identifiers.stream()
+                                             .toList(),
+                                     false);
    }


@ -123,4 +159,22 @@ public class SectionIdentifier {
        return identifierString;
    }

+
+    public boolean isEmpty() {
+
+        return this.format.equals(Format.EMPTY);
+    }
+
+
+    public int level() {
+
+        return identifiers.size();
+    }
+
+
+    protected List<Integer> getIdentifiers() {
+
+        return identifiers;
+    }
+
 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/AbstractNodeVisitor.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/AbstractNodeVisitor.java
@ -1,94 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.processor.model.graph;
-
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Footer;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Header;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Headline;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Image;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Paragraph;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Section;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SemanticNode;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SuperSection;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableCell;
-
-public abstract class AbstractNodeVisitor implements NodeVisitor {
-
-    @Override
-    public void visit(Document document) {
-
-        visitChildren(document);
-    }
-
-
-    @Override
-    public void visit(SuperSection superSection) {
-
-        visitChildren(superSection);
-    }
-
-
-    @Override
-    public void visit(Section section) {
-
-        visitChildren(section);
-    }
-
-
-    @Override
-    public void visit(Headline headline) {
-
-        visitChildren(headline);
-    }
-
-
-    @Override
-    public void visit(Paragraph paragraph) {
-
-        visitChildren(paragraph);
-    }
-
-
-    @Override
-    public void visit(Footer footer) {
-
-        visitChildren(footer);
-    }
-
-
-    @Override
-    public void visit(Header header) {
-
-        visitChildren(header);
-    }
-
-
-    @Override
-    public void visit(Image image) {
-
-        visitChildren(image);
-    }
-
-
-    @Override
-    public void visit(Table table) {
-
-        visitChildren(table);
-    }
-
-
-    @Override
-    public void visit(TableCell tableCell) {
-
-        visitChildren(tableCell);
-    }
-
-
-    private void visitChildren(SemanticNode semanticNode) {
-
-        semanticNode.streamChildren()
-                .forEach(node -> node.accept(this));
-
-    }
-
-}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/DocumentTree.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/DocumentTree.java
@ -1,230 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.processor.model.graph;
-
-import static java.lang.String.format;
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.stream.Stream;
-
-import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.GenericSemanticNode;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SemanticNode;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableCell;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlockCollector;
-
-import lombok.AccessLevel;
-import lombok.AllArgsConstructor;
-import lombok.Builder;
-import lombok.Data;
-import lombok.EqualsAndHashCode;
-import lombok.Getter;
-import lombok.experimental.FieldDefaults;
-
-@Data
-@EqualsAndHashCode
-public class DocumentTree {
-
-    private final Entry root;
-
-
-    public DocumentTree(Document document) {
-
-        root = Entry.builder().treeId(Collections.emptyList()).children(new LinkedList<>()).node(document).build();
-    }
-
-
-    public TextBlock buildTextBlock() {
-
-        return allEntriesInOrder().map(Entry::getNode)
-                .filter(SemanticNode::isLeaf)
-                .map(SemanticNode::getLeafTextBlock)
-                .collect(new TextBlockCollector());
-    }
-
-
-    public List<Integer> createNewMainEntryAndReturnId(GenericSemanticNode node) {
-
-        return createNewChildEntryAndReturnIdImpl(Collections.emptyList(), node);
-    }
-
-
-    public List<Integer> createNewChildEntryAndReturnId(GenericSemanticNode parentNode, GenericSemanticNode node) {
-
-        return createNewChildEntryAndReturnIdImpl(parentNode.getTreeId(), node);
-    }
-
-
-    public List<Integer> createNewChildEntryAndReturnId(GenericSemanticNode parentNode, Table node) {
-
-        return createNewChildEntryAndReturnIdImpl(parentNode.getTreeId(), node);
-    }
-
-
-    public List<Integer> createNewTableChildEntryAndReturnId(Table parentTable, TableCell tableCell) {
-
-        return createNewChildEntryAndReturnIdImpl(parentTable.getTreeId(), tableCell);
-    }
-
-
-    @SuppressWarnings("PMD.UnusedPrivateMethod") // PMD actually flags this wrong
-    private List<Integer> createNewChildEntryAndReturnIdImpl(List<Integer> parentId, SemanticNode node) {
-
-        if (!entryExists(parentId)) {
-            throw new IllegalArgumentException(format("parentId %s does not exist!", parentId));
-        }
-
-        Entry parent = getEntryById(parentId);
-        List<Integer> newId = new LinkedList<>(parentId);
-        newId.add(parent.children.size());
-        parent.children.add(Entry.builder().treeId(newId).node(node).build());
-
-        return newId;
-    }
-
-
-    private boolean entryExists(List<Integer> treeId) {
-
-        if (treeId.isEmpty()) {
-            return root != null;
-        }
-        Entry entry = root.children.get(treeId.get(0));
-        for (int id : treeId.subList(1, treeId.size())) {
-            if (id >= entry.children.size() || 0 > id) {
-                return false;
-            }
-            entry = entry.children.get(id);
-        }
-        return true;
-    }
-
-
-    public Entry getParentEntryById(List<Integer> treeId) {
-
-        return getEntryById(getParentId(treeId));
-    }
-
-
-    public boolean hasParentById(List<Integer> treeId) {
-
-        return !treeId.isEmpty();
-    }
-
-
-    public Stream<SemanticNode> childNodes(List<Integer> treeId) {
-
-        return getEntryById(treeId).children.stream()
-                .map(Entry::getNode);
-    }
-
-
-    public Stream<SemanticNode> childNodesOfType(List<Integer> treeId, NodeType nodeType) {
-
-        return getEntryById(treeId).children.stream()
-                .filter(entry -> entry.node.getType().equals(nodeType))
-                .map(Entry::getNode);
-    }
-
-
-    private static List<Integer> getParentId(List<Integer> treeId) {
-
-        if (treeId.isEmpty()) {
-            throw new UnsupportedOperationException("Root has no parent!");
-        }
-        if (treeId.size() < 2) {
-            return Collections.emptyList();
-        }
-        return treeId.subList(0, treeId.size() - 1);
-    }
-
-
-    public Entry getEntryById(List<Integer> treeId) {
-
-        if (treeId.isEmpty()) {
-            return root;
-        }
-        Entry entry = root;
-        for (int id : treeId) {
-            entry = entry.children.get(id);
-        }
-        return entry;
-    }
-
-
-    public Stream<Entry> mainEntries() {
-
-        return root.children.stream();
-    }
-
-
-    public Stream<Entry> allEntriesInOrder() {
-
-        return Stream.of(root)
-                .flatMap(DocumentTree::flatten);
-    }
-
-
-    public Stream<Entry> allSubEntriesInOrder(List<Integer> parentId) {
-
-        return getEntryById(parentId).children.stream()
-                .flatMap(DocumentTree::flatten);
-    }
-
-
-    @Override
-    public String toString() {
-
-        return String.join("\n",
-                           allEntriesInOrder().map(Entry::toString)
-                                   .toList());
-    }
-
-
-    private static Stream<Entry> flatten(Entry entry) {
-
-        return Stream.concat(Stream.of(entry),
-                             entry.children.stream()
-                                     .flatMap(DocumentTree::flatten));
-    }
-
-
-    public SemanticNode getHighestParentById(List<Integer> treeId) {
-
-        if (treeId.isEmpty()) {
-            return root.node;
-        }
-        return root.children.get(treeId.get(0)).node;
-    }
-
-
-    @Builder
-    @Getter
-    @AllArgsConstructor
-    @FieldDefaults(level = AccessLevel.PRIVATE, makeFinal = true)
-    public static class Entry {
-
-        List<Integer> treeId;
-        SemanticNode node;
-        @Builder.Default
-        List<Entry> children = new LinkedList<>();
-
-
-        @Override
-        public String toString() {
-
-            return node.toString();
-        }
-
-
-        public NodeType getType() {
-
-            return node.getType();
-        }
-
-    }
-
-}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/NodeVisitor.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/NodeVisitor.java
@ -1,45 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.processor.model.graph;
-
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Footer;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Header;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Headline;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Image;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Paragraph;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Section;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SuperSection;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableCell;
-
-public interface NodeVisitor {
-
-    void visit(Document document);
-
-
-    void visit(SuperSection superSection);
-
-
-    void visit(Section section);
-
-
-    void visit(Headline headline);
-
-
-    void visit(Paragraph paragraph);
-
-
-    void visit(Footer footer);
-
-
-    void visit(Header header);
-
-
-    void visit(Image image);
-
-
-    void visit(Table table);
-
-
-    void visit(TableCell tableCell);
-
-}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/TextRange.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/TextRange.java
@ -1,164 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.processor.model.graph;
-
-import static java.lang.String.format;
-
-import java.util.Collection;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.stream.IntStream;
-
-import lombok.EqualsAndHashCode;
-import lombok.Setter;
-
-@Setter
-@EqualsAndHashCode
-@SuppressWarnings("PMD.AvoidFieldNameMatchingMethodName")
-public class TextRange implements Comparable<TextRange> {
-
-    private int start;
-    private int end;
-
-
-    public TextRange(int start, int end) {
-
-        if (start > end) {
-            throw new IllegalArgumentException(format("start: %d > end: %d", start, end));
-        }
-        this.start = start;
-        this.end = end;
-    }
-
-
-    public int length() {
-
-        return end - start;
-    }
-
-
-    public int start() {
-
-        return start;
-    }
-
-
-    public int end() {
-
-        return end;
-    }
-
-
-    public boolean contains(TextRange textRange) {
-
-        return start <= textRange.start() && textRange.end() <= end;
-    }
-
-
-    public boolean containedBy(TextRange textRange) {
-
-        return textRange.contains(this);
-    }
-
-
-    public boolean contains(int start, int end) {
-
-        if (start > end) {
-            throw new IllegalArgumentException(format("start: %d > end: %d", start, end));
-        }
-        return this.start <= start && end <= this.end;
-    }
-
-
-    public boolean containedBy(int start, int end) {
-
-        if (start > end) {
-            throw new IllegalArgumentException(format("start: %d > end: %d", start, end));
-        }
-        return start <= this.start && this.end <= end;
-    }
-
-
-    public boolean contains(int index) {
-
-        return start <= index && index <= end;
-    }
-
-
-    public boolean containsExclusive(int index) {
-
-        return start <= index && index < end;
-    }
-
-
-    public boolean intersects(TextRange textRange) {
-
-        return textRange.start() < this.end && this.start < textRange.end();
-    }
-
-
-    public List<TextRange> split(List<Integer> splitIndices) {
-
-        if (splitIndices.stream()
-                .anyMatch(idx -> !this.containsExclusive(idx))) {
-            throw new IndexOutOfBoundsException(format("%s splitting indices are out of range for %s",
-                                                       splitIndices.stream()
-                                                               .filter(idx -> !this.containsExclusive(idx))
-                                                               .toList(),
-                                                       this));
-        }
-        List<TextRange> splitBoundaries = new LinkedList<>();
-        int previousIndex = start;
-        for (int splitIndex : splitIndices) {
-
-            // skip split if it would produce a boundary of length 0
-            if (splitIndex == previousIndex) {
-                continue;
-            }
-            splitBoundaries.add(new TextRange(previousIndex, splitIndex));
-            previousIndex = splitIndex;
-        }
-        splitBoundaries.add(new TextRange(previousIndex, end));
-        return splitBoundaries;
-    }
-
-
-    public IntStream intStream() {
-
-        return IntStream.range(start, end);
-    }
-
-
-    public static TextRange merge(Collection<TextRange> boundaries) {
-
-        int minStart = boundaries.stream()
-                .mapToInt(TextRange::start)
-                .min()
-                .orElseThrow(IllegalArgumentException::new);
-        int maxEnd = boundaries.stream()
-                .mapToInt(TextRange::end)
-                .max()
-                .orElseThrow(IllegalArgumentException::new);
-        return new TextRange(minStart, maxEnd);
-    }
-
-
-    @Override
-    public String toString() {
-
-        return format("Boundary [%d|%d)", start, end);
-    }
-
-
-    @Override
-    public int compareTo(TextRange textRange) {
-
-        if (end < textRange.end() && start < textRange.start()) {
-            return -1;
-        }
-        if (start > textRange.start() && end > textRange.end()) {
-            return 1;
-        }
-
-        return 0;
-    }
-
-}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/entity/EntityType.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/entity/EntityType.java
@ -1,8 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.processor.model.graph.entity;
-
-public enum EntityType {
-    ENTITY,
-    RECOMMENDATION,
-    FALSE_POSITIVE,
-    FALSE_RECOMMENDATION
-}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/entity/RedactionPosition.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/entity/RedactionPosition.java
@ -1,24 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.processor.model.graph.entity;
-
-import java.awt.geom.Rectangle2D;
-import java.util.List;
-
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page;
-
-import lombok.AccessLevel;
-import lombok.AllArgsConstructor;
-import lombok.Data;
-import lombok.experimental.FieldDefaults;
-
-@Data
-@AllArgsConstructor
-@FieldDefaults(level = AccessLevel.PRIVATE)
-public class RedactionPosition {
-
-    final String id;
-    Page page;
-    // Each entry in this list corresponds to an entry in the redaction log, this means:
-    // An entity might be represented by multiple redaction log entries
-    List<Rectangle2D> rectanglePerLine;
-
-}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/entity/TextEntity.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/entity/TextEntity.java
@ -1,228 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.processor.model.graph.entity;
-
-import java.awt.geom.Rectangle2D;
-import java.util.Collection;
-import java.util.Comparator;
-import java.util.Deque;
-import java.util.HashSet;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.TextRange;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SemanticNode;
-import com.knecon.fforesight.service.layoutparser.processor.utils.IdBuilder;
-
-import lombok.AccessLevel;
-import lombok.AllArgsConstructor;
-import lombok.Builder;
-import lombok.Data;
-import lombok.EqualsAndHashCode;
-import lombok.experimental.FieldDefaults;
-
-@Data
-@Builder
-@AllArgsConstructor
-@FieldDefaults(level = AccessLevel.PRIVATE)
-@EqualsAndHashCode(onlyExplicitlyIncluded = true)
-public class TextEntity {
-
-    // initial values
-    @EqualsAndHashCode.Include
-    final TextRange textRange;
-    @EqualsAndHashCode.Include
-    final String type;
-    @EqualsAndHashCode.Include
-    final EntityType entityType;
-
-    // empty defaults
-    boolean redaction;
-    boolean removed;
-    boolean ignored;
-    boolean resized;
-    boolean skipRemoveEntitiesContainedInLarger;
-    boolean dictionaryEntry;
-    boolean dossierDictionaryEntry;
-    Set<Engine> engines;
-    Set<TextEntity> references;
-    @Builder.Default
-    Deque<Integer> matchedRules = new LinkedList<>();
-    String redactionReason;
-    String legalBasis;
-
-    // inferred on graph insertion
-    @EqualsAndHashCode.Include
-    String value;
-    String textBefore;
-    String textAfter;
-    @Builder.Default
-    Set<Page> pages = new HashSet<>();
-    List<RedactionPosition> redactionPositionsPerPage;
-    @Builder.Default
-    List<SemanticNode> intersectingNodes = new LinkedList<>();
-    SemanticNode deepestFullyContainingNode;
-
-
-    public static TextEntity initialEntityNode(TextRange textRange, String type, EntityType entityType) {
-
-        return TextEntity.builder().type(type).entityType(entityType).textRange(textRange).engines(new HashSet<>()).references(new HashSet<>()).build();
-    }
-
-
-    public boolean occursInNodeOfType(Class<? extends SemanticNode> clazz) {
-
-        return intersectingNodes.stream().anyMatch(clazz::isInstance);
-    }
-
-
-    public boolean occursInNode(SemanticNode semanticNode) {
-
-        return intersectingNodes.stream().anyMatch(node -> node.equals(semanticNode));
-    }
-
-
-    public boolean isType(String type) {
-
-        return this.type.equals(type);
-    }
-
-
-    public boolean isAnyType(List<String> types) {
-
-        return types.contains(type);
-    }
-
-
-    public void addIntersectingNode(SemanticNode containingNode) {
-
-        intersectingNodes.add(containingNode);
-    }
-
-
-    public void removeFromGraph() {
-
-        intersectingNodes.forEach(node -> node.getEntities().remove(this));
-        pages.forEach(page -> page.getEntities().remove(this));
-        intersectingNodes = new LinkedList<>();
-        deepestFullyContainingNode = null;
-        pages = new HashSet<>();
-        removed = true;
-        ignored = true;
-    }
-
-
-    public void addMatchedRule(int ruleNumber) {
-
-        matchedRules.add(ruleNumber);
-    }
-
-
-    public int getMatchedRule() {
-
-        if (matchedRules.isEmpty()) {
-            return 0;
-        }
-        return matchedRules.getLast();
-    }
-
-
-    public List<RedactionPosition> getRedactionPositionsPerPage() {
-
-        if (redactionPositionsPerPage == null || redactionPositionsPerPage.isEmpty()) {
-            Map<Page, List<Rectangle2D>> rectanglesPerLinePerPage = deepestFullyContainingNode.getTextBlock().getPositionsPerPage(textRange);
-
-            Page firstPage = rectanglesPerLinePerPage.keySet()
-                    .stream()
-                    .min(Comparator.comparingInt(Page::getNumber))
-                    .orElseThrow(() -> new RuntimeException("No Positions found on any page!"));
-            String id = IdBuilder.buildId(pages, rectanglesPerLinePerPage.values().stream().flatMap(Collection::stream).toList());
-            redactionPositionsPerPage = rectanglesPerLinePerPage.entrySet().stream().map(entry -> buildRedactionPosition(firstPage, id, entry)).toList();
-        }
-        return redactionPositionsPerPage;
-    }
-
-
-    private static RedactionPosition buildRedactionPosition(Page firstPage, String id, Map.Entry<Page, List<Rectangle2D>> entry) {
-
-        if (entry.getKey().equals(firstPage)) {
-            return new RedactionPosition(id, entry.getKey(), entry.getValue());
-        } else {
-            return new RedactionPosition(id + "-" + entry.getKey().getNumber(), entry.getKey(), entry.getValue());
-        }
-    }
-
-
-    public boolean containedBy(TextEntity textEntity) {
-
-        return this.textRange.containedBy(textEntity.getTextRange());
-    }
-
-
-    public boolean contains(TextEntity textEntity) {
-
-        return this.textRange.contains(textEntity.getTextRange());
-    }
-
-
-    public boolean intersects(TextEntity textEntity) {
-
-        return this.textRange.intersects(textEntity.getTextRange());
-    }
-
-
-    public void addEngine(Engine engine) {
-
-        engines.add(engine);
-    }
-
-
-    public void addEngines(Set<Engine> engines) {
-
-        this.engines.addAll(engines);
-    }
-
-
-    public void addReference(TextEntity reference) {
-
-        references.add(reference);
-    }
-
-
-    public void addReferences(List<TextEntity> references) {
-
-        this.references.addAll(references);
-    }
-
-
-    public boolean matchesAnnotationId(String manualRedactionId) {
-
-        return getRedactionPositionsPerPage().stream().anyMatch(entityPosition -> entityPosition.getId().equals(manualRedactionId));
-    }
-
-
-    @Override
-    public String toString() {
-
-        StringBuilder sb = new StringBuilder();
-        sb.append("Entity[\"");
-        sb.append(value);
-        sb.append("\", ");
-        sb.append(textRange);
-        sb.append(", pages[");
-        pages.forEach(page -> {
-            sb.append(page.getNumber());
-            sb.append(", ");
-        });
-        sb.delete(sb.length() - 2, sb.length());
-        sb.append("], type = \"");
-        sb.append(type);
-        sb.append("\", EntityType.");
-        sb.append(entityType);
-        sb.append("]");
-        return sb.toString();
-    }
-
-}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/AbstractSemanticNode.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/AbstractSemanticNode.java
@ -1,74 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes;
-
-import java.awt.geom.Rectangle2D;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.TextEntity;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
-
-import lombok.AccessLevel;
-import lombok.AllArgsConstructor;
-import lombok.Builder;
-import lombok.Data;
-import lombok.EqualsAndHashCode;
-import lombok.NoArgsConstructor;
-import lombok.experimental.FieldDefaults;
-import lombok.experimental.SuperBuilder;
-import lombok.extern.slf4j.Slf4j;
-
-@Slf4j
-@Data
-@SuperBuilder
-@AllArgsConstructor
-@NoArgsConstructor
-@FieldDefaults(level = AccessLevel.PRIVATE)
-public abstract class AbstractSemanticNode implements GenericSemanticNode {
-
-    @Builder.Default
-    Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
-    List<Integer> treeId;
-
-    TextBlock textBlock;
-    @EqualsAndHashCode.Exclude
-    DocumentTree documentTree;
-
-    @Builder.Default
-    @EqualsAndHashCode.Exclude
-    Set<TextEntity> entities = new HashSet<>();
-
-    @EqualsAndHashCode.Exclude
-    Map<Page, Rectangle2D> bBoxCache;
-
-
-    @Override
-    public TextBlock getTextBlock() {
-
-        if (textBlock == null) {
-            textBlock = GenericSemanticNode.super.getTextBlock();
-        }
-        return textBlock;
-    }
-
-
-    @Override
-    public String toString() {
-
-        return treeId.toString() + ": " + getType() + ": " + this.getTextBlock().buildSummary();
-    }
-
-
-    @Override
-    public Map<Page, Rectangle2D> getBBox() {
-
-        if (bBoxCache == null) {
-            bBoxCache = GenericSemanticNode.super.getBBox();
-        }
-        return bBoxCache;
-    }
-
-}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Document.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Document.java
@ -1,173 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes;
-
-import java.awt.geom.Rectangle2D;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
-
-import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.NodeVisitor;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
-import com.knecon.fforesight.service.layoutparser.processor.visualization.LayoutDebugLayer;
-
-import lombok.AccessLevel;
-import lombok.AllArgsConstructor;
-import lombok.Data;
-import lombok.EqualsAndHashCode;
-import lombok.NoArgsConstructor;
-import lombok.experimental.FieldDefaults;
-import lombok.experimental.SuperBuilder;
-
-@Data
-@SuperBuilder
-@AllArgsConstructor
-@NoArgsConstructor
-@EqualsAndHashCode(callSuper = true)
-@FieldDefaults(level = AccessLevel.PRIVATE)
-public class Document extends AbstractSemanticNode {
-
-    Set<Page> pages;
-    Integer numberOfPages;
-
-    LayoutDebugLayer layoutDebugLayer;
-
-
-    @Override
-    public NodeType getType() {
-
-        return NodeType.DOCUMENT;
-    }
-
-
-    /**
-     * Gets the sections of the document as a list.
-     *
-     * @return A list of all sections within the document.
-     */
-    public List<Section> getAllSections() {
-
-        return streamAllSubNodesOfType(NodeType.SECTION).map(node -> (Section) node)
-                .collect(Collectors.toList());
-    }
-
-
-    /**
-     * Gets the main sections of the document as a list.
-     *
-     * @return A list of main sections within the document
-     * @deprecated This method is marked for removal.
-     * Use {@link #streamChildrenOfType(NodeType)} instead,
-     * or {@link #getChildrenOfTypeSectionOrSuperSection()} which returns children of type SECTION as well as SUPER_SECTION.
-     */
-    @Deprecated(forRemoval = true)
-    public List<Section> getMainSections() {
-
-        return streamChildrenOfType(NodeType.SECTION).map(node -> (Section) node)
-                .collect(Collectors.toList());
-    }
-
-
-    /**
-     * Gets the direct children of type SECTION or SUPER_SECTION of the document as a list of SemanticNode objects.
-     *
-     * @return A list of all children of type SECTION or SUPER_SECTION.
-     */
-    public List<SemanticNode> getChildrenOfTypeSectionOrSuperSection() {
-
-        return streamChildren().filter(semanticNode -> semanticNode.getType().equals(NodeType.SECTION) || semanticNode.getType().equals(NodeType.SUPER_SECTION))
-                .toList();
-    }
-
-
-    public List<Header> getHeaders() {
-
-        return streamChildrenOfType(NodeType.HEADER).map(node -> (Header) node)
-                .collect(Collectors.toList());
-    }
-
-
-    public List<Footer> getFooters() {
-
-        return streamChildrenOfType(NodeType.FOOTER).map(node -> (Footer) node)
-                .collect(Collectors.toList());
-    }
-
-
-    @Override
-    public Headline getHeadline() {
-
-        return streamAllSubNodesOfType(NodeType.HEADLINE).map(node -> (Headline) node)
-                .findFirst().orElse(Headline.builder().build());
-    }
-
-
-    public Stream<TextBlock> streamTerminalTextBlocksInOrder() {
-
-        return streamAllNodes().filter(SemanticNode::isLeaf)
-                .map(SemanticNode::getTextBlock);
-    }
-
-
-    @Override
-    public List<Integer> getTreeId() {
-
-        return Collections.emptyList();
-    }
-
-
-    @Override
-    public void setTreeId(List<Integer> tocId) {
-
-        throw new UnsupportedOperationException("Document is always the root of the TablePageBlock of Contents");
-    }
-
-
-    private Stream<SemanticNode> streamAllNodes() {
-
-        return getDocumentTree().allEntriesInOrder()
-                .map(DocumentTree.Entry::getNode);
-    }
-
-
-    public Stream<Image> streamAllImages() {
-
-        return streamAllSubNodesOfType(NodeType.IMAGE).map(node -> (Image) node);
-    }
-
-
-    public Map<NodeType, Long> buildSemanticNodeCounts() {
-
-        return streamAllSubNodes().collect(Collectors.groupingBy(SemanticNode::getType, Collectors.counting()));
-    }
-
-
-    @Override
-    public String toString() {
-
-        return NodeType.DOCUMENT + ": " + this.getTextBlock().buildSummary();
-    }
-
-
-    @Override
-    public Map<Page, Rectangle2D> getBBox() {
-
-        Map<Page, Rectangle2D> bBox = new HashMap<>();
-        for (Page page : pages) {
-            bBox.put(page, new Rectangle2D.Double(0, 0, page.getWidth(), page.getHeight()));
-        }
-        return bBox;
-    }
-
-
-    @Override
-    public void accept(NodeVisitor visitor) {
-
-        visitor.visit(this);
-    }
-
-}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/DuplicatedParagraph.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/DuplicatedParagraph.java
@ -1,35 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes;
-
-import java.util.stream.Stream;
-
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlockCollector;
-
-import lombok.Data;
-import lombok.EqualsAndHashCode;
-import lombok.experimental.SuperBuilder;
-
-@Data
-@EqualsAndHashCode(callSuper = true)
-@SuperBuilder
-public class DuplicatedParagraph extends Paragraph {
-
-    TextBlock unsortedLeafTextBlock;
-
-
-    @Override
-    public TextBlock getTextBlock() {
-
-        return Stream.of(super.getLeafTextBlock(), unsortedLeafTextBlock)
-                .collect(new TextBlockCollector());
-
-    }
-
-
-    @Override
-    public String toString() {
-
-        return super.toString();
-    }
-
-}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Footer.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Footer.java
@ -1,57 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes;
-
-import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.NodeVisitor;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
-
-import lombok.AccessLevel;
-import lombok.AllArgsConstructor;
-import lombok.Data;
-import lombok.EqualsAndHashCode;
-import lombok.experimental.FieldDefaults;
-import lombok.experimental.SuperBuilder;
-
-@Data
-@SuperBuilder
-@AllArgsConstructor
-@EqualsAndHashCode(callSuper = true)
-@FieldDefaults(level = AccessLevel.PRIVATE)
-public class Footer extends AbstractSemanticNode {
-
-    TextBlock leafTextBlock;
-
-
-    @Override
-    public NodeType getType() {
-
-        return NodeType.FOOTER;
-    }
-
-
-    @Override
-    public boolean isLeaf() {
-
-        return true;
-    }
-
-
-    @Override
-    public void accept(NodeVisitor visitor) {
-        visitor.visit(this);
-    }
-
-
-    @Override
-    public TextBlock getTextBlock() {
-
-        return leafTextBlock;
-    }
-
-
-    @Override
-    public String toString() {
-
-        return getTreeId() + ": " + NodeType.FOOTER + ": " + leafTextBlock.buildSummary();
-    }
-
-}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/GenericSemanticNode.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/GenericSemanticNode.java
@ -1,5 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes;
-
-public interface GenericSemanticNode extends SemanticNode {
-
-}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Header.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Header.java
@ -1,58 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes;
-
-import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.NodeVisitor;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
-
-import lombok.AccessLevel;
-import lombok.AllArgsConstructor;
-import lombok.Data;
-import lombok.EqualsAndHashCode;
-import lombok.experimental.FieldDefaults;
-import lombok.experimental.SuperBuilder;
-
-@Data
-@SuperBuilder
-@AllArgsConstructor
-@EqualsAndHashCode(callSuper = true)
-@FieldDefaults(level = AccessLevel.PRIVATE)
-public class Header extends AbstractSemanticNode {
-
-    TextBlock leafTextBlock;
-
-
-    @Override
-    public boolean isLeaf() {
-
-        return true;
-    }
-
-
-    @Override
-    public void accept(NodeVisitor visitor) {
-
-        visitor.visit(this);
-    }
-
-
-    @Override
-    public NodeType getType() {
-
-        return NodeType.HEADER;
-    }
-
-
-    @Override
-    public TextBlock getTextBlock() {
-
-        return leafTextBlock;
-    }
-
-
-    @Override
-    public String toString() {
-
-        return getTreeId() + ": " + NodeType.HEADER + ": " + leafTextBlock.buildSummary();
-    }
-
-}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Headline.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Headline.java
@ -1,65 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes;
-
-import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.NodeVisitor;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
-
-import lombok.AccessLevel;
-import lombok.AllArgsConstructor;
-import lombok.Data;
-import lombok.EqualsAndHashCode;
-import lombok.experimental.FieldDefaults;
-import lombok.experimental.SuperBuilder;
-
-@Data
-@SuperBuilder
-@AllArgsConstructor
-@EqualsAndHashCode(callSuper = true)
-@FieldDefaults(level = AccessLevel.PRIVATE)
-public class Headline extends AbstractSemanticNode {
-
-    TextBlock leafTextBlock;
-
-
-    @Override
-    public NodeType getType() {
-
-        return NodeType.HEADLINE;
-    }
-
-
-    @Override
-    public boolean isLeaf() {
-
-        return true;
-    }
-
-
-    @Override
-    public void accept(NodeVisitor visitor) {
-
-        visitor.visit(this);
-    }
-
-
-    @Override
-    public TextBlock getTextBlock() {
-
-        return leafTextBlock;
-    }
-
-
-    @Override
-    public String toString() {
-
-        return getTreeId() + ": " + NodeType.HEADLINE + ": " + leafTextBlock.buildSummary();
-    }
-
-
-    @Override
-    public Headline getHeadline() {
-
-        return this;
-    }
-
-}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Image.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Image.java
@ -1,115 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes;
-
-import java.awt.geom.Rectangle2D;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Set;
-
-import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.NodeVisitor;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
-
-import lombok.AccessLevel;
-import lombok.AllArgsConstructor;
-import lombok.Builder;
-import lombok.Data;
-import lombok.EqualsAndHashCode;
-import lombok.NoArgsConstructor;
-import lombok.experimental.FieldDefaults;
-import lombok.experimental.SuperBuilder;
-
-@Data
-@SuperBuilder
-@AllArgsConstructor
-@NoArgsConstructor
-@EqualsAndHashCode(callSuper = true)
-@FieldDefaults(level = AccessLevel.PRIVATE)
-public class Image extends AbstractSemanticNode {
-
-    String id;
-
-    String representationHash;
-
-    ImageType imageType;
-    boolean transparent;
-    Rectangle2D position;
-
-    TextBlock leafTextBlock;
-
-    boolean redaction;
-    boolean ignored;
-    @Builder.Default
-    String redactionReason = "";
-    @Builder.Default
-    String legalBasis = "";
-    @Builder.Default
-    int matchedRule = -1;
-
-    @EqualsAndHashCode.Exclude
-    Page page;
-
-
-    @Override
-    public NodeType getType() {
-
-        return NodeType.IMAGE;
-    }
-
-
-    @Override
-    public TextBlock getTextBlock() {
-
-        return leafTextBlock;
-    }
-
-
-    @Override
-    public Set<Page> getPages() {
-
-        return Collections.singleton(page);
-    }
-
-
-    @Override
-    public String toString() {
-
-        return getTreeId() + ": " + NodeType.IMAGE + ": " + imageType.toString() + " " + position;
-    }
-
-
-    @Override
-    public Map<Page, Rectangle2D> getBBox() {
-
-        Map<Page, Rectangle2D> bBoxPerPage = new HashMap<>();
-        bBoxPerPage.put(page, position);
-        return bBoxPerPage;
-    }
-
-
-    @Override
-    public void accept(NodeVisitor visitor) {
-
-        visitor.visit(this);
-    }
-
-
-    @Override
-    public boolean isLeaf() {
-
-        return true;
-    }
-
-
-    public double getArea() {
-
-        return position.getWidth() * position.getHeight();
-    }
-
-
-    public boolean isFullPageImage() {
-
-        return imageType.equals(ImageType.OCR) || getArea() >= 0.5 * page.getArea();
-    }
-
-}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/ImageType.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/ImageType.java
@ -1,26 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes;
-
-import java.util.Locale;
-
-public enum ImageType {
-    LOGO,
-    FORMULA,
-    SIGNATURE,
-    SIGNATURE_VISUAL,
-    OTHER,
-    OCR,
-    GRAPHIC;
-
-
-    public static ImageType fromString(String imageType) {
-
-        return switch (imageType.toLowerCase(Locale.ROOT)) {
-            case "logo" -> ImageType.LOGO;
-            case "formula" -> ImageType.FORMULA;
-            case "signature" -> ImageType.SIGNATURE;
-            case "ocr" -> ImageType.OCR;
-            case "graphic" -> ImageType.GRAPHIC;
-            default -> ImageType.OTHER;
-        };
-    }
-}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Page.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Page.java
@ -1,96 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes;
-
-import java.util.HashSet;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Set;
-
-import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.TextEntity;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlockCollector;
-
-import lombok.AccessLevel;
-import lombok.AllArgsConstructor;
-import lombok.Builder;
-import lombok.EqualsAndHashCode;
-import lombok.Getter;
-import lombok.Setter;
-import lombok.experimental.FieldDefaults;
-
-@Getter
-@Setter
-@Builder
-@AllArgsConstructor
-@FieldDefaults(level = AccessLevel.PRIVATE)
-public class Page {
-
-    Integer number;
-    Integer height;
-    Integer width;
-    Integer rotation;
-
-    @EqualsAndHashCode.Exclude
-    List<SemanticNode> mainBody;
-    @EqualsAndHashCode.Exclude
-    Header header;
-    @EqualsAndHashCode.Exclude
-    Footer footer;
-
-    @Builder.Default
-    @EqualsAndHashCode.Exclude
-    Set<TextEntity> entities = new HashSet<>();
-
-    @Builder.Default
-    @EqualsAndHashCode.Exclude
-    Set<Image> images = new HashSet<>();
-
-
-    public static Page fromClassificationPage(ClassificationPage classificationPage) {
-
-        return Page.builder()
-                .height((int) classificationPage.getPageHeight())
-                .width((int) classificationPage.getPageWidth())
-                .number(classificationPage.getPageNumber())
-                .rotation(classificationPage.getRotation())
-                .mainBody(new LinkedList<>())
-                .build();
-    }
-
-
-    public TextBlock getMainBodyTextBlock() {
-
-        return mainBody.stream()
-                .filter(SemanticNode::isLeaf)
-                .map(SemanticNode::getLeafTextBlock)
-                .collect(new TextBlockCollector());
-    }
-
-
-    @Override
-    public String toString() {
-
-        return String.valueOf(number);
-    }
-
-
-    @Override
-    public int hashCode() {
-
-        return number;
-    }
-
-
-    @Override
-    public boolean equals(Object o) {
-
-        return o instanceof Page && o.hashCode() == this.hashCode();
-    }
-
-
-    public double getArea() {
-
-        return height * width;
-    }
-
-}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Paragraph.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Paragraph.java
@ -1,51 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes;
-
-import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.NodeVisitor;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
-
-import lombok.AccessLevel;
-import lombok.AllArgsConstructor;
-import lombok.Data;
-import lombok.EqualsAndHashCode;
-import lombok.experimental.FieldDefaults;
-import lombok.experimental.SuperBuilder;
-
-@Data
-@SuperBuilder
-@AllArgsConstructor
-@EqualsAndHashCode(callSuper = true)
-@FieldDefaults(level = AccessLevel.PROTECTED)
-public class Paragraph extends AbstractSemanticNode {
-
-    TextBlock leafTextBlock;
-
-
-    @Override
-    public NodeType getType() {
-
-        return NodeType.PARAGRAPH;
-    }
-
-
-    @Override
-    public boolean isLeaf() {
-
-        return true;
-    }
-
-
-    @Override
-    public void accept(NodeVisitor visitor) {
-
-        visitor.visit(this);
-    }
-
-
-    @Override
-    public TextBlock getTextBlock() {
-
-        return leafTextBlock;
-    }
-
-}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Section.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Section.java
@ -1,53 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes;
-
-import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.NodeVisitor;
-
-import lombok.AccessLevel;
-import lombok.AllArgsConstructor;
-import lombok.Data;
-import lombok.EqualsAndHashCode;
-import lombok.experimental.FieldDefaults;
-import lombok.experimental.SuperBuilder;
-
-@Data
-@SuperBuilder
-@AllArgsConstructor
-@FieldDefaults(level = AccessLevel.PRIVATE)
-@EqualsAndHashCode(callSuper = true)
-public class Section extends AbstractSemanticNode {
-
-    @Override
-    public NodeType getType() {
-
-        return NodeType.SECTION;
-    }
-
-
-    public Headline getHeadline() {
-
-        return streamChildrenOfType(NodeType.HEADLINE).map(node -> (Headline) node)
-                .findFirst().orElseGet(() -> getParent().getHeadline());
-    }
-
-
-    @Override
-    public void accept(NodeVisitor visitor) {
-
-        visitor.visit(this);
-    }
-
-
-    public boolean hasTables() {
-
-        return streamAllSubNodesOfType(NodeType.TABLE).findAny().isPresent();
-    }
-
-
-    @Override
-    public String toString() {
-
-        return getTreeId() + ": " + NodeType.SECTION + ": " + this.getTextBlock().buildSummary();
-    }
-
-}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/SemanticNode.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/SemanticNode.java
@ -1,507 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes;
-
-import static java.lang.String.format;
-
-import java.awt.geom.Rectangle2D;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Set;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
-
-import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
-import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.TextRange;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.EntityType;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.TextEntity;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.AtomicTextBlock;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlockCollector;
-import com.knecon.fforesight.service.layoutparser.processor.utils.BBoxMergingUtility;
-import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.NodeVisitor;
-
-public interface SemanticNode {
-
-    /**
-     * Returns the type of this node, such as Section, Paragraph, etc.
-     *
-     * @return NodeType of this node
-     */
-    NodeType getType();
-
-
-    /**
-     * Searches all Nodes located underneath this Node in the DocumentTree and concatenates their AtomicTextBlocks into a single TextBlock.
-     * So, for a Section all TextBlocks of Subsections, Paragraphs, and Tables are concatenated into a single TextBlock
-     * If the Node is a Leaf, the LeafTextBlock will be returned instead.
-     *
-     * @return TextBlock containing all AtomicTextBlocks that are located under this Node.
-     */
-    default TextBlock getTextBlock() {
-
-        return streamAllSubNodes().filter(SemanticNode::isLeaf)
-                .map(SemanticNode::getTextBlock)
-                .collect(new TextBlockCollector());
-    }
-
-
-    /**
-     * Any Node maintains its own Set of Entities.
-     * This Set contains all Entities whose boundary intersects the boundary of this node.
-     *
-     * @return Set of all Entities associated with this Node
-     */
-    Set<TextEntity> getEntities();
-
-
-    /**
-     * Each AtomicTextBlock is assigned a page, so to get the pages this node appears on, it collects the PageNodes from each AtomicTextBlock belonging to this node's TextBlock.
-     *
-     * @return Set of PageNodes this node appears on.
-     */
-    default Set<Page> getPages() {
-
-        return getTextBlock().getPages();
-    }
-
-
-    default Page getFirstPage() {
-
-        return getTextBlock().getPages()
-                .stream()
-                .min(Comparator.comparingInt(Page::getNumber)).orElseThrow(() -> new IllegalStateException("SemanticNode has no Page!"));
-    }
-
-
-    /**
-     * Each AtomicTextBlock is assigned a page, so to get the pages for this boundary, it collects the PageNodes from each AtomicTextBlock belonging to this node's TextBlock.
-     *
-     * @return Set of PageNodes this node appears on.
-     */
-    default Set<Page> getPages(TextRange textRange) {
-
-        if (!getBoundary().contains(textRange)) {
-            throw new IllegalArgumentException(format("%s which was used to query for pages is not contained in the %s of this node!", textRange, getBoundary()));
-        }
-        return getTextBlock().getPages(textRange);
-    }
-
-
-    default boolean isOnPage(int pageNumber) {
-
-        return getPages().stream()
-                .anyMatch(page -> page.getNumber() == pageNumber);
-    }
-
-
-    /**
-     * Returns the DocumentTree Object.
-     *
-     * @return the DocumentTree of the Document this node belongs to
-     */
-    DocumentTree getDocumentTree();
-
-
-    /**
-     * The id is a List of Integers uniquely identifying this node in the DocumentTree.
-     *
-     * @return the DocumentTree ID
-     */
-    List<Integer> getTreeId();
-
-
-    /**
-     * This should only be used during graph construction.
-     *
-     * @param tocId List of Integers
-     */
-    void setTreeId(List<Integer> tocId);
-
-
-    /**
-     * Traverses the Tree up, until it hits a Headline or hits a Section which will then return the first Headline from its children.
-     * Throws NotFoundException if no Headline is found this way
-     *
-     * @return First Headline found
-     */
-    default Headline getHeadline() {
-
-        return getParent().getHeadline();
-    }
-
-
-    /**
-     * Checks if its TocId has a length greater than zero.
-     *
-     * @return boolean indicating whether this Node has a Parent in the DocumentTree
-     */
-    default boolean hasParent() {
-
-        return getDocumentTree().hasParentById(getTreeId());
-    }
-
-
-    /**
-     * @return The SemanticNode representing the Parent in the DocumentTree
-     * throws NotFoundException, when no parent is present
-     */
-    default SemanticNode getParent() {
-
-        return getDocumentTree().getParentEntryById(getTreeId()).getNode();
-    }
-
-
-    /**
-     * @return The SemanticNode which is directly underneath the document and also under which this node is.
-     * if this is the highest child node or the document itself, it returns itself.
-     */
-    default SemanticNode getHighestParent() {
-
-        return getDocumentTree().getHighestParentById(getTreeId());
-    }
-
-
-    /**
-     * Leaf means a SemanticNode has direct access to a TextBlock, by default this is false and must be overridden.
-     * Currently only Sections, Images, and Tables are not leaves.
-     * A TableCell might be a leaf depending on its area compared to the page.
-     *
-     * @return boolean, indicating if a Node has direct access to a TextBlock
-     */
-    default boolean isLeaf() {
-
-        return false;
-    }
-
-
-    /**
-     * Leaf means a SemanticNode has direct access to a TextBlock, by default this is false and must be overridden.
-     * Currently only Sections and Tables are no leaves.
-     *
-     * @return AtomicTextBlock
-     */
-    default TextBlock getLeafTextBlock() {
-
-        throw new UnsupportedOperationException("Only leaf Nodes have access to LeafTextBlocks!");
-    }
-
-
-    /**
-     * Should only be used during construction of the Graph. Sets the LeafTextBlock of this SemanticNode.
-     *
-     * @param textBlock the TextBlock to set as the LeafTextBlock of this SemanticNode
-     */
-    default void setLeafTextBlock(TextBlock textBlock) {
-
-        throw new UnsupportedOperationException();
-    }
-
-
-    /**
-     * Checks whether this SemanticNode has any Entity with EntityType.ENTITY of the provided type.
-     *
-     * @param type string representing the type of entity to check for
-     * @return true, if this SemanticNode has at least one Entity of the provided type
-     */
-    default boolean hasEntitiesOfType(String type) {
-
-        return getEntities().stream()
-                .filter(entity -> entity.getEntityType().equals(EntityType.ENTITY))
-                .anyMatch(redactionEntity -> redactionEntity.getType().equals(type));
-    }
-
-
-    /**
-     * Returns a List of Entities in this SemanticNode which are of the provided type such as "CBI_author".
-     *
-     * @param type string representing the type of entities to return
-     * @return List of RedactionEntities of any the type
-     */
-    default List<TextEntity> getEntitiesOfType(String type) {
-
-        return getEntities().stream()
-                .filter(redactionEntity -> redactionEntity.getType().equals(type))
-                .toList();
-    }
-
-
-    /**
-     * Returns a List of Entities in this SemanticNode which have any of the provided types such as "CBI_author".
-     *
-     * @param types A list of strings representing the types of entities to return
-     * @return List of RedactionEntities of any provided type
-     */
-    default List<TextEntity> getEntitiesOfType(List<String> types) {
-
-        return getEntities().stream()
-                .filter(redactionEntity -> redactionEntity.isAnyType(types))
-                .toList();
-    }
-
-
-    /**
-     * Each AtomicTextBlock has an index on its page, this returns the number of the first AtomicTextBlock underneath this node.
-     * If this node does not have any AtomicTexBlocks underneath it, e.g. an empty TableCell. It returns -1.
-     *
-     * @return Integer representing the number on the page
-     */
-    default Integer getNumberOnPage() {
-
-        TextBlock textBlock = getTextBlock();
-        if (!textBlock.getAtomicTextBlocks().isEmpty()) {
-            return getTextBlock().getAtomicTextBlocks().get(0).getNumberOnPage();
-        } else {
-            return -1;
-        }
-    }
-
-
-    /**
-     * Checks if the SemanticNode contains any text.
-     *
-     * @return true, if this node's TextBlock is not empty
-     */
-    default boolean hasText() {
-
-        return !getTextBlock().isEmpty();
-    }
-
-
-    /**
-     * Checks whether this SemanticNode contains the provided String.
-     *
-     * @param string A String which the TextBlock might contain
-     * @return true, if this node's TextBlock contains the string
-     */
-    default boolean containsString(String string) {
-
-        return getTextBlock().getSearchText().contains(string);
-    }
-
-
-    /**
-     * Checks whether this SemanticNode contains all the provided Strings.
-     *
-     * @param strings A List of Strings which the TextBlock might contain
-     * @return true, if this node's TextBlock contains all strings
-     */
-    default boolean containsStrings(List<String> strings) {
-
-        return strings.stream()
-                .allMatch(this::containsString);
-    }
-
-
-    /**
-     * Checks whether this SemanticNode contains all the provided Strings ignoring case.
-     *
-     * @param string A String which the TextBlock might contain
-     * @return true, if this node's TextBlock contains the string ignoring case
-     */
-    default boolean containsStringIgnoreCase(String string) {
-
-        return getTextBlock().getSearchText().toLowerCase(Locale.ROOT).contains(string.toLowerCase(Locale.ROOT));
-    }
-
-
-    /**
-     * Checks whether this SemanticNode contains any of the provided Strings.
-     *
-     * @param strings A List of Strings which the TextBlock might contain
-     * @return true, if this node's TextBlock contains any of the strings
-     */
-    default boolean containsAnyString(List<String> strings) {
-
-        return strings.stream()
-                .anyMatch(this::containsString);
-    }
-
-
-    /**
-     * Checks whether this SemanticNode contains any of the provided Strings ignoring case.
-     *
-     * @param strings A List of Strings which the TextBlock might contain
-     * @return true, if this node's TextBlock contains any of the strings
-     */
-    default boolean containsAnyStringIgnoreCase(List<String> strings) {
-
-        return strings.stream()
-                .anyMatch(this::containsStringIgnoreCase);
-    }
-
-
-    /**
-     * This function is used during insertion of EntityNodes into the graph, it checks if the boundary of the RedactionEntity intersects or even contains the RedactionEntity.
-     * It sets the fields accordingly and recursively calls this function on all its children.
-     *
-     * @param textEntity RedactionEntity, which is being inserted into the graph
-     */
-    default void addThisToEntityIfIntersects(TextEntity textEntity) {
-
-        TextBlock textBlock = getTextBlock();
-        if (textBlock.getTextRange().intersects(textEntity.getTextRange())) {
-            if (textBlock.containsBoundary(textEntity.getTextRange())) {
-                textEntity.setDeepestFullyContainingNode(this);
-            }
-
-            textEntity.addIntersectingNode(this);
-            streamChildren().filter(semanticNode -> semanticNode.getBoundary().intersects(textEntity.getTextRange()))
-                    .forEach(node -> node.addThisToEntityIfIntersects(textEntity));
-        }
-    }
-
-
-    /**
-     * returns the set of layoutengines.
-     *
-     * @return set of layoutengines.
-     */
-    Set<LayoutEngine> getEngines();
-
-
-    /**
-     * adds a layoutengine to the set.
-     */
-    default void addEngine(LayoutEngine engine) {
-
-        getEngines().add(engine);
-    }
-
-
-    /**
-     * Streams all children located directly underneath this node in the DocumentTree.
-     *
-     * @return Stream of all children
-     */
-    default Stream<SemanticNode> streamChildren() {
-
-        return getDocumentTree().childNodes(getTreeId());
-    }
-
-
-    /**
-     * Streams all children located directly underneath this node in the DocumentTree of the provided type.
-     *
-     * @return Stream of all children
-     */
-    default Stream<SemanticNode> streamChildrenOfType(NodeType nodeType) {
-
-        return getDocumentTree().childNodesOfType(getTreeId(), nodeType);
-    }
-
-
-    /**
-     * Recursively streams all SemanticNodes located underneath this node in the DocumentTree in order.
-     *
-     * @return Stream of all SubNodes
-     */
-    default Stream<SemanticNode> streamAllSubNodes() {
-
-        return getDocumentTree().allSubEntriesInOrder(getTreeId())
-                .map(DocumentTree.Entry::getNode);
-    }
-
-
-    /**
-     * Recursively streams all SemanticNodes of the provided type located underneath this node in the DocumentTree in order.
-     *
-     * @return Stream of all SubNodes
-     */
-    default Stream<SemanticNode> streamAllSubNodesOfType(NodeType nodeType) {
-
-        return getDocumentTree().allSubEntriesInOrder(getTreeId())
-                .filter(entry -> entry.getType().equals(nodeType))
-                .map(DocumentTree.Entry::getNode);
-    }
-
-
-    /**
-     * The Boundary is the start and end string offsets in the reading order of the document.
-     *
-     * @return Boundary of this Node's TextBlock
-     */
-    default TextRange getBoundary() {
-
-        return getTextBlock().getTextRange();
-    }
-
-
-    /**
-     * If this Node is a Leaf it will calculate the boundingBox of its LeafTextBlock, otherwise it will calculate the Union of the BoundingBoxes of all its Children.
-     * If called on the Document, it will return the cropbox of each page
-     *
-     * @return Rectangle2D fully encapsulating this Node for each page.
-     */
-    default Map<Page, Rectangle2D> getBBox() {
-
-        if (isLeaf()) {
-            return getBBoxFromLeafTextBlock();
-        }
-
-        return getBBoxFromChildren();
-    }
-
-
-    /**
-     * Checks whether the Bounding Box of this SemanticNode contains the provided rectangle on the provided page.
-     *
-     * @param rectangle2D The rectangle to check if it is contained
-     * @param pageNumber  The Page number on which the rectangle should be checked
-     * @return boolean
-     */
-    default boolean containsRectangle(Rectangle2D rectangle2D, Integer pageNumber) {
-
-        Page helperPage = Page.builder().number(pageNumber).build();
-        if (!getPages().contains(helperPage)) {
-            return false;
-        }
-        return getBBox().get(helperPage).contains(rectangle2D);
-    }
-
-
-    /**
-     * TODO: this produces unwanted results for sections spanning multiple columns.
-     * Computes the Union of the bounding boxes of all children recursively.
-     *
-     * @return The union of the BoundingBoxes of all children
-     */
-    private Map<Page, Rectangle2D> getBBoxFromChildren() {
-
-        List<Map<Page, Rectangle2D>> childrenBBoxes = streamChildren().filter(child -> !isFullPageImage(child))
-                .map(SemanticNode::getBBox)
-                .toList();
-        return BBoxMergingUtility.mergeBBoxes(childrenBBoxes);
-    }
-
-
-    private static boolean isFullPageImage(SemanticNode child) {
-
-        if (!child.getType().equals(NodeType.IMAGE)) {
-            return false;
-        }
-        return ((Image) child).isFullPageImage();
-    }
-
-
-    /**
-     * @return The union of all BoundingBoxes of the TextBlock of this node
-     */
-    private Map<Page, Rectangle2D> getBBoxFromLeafTextBlock() {
-
-        Map<Page, Rectangle2D> bBoxPerPage = new HashMap<>();
-        Map<Page, List<AtomicTextBlock>> atomicTextBlockPerPage = getTextBlock().getAtomicTextBlocks()
-                .stream()
-                .collect(Collectors.groupingBy(AtomicTextBlock::getPage));
-        atomicTextBlockPerPage.forEach((page, atbs) -> bBoxPerPage.put(page, RectangleTransformations.bBoxUnionAtomicTextBlock(atbs)));
-        return bBoxPerPage;
-    }
-
-
-    void accept(NodeVisitor visitor);
-
-}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/SuperSection.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/SuperSection.java
@ -1,47 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes;
-
-import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.NodeVisitor;
-
-import lombok.AccessLevel;
-import lombok.AllArgsConstructor;
-import lombok.Data;
-import lombok.EqualsAndHashCode;
-import lombok.experimental.FieldDefaults;
-import lombok.experimental.SuperBuilder;
-
-@Data
-@SuperBuilder
-@AllArgsConstructor
-@FieldDefaults(level = AccessLevel.PRIVATE)
-@EqualsAndHashCode(callSuper = true)
-public class SuperSection extends AbstractSemanticNode {
-
-    @Override
-    public NodeType getType() {
-
-        return NodeType.SUPER_SECTION;
-    }
-
-
-    public Headline getHeadline() {
-
-        return streamChildrenOfType(NodeType.HEADLINE).map(node -> (Headline) node)
-                .findFirst().orElseGet(() -> getParent().getHeadline());
-    }
-
-
-    @Override
-    public void accept(NodeVisitor visitor) {
-
-        visitor.visit(this);
-    }
-
-
-    @Override
-    public String toString() {
-
-        return getTreeId() + ": " + NodeType.SUPER_SECTION + ": " + this.getTextBlock().buildSummary();
-    }
-
-}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Table.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Table.java
@ -1,363 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes;
-
-import static java.lang.String.format;
-
-import java.awt.geom.Rectangle2D;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Set;
-import java.util.stream.IntStream;
-import java.util.stream.Stream;
-
-import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
-import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.NodeVisitor;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.TextEntity;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlockCollector;
-
-import lombok.AccessLevel;
-import lombok.AllArgsConstructor;
-import lombok.Builder;
-import lombok.Data;
-import lombok.EqualsAndHashCode;
-import lombok.experimental.FieldDefaults;
-
-@Data
-@Builder
-@AllArgsConstructor
-@FieldDefaults(level = AccessLevel.PRIVATE)
-public class Table implements SemanticNode {
-
-    @Builder.Default
-    Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
-    List<Integer> treeId;
-    DocumentTree documentTree;
-
-    int numberOfRows;
-    int numberOfCols;
-    TextBlock textBlock;
-
-    @Builder.Default
-    @EqualsAndHashCode.Exclude
-    Set<TextEntity> entities = new HashSet<>();
-
-    @EqualsAndHashCode.Exclude
-    Map<Page, Rectangle2D> bBoxCache;
-
-    /**
-     * Streams all entities in this table, that appear in a row, which contains any of the provided strings.
-     *
-     * @param strings Strings to check whether a row contains them
-     * @return Stream of all entities in this table, that appear in a row, which contains any of the provided strings
-     */
-    public Stream<TextEntity> streamEntitiesWhereRowContainsStringsIgnoreCase(List<String> strings) {
-
-        return IntStream.range(0, numberOfRows).boxed()
-                .filter(row -> rowContainsStringsIgnoreCase(row, strings))
-                .flatMap(this::streamRow)
-                .map(TableCell::getEntities)
-                .flatMap(Collection::stream);
-    }
-
-
-    /**
-     * Checks whether the specified row contains all the provided strings.
-     *
-     * @param row     the row to check as an Integer, must be smaller than numberOfRows
-     * @param strings a list of strings to check for
-     * @return true, if all strings appear in the provided row
-     */
-    public boolean rowContainsStringsIgnoreCase(Integer row, List<String> strings) {
-
-        String rowText = streamRow(row).map(TableCell::getTextBlock)
-                .collect(new TextBlockCollector()).getSearchText().toLowerCase(Locale.ROOT);
-        return strings.stream()
-                .map(String::toLowerCase)
-                .allMatch(rowText::contains);
-    }
-
-
-    /**
-     * Streams all entities which appear in a row where at least one cell has the provided header and the provided value.
-     *
-     * @param header the header value to search for
-     * @param value  the string which the table cell should contain
-     * @return a stream of all entities, which appear in a row where at least one cell has the provided header and the provided value.
-     */
-    public Stream<TextEntity> streamEntitiesWhereRowHasHeaderAndValue(String header, String value) {
-
-        List<Integer> vertebrateStudyCols = streamHeaders().filter(headerNode -> headerNode.containsString(header))
-                .map(TableCell::getCol)
-                .toList();
-        return streamTableCells().filter(tableCellNode -> vertebrateStudyCols.stream()
-                        .anyMatch(vertebrateStudyCol -> getCell(tableCellNode.getRow(), vertebrateStudyCol).containsString(value)))
-                .map(TableCell::getEntities)
-                .flatMap(Collection::stream);
-    }
-
-
-    /**
-     * Streams all entities which appear in a row where at least one cell has the provided header and any provided value.
-     *
-     * @param header the header value to search for
-     * @param values the strings which the table cell should contain
-     * @return a stream of all entities, which appear in a row where at least one cell has the provided header and any provided value.
-     */
-    public Stream<TextEntity> streamEntitiesWhereRowHasHeaderAndAnyValue(String header, List<String> values) {
-
-        List<Integer> colsWithHeader = streamHeaders().filter(headerNode -> headerNode.containsString(header))
-                .map(TableCell::getCol)
-                .toList();
-        return streamTableCells().filter(tableCellNode -> colsWithHeader.stream()
-                        .anyMatch(colWithHeader -> getCell(tableCellNode.getRow(), colWithHeader).containsAnyString(values)))
-                .map(TableCell::getEntities)
-                .flatMap(Collection::stream);
-    }
-
-
-    /**
-     * Streams all entities in this table, that appear in a row, which contains at least one entity with any of the provided types.
-     *
-     * @param types type strings to check whether a row contains an entity like them
-     * @return Stream of all entities in this table, that appear in a row, which contains at least one entity with any of the provided types.
-     */
-    public Stream<TextEntity> streamEntitiesWhereRowContainsEntitiesOfType(List<String> types) {
-
-        List<Integer> rowsWithEntityOfType = IntStream.range(0, numberOfRows).boxed()
-                .filter(rowNumber -> streamEntityTypesInRow(rowNumber).anyMatch(existingType -> types.stream()
-                        .anyMatch(typeToCheck -> typeToCheck.equals(existingType))))
-                .toList();
-
-        return rowsWithEntityOfType.stream()
-                .flatMap(this::streamRow)
-                .map(TableCell::getEntities)
-                .flatMap(Collection::stream);
-    }
-
-
-    /**
-     * Streams all entities in this table, that appear in a row, which does not contain any entity with any of the provided types.
-     *
-     * @param types type strings to check whether a row doesn't contain an entity like it
-     * @return Stream of all entities in this table, that appear in a row, which does not contain any entity with any of the provided types.
-     */
-    public Stream<TextEntity> streamEntitiesWhereRowContainsNoEntitiesOfType(List<String> types) {
-
-        List<Integer> rowsWithNoEntityOfType = IntStream.range(0, numberOfRows).boxed()
-                .filter(rowNumber -> streamEntityTypesInRow(rowNumber).noneMatch(existingType -> types.stream()
-                        .anyMatch(typeToCheck -> typeToCheck.equals(existingType))))
-                .toList();
-
-        return rowsWithNoEntityOfType.stream()
-                .flatMap(this::streamRow)
-                .map(TableCell::getEntities)
-                .flatMap(Collection::stream);
-    }
-
-
-    private Stream<String> streamEntityTypesInRow(Integer rowNumber) {
-
-        return streamRow(rowNumber).map(TableCell::getEntities)
-                .flatMap(Collection::stream)
-                .map(TextEntity::getType)
-                .distinct();
-    }
-
-
-    /**
-     * Returns a TableCell at the provided row and column location.
-     *
-     * @param row int representing the row, must be smaller than numberOfRows
-     * @param col int representing the col, must be smaller than numberOfCols
-     * @return TableCell at the provided location in the table
-     */
-    public TableCell getCell(int row, int col) {
-
-        if (numberOfRows - row < 0 || numberOfCols - col < 0) {
-            throw new IllegalArgumentException(format("row %d, col %d is out of bounds for number of rows of %d and number of cols %d", row, col, numberOfRows, numberOfCols));
-        }
-        int idx = row * numberOfCols + col;
-        return (TableCell) documentTree.getEntryById(treeId).getChildren()
-                .get(idx).getNode();
-    }
-
-
-    /**
-     * Streams all TableCells in this Table row-wise.
-     *
-     * @return Stream of all TableCells
-     */
-    public Stream<TableCell> streamTableCells() {
-
-        return streamChildrenOfType(NodeType.TABLE_CELL).map(node -> (TableCell) node);
-    }
-
-
-    /**
-     * Streams all TableCells in this Table which have the provided header row-wise.
-     *
-     * @return Stream of all TableCells which have the provided header
-     */
-    public Stream<TableCell> streamTableCellsWithHeader(String header) {
-
-        return streamHeaders().filter(tableCellNode -> tableCellNode.getTextBlock().getSearchText().contains(header))
-                .map(TableCell::getCol)
-                .flatMap(this::streamCol)
-                .filter(tableCellNode -> !tableCellNode.isHeader());
-    }
-
-
-    /**
-     * Streams all TableCells belonging to the provided column from top down.
-     *
-     * @param col int representing the column
-     * @return Stream of all TableCell in the provided column
-     */
-    public Stream<TableCell> streamCol(int col) {
-
-        return IntStream.range(0, numberOfRows).boxed()
-                .map(row -> getCell(row, col));
-    }
-
-
-    /**
-     * Streams all TableCells belonging to the provided row from left to right.
-     *
-     * @param row int representing the row
-     * @return Stream of all TableCell in the provided row
-     */
-    public Stream<TableCell> streamRow(int row) {
-
-        return IntStream.range(0, numberOfCols).boxed()
-                .map(col -> getCell(row, col));
-    }
-
-
-    /**
-     * Streams all TableCells row-wise and filters them with header == true.
-     *
-     * @return Stream of all TableCells with header == true
-     */
-    public Stream<TableCell> streamHeaders() {
-
-        return streamTableCells().filter(TableCell::isHeader);
-    }
-
-
-    /**
-     * Streams all TableCells of the provided row and column and filters them with header == true.
-     *
-     * @param row int representing the row
-     * @param col int representing the column
-     * @return Stream of all TableCells with header == true in the provided row or col
-     */
-    public Stream<TableCell> streamHeadersForCell(int row, int col) {
-
-        return Stream.concat(streamRow(row), streamCol(col))
-                .filter(TableCell::isHeader);
-    }
-
-
-    /**
-     * Streams all Headers and checks if any equal the provided string.
-     *
-     * @param header string to check the headers for
-     * @return true, if at least one header equals the provided string
-     */
-    public boolean hasHeader(String header) {
-
-        return streamHeaders().anyMatch(tableCellNode -> tableCellNode.getTextBlock().getSearchText().strip().equals(header));
-    }
-
-
-    /**
-     * Checks if this table has a column with the provided header and any of the table cells in that column contain the provided value.
-     *
-     * @param header string to find header cells
-     * @param value  string to check cells with provided header
-     * @return true, if this table has a column with the provided header and any of the table cells in that column contain the provided value
-     */
-    public boolean hasRowWithHeaderAndValue(String header, String value) {
-
-        return streamTableCellsWithHeader(header).anyMatch(tableCellNode -> tableCellNode.containsString(value));
-    }
-
-
-    /**
-     * Checks if this table has a column with the provided header and any of the table cells in that column contains any of the provided values.
-     *
-     * @param header string to find header cells
-     * @param values List of strings to check cells with provided header
-     * @return true, if this table has a column with the provided header and any of the table cells in that column contains any of the provided values.
-     */
-    public boolean hasRowWithHeaderAndAnyValue(String header, List<String> values) {
-
-        return streamTableCellsWithHeader(header).anyMatch(tableCellNode -> tableCellNode.containsAnyString(values));
-    }
-
-
-    /**
-     * Finds all entities of the provided type, which appear in the same row that the provided entity appears in.
-     *
-     * @param type            the type of entities to search for
-     * @param textEntity the entity, which appears in the row to search
-     * @return List of all entities of the provided type, which appear in the same row that the provided entity appears in.
-     */
-    public List<TextEntity> getEntitiesOfTypeInSameRow(String type, TextEntity textEntity) {
-
-        return textEntity.getIntersectingNodes()
-                .stream()
-                .filter(node -> node instanceof TableCell)
-                .map(node -> (TableCell) node)
-                .flatMap(tableCellNode -> streamRow(tableCellNode.getRow()))
-                .map(cell -> cell.getEntitiesOfType(type))
-                .flatMap(Collection::stream)
-                .toList();
-    }
-
-
-    @Override
-    public NodeType getType() {
-
-        return NodeType.TABLE;
-    }
-
-
-    @Override
-    public TextBlock getTextBlock() {
-
-        if (textBlock == null) {
-            textBlock = SemanticNode.super.getTextBlock();
-        }
-        return textBlock;
-    }
-
-
-    @Override
-    public String toString() {
-
-        return treeId.toString() + ": " + NodeType.TABLE + ": #cols: " + numberOfCols + ", #rows: " + numberOfRows + ", " + this.getTextBlock().buildSummary();
-    }
-
-
-    @Override
-    public Map<Page, Rectangle2D> getBBox() {
-
-        if (bBoxCache == null) {
-            bBoxCache = SemanticNode.super.getBBox();
-        }
-        return bBoxCache;
-    }
-
-
-    @Override
-    public void accept(NodeVisitor visitor) {
-        visitor.visit(this);
-    }
-
-}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/TableCell.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/TableCell.java
@ -1,95 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes;
-
-import java.awt.geom.Rectangle2D;
-import java.util.HashMap;
-import java.util.Map;
-
-import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.NodeVisitor;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlockCollector;
-
-import lombok.AccessLevel;
-import lombok.AllArgsConstructor;
-import lombok.Data;
-import lombok.EqualsAndHashCode;
-import lombok.experimental.FieldDefaults;
-import lombok.experimental.SuperBuilder;
-
-@Data
-@SuperBuilder
-@AllArgsConstructor
-@EqualsAndHashCode(callSuper = true)
-@FieldDefaults(level = AccessLevel.PRIVATE)
-public class TableCell extends AbstractSemanticNode {
-
-    int row;
-    int col;
-    boolean header;
-
-    Rectangle2D bBox;
-
-    TextBlock leafTextBlock;
-
-    TextBlock textBlock;
-
-
-    @Override
-    public Map<Page, Rectangle2D> getBBox() {
-
-        Map<Page, Rectangle2D> bBoxPerPage = new HashMap<>();
-        getPages().forEach(page -> bBoxPerPage.put(page, bBox));
-        return bBoxPerPage;
-    }
-
-
-    @Override
-    public void accept(NodeVisitor visitor) {
-
-        visitor.visit(this);
-    }
-
-
-    @Override
-    public NodeType getType() {
-
-        return NodeType.TABLE_CELL;
-    }
-
-
-    @Override
-    public boolean isLeaf() {
-
-        return getDocumentTree().getEntryById(getTreeId()).getChildren().isEmpty();
-    }
-
-
-    @Override
-    public TextBlock getTextBlock() {
-
-        if (isLeaf()) {
-            return leafTextBlock;
-        }
-
-        if (textBlock == null) {
-            textBlock = buildTextBlock();
-        }
-        return textBlock;
-    }
-
-
-    private TextBlock buildTextBlock() {
-
-        return streamAllSubNodes().filter(SemanticNode::isLeaf)
-                .map(SemanticNode::getLeafTextBlock)
-                .collect(new TextBlockCollector());
-    }
-
-
-    @Override
-    public String toString() {
-
-        return getTreeId() + ": " + NodeType.TABLE_CELL + ": " + this.getTextBlock().buildSummary();
-    }
-
-}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/textblock/AtomicTextBlock.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/textblock/AtomicTextBlock.java
@ -1,275 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock;
-
-import static java.lang.String.format;
-
-import java.awt.geom.Rectangle2D;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.stream.Collectors;
-
-import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionData;
-import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextData;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.TextRange;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SemanticNode;
-import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
-
-import lombok.AccessLevel;
-import lombok.AllArgsConstructor;
-import lombok.Builder;
-import lombok.Data;
-import lombok.EqualsAndHashCode;
-import lombok.experimental.FieldDefaults;
-
-@Data
-@Builder
-@AllArgsConstructor
-@FieldDefaults(level = AccessLevel.PRIVATE)
-public class AtomicTextBlock implements TextBlock {
-
-    Long id;
-    Integer numberOnPage;
-    Page page;
-
-    //string coordinates
-    TextRange textRange;
-    String searchText;
-    @Builder.Default
-    List<Integer> lineBreaks = new ArrayList<>();
-    @Builder.Default
-    List<TextRange> boldTextBoundaries = new ArrayList<>();
-    @Builder.Default
-    List<TextRange> italicTextBoundaries = new ArrayList<>();
-    String orientation;
-    int textDirection;
-
-    //position coordinates
-    @Builder.Default
-    List<Integer> stringIdxToPositionIdx = new ArrayList<>();
-    @Builder.Default
-    List<Rectangle2D> positions = new ArrayList<>();
-
-    @EqualsAndHashCode.Exclude
-    SemanticNode parent;
-
-
-    @Override
-    public int numberOfLines() {
-
-        return lineBreaks.size() + 1;
-    }
-
-
-    @Override
-    public String subSequenceWithLineBreaks(TextRange stringTextRange) {
-
-        if (stringTextRange.length() == 0 || !getTextRange().contains(stringTextRange)) {
-            return "";
-        }
-
-        Set<Integer> lbInBoundary = lineBreaks.stream()
-                .map(i -> i + this.textRange.start())
-                .filter(stringTextRange::contains)
-                .collect(Collectors.toSet());
-        if (stringTextRange.end() == getTextRange().end()) {
-            lbInBoundary.add(getTextRange().end());
-        }
-        StringBuilder sb = new StringBuilder();
-        for (int i = stringTextRange.start(); i < stringTextRange.end(); i++) {
-            char character = this.charAt(i);
-            if (lbInBoundary.contains(i + 1)) {
-                // always plus one, due to the linebreaks being an exclusive end index
-                if (!Character.isWhitespace(character)) {
-                    lbInBoundary.remove(i + 1);
-                    lbInBoundary.add(i + 2);
-                    sb.append(character);
-                    continue;
-                }
-                sb.append("\n");
-            } else {
-                sb.append(character);
-            }
-        }
-        return sb.toString();
-    }
-
-
-    public static AtomicTextBlock fromSearchTextWithTextPosition(String searchText,
-                                                                 List<Integer> lineBreaks,
-                                                                 List<TextRange> boldTextBoundaries,
-                                                                 List<TextRange> italicTextBoundaries,
-                                                                 List<Rectangle2D> positions,
-                                                                 List<Integer> stringIdxToPositionIdx,
-                                                                 long idx,
-                                                                 SemanticNode parent,
-                                                                 int numberOnPage,
-                                                                 Page page,
-                                                                 int offset,
-                                                                 String orientation,
-                                                                 int textDirection) {
-
-        return AtomicTextBlock.builder()
-                .id(idx)
-                .parent(parent)
-                .searchText(searchText)
-                .numberOnPage(numberOnPage)
-                .page(page)
-                .lineBreaks(lineBreaks)
-                .boldTextBoundaries(boldTextBoundaries)
-                .italicTextBoundaries(italicTextBoundaries)
-                .positions(positions)
-                .stringIdxToPositionIdx(stringIdxToPositionIdx)
-                .textRange(new TextRange(offset, offset + searchText.length()))
-                .textDirection(textDirection)
-                .orientation(orientation)
-                .build();
-    }
-
-
-    public static AtomicTextBlock empty(Long textBlockIdx, int stringOffset, Page page, int numberOnPage, SemanticNode parent) {
-
-        return AtomicTextBlock.builder()
-                .id(textBlockIdx)
-                .textRange(new TextRange(stringOffset, stringOffset))
-                .searchText("")
-                .page(page)
-                .numberOnPage(numberOnPage)
-                .parent(parent)
-                .build();
-    }
-
-
-    public static AtomicTextBlock fromAtomicTextBlockData(DocumentTextData documentTextData, DocumentPositionData documentPositionData, SemanticNode parent, Page page) {
-
-        return AtomicTextBlock.builder()
-                .id(documentTextData.getId())
-                .numberOnPage(documentTextData.getNumberOnPage())
-                .page(page)
-                .textRange(new TextRange(documentTextData.getStart(), documentTextData.getEnd()))
-                .searchText(documentTextData.getSearchText())
-                .lineBreaks(Arrays.stream(documentTextData.getLineBreaks()).boxed()
-                                    .toList())
-                .stringIdxToPositionIdx(Arrays.stream(documentPositionData.getStringIdxToPositionIdx()).boxed()
-                                                .toList())
-                .positions(toRectangle2DList(documentPositionData.getPositions()))
-                .parent(parent)
-                .build();
-    }
-
-
-    private static List<Rectangle2D> toRectangle2DList(float[][] positions) {
-
-        return Arrays.stream(positions)
-                .map(floatArr -> (Rectangle2D) new Rectangle2D.Float(floatArr[0], floatArr[1], floatArr[2], floatArr[3]))
-                .toList();
-    }
-
-
-    public CharSequence getLine(int lineNumber) {
-
-        if (lineNumber >= numberOfLines() || lineNumber < 0) {
-            throw new IndexOutOfBoundsException(format("line %d out of range for AtomicTextBlock with %d lines", lineNumber, numberOfLines()));
-        }
-        if (lineNumber == 0) {
-            if (lineBreaks.isEmpty()) {
-                return searchText;
-            }
-            return subSequence(textRange.start(), lineBreaks.get(0) + textRange.start());
-        } else if (lineNumber == numberOfLines() - 1) {
-            return subSequence(lineBreaks.get(lineBreaks.size() - 1) + textRange.start(), textRange.end());
-        }
-        return subSequence(lineBreaks.get(lineNumber - 1) + textRange.start(), lineBreaks.get(lineNumber) + textRange.start());
-    }
-
-
-    @Override
-    public List<AtomicTextBlock> getAtomicTextBlocks() {
-
-        return List.of(this);
-    }
-
-
-    @Override
-    public int getNextLinebreak(int fromIndex) {
-
-        return lineBreaks.stream()//
-                       .filter(linebreak -> linebreak > fromIndex - textRange.start()) //
-                       .findFirst() //
-                       .orElse(searchText.length()) + textRange.start();
-    }
-
-
-    @Override
-    public int getPreviousLinebreak(int fromIndex) {
-
-        return lineBreaks.stream()//
-                       .filter(linebreak -> linebreak <= fromIndex - textRange.start())//
-                       .reduce((a, b) -> b)//
-                       .orElse(0) + textRange.start();
-    }
-
-
-    @Override
-    public Rectangle2D getPosition(int stringIdx) {
-
-        return positions.get(stringIdxToPositionIdx.get(stringIdx - textRange.start()));
-    }
-
-
-    @Override
-    public List<Rectangle2D> getPositions(TextRange stringTextRange) {
-
-        if (!containsBoundary(stringTextRange)) {
-            throw new IndexOutOfBoundsException(format("%s is out of bounds for %s", stringTextRange, this.textRange));
-        }
-        if (stringTextRange.length() == 0) {
-            return Collections.emptyList();
-        }
-
-        int startPositionIdx = stringIdxToPositionIdx.get(stringTextRange.start() - this.textRange.start());
-
-        if (stringTextRange.end() == this.textRange.end()) {
-            return positions.subList(startPositionIdx, positions.size());
-        }
-
-        return positions.subList(startPositionIdx, stringIdxToPositionIdx.get(stringTextRange.end() - this.textRange.start()));
-
-    }
-
-
-    public Map<Page, List<Rectangle2D>> getPositionsPerPage(TextRange stringTextRange) {
-
-        List<Rectangle2D> rectanglesPerLine = stringTextRange.split(getAllLineBreaksInBoundary(stringTextRange))
-                .stream()
-                .map(this::getPositions)
-                .map(RectangleTransformations::rectangleBBoxWithGaps)
-                .flatMap(Collection::stream)
-                .toList();
-        Map<Page, List<Rectangle2D>> rectanglePerLinePerPage = new HashMap<>();
-        rectanglePerLinePerPage.put(page, rectanglesPerLine);
-        return rectanglePerLinePerPage;
-    }
-
-
-    protected List<Integer> getAllLineBreaksInBoundary(TextRange textRange) {
-
-        return getLineBreaks().stream()
-                .map(linebreak -> linebreak + this.textRange.start())
-                .filter(textRange::contains)
-                .toList();
-    }
-
-
-    @Override
-    public String toString() {
-
-        return searchText;
-    }
-
-}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/textblock/ConcatenatedTextBlock.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/textblock/ConcatenatedTextBlock.java
@ -1,271 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock;
-
-import static java.lang.String.format;
-
-import java.awt.geom.Rectangle2D;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.stream.Stream;
-
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.TextRange;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page;
-
-import lombok.AccessLevel;
-import lombok.Data;
-import lombok.experimental.FieldDefaults;
-
-@Data
-@FieldDefaults(level = AccessLevel.PRIVATE)
-public class ConcatenatedTextBlock implements TextBlock {
-
-    List<AtomicTextBlock> atomicTextBlocks;
-    String searchText;
-    TextRange textRange;
-
-
-    public static ConcatenatedTextBlock empty() {
-
-        return new ConcatenatedTextBlock(Collections.emptyList());
-    }
-
-
-    public ConcatenatedTextBlock(List<AtomicTextBlock> atomicTextBlocks) {
-
-        this.atomicTextBlocks = new LinkedList<>();
-        if (atomicTextBlocks.isEmpty()) {
-            textRange = new TextRange(-1, -1);
-            return;
-        }
-        var firstTextBlock = atomicTextBlocks.get(0);
-        this.atomicTextBlocks.add(firstTextBlock);
-        textRange = new TextRange(firstTextBlock.getTextRange().start(), firstTextBlock.getTextRange().end());
-
-        atomicTextBlocks.subList(1, atomicTextBlocks.size())
-                .forEach(this::concat);
-    }
-
-
-    public ConcatenatedTextBlock concat(TextBlock textBlock) {
-
-        int start = textBlock.getTextRange().start();
-        int end = textBlock.getTextRange().end();
-        if (this.atomicTextBlocks.isEmpty()) {
-            textRange.setStart(start);
-            textRange.setEnd(end);
-        } else if (textRange.end() != start) {
-            throw new UnsupportedOperationException(format("Can only concat consecutive TextBlocks, trying to concat %s and %s", textRange, textBlock.getTextRange()));
-        }
-        this.atomicTextBlocks.addAll(textBlock.getAtomicTextBlocks());
-        textRange.setEnd(end);
-        this.searchText = null;
-        return this;
-    }
-
-
-    private AtomicTextBlock getAtomicTextBlockByStringIndex(int stringIdx) {
-
-        return atomicTextBlocks.stream()
-                .filter(textBlock -> textBlock.getTextRange().containsExclusive(stringIdx))
-                .findAny()
-                .orElseThrow(IndexOutOfBoundsException::new);
-    }
-
-
-    private List<AtomicTextBlock> getAllAtomicTextBlocksPartiallyInStringBoundary(TextRange textRange) {
-
-        return atomicTextBlocks.stream()
-                .filter(tb -> tb.getTextRange().intersects(textRange))
-                .toList();
-    }
-
-
-    @Override
-    public String getSearchText() {
-
-        if (searchText == null) {
-            StringBuilder sb = new StringBuilder();
-            getAtomicTextBlocks().forEach(atb -> sb.append(atb.getSearchText()));
-            searchText = sb.toString();
-        }
-        return searchText;
-    }
-
-
-    @Override
-    public int numberOfLines() {
-
-        return atomicTextBlocks.stream()
-                .map(AtomicTextBlock::getLineBreaks)
-                .mapToInt(List::size).sum();
-    }
-
-
-    @Override
-    public int getNextLinebreak(int fromIndex) {
-
-        return getAtomicTextBlockByStringIndex(fromIndex).getNextLinebreak(fromIndex);
-    }
-
-
-    @Override
-    public int getPreviousLinebreak(int fromIndex) {
-
-        return getAtomicTextBlockByStringIndex(fromIndex).getPreviousLinebreak(fromIndex);
-    }
-
-
-    @Override
-    public List<Integer> getLineBreaks() {
-
-        return getAtomicTextBlocks().stream()
-                .flatMap(atomicTextBlock -> atomicTextBlock.getLineBreaks()
-                        .stream())
-                .toList();
-    }
-
-
-    @Override
-    public Rectangle2D getPosition(int stringIdx) {
-
-        return getAtomicTextBlockByStringIndex(stringIdx).getPosition(stringIdx);
-    }
-
-
-    @Override
-    public List<Rectangle2D> getPositions(TextRange stringTextRange) {
-
-        List<AtomicTextBlock> textBlocks = getAllAtomicTextBlocksPartiallyInStringBoundary(stringTextRange);
-
-        if (textBlocks.size() == 1) {
-            return textBlocks.get(0).getPositions(stringTextRange);
-        }
-
-        AtomicTextBlock firstTextBlock = textBlocks.get(0);
-        List<Rectangle2D> positions = new LinkedList<>(firstTextBlock.getPositions(new TextRange(stringTextRange.start(), firstTextBlock.getTextRange().end())));
-
-        for (AtomicTextBlock textBlock : textBlocks.subList(1, textBlocks.size() - 1)) {
-            positions.addAll(textBlock.getPositions());
-        }
-
-        var lastTextBlock = textBlocks.get(textBlocks.size() - 1);
-        positions.addAll(lastTextBlock.getPositions(new TextRange(lastTextBlock.getTextRange().start(), stringTextRange.end())));
-
-        return positions;
-    }
-
-
-    @Override
-    public Map<Page, List<Rectangle2D>> getPositionsPerPage(TextRange stringTextRange) {
-
-        List<AtomicTextBlock> textBlocks = getAllAtomicTextBlocksPartiallyInStringBoundary(stringTextRange);
-
-        if (textBlocks.size() == 1) {
-            return textBlocks.get(0).getPositionsPerPage(stringTextRange);
-        }
-
-        AtomicTextBlock firstTextBlock = textBlocks.get(0);
-        Map<Page, List<Rectangle2D>> rectanglesPerLinePerPage = firstTextBlock.getPositionsPerPage(new TextRange(stringTextRange.start(), firstTextBlock.getTextRange().end()));
-
-        for (AtomicTextBlock textBlock : textBlocks.subList(1, textBlocks.size() - 1)) {
-            rectanglesPerLinePerPage = mergeEntityPositionsWithSamePageNode(rectanglesPerLinePerPage, textBlock.getPositionsPerPage(textBlock.getTextRange()));
-        }
-
-        AtomicTextBlock lastTextBlock = textBlocks.get(textBlocks.size() - 1);
-        rectanglesPerLinePerPage = mergeEntityPositionsWithSamePageNode(rectanglesPerLinePerPage,
-                                                                        lastTextBlock.getPositionsPerPage(new TextRange(lastTextBlock.getTextRange().start(),
-                                                                                                                        stringTextRange.end())));
-
-        return rectanglesPerLinePerPage;
-    }
-
-
-    private Map<Page, List<Rectangle2D>> mergeEntityPositionsWithSamePageNode(Map<Page, List<Rectangle2D>> map1, Map<Page, List<Rectangle2D>> map2) {
-
-        Map<Page, List<Rectangle2D>> mergedMap = new HashMap<>(map1);
-        map2.forEach((pageNode, rectangles) -> mergedMap.merge(pageNode,
-                                                               rectangles,
-                                                               (l1, l2) -> Stream.concat(l1.stream(), l2.stream())
-                                                                       .toList()));
-        return mergedMap;
-    }
-
-
-    @Override
-    public String subSequenceWithLineBreaks(TextRange stringTextRange) {
-
-        if (stringTextRange.length() == 0 || !getTextRange().contains(stringTextRange)) {
-            return "";
-        }
-
-        List<AtomicTextBlock> textBlocks = getAllAtomicTextBlocksPartiallyInStringBoundary(stringTextRange);
-
-        if (textBlocks.size() == 1) {
-            return textBlocks.get(0).subSequenceWithLineBreaks(stringTextRange);
-        }
-
-        StringBuilder sb = new StringBuilder();
-        AtomicTextBlock firstTextBlock = textBlocks.get(0);
-        sb.append(firstTextBlock.subSequenceWithLineBreaks(new TextRange(stringTextRange.start(), firstTextBlock.getTextRange().end())));
-
-        for (AtomicTextBlock textBlock : textBlocks.subList(1, textBlocks.size() - 1)) {
-            sb.append(textBlock.searchTextWithLineBreaks());
-        }
-
-        var lastTextBlock = textBlocks.get(textBlocks.size() - 1);
-        sb.append(lastTextBlock.subSequenceWithLineBreaks(new TextRange(lastTextBlock.getTextRange().start(), stringTextRange.end())));
-
-        return sb.toString();
-    }
-
-
-    @Override
-    public String toString() {
-
-        return getSearchText();
-    }
-
-
-    @Override
-    public List<TextRange> getBoldTextBoundaries() {
-
-        return getAtomicTextBlocks().stream()
-                .map(AtomicTextBlock::getBoldTextBoundaries)
-                .flatMap(Collection::stream)
-                .toList();
-    }
-
-
-    @Override
-    public List<TextRange> getItalicTextBoundaries() {
-
-        return getAtomicTextBlocks().stream()
-                .map(AtomicTextBlock::getItalicTextBoundaries)
-                .flatMap(Collection::stream)
-                .toList();
-    }
-
-
-    @Override
-    public String getOrientation() {
-
-        if (atomicTextBlocks.isEmpty()) {
-            return "";
-        }
-        return atomicTextBlocks.get(0).getOrientation();
-    }
-
-
-    @Override
-    public int getTextDirection() {
-
-        if (atomicTextBlocks.isEmpty()) {
-            return 0;
-        }
-        return atomicTextBlocks.get(0).getTextDirection();
-    }
-
-}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/textblock/TextBlock.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/textblock/TextBlock.java
@ -1,158 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock;
-
-import static java.lang.String.format;
-
-import java.awt.geom.Rectangle2D;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.stream.Collectors;
-
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.TextRange;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page;
-
-public interface TextBlock extends CharSequence {
-
-    String getSearchText();
-
-
-    List<AtomicTextBlock> getAtomicTextBlocks();
-
-
-    List<TextRange> getBoldTextBoundaries();
-
-
-    List<TextRange> getItalicTextBoundaries();
-
-
-    String getOrientation();
-
-
-    int getTextDirection();
-
-
-    TextRange getTextRange();
-
-
-    int getNextLinebreak(int fromIndex);
-
-
-    int getPreviousLinebreak(int fromIndex);
-
-
-    List<Integer> getLineBreaks();
-
-
-    Rectangle2D getPosition(int stringIdx);
-
-
-    List<Rectangle2D> getPositions(TextRange stringTextRange);
-
-
-    Map<Page, List<Rectangle2D>> getPositionsPerPage(TextRange stringTextRange);
-
-
-    int numberOfLines();
-
-
-    String subSequenceWithLineBreaks(TextRange stringTextRange);
-
-
-    default String searchTextWithLineBreaks() {
-
-        return subSequenceWithLineBreaks(getTextRange());
-    }
-
-    default int indexOf(String searchTerm) {
-
-        return indexOf(searchTerm, getTextRange().start());
-    }
-
-
-    default Set<Page> getPages() {
-
-        return getAtomicTextBlocks().stream()
-                .map(AtomicTextBlock::getPage)
-                .collect(Collectors.toUnmodifiableSet());
-    }
-
-
-    default Set<Page> getPages(TextRange textRange) {
-
-        return getAtomicTextBlocks().stream()
-                .filter(atomicTextBlock -> atomicTextBlock.getTextRange().intersects(textRange))
-                .map(AtomicTextBlock::getPage)
-                .collect(Collectors.toUnmodifiableSet());
-    }
-
-
-    default int indexOf(String searchTerm, int startOffset) {
-
-        int start = getSearchText().indexOf(searchTerm, startOffset - getTextRange().start());
-        if (start == -1) {
-            return -1;
-        }
-        return start + getTextRange().start();
-    }
-
-
-    default CharSequence getFirstLine() {
-
-        return subSequence(getTextRange().start(), getNextLinebreak(getTextRange().start()));
-    }
-
-
-    default boolean containsBoundary(TextRange textRange) {
-
-        if (textRange.end() < textRange.start()) {
-            throw new IllegalArgumentException(format("Invalid %s, StartIndex must be smaller than EndIndex", textRange));
-        }
-        return getTextRange().contains(textRange);
-    }
-
-
-    default boolean containsIndex(int stringIndex) {
-
-        return getTextRange().containsExclusive(stringIndex);
-    }
-
-
-    default CharSequence subSequence(TextRange textRange) {
-
-        return subSequence(textRange.start(), textRange.end());
-    }
-
-
-    default String buildSummary() {
-
-        String[] words = getSearchText().split(" ");
-        int bound = Math.min(words.length, 4);
-        List<String> list = new ArrayList<>(Arrays.asList(words).subList(0, bound));
-
-        return String.join(" ", list);
-    }
-
-
-    @Override
-    default CharSequence subSequence(int start, int end) {
-
-        return getSearchText().substring(start - getTextRange().start(), end - getTextRange().start());
-    }
-
-
-    @Override
-    default int length() {
-
-        return getTextRange().length();
-    }
-
-
-    @Override
-    default char charAt(int index) {
-
-        return getSearchText().charAt(index - getTextRange().start());
-    }
-
-}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/textblock/TextBlockCollector.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/textblock/TextBlockCollector.java
@ -1,49 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock;
-
-import java.util.Set;
-import java.util.function.BiConsumer;
-import java.util.function.BinaryOperator;
-import java.util.function.Function;
-import java.util.function.Supplier;
-import java.util.stream.Collector;
-
-import lombok.NoArgsConstructor;
-
-@NoArgsConstructor
-public class TextBlockCollector implements Collector<TextBlock, ConcatenatedTextBlock, TextBlock> {
-
-    @Override
-    public Supplier<ConcatenatedTextBlock> supplier() {
-
-        return ConcatenatedTextBlock::empty;
-    }
-
-
-    @Override
-    public BiConsumer<ConcatenatedTextBlock, TextBlock> accumulator() {
-
-        return ConcatenatedTextBlock::concat;
-    }
-
-
-    @Override
-    public BinaryOperator<ConcatenatedTextBlock> combiner() {
-
-        return ConcatenatedTextBlock::concat;
-    }
-
-
-    @Override
-    public Function<ConcatenatedTextBlock, TextBlock> finisher() {
-
-        return a -> a;
-    }
-
-
-    @Override
-    public Set<Characteristics> characteristics() {
-
-        return Set.of(Characteristics.IDENTITY_FINISH, Characteristics.CONCURRENT);
-    }
-
-}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/image/ClassifiedImage.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/image/ClassifiedImage.java
@ -2,7 +2,7 @@ package com.knecon.fforesight.service.layoutparser.processor.model.image;

 import java.awt.geom.Rectangle2D;

-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.ImageType;
+import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType;

 import lombok.AllArgsConstructor;
 import lombok.Data;
@ -22,11 +22,10 @@ public class ClassifiedImage {
    private boolean isAppendedToSection;
    private boolean hasTransparency;
    private int page;
-    @NonNull
    private String representation;


-    public ClassifiedImage(@NonNull Rectangle2D position, @NonNull ImageType imageType, boolean hasTransparency, int page, @NonNull String representation) {
+    public ClassifiedImage(@NonNull Rectangle2D position, @NonNull ImageType imageType, boolean hasTransparency, int page, String representation) {

        this.position = position;
        this.imageType = imageType;
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineExtractorService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineExtractorService.java
@ -1,5 +1,6 @@
 package com.knecon.fforesight.service.layoutparser.processor.model.outline;

+import java.awt.geom.AffineTransform;
 import java.awt.geom.Point2D;
 import java.io.IOException;
 import java.util.ArrayList;
@ -26,6 +27,9 @@ import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocume
 import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem;
 import org.springframework.stereotype.Service;

+import com.knecon.fforesight.service.layoutparser.processor.utils.CoordinateTransforms;
+import com.knecon.fforesight.service.layoutparser.processor.utils.PageInformation;
+
 import lombok.SneakyThrows;
 import lombok.extern.slf4j.Slf4j;

@ -89,12 +93,13 @@ public class OutlineExtractorService {
            if (page == null) {
                return Optional.empty();
            }
-        }catch (IOException e){
+        } catch (IOException e) {
            log.info(String.format("Error occurred during position resolution for outline item with title %s: " + e, title));
            return Optional.empty();
        }

-        int pageNumber = document.getPages().indexOf(page);
+        int pageNumber = document.getPages().indexOf(page) + 1;
+        AffineTransform userSpaceToPageCoords = CoordinateTransforms.calculateInitialUserSpaceCoordsToPageCoords(PageInformation.fromPDPage(pageNumber, page));

        Optional<Point2D> outlinePosition = Optional.empty();

@ -123,8 +128,15 @@ public class OutlineExtractorService {
            log.info(String.format("Error occurred during position resolution for outline item on page %s with title %s: " + e, pageNumber, title));
        }

-        return Optional.of(new OutlineObjectTreeNode(new OutlineObject(title, pageNumber, outlinePosition.orElse(new Point2D.Float(0, 0)), depth)));
+        return Optional.of(new OutlineObjectTreeNode(new OutlineObject(title,
+                                                                       pageNumber,
+                                                                       transformPointToPageCoords(outlinePosition, userSpaceToPageCoords), depth)));
+    }

+
+    private static Point2D transformPointToPageCoords(Optional<Point2D> outlinePosition, AffineTransform userSpaceToPageCoords) {
+
+        return outlinePosition.map(point -> userSpaceToPageCoords.transform(point, null)).orElse(null);
    }


--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineObject.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineObject.java
@ -1,27 +1,34 @@
 package com.knecon.fforesight.service.layoutparser.processor.model.outline;

 import java.awt.geom.Point2D;
+import java.util.Optional;

-import lombok.AllArgsConstructor;
-import lombok.Data;
-import lombok.RequiredArgsConstructor;
+import com.knecon.fforesight.service.layoutparser.processor.docstrum.model.BoundingBox;
+
+import lombok.Getter;
+import lombok.Setter;

-@Data
-@RequiredArgsConstructor
-@AllArgsConstructor
 public class OutlineObject {

+    @Getter
    private final String title;
+    @Getter
    private final int pageNumber;
-    private Point2D point;
+    @Getter
    private final int treeDepth;

+    private Point2D point; // java coordinates, (0, 0) is always top left
+
+    @Getter
+    @Setter
    private boolean found;


    public OutlineObject(String title, int pageNumber, Point2D point2D, int depth) {

-        this(title, pageNumber, depth);
+        this.title = title;
+        this.pageNumber = pageNumber;
+        this.treeDepth = depth;
        this.point = point2D;
    }

@ -32,4 +39,39 @@ public class OutlineObject {
        return "OutlineObject{" + "title='" + title + '\'' + '}';
    }

+
+    public Optional<Point2D> getPoint() {
+
+        return Optional.ofNullable(point);
+    }
+
+
+    public boolean isAbove(BoundingBox boundingBox) {
+
+        if (point == null) {
+            return true;
+        }
+        return point.getY() <= boundingBox.getMaxY();
+    }
+
+
+    public double distance(BoundingBox boundingBox) {
+
+        if (point == null) {
+            return 0;
+        }
+        if (boundingBox.getBBox().contains(point)) {
+            return 0;
+        }
+        double deltaX = Math.min(Math.abs(boundingBox.getMinX() - point.getX()), Math.abs(boundingBox.getMaxX() - point.getX()));
+        double deltaY = Math.min(Math.abs(boundingBox.getMinY() - point.getY()), Math.abs(boundingBox.getMaxY() - point.getY()));
+        return Math.sqrt(deltaX * deltaX + deltaY * deltaY);
+    }
+
+
+    public void resetPoint() {
+
+        this.point = null;
+    }
+
 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineObjectTree.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineObjectTree.java
@ -39,4 +39,28 @@ public class OutlineObjectTree {
        }
    }

+    @Override
+    public String toString() {
+
+        StringBuilder sb = new StringBuilder();
+        sb.append("OutlineObjectTree(\n");
+        for (OutlineObjectTreeNode node : rootNodes) {
+            buildString(node, sb, 1);
+        }
+        sb.append(")");
+        return sb.toString();
+    }
+
+    private void buildString(OutlineObjectTreeNode node, StringBuilder sb, int depth) {
+
+        for (int i = 0; i < depth; i++) {
+            sb.append("  ");
+        }
+        sb.append(node.getOutlineObject().getTitle()).append("\n");
+
+        for (OutlineObjectTreeNode child : node.getChildren()) {
+            buildString(child, sb, depth + 1);
+        }
+    }
+
 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/TableOfContents.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/TableOfContents.java
@ -14,12 +14,12 @@ import lombok.RequiredArgsConstructor;

@Data
@RequiredArgsConstructor
-public class TableOfContents implements Iterable<TableOfContentItem> {
+public class SectionTree implements Iterable<SectionTreeEntry> {

-    private List<TableOfContentItem> mainSections = new ArrayList<>();
+    private List<SectionTreeEntry> mainSections = new ArrayList<>();


-    public TableOfContents(List<TableOfContentItem> mainSections) {
+    public SectionTree(List<SectionTreeEntry> mainSections) {

        this.mainSections = mainSections;
    }
@ -28,36 +28,36 @@ public class TableOfContents implements Iterable<TableOfContentItem> {
    public List<TextPageBlock> getAllTextPageBlocks() {

        List<TextPageBlock> allTextPageBlocks = new ArrayList<>();
-        for (TableOfContentItem item : mainSections) {
+        for (SectionTreeEntry item : mainSections) {
            collectTextPageBlocks(item, allTextPageBlocks);
        }
        return allTextPageBlocks;
    }


-    private void collectTextPageBlocks(TableOfContentItem item, List<TextPageBlock> textPageBlocks) {
+    private void collectTextPageBlocks(SectionTreeEntry item, List<TextPageBlock> textPageBlocks) {

        textPageBlocks.add(item.getHeadline());
-        for (TableOfContentItem child : item.getChildren()) {
+        for (SectionTreeEntry child : item.getChildren()) {
            collectTextPageBlocks(child, textPageBlocks);
        }
    }


-    public List<TableOfContentItem> getAllTableOfContentItems() {
+    public List<SectionTreeEntry> getAllTableOfContentItems() {

-        List<TableOfContentItem> allItems = new ArrayList<>();
-        for (TableOfContentItem item : mainSections) {
+        List<SectionTreeEntry> allItems = new ArrayList<>();
+        for (SectionTreeEntry item : mainSections) {
            collectTableOfContentItems(item, allItems);
        }
        return allItems;
    }


-    private void collectTableOfContentItems(TableOfContentItem item, List<TableOfContentItem> allItems) {
+    private void collectTableOfContentItems(SectionTreeEntry item, List<SectionTreeEntry> allItems) {

        allItems.add(item);
-        for (TableOfContentItem child : item.getChildren()) {
+        for (SectionTreeEntry child : item.getChildren()) {
            collectTableOfContentItems(child, allItems);
        }
    }
@ -65,7 +65,7 @@ public class TableOfContents implements Iterable<TableOfContentItem> {

    private boolean containsBlock(TextPageBlock block) {

-        for (TableOfContentItem existingItem : this.getMainSections()) {
+        for (SectionTreeEntry existingItem : this.getMainSections()) {
            if (existingItem.getHeadline().equals(block) || existingItem.contains(block)) {
                return true;
            }
@ -74,9 +74,9 @@ public class TableOfContents implements Iterable<TableOfContentItem> {
    }


-    private boolean containsItem(TableOfContentItem tocItem) {
+    private boolean containsItem(SectionTreeEntry tocItem) {

-        for (TableOfContentItem existingItem : this.getMainSections()) {
+        for (SectionTreeEntry existingItem : this.getMainSections()) {
            if (existingItem.equals(tocItem) || existingItem.contains(tocItem)) {
                return true;
            }
@ -86,18 +86,18 @@ public class TableOfContents implements Iterable<TableOfContentItem> {


    @Override
-    public @NonNull Iterator<TableOfContentItem> iterator() {
+    public @NonNull Iterator<SectionTreeEntry> iterator() {

-        return new TableOfContentItemIterator(mainSections);
+        return new SectionTreeEntryIterator(mainSections);
    }


-    private static class TableOfContentItemIterator implements Iterator<TableOfContentItem> {
+    private static class SectionTreeEntryIterator implements Iterator<SectionTreeEntry> {

-        private final Stack<Iterator<TableOfContentItem>> stack = new Stack<>();
+        private final Stack<Iterator<SectionTreeEntry>> stack = new Stack<>();


-        TableOfContentItemIterator(List<TableOfContentItem> mainSections) {
+        SectionTreeEntryIterator(List<SectionTreeEntry> mainSections) {

            stack.push(mainSections.iterator());
        }
@ -112,10 +112,10 @@ public class TableOfContents implements Iterable<TableOfContentItem> {


        @Override
-        public TableOfContentItem next() {
+        public SectionTreeEntry next() {

            ensureStackTopIsCurrent();
-            TableOfContentItem currentItem = stack.peek().next();
+            SectionTreeEntry currentItem = stack.peek().next();
            if (currentItem.getChildren() != null && !currentItem.getChildren().isEmpty()) {
                stack.push(currentItem.getChildren()
                                   .iterator());
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/SectionTreeBuilderService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/SectionTreeBuilderService.java
@ -1,5 +1,6 @@
 package com.knecon.fforesight.service.layoutparser.processor.model.outline;

+import static com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType.TABLE_OF_CONTENTS_HEADLINE;
 import static com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType.getHeadlineNumber;

 import java.util.ArrayList;
@ -10,6 +11,7 @@ import java.util.TreeSet;

 import org.springframework.stereotype.Service;

+import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;

 import io.micrometer.observation.annotation.Observed;
@ -17,21 +19,23 @@ import lombok.extern.slf4j.Slf4j;

@Service
@Slf4j
-public class OutlineValidationService {
+public class SectionTreeBuilderService {

    @Observed(name = "OutlineValidationService", contextualName = "create-toc")
-    public TableOfContents createToC(List<TextPageBlock> headlines) {
+    public SectionTree createSectionTree(ClassificationDocument classificationDocument) {

-        List<TableOfContentItem> mainSections = new ArrayList<>();
-        Map<Integer, TableOfContentItem> lastItemsPerDepth = new HashMap<>();
-        TableOfContentItem last = null;
+        List<TextPageBlock> headlines = extractHeadlines(classificationDocument);
+
+        List<SectionTreeEntry> mainSections = new ArrayList<>();
+        Map<Integer, SectionTreeEntry> lastItemsPerDepth = new HashMap<>();
+        SectionTreeEntry last = null;
        TreeSet<Integer> depths = new TreeSet<>();

        for (TextPageBlock current : headlines) {
            int currentDepth = getHeadlineNumber(current.getClassification());
            Integer parentDepth = depths.floor(currentDepth - 1);

-            var tocItem = new TableOfContentItem(current);
+            var tocItem = new SectionTreeEntry(current);

            if (parentDepth == null) {
                mainSections.add(tocItem);
@ -41,14 +45,16 @@ public class OutlineValidationService {
            } else {
                assert last != null;
                int lastDepth = getHeadlineNumber(last.getHeadline().getClassification());
-
-                if (lastDepth < parentDepth) {
+                if (last.getHeadline().getClassification().equals(TABLE_OF_CONTENTS_HEADLINE) && !current.getClassification().equals(TABLE_OF_CONTENTS_HEADLINE)) {
+                    // headline after toc should always start a main section
+                    parentDepth = 1;
+                } else if (lastDepth < parentDepth) {
                    parentDepth = lastDepth;
                } else if (lastDepth == currentDepth && last.getParent() != null) {
                    parentDepth = getHeadlineNumber(last.getParent().getHeadline().getClassification());
                }

-                TableOfContentItem parent = lastItemsPerDepth.get(parentDepth);
+                SectionTreeEntry parent = lastItemsPerDepth.get(parentDepth);
                parent.addChild(tocItem);
            }

@ -57,7 +63,20 @@ public class OutlineValidationService {
            depths.add(currentDepth);
        }

-        return new TableOfContents(mainSections);
+        return new SectionTree(mainSections);
+
+    }
+
+
+    private static List<TextPageBlock> extractHeadlines(ClassificationDocument classificationDocument) {
+
+        return classificationDocument.getPages()
+                .stream()
+                .flatMap(classificationPage -> classificationPage.getTextBlocks()
+                        .stream()
+                        .filter(tb -> tb instanceof TextPageBlock && tb.getClassification() != null && tb.getClassification().isHeadline())
+                        .map(tb -> (TextPageBlock) tb))
+                .toList();
    }

 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/SectionTreeEnhancementService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/SectionTreeEnhancementService.java
@ -1,6 +1,7 @@
 package com.knecon.fforesight.service.layoutparser.processor.model.outline;

 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
@ -22,28 +23,28 @@ import lombok.extern.slf4j.Slf4j;

@Slf4j
@Service
-public class TOCEnrichmentService {
+public class SectionTreeEnhancementService {

    public void assignSectionBlocksAndImages(ClassificationDocument document) {

-        TableOfContents toc = document.getTableOfContents();
-        Iterator<TableOfContentItem> iterator = toc.iterator();
-        TableOfContentItem currentTOCItem = null;
+        SectionTree toc = document.getSectionTree();
+        Iterator<SectionTreeEntry> iterator = toc.iterator();
+        SectionTreeEntry currentTOCItem = null;
        if (iterator.hasNext()) {
            currentTOCItem = iterator.next();
        }
        List<AbstractPageBlock> startBlocks = new ArrayList<>();
        List<ClassifiedImage> startImages = new ArrayList<>();
-        TableOfContentItem currentSection = null;
+        SectionTreeEntry currentSection = null;
        boolean foundFirstHeadline = false;

        List<ClassificationHeader> headers = new ArrayList<>();
        List<ClassificationFooter> footers = new ArrayList<>();
        TablePageBlock previousTable = null;
-        List<TableOfContentItem> lastFoundTOCItems = new ArrayList<>();
+        List<SectionTreeEntry> lastFoundTOCItems = new ArrayList<>();

        for (ClassificationPage page : document.getPages()) {
-            List<TableOfContentItem> currentPageTOCItems = new ArrayList<>();
+            List<SectionTreeEntry> currentPageTOCItems = new ArrayList<>();
            List<TextPageBlock> header = new ArrayList<>();
            List<TextPageBlock> footer = new ArrayList<>();
            for (AbstractPageBlock current : page.getTextBlocks()) {
@ -100,7 +101,7 @@ public class TOCEnrichmentService {
                Double xMax = null;
                Double yMax = null;

-                for (TableOfContentItem tocItem : lastFoundTOCItems) {
+                for (SectionTreeEntry tocItem : lastFoundTOCItems) {
                    var headline = tocItem.getHeadline();

                    if (headline.getPage() != page.getPageNumber()) {
@ -167,11 +168,11 @@ public class TOCEnrichmentService {
            }
        }

-        if (!startBlocks.isEmpty()) {
-            TableOfContentItem unassigned = new TableOfContentItem(null);
+        if (!startBlocks.isEmpty() || !startImages.isEmpty()) {
+            SectionTreeEntry unassigned = new SectionTreeEntry(null);
            unassigned.setSectionBlocks(startBlocks);
            unassigned.setImages(startImages);
-            document.getTableOfContents().getMainSections().add(0, unassigned);
+            document.getSectionTree().getMainSections().add(0, unassigned);
        }
        document.setHeaders(headers);
        document.setFooters(footers);
@ -185,12 +186,8 @@ public class TOCEnrichmentService {
            List<Cell> previousTableNonHeaderRow = getRowWithNonHeaderCells(previousTable);
            List<Cell> tableNonHeaderRow = getRowWithNonHeaderCells(currentTable);
            // Allow merging of tables if header row is separated from first logical non-header row
-            if (previousTableNonHeaderRow.isEmpty()
-                && previousTable.getRowCount() == 1
-                && previousTable.getRows()
-                           .get(0).size() == tableNonHeaderRow.size()) {
-                previousTableNonHeaderRow = previousTable.getRows()
-                        .get(0)
+            if (previousTableNonHeaderRow.isEmpty() && previousTable.getRowCount() == 1 && previousTable.getRows().get(0).size() == tableNonHeaderRow.size()) {
+                previousTableNonHeaderRow = previousTable.getRows().get(0)
                        .stream()
                        .map(cell -> {
                            Cell fakeCell = Cell.copy(cell);
@ -201,8 +198,7 @@ public class TOCEnrichmentService {
            }
            if (previousTableNonHeaderRow.size() == tableNonHeaderRow.size()) {
                for (int i = currentTable.getRowCount() - 1; i >= 0; i--) { // Non header rows are most likely at bottom of table
-                    List<Cell> row = currentTable.getRows()
-                            .get(i);
+                    List<Cell> row = currentTable.getRows().get(i);
                    if (row.size() == tableNonHeaderRow.size() && row.stream()
                            .allMatch(cell -> cell.getHeaderCells().isEmpty())) {
                        for (int j = 0; j < row.size(); j++) {
@ -225,18 +221,15 @@ public class TOCEnrichmentService {

        return table.getRows()
                .stream()
-                .flatMap(row -> row.stream()
-                        .filter(cell -> !cell.getHeaderCells().isEmpty()))
-                .findAny().isEmpty();
-
+                .flatMap(Collection::stream)
+                .allMatch(cell -> cell.getHeaderCells().isEmpty());
    }


    private List<Cell> getRowWithNonHeaderCells(TablePageBlock table) {

        for (int i = table.getRowCount() - 1; i >= 0; i--) { // Non header rows are most likely at bottom of table
-            List<Cell> row = table.getRows()
-                    .get(i);
+            List<Cell> row = table.getRows().get(i);
            if (row.size() == 1) {
                continue;
            }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/TableOfContentItem.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/TableOfContentItem.java
@ -2,10 +2,12 @@ package com.knecon.fforesight.service.layoutparser.processor.model.outline;

 import java.util.ArrayList;
 import java.util.List;
+import java.util.Objects;
 import java.util.stream.Collectors;

+import com.iqser.red.service.redaction.v1.server.model.document.nodes.GenericSemanticNode;
 import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.AbstractSemanticNode;
+import com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType;
 import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;

@ -14,52 +16,68 @@ import lombok.EqualsAndHashCode;

@Data
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
-public class TableOfContentItem {
+public class SectionTreeEntry {
+
+    public enum Type {
+        SECTION,
+        SUPER_SECTION,
+        TOC_SECTION
+    }

    @EqualsAndHashCode.Include
    private TextPageBlock headline;
-    private List<TableOfContentItem> children = new ArrayList<>();
-    private TableOfContentItem parent;
+    private List<SectionTreeEntry> children = new ArrayList<>();
+    private SectionTreeEntry parent;

    private List<AbstractPageBlock> sectionBlocks = new ArrayList<>();
    private List<ClassifiedImage> images = new ArrayList<>();

-    private AbstractSemanticNode section;
+    private GenericSemanticNode section;


-    public TableOfContentItem(TextPageBlock headline) {
+    public SectionTreeEntry(TextPageBlock headline) {

        this.headline = headline;
    }


-    public void addChild(TableOfContentItem tableOfContentItem) {
+    public Type getType() {

-        children.add(tableOfContentItem);
-        tableOfContentItem.setParent(this);
+        if (!Objects.isNull(headline) && headline.getClassification().equals(PageBlockType.TABLE_OF_CONTENTS_HEADLINE)) {
+            return Type.TOC_SECTION;
+        }
+        if (children.isEmpty()) {
+            return Type.SECTION;
+        }
+        return Type.SUPER_SECTION;
    }


-    public TableOfContentItem getSiblingBefore() {
+    public void addChild(SectionTreeEntry sectionTreeEntry) {
+
+        children.add(sectionTreeEntry);
+        sectionTreeEntry.setParent(this);
+    }
+
+
+    public SectionTreeEntry getSiblingBefore() {

        if (parent != null) {
            int index = parent.getChildren().indexOf(this);
            if (index > 0) {
-                return parent.getChildren()
-                        .get(index - 1);
+                return parent.getChildren().get(index - 1);
            }
        }
        return null;
    }


-    public TableOfContentItem getSiblingAfter() {
+    public SectionTreeEntry getSiblingAfter() {

        if (parent != null) {
            int index = parent.getChildren().indexOf(this);
            if (index >= 0 && index < parent.getChildren().size() - 1) {
-                return parent.getChildren()
-                        .get(index + 1);
+                return parent.getChildren().get(index + 1);
            }
        }
        return null;
@ -71,7 +89,7 @@ public class TableOfContentItem {
        if (headline.equals(block)) {
            return true;
        }
-        for (TableOfContentItem child : children) {
+        for (SectionTreeEntry child : children) {
            if (child.contains(block)) {
                return true;
            }
@ -80,12 +98,12 @@ public class TableOfContentItem {
    }


-    public boolean contains(TableOfContentItem tocItem) {
+    public boolean contains(SectionTreeEntry tocItem) {

        if (this.equals(tocItem)) {
            return true;
        }
-        for (TableOfContentItem child : children) {
+        for (SectionTreeEntry child : children) {
            if (child.contains(tocItem)) {
                return true;
            }
@ -93,17 +111,19 @@ public class TableOfContentItem {
        return false;
    }

+
    public List<AbstractPageBlock> getNonEmptySectionBlocks() {

-        return sectionBlocks.stream().filter(pageBlock -> !pageBlock.isEmpty()).collect(Collectors.toList());
+        return sectionBlocks.stream()
+                .filter(pageBlock -> !pageBlock.isEmpty())
+                .collect(Collectors.toList());
    }

+
    @Override
    public String toString() {

        return "OutlineObjectTreeNode{" + "textPageBlock=" + headline + '}';
    }

-
-
 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/table/Cell.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/table/Cell.java
@ -9,7 +9,7 @@ import java.util.List;

 import com.knecon.fforesight.service.layoutparser.processor.docstrum.model.BoundingBox;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
-import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.Word;
 import com.knecon.fforesight.service.layoutparser.processor.utils.TextNormalizationUtilities;

 import lombok.Data;
@ -68,12 +68,12 @@ public class Cell extends BoundingBox {
        StringBuilder sb = new StringBuilder();

        Iterator<TextPageBlock> itty = textBlocks.iterator();
-        TextPositionSequence previous = null;
+        Word previous = null;
        while (itty.hasNext()) {

            TextPageBlock textBlock = itty.next();

-            for (TextPositionSequence word : textBlock.getSequences()) {
+            for (Word word : textBlock.getWords()) {
                if (previous != null) {
                    if (Math.abs(previous.getMaxYDirAdj() - word.getMaxYDirAdj()) > word.getTextHeight()) {
                        sb.append('\n');
@ -87,7 +87,7 @@ public class Cell extends BoundingBox {

        }

-        return TextNormalizationUtilities.removeHyphenLineBreaks(sb.toString()).replaceAll("\n", " ").replaceAll(" {2}", " ");
+        return TextNormalizationUtilities.cleanString(sb.toString());
    }


--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/AbstractBlockOnPage.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/AbstractBlockOnPage.java
@ -0,0 +1,8 @@
+package com.knecon.fforesight.service.layoutparser.processor.model.text;
+
+import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
+import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
+
+public record AbstractBlockOnPage(AbstractPageBlock block, ClassificationPage page) {
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/FrequencyCounters.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/FrequencyCounters.java
@ -0,0 +1,21 @@
+package com.knecon.fforesight.service.layoutparser.processor.model.text;
+
+import com.knecon.fforesight.service.layoutparser.processor.model.FloatFrequencyCounter;
+
+import lombok.AccessLevel;
+import lombok.Getter;
+import lombok.NoArgsConstructor;
+import lombok.experimental.FieldDefaults;
+
+@Getter
+@NoArgsConstructor
+@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
+public class FrequencyCounters {
+
+    FloatFrequencyCounter lineHeightFrequencyCounter = new FloatFrequencyCounter();
+    FloatFrequencyCounter fontSizeFrequencyCounter = new FloatFrequencyCounter();
+    FloatFrequencyCounter spaceFrequencyCounter = new FloatFrequencyCounter();
+    StringFrequencyCounter fontFrequencyCounter = new StringFrequencyCounter();
+    StringFrequencyCounter styleFrequencyCounter = new StringFrequencyCounter();
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/ListIdentifier.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/ListIdentifier.java
@ -0,0 +1,107 @@
+package com.knecon.fforesight.service.layoutparser.processor.model.text;
+
+import java.util.List;
+import java.util.Optional;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import lombok.AccessLevel;
+import lombok.AllArgsConstructor;
+import lombok.Getter;
+import lombok.experimental.FieldDefaults;
+
+@AllArgsConstructor
+@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
+public class ListIdentifier {
+
+    public static final Pattern NUMBER_WITH_DOT = Pattern.compile("^\\s*([1-9]{1,4})\\.\\s+");
+    public static final Pattern NUMBER_IN_PARENTHESES = Pattern.compile("^\\s*\\(([1-9]{1,4})\\)\\s+");
+
+    enum Format {
+        NUMBER_WITH_DOT,
+        NUMBER_IN_PARENTHESES
+    }
+
+    Format format;
+    @Getter
+    Word word;
+    @Getter
+    int page;
+    int representation;
+
+
+    public static Optional<ListIdentifier> parse(TextPageBlock textPageBlock, int page) {
+
+        return parse(textPageBlock.getWords().subList(0, Math.min(5, textPageBlock.getWords().size())), page);
+    }
+
+
+    public static Optional<ListIdentifier> parse(List<Word> sequences, int page) {
+
+        StringBuilder sb = new StringBuilder();
+        for (Word sequence : sequences) {
+            sb.append(sequence.toString());
+            sb.append(" ");
+        }
+        sb.replace(sb.length() - 1, sb.length(), "");
+        String text = sb.toString();
+
+        Matcher numberMatcher = NUMBER_WITH_DOT.matcher(text);
+
+        if (numberMatcher.find()) {
+            Optional<Integer> representation = parseInteger(numberMatcher.group(1));
+            if (representation.isPresent()) {
+                return Optional.of(new ListIdentifier(Format.NUMBER_WITH_DOT, sequences.get(0), page, representation.get()));
+            }
+        }
+
+        Matcher parenthesisMatcher = NUMBER_IN_PARENTHESES.matcher(text);
+        if (parenthesisMatcher.find()) {
+            Optional<Integer> representation = parseInteger(parenthesisMatcher.group(1));
+            if (representation.isPresent()) {
+                return Optional.of(new ListIdentifier(Format.NUMBER_IN_PARENTHESES, sequences.get(0), page, representation.get()));
+            }
+        }
+        return Optional.empty();
+    }
+
+
+    private static Optional<Integer> parseInteger(String text) {
+
+        try {
+            return Optional.of(Integer.parseInt(text));
+        } catch (NumberFormatException e) {
+            return Optional.empty();
+        }
+    }
+
+
+    public static boolean isInOrder(List<ListIdentifier> listIdentifiers) {
+
+        if (listIdentifiers.size() <= 1) {
+            return true;
+        }
+
+        for (int i = 1; i < listIdentifiers.size(); i++) {
+            ListIdentifier current = listIdentifiers.get(i);
+            ListIdentifier previous = listIdentifiers.get(i - 1);
+            if (current.format != previous.format) {
+                return false;
+            }
+            if (current.representation <= previous.representation) {
+                return false;
+            }
+            if (!current.word.intersectsXDirAdj(previous.word, 2)) {
+                return false;
+            }
+            if (current.page == previous.page && !current.word.isBelowDirAdj(previous.word)) {
+                return false;
+            }
+            if (current.page < previous.page) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/RedTextPosition.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/RedTextPosition.java
@ -7,6 +7,8 @@ import org.apache.pdfbox.text.TextPosition;

 import com.knecon.fforesight.service.layoutparser.processor.docstrum.model.TextBoundingBox;
 import com.knecon.fforesight.service.layoutparser.processor.docstrum.utils.FastAtan2;
+import com.knecon.fforesight.service.layoutparser.processor.utils.CoordinateTransforms;
+import com.knecon.fforesight.service.layoutparser.processor.utils.PageInformation;

 import lombok.AccessLevel;
 import lombok.AllArgsConstructor;
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/SearchableText.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/SearchableText.java
@ -2,6 +2,7 @@ package com.knecon.fforesight.service.layoutparser.processor.model.text;

 import java.util.ArrayList;
 import java.util.List;
+
 import com.knecon.fforesight.service.layoutparser.processor.utils.TextNormalizationUtilities;

 import lombok.Getter;
@ -9,18 +10,18 @@ import lombok.Getter;
@Getter
 public class SearchableText {

-    private final List<TextPositionSequence> sequences = new ArrayList<>();
+    private final List<Word> sequences = new ArrayList<>();


-    public void add(TextPositionSequence textPositionSequence) {
+    public void add(Word word) {

-        sequences.add(textPositionSequence);
+        sequences.add(word);
    }


-    public void addAll(List<TextPositionSequence> textPositionSequences) {
+    public void addAll(List<Word> words) {

-        sequences.addAll(textPositionSequences);
+        sequences.addAll(words);
    }


@ -31,18 +32,14 @@ public class SearchableText {
    }


-    public static String buildString(List<TextPositionSequence> sequences) {
+    public static String buildString(List<Word> sequences) {

        StringBuilder sb = new StringBuilder();
-        for (TextPositionSequence word : sequences) {
+        for (Word word : sequences) {
            sb.append(word);
            sb.append(' ');
        }
-        String text = sb.toString();
-        text = TextNormalizationUtilities.removeHyphenLineBreaks(text);
-        text = TextNormalizationUtilities.removeLineBreaks(text);
-        text = TextNormalizationUtilities.removeRepeatingWhitespaces(text);
-        return text;
+        return TextNormalizationUtilities.cleanString(sb.toString());
    }

 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/StringFrequencyCounter.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/StringFrequencyCounter.java
@ -9,10 +9,14 @@ public class StringFrequencyCounter {

    @Getter
    private final Map<String, Integer> countPerValue = new HashMap<>();
+    boolean changed;
+    String mostPopularCache;


    public void add(String value) {

+        changed = true;
+
        if (!countPerValue.containsKey(value)) {
            countPerValue.put(value, 1);
        } else {
@ -23,6 +27,8 @@ public class StringFrequencyCounter {

    public void addAll(Map<String, Integer> otherCounter) {

+        changed = true;
+
        for (Map.Entry<String, Integer> entry : otherCounter.entrySet()) {
            if (countPerValue.containsKey(entry.getKey())) {
                countPerValue.put(entry.getKey(), countPerValue.get(entry.getKey()) + entry.getValue());
@ -35,13 +41,18 @@ public class StringFrequencyCounter {

    public String getMostPopular() {

-        Map.Entry<String, Integer> mostPopular = null;
-        for (Map.Entry<String, Integer> entry : countPerValue.entrySet()) {
-            if (mostPopular == null || entry.getValue() > mostPopular.getValue()) {
-                mostPopular = entry;
+        if (changed || mostPopularCache == null) {
+            Map.Entry<String, Integer> mostPopular = null;
+            for (Map.Entry<String, Integer> entry : countPerValue.entrySet()) {
+                if (mostPopular == null || entry.getValue() >= mostPopular.getValue()) {
+                    mostPopular = entry;
+                }
            }
+            mostPopularCache = mostPopular != null ? mostPopular.getKey() : null;
+            changed = false;
        }
-        return mostPopular != null ? mostPopular.getKey() : null;
+
+        return mostPopularCache;
    }

 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextBlockOnPage.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextBlockOnPage.java
@ -0,0 +1,7 @@
+package com.knecon.fforesight.service.layoutparser.processor.model.text;
+
+import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
+
+public record TextBlockOnPage(TextPageBlock textBlock, ClassificationPage page) {
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPageBlock.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPageBlock.java
@ -2,11 +2,11 @@ package com.knecon.fforesight.service.layoutparser.processor.model.text;

 import java.awt.geom.Rectangle2D;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;

 import com.fasterxml.jackson.annotation.JsonIgnore;
 import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
-import com.knecon.fforesight.service.layoutparser.processor.model.FloatFrequencyCounter;
 import com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType;
 import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
 import com.knecon.fforesight.service.layoutparser.processor.utils.TextNormalizationUtilities;
@ -25,56 +25,59 @@ import lombok.NoArgsConstructor;
 public class TextPageBlock extends AbstractPageBlock {

    @Builder.Default
-    private List<TextPositionSequence> sequences = new ArrayList<>();
+    private List<Word> words = new ArrayList<>();
+    @Builder.Default
+    private FrequencyCounters frequencyCounters = new FrequencyCounters();

    private Rectangle2D bBoxDirAdj;

-    private String mostPopularWordFont;
-
-    private String mostPopularWordStyle;
-
-    private double mostPopularWordFontSize;
-
-    private double mostPopularWordHeight;
-
-    private double mostPopularWordSpaceWidth;
-
-    private double highestFontSize;
+    private boolean underlined;

    private PageBlockType classification;

    private boolean toDuplicate;

+    private String text;
+    private boolean changed;

-    public TextPageBlock(List<TextPositionSequence> sequences) {

-        this.sequences = sequences;
-        if (!sequences.isEmpty()) {
-            calculateFrequencyCounters();
+    public TextPageBlock(List<Word> words) {
+
+        this.words = new ArrayList<>(words);
+        this.frequencyCounters = new FrequencyCounters();
+
+        if (!words.isEmpty()) {
+            addToFrequencyCounters(words);
        }
        calculateBBox();
    }


+    public List<Word> getWords() {
+
+        return Collections.unmodifiableList(words);
+    }
+
+
    public TextDirection getDir() {

-        return sequences.get(0).getDir();
+        return words.get(0).getDir();
    }


    private void calculateBBox() {

-        if (sequences == null) {
+        if (words == null) {
            this.bBox = new Rectangle2D.Double();
            this.bBoxPdf = new Rectangle2D.Double();
            this.bBoxDirAdj = new Rectangle2D.Double();
            return;
        }
-        this.bBoxDirAdj = sequences.stream()
-                .map(TextPositionSequence::getBBoxDirAdj)
+        this.bBoxDirAdj = words.stream()
+                .map(Word::getBBoxDirAdj)
                .collect(RectangleTransformations.collectBBox());

-        setToBBoxOfComponents(sequences);
+        setToBBoxOfComponents(words);
    }


@ -96,8 +99,8 @@ public class TextPageBlock extends AbstractPageBlock {
            throw new IllegalArgumentException("Cannot merge textBlocks on different pages.");
        }

-        List<TextPositionSequence> sequences = textBlocksToMerge.stream()
-                .map(TextPageBlock::getSequences)
+        List<Word> sequences = textBlocksToMerge.stream()
+                .map(TextPageBlock::getWords)
                .flatMap(java.util.Collection::stream)
                .toList();
        sequences = new ArrayList<>(sequences);
@ -106,38 +109,27 @@ public class TextPageBlock extends AbstractPageBlock {
    }


-    private void calculateFrequencyCounters() {
+    private void addToFrequencyCounters(List<Word> sequences) {

-        FloatFrequencyCounter lineHeightFrequencyCounter = new FloatFrequencyCounter();
-        FloatFrequencyCounter fontSizeFrequencyCounter = new FloatFrequencyCounter();
-        FloatFrequencyCounter spaceFrequencyCounter = new FloatFrequencyCounter();
-        StringFrequencyCounter fontFrequencyCounter = new StringFrequencyCounter();
-        StringFrequencyCounter styleFrequencyCounter = new StringFrequencyCounter();
-
-        for (TextPositionSequence wordBlock : sequences) {
-
-            lineHeightFrequencyCounter.add(wordBlock.getTextHeight());
-            fontSizeFrequencyCounter.add(wordBlock.getFontSize());
-            spaceFrequencyCounter.add(wordBlock.getSpaceWidth());
-            fontFrequencyCounter.add(wordBlock.getFont());
-            styleFrequencyCounter.add(wordBlock.getFontStyle());
+        for (Word wordBlock : sequences) {

+            frequencyCounters.getLineHeightFrequencyCounter().add(wordBlock.getTextHeight());
+            frequencyCounters.getFontSizeFrequencyCounter().add(wordBlock.getFontSize());
+            frequencyCounters.getSpaceFrequencyCounter().add(wordBlock.getSpaceWidth());
+            frequencyCounters.getFontFrequencyCounter().add(wordBlock.getFont());
+            frequencyCounters.getStyleFrequencyCounter().add(wordBlock.getFontStyle());
        }

-        setMostPopularWordFont(fontFrequencyCounter.getMostPopular());
-        setMostPopularWordStyle(styleFrequencyCounter.getMostPopular());
-        setMostPopularWordFontSize(fontSizeFrequencyCounter.getMostPopular());
-        setMostPopularWordHeight(lineHeightFrequencyCounter.getMostPopular());
-        setMostPopularWordSpaceWidth(spaceFrequencyCounter.getMostPopular());
-        setHighestFontSize(fontSizeFrequencyCounter.getHighest());
+        setUnderlined(this.words.stream()
+                              .allMatch(Word::isUnderline));
    }


-    public TextPageBlock union(TextPositionSequence r) {
+    public TextPageBlock union(Word r) {

        TextPageBlock union = this.copy();
-        union.getSequences().add(r);
-        calculateFrequencyCounters();
+        union.add(r);
+        addToFrequencyCounters(List.of(r));
        calculateBBox();
        return union;
    }
@ -146,51 +138,50 @@ public class TextPageBlock extends AbstractPageBlock {
    public TextPageBlock union(TextPageBlock r) {

        TextPageBlock union = this.copy();
-        union.getSequences().addAll(r.getSequences());
-        calculateFrequencyCounters();
+        union.addAll(r.getWords());
+        addToFrequencyCounters(r.getWords());
        calculateBBox();
        return union;
    }


-    public void add(TextPageBlock r) {
+    public void add(TextPageBlock textPageBlock) {

-        sequences.addAll(r.getSequences());
-        calculateFrequencyCounters();
+        changed = true;
+        words.addAll(textPageBlock.getWords());
+        addToFrequencyCounters(textPageBlock.getWords());
        calculateBBox();
    }


-    public void add(TextPositionSequence r) {
+    public void add(Word word) {

-        sequences.add(r);
-        calculateFrequencyCounters();
+        changed = true;
+        words.add(word);
+        addToFrequencyCounters(List.of(word));
+        calculateBBox();
+    }
+
+
+    public void addAll(List<Word> words) {
+
+        changed = true;
+        this.words.addAll(words);
+        addToFrequencyCounters(words);
        calculateBBox();
    }


    public TextPageBlock copy() {

-        return new TextPageBlock(new ArrayList<>(sequences));
+        return new TextPageBlock(new ArrayList<>(words));
    }


    @Override
    public String toString() {

-        StringBuilder builder = new StringBuilder();
-
-        for (int i = 0; i < sequences.size(); i++) {
-            String sequenceAsString = sequences.get(i).toString();
-            // Fix for missing Whitespace. This is recognized in getSequences method. See PDFTextStripper Line 1730.
-            if (i != 0 && sequences.get(i - 1).charAt(sequences.get(i - 1).length() - 1) != ' ' && sequenceAsString.charAt(0) != ' ') {
-                builder.append(' ');
-            }
-            builder.append(sequenceAsString);
-        }
-
-        return builder.toString();
-
+        return getText();
    }


@ -198,30 +189,36 @@ public class TextPageBlock extends AbstractPageBlock {
    @JsonIgnore
    public String getText() {

-        StringBuilder sb = new StringBuilder();
+        if (text == null || changed) {

-        TextPositionSequence previous = null;
-        for (TextPositionSequence word : sequences) {
-            if (previous != null) {
-                if (Math.abs(previous.getMaxYDirAdj() - word.getMaxYDirAdj()) > word.getTextHeight()) {
-                    sb.append('\n');
-                } else {
-                    sb.append(' ');
+            StringBuilder sb = new StringBuilder();
+
+            Word previous = null;
+            for (Word word : words) {
+                if (previous != null) {
+                    if (Math.abs(previous.getMaxYDirAdj() - word.getMaxYDirAdj()) > word.getTextHeight()) {
+                        sb.append('\n');
+                    } else {
+                        sb.append(' ');
+                    }
                }
+                sb.append(word.toString());
+                previous = word;
            }
-            sb.append(word.toString());
-            previous = word;
+
+            text = TextNormalizationUtilities.removeHyphenLinebreaks(sb.toString());
+            changed = false;
        }

-        return TextNormalizationUtilities.removeHyphenLineBreaks(sb.toString());
+        return text;
    }


    public int getNumberOfLines() {

        int numberOfLines = 1;
-        TextPositionSequence previous = null;
-        for (TextPositionSequence word : sequences) {
+        Word previous = null;
+        for (Word word : words) {
            if (previous != null) {
                if (word.getMaxYDirAdj() - previous.getMaxYDirAdj() > word.getTextHeight()) {
                    numberOfLines++;
@ -233,10 +230,47 @@ public class TextPageBlock extends AbstractPageBlock {
    }


+    public String getMostPopularWordFont() {
+
+        return frequencyCounters.getFontFrequencyCounter().getMostPopular();
+    }
+
+
+    public String getMostPopularWordStyle() {
+
+        return frequencyCounters.getStyleFrequencyCounter().getMostPopular();
+    }
+
+
+    public double getMostPopularWordFontSize() {
+
+        return frequencyCounters.getFontSizeFrequencyCounter().getMostPopular();
+    }
+
+
+    public double getMostPopularWordHeight() {
+
+        return frequencyCounters.getLineHeightFrequencyCounter().getMostPopular();
+    }
+
+
+    public double getMostPopularWordSpaceWidth() {
+
+        return frequencyCounters.getSpaceFrequencyCounter().getMostPopular();
+    }
+
+
+    public double getHighestFontSize() {
+
+        Double highest = frequencyCounters.getFontSizeFrequencyCounter().getHighest();
+        return highest == null ? 0 : highest;
+    }
+
+
    @Override
    public boolean isEmpty() {

-        return sequences.isEmpty();
+        return words.isEmpty();
    }

 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPositionSequence.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPositionSequence.java
@ -1,190 +0,0 @@
-package com.knecon.fforesight.service.layoutparser.processor.model.text;
-
-import static com.knecon.fforesight.service.layoutparser.processor.model.text.RedTextPosition.HEIGHT_PADDING;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Locale;
-import java.util.stream.Collectors;
-
-import org.apache.pdfbox.text.TextPosition;
-
-import com.knecon.fforesight.service.layoutparser.processor.docstrum.model.TextBoundingBox;
-
-import lombok.AllArgsConstructor;
-import lombok.Builder;
-import lombok.Data;
-import lombok.EqualsAndHashCode;
-import lombok.NoArgsConstructor;
-import lombok.extern.slf4j.Slf4j;
-
-@Slf4j
-@Data
-@Builder
-@NoArgsConstructor
-@AllArgsConstructor
-@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = true) // needs the bbox to be unique
-public class TextPositionSequence extends TextBoundingBox implements CharSequence {
-
-    public static final String STANDARD = "standard";
-    public static final String BOLD_ITALIC = "bold, italic";
-    public static final String BOLD = "bold";
-    public static final String ITALIC = "italic";
-
-    @EqualsAndHashCode.Include
-    private int page;
-
-    @EqualsAndHashCode.Include
-    @Builder.Default
-    private List<RedTextPosition> textPositions = new ArrayList<>();
-
-    private boolean isParagraphStart;
-    private boolean strikethrough;
-    private boolean underline;
-
-
-    public TextPositionSequence(List<TextPosition> textPositions, int pageNumber, boolean isParagraphStart) {
-
-        this.textPositions = textPositions.stream()
-                .map(RedTextPosition::fromTextPosition)
-                .collect(Collectors.toList());
-        this.page = pageNumber;
-        this.isParagraphStart = isParagraphStart;
-        calculateBBox();
-    }
-
-
-    private void calculateBBox() {
-
-        setToBBoxOfComponents(getTextPositions());
-    }
-
-
-    public TextPositionSequence(List<RedTextPosition> textPositions, int page) {
-
-        this.textPositions = textPositions;
-        this.page = page;
-        calculateBBox();
-    }
-
-
-    @Override
-    public int length() {
-
-        return textPositions.size();
-    }
-
-
-    @Override
-    public char charAt(int index) {
-
-        RedTextPosition textPosition = textPositionAt(index);
-        String text = textPosition.getUnicode();
-        return text.charAt(0);
-    }
-
-
-    public char charAt(int index, boolean caseInSensitive) {
-
-        RedTextPosition textPosition = textPositionAt(index);
-        String text = textPosition.getUnicode();
-        return caseInSensitive ? text.toLowerCase(Locale.ROOT).charAt(0) : text.charAt(0);
-    }
-
-
-    @Override
-    public TextPositionSequence subSequence(int start, int end) {
-
-        var textPositionSequence = new TextPositionSequence();
-        textPositionSequence.textPositions = textPositions.subList(start, end);
-        textPositionSequence.page = page;
-        textPositionSequence.dir = dir;
-        textPositionSequence.setToBBoxOfComponents(getTextPositions());
-        return textPositionSequence;
-    }
-
-
-    @Override
-    public String toString() {
-
-        StringBuilder builder = new StringBuilder(length());
-        for (int i = 0; i < length(); i++) {
-            builder.append(charAt(i));
-        }
-        return builder.toString();
-    }
-
-
-    public RedTextPosition textPositionAt(int index) {
-
-        return textPositions.get(index);
-    }
-
-
-    public void add(TextPositionSequence textPositionSequence, RedTextPosition textPosition) {
-
-        this.textPositions.add(textPosition);
-        this.page = textPositionSequence.getPage();
-        calculateBBox();
-    }
-
-
-    public void add(TextPosition textPosition) {
-
-        this.textPositions.add(RedTextPosition.fromTextPosition(textPosition));
-        calculateBBox();
-    }
-
-    public double getTextHeightNoPadding() {
-
-        return textPositions.get(0).getHeightDirAdj();
-    }
-
-
-    public double getTextHeight() {
-
-        return textPositions.get(0).getHeightDirAdj() + HEIGHT_PADDING;
-    }
-
-
-    public String getFont() {
-
-        if (textPositions.get(0).getFontName() == null) {
-            return "none";
-        }
-        return textPositions.get(0).getFontName().toLowerCase(Locale.ROOT).replaceAll(",bold", "").replaceAll(",italic", "");
-    }
-
-
-    public String getFontStyle() {
-
-        if (textPositions.get(0).getFontName() == null) {
-            return STANDARD;
-        }
-        String lowercaseFontName = textPositions.get(0).getFontName().toLowerCase(Locale.ROOT);
-
-        if (lowercaseFontName.contains(BOLD) && lowercaseFontName.contains(ITALIC)) {
-            return BOLD_ITALIC;
-        } else if (lowercaseFontName.contains(BOLD)) {
-            return BOLD;
-        } else if (lowercaseFontName.contains(ITALIC)) {
-            return ITALIC;
-        } else {
-            return STANDARD;
-        }
-    }
-
-
-    public float getFontSize() {
-
-        return textPositions.get(0).getFontSizeInPt();
-    }
-
-
-    public float getSpaceWidth() {
-
-        return textPositions.get(0).getWidthOfSpace();
-    }
-
-}
-
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TocNumberComparator.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TocNumberComparator.java
@ -0,0 +1,36 @@
+package com.knecon.fforesight.service.layoutparser.processor.model.text;
+
+import java.util.Comparator;
+import java.util.HashMap;
+
+import com.knecon.fforesight.service.layoutparser.processor.services.classification.NumberWord;
+
+public class TocNumberComparator implements Comparator<NumberWord> {
+
+    private HashMap<NumberWord, TextBlockOnPage> lookup;
+
+
+    public TocNumberComparator(HashMap<NumberWord, TextBlockOnPage> lookup) {
+
+        this.lookup = lookup;
+    }
+
+
+    @Override
+    public int compare(NumberWord number1, NumberWord number2) {
+
+        int page1 = lookup.get(number1).page().getPageNumber();
+        int page2 = lookup.get(number2).page().getPageNumber();
+
+        if (page1 != page2) {
+            return Integer.compare(page1, page2);
+        }
+
+        if (number1.word().getY() != number2.word().getY()) {
+            return Double.compare(number1.word().getY(), number2.word().getY());
+        }
+
+        return Integer.compare(number1.number(), number2.number());
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/Word.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/Word.java
@ -0,0 +1,272 @@
+package com.knecon.fforesight.service.layoutparser.processor.model.text;
+
+import static com.knecon.fforesight.service.layoutparser.processor.model.text.RedTextPosition.HEIGHT_PADDING;
+
+import java.awt.geom.AffineTransform;
+import java.awt.geom.Rectangle2D;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Locale;
+import java.util.Objects;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+import org.apache.pdfbox.text.TextPosition;
+
+import com.knecon.fforesight.service.layoutparser.processor.docstrum.model.Character;
+import com.knecon.fforesight.service.layoutparser.processor.docstrum.model.TextBoundingBox;
+
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+
+@Slf4j
+@Data
+@Builder
+@NoArgsConstructor
+@AllArgsConstructor
+@SuppressWarnings("pmd")
+public class Word extends TextBoundingBox implements CharSequence {
+
+    public static final String STANDARD = "standard";
+    public static final String BOLD_ITALIC = "bold, italic";
+    public static final String BOLD = "bold";
+    public static final String ITALIC = "italic";
+    public static final Pattern FONT_CLEANER = Pattern.compile(",bold|,italic");
+
+    private int page;
+
+    @Builder.Default
+    private List<Character> characters = new ArrayList<>();
+    private boolean isParagraphStart;
+    private boolean strikethrough;
+    private boolean underline;
+
+    private Integer hashcodeCache;
+
+
+    public Word(List<TextPosition> textPositions, int pageNumber, boolean isParagraphStart) {
+
+        this.characters = textPositions.stream()
+                .map(RedTextPosition::fromTextPosition)
+                .map(Character::new)
+                .collect(Collectors.toList());
+        this.page = pageNumber;
+        this.isParagraphStart = isParagraphStart;
+        calculateBBoxAndHashcode();
+    }
+
+
+    private void calculateBBoxAndHashcode() {
+
+        setToBBoxOfComponents(getTextPositions());
+        hashcodeCache = null;
+    }
+
+
+    public Word(List<Character> textPositions, int page) {
+
+        this.characters = new ArrayList<>(textPositions);
+        this.page = page;
+        calculateBBoxAndHashcode();
+    }
+
+
+    @Override
+    public int length() {
+
+        return characters.size();
+    }
+
+
+    @Override
+    public char charAt(int index) {
+
+        RedTextPosition textPosition = textPositionAt(index);
+        String text = textPosition.getUnicode();
+        return text.charAt(0);
+    }
+
+
+    public char charAt(int index, boolean caseInSensitive) {
+
+        RedTextPosition textPosition = textPositionAt(index);
+        String text = textPosition.getUnicode();
+        return caseInSensitive ? text.toLowerCase(Locale.ROOT).charAt(0) : text.charAt(0);
+    }
+
+
+    @Override
+    public Word subSequence(int start, int end) {
+
+        var textPositionSequence = new Word();
+        textPositionSequence.characters = characters.subList(start, end);
+        textPositionSequence.page = page;
+        textPositionSequence.dir = dir;
+        textPositionSequence.setToBBoxOfComponents(getTextPositions());
+        return textPositionSequence;
+    }
+
+
+    @Override
+    public String toString() {
+
+        StringBuilder builder = new StringBuilder(length());
+        for (int i = 0; i < length(); i++) {
+            builder.append(charAt(i));
+        }
+        return builder.toString();
+    }
+
+
+    public RedTextPosition textPositionAt(int index) {
+
+        return characters.get(index).getTextPosition();
+    }
+
+
+    public void add(Word word, RedTextPosition textPosition) {
+
+        this.characters.add(new Character(textPosition));
+        this.page = word.getPage();
+        calculateBBoxAndHashcode();
+    }
+
+
+    public void add(Character current) {
+
+        characters.add(current);
+        calculateBBoxAndHashcode();
+    }
+
+
+    public void add(TextPosition textPosition) {
+
+        add(new Character(RedTextPosition.fromTextPosition(textPosition)));
+    }
+
+
+    public double getTextHeightNoPadding() {
+
+        return characters.get(0).getTextPosition().getHeightDirAdj();
+    }
+
+
+    public double getTextHeight() {
+
+        return characters.get(0).getTextPosition().getHeightDirAdj() + HEIGHT_PADDING;
+    }
+
+
+    public String getFont() {
+
+        if (characters.get(0).getTextPosition().getFontName() == null) {
+            return "none";
+        }
+
+        return FONT_CLEANER.matcher(characters.get(0).getTextPosition().getFontName().toLowerCase(Locale.ROOT)).replaceAll("");
+    }
+
+
+    public String getFontStyle() {
+
+        if (characters.get(0).getTextPosition().getFontName() == null) {
+            return STANDARD;
+        }
+        String lowercaseFontName = characters.get(0).getTextPosition().getFontName().toLowerCase(Locale.ROOT);
+
+        if (lowercaseFontName.contains(BOLD) && lowercaseFontName.contains(ITALIC)) {
+            return BOLD_ITALIC;
+        } else if (lowercaseFontName.contains(BOLD)) {
+            return BOLD;
+        } else if (lowercaseFontName.contains(ITALIC)) {
+            return ITALIC;
+        } else {
+            return STANDARD;
+        }
+    }
+
+
+    public float getFontSize() {
+
+        return characters.get(0).getTextPosition().getFontSizeInPt();
+    }
+
+
+    public float getSpaceWidth() {
+
+        return characters.get(0).getTextPosition().getWidthOfSpace();
+    }
+
+
+    public boolean equals(final Object o) {
+        // auto-generated with lombok
+        if (o == this) {
+            return true;
+        }
+        if (!(o instanceof Word other)) {
+            return false;
+        }
+        if (!other.canEqual((Object) this)) {
+            return false;
+        }
+        if (!super.equals(o)) {
+            return false;
+        }
+        if (this.getPage() != other.getPage()) {
+            return false;
+        }
+        final Object this$textPositions = this.getTextPositions();
+        final Object other$textPositions = other.getTextPositions();
+        if (!Objects.equals(this$textPositions, other$textPositions)) {
+            return false;
+        }
+        return Objects.equals(this.getHashcodeCache(), other.getHashcodeCache());
+    }
+
+
+    protected boolean canEqual(final Object other) {return other instanceof Word;}
+
+
+    public int hashCode() {
+
+        if (hashcodeCache == null) {
+            hashcodeCache = hashcodeCalculation();
+        }
+
+        return hashcodeCache;
+    }
+
+
+    private int hashcodeCalculation() {
+
+        final int PRIME = 59;
+        int result = super.hashCode();
+        result = result * PRIME + this.getPage();
+        final Object $textPositions = this.getTextPositions();
+        result = result * PRIME + ($textPositions == null ? 43 : $textPositions.hashCode());
+        return result;
+    }
+
+
+    private List<RedTextPosition> getTextPositions() {
+
+        return characters.stream()
+                .map(Character::getTextPosition)
+                .toList();
+    }
+
+
+    public void transform(AffineTransform rotateInstance) {
+
+        for (RedTextPosition textPosition : getTextPositions()) {
+            Rectangle2D exactDirAdjCoordinates = rotateInstance.createTransformedShape(textPosition.getBBoxDirAdj()).getBounds2D();
+            textPosition.setBBoxDirAdj(exactDirAdjCoordinates);
+        }
+        calculateBBoxAndHashcode();
+    }
+
+}
+
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/ImageServiceResponseAdapter.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/ImageServiceResponseAdapter.java
@ -9,9 +9,9 @@ import java.util.Map;

 import org.springframework.stereotype.Service;

+import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType;
 import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
 import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.ImageType;
 import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
 import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;

--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java
@ -8,7 +8,7 @@ import java.util.Map;

 import org.springframework.stereotype.Service;

-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.ImageType;
+import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType;
 import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
 import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingBox;
 import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse;
@ -79,7 +79,7 @@ public class VisualLayoutParsingAdapter {
                ClassifiedImage signature = new ClassifiedImage(new Rectangle2D.Float(t.getBox().getX1(),
                        t.getBox().getY1(),
                        t.getBox().getX2() - t.getBox().getX1(),
-                        t.getBox().getY2() - t.getBox().getY1()), ImageType.SIGNATURE, true, false, false, pageNumber,"");
+                        t.getBox().getY2() - t.getBox().getY1()), ImageType.SIGNATURE, true, false, false, pageNumber, "");

                signatures.add(signature);
            }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/DividingColumnDetectionService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/DividingColumnDetectionService.java
@ -23,11 +23,11 @@ public class DividingColumnDetectionService {
    public List<Rectangle2D> detectColumns(PageContents pageContents) {


-        if (pageContents.getSortedTextPositionSequences().size() < 2) {
+        if (pageContents.getSortedWords().size() < 2) {
            return List.of(pageContents.getCropBox());
        }

-        GapInformation linesWithGapInformation = GapDetectionService.findGapsInLines(pageContents.getSortedTextPositionSequences(), pageContents.getCropBox());
+        GapInformation linesWithGapInformation = GapDetectionService.findGapsInLines(pageContents.getSortedWords(), pageContents.getCropBox());

        return detectColumnsFromLines(linesWithGapInformation.getXGaps(), pageContents.getCropBox());
    }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/GapDetectionService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/GapDetectionService.java
@ -5,7 +5,7 @@ import java.util.LinkedList;
 import java.util.List;

 import com.knecon.fforesight.service.layoutparser.processor.model.GapInformation;
-import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.Word;

 import lombok.AllArgsConstructor;
 import lombok.experimental.UtilityClass;
@ -18,23 +18,23 @@ public class GapDetectionService {
    private static final double NEW_LINE_FACTOR = 0.2;


-    public static GapInformation findGapsInLines(List<TextPositionSequence> sortedTextPositionSequences, Rectangle2D mainBodyTextFrame) {
+    public static GapInformation findGapsInLines(List<Word> sortedWords, Rectangle2D mainBodyTextFrame) {

-        if (sortedTextPositionSequences.isEmpty()) {
+        if (sortedWords.isEmpty()) {
            return new GapInformation();
        }

-        final double avgTextPositionHeight = getAvgTextPositionHeight(sortedTextPositionSequences);
+        final double avgTextPositionHeight = getAvgTextPositionHeight(sortedWords);

        XGapsContext xGapContext = XGapsContext.init(mainBodyTextFrame);
        YGapsContext yGapContext = YGapsContext.init(mainBodyTextFrame);

-        var previousTextPosition = sortedTextPositionSequences.get(0);
+        var previousTextPosition = sortedWords.get(0);
        Rectangle2D rectangle = toRectangle2D(previousTextPosition);

        xGapContext.addGapFromLeftEdgeOfMainBody(rectangle);

-        for (TextPositionSequence currentTextPosition : sortedTextPositionSequences.subList(1, sortedTextPositionSequences.size())) {
+        for (Word currentTextPosition : sortedWords.subList(1, sortedWords.size())) {

            double yDifference = Math.abs(currentTextPosition.getMaxYDirAdj() - previousTextPosition.getMaxYDirAdj());
            double xGap = Math.abs(previousTextPosition.getMaxXDirAdj() - currentTextPosition.getXDirAdj());
@ -59,14 +59,14 @@ public class GapDetectionService {
            }
            previousTextPosition = currentTextPosition;
        }
-        xGapContext.addGapToRightEdgeOfMainBody(toRectangle2D(sortedTextPositionSequences.get(sortedTextPositionSequences.size() - 1)));
+        xGapContext.addGapToRightEdgeOfMainBody(toRectangle2D(sortedWords.get(sortedWords.size() - 1)));
        xGapContext.gapsPerLine.add(xGapContext.gapsInCurrentLine);

        return new GapInformation(xGapContext.gapsPerLine, yGapContext.gapsPerLine);
    }


-    private static Rectangle2D toRectangle2D(TextPositionSequence textPosition) {
+    private static Rectangle2D toRectangle2D(Word textPosition) {

        return mirrorY(textPosition.getBBox());
    }
@ -87,18 +87,18 @@ public class GapDetectionService {
    }


-    private static void assertAllTextPositionsHaveSameDir(List<TextPositionSequence> textPositionSequences) {
+    private static void assertAllTextPositionsHaveSameDir(List<Word> words) {

-        assert textPositionSequences.stream()
-                .map(TextPositionSequence::getDir)
-                .allMatch(a -> a.equals(textPositionSequences.get(0).getDir()));
+        assert words.stream()
+                .map(Word::getDir)
+                .allMatch(a -> a.equals(words.get(0).getDir()));
    }


-    private static double getAvgTextPositionHeight(List<TextPositionSequence> textPositionSequences) {
+    private static double getAvgTextPositionHeight(List<Word> words) {

-        return textPositionSequences.stream()
-                .mapToDouble(TextPositionSequence::getHeight).average().orElseThrow();
+        return words.stream()
+                .mapToDouble(Word::getHeight).average().orElseThrow();
    }


--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/InvisibleTableDetectionService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/InvisibleTableDetectionService.java
@ -7,17 +7,17 @@ import java.util.List;

 import com.knecon.fforesight.service.layoutparser.processor.model.GapInformation;
 import com.knecon.fforesight.service.layoutparser.processor.model.LineInformation;
-import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.Word;

 import lombok.experimental.UtilityClass;

@UtilityClass
 public class InvisibleTableDetectionService {

-    public List<List<Rectangle2D>> detectTable(List<TextPositionSequence> textPositionSequences, Rectangle2D tableBBox) {
+    public List<List<Rectangle2D>> detectTable(List<Word> words, Rectangle2D tableBBox) {

-        LineInformation lineInformation = LineDetectionService.calculateLineInformation(textPositionSequences);
-        GapInformation gaps = GapDetectionService.findGapsInLines(textPositionSequences, tableBBox);
+        LineInformation lineInformation = LineDetectionService.calculateLineInformation(words);
+        GapInformation gaps = GapDetectionService.findGapsInLines(words, tableBBox);
        List<Rectangle2D> gapsAcrossLines = GapsAcrossLinesService.detectXGapsAcrossLines(gaps, tableBBox);
        List<Double> columnXCoords = gapsAcrossLines.stream().map(RectangularShape::getCenterX).toList();
        int colCount = gapsAcrossLines.size();
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/LineDetectionService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/LineDetectionService.java
@ -7,7 +7,7 @@ import java.util.List;
 import com.knecon.fforesight.service.layoutparser.processor.model.GapInformation;
 import com.knecon.fforesight.service.layoutparser.processor.model.LineInformation;
 import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
-import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.Word;

 import lombok.AllArgsConstructor;
 import lombok.Getter;
@ -19,37 +19,37 @@ public class LineDetectionService {
    private static final double X_GAP_FACTOR = 1; // multiplied with average text height, determines the minimum distance of gaps in lines


-    public LineInformation calculateLineInformation(List<TextPositionSequence> sortedTextPositionSequences) {
+    public LineInformation calculateLineInformation(List<Word> sortedWords) {

-        if (sortedTextPositionSequences.isEmpty()) {
+        if (sortedWords.isEmpty()) {
            return LineFactory.init().build();
        }

-        return buildLineInformation(sortedTextPositionSequences);
+        return buildLineInformation(sortedWords);
    }


-    public List<List<Rectangle2D>> findLinesWithGaps(List<TextPositionSequence> sortedTextPositionSequences, Rectangle2D mainBodyTextFrame) {
+    public List<List<Rectangle2D>> findLinesWithGaps(List<Word> sortedWords, Rectangle2D mainBodyTextFrame) {

-        return calculateLineInformation(sortedTextPositionSequences).getBBoxWithGapsByLines();
+        return calculateLineInformation(sortedWords).getBBoxWithGapsByLines();
    }


-    public List<List<TextPositionSequence>> orderByLines(List<TextPositionSequence> sortedTextPositionSequences, Rectangle2D mainBodyTextFrame) {
+    public List<List<Word>> orderByLines(List<Word> sortedWords, Rectangle2D mainBodyTextFrame) {

-        return calculateLineInformation(sortedTextPositionSequences).getSequencesByLines();
+        return calculateLineInformation(sortedWords).getSequencesByLines();
    }


-    private static LineInformation buildLineInformation(List<TextPositionSequence> sortedTextPositionSequences) {
+    private static LineInformation buildLineInformation(List<Word> sortedWords) {

-        final double avgTextPositionHeight = getAvgTextPositionHeight(sortedTextPositionSequences);
+        final double avgTextPositionHeight = getAvgTextPositionHeight(sortedWords);

        LineFactory lineFactory = LineFactory.init();

-        var previousTextPosition = sortedTextPositionSequences.get(0);
+        var previousTextPosition = sortedWords.get(0);
        lineFactory.addToCurrentLine(previousTextPosition);
-        for (TextPositionSequence currentTextPosition : sortedTextPositionSequences.subList(1, sortedTextPositionSequences.size())) {
+        for (Word currentTextPosition : sortedWords.subList(1, sortedWords.size())) {
            if (isNewLine(currentTextPosition, previousTextPosition, avgTextPositionHeight) || isSplitByOrientation(currentTextPosition, previousTextPosition)) {
                lineFactory.startNewLine();
            } else if (isXGap(currentTextPosition, previousTextPosition, avgTextPositionHeight)) {
@ -63,25 +63,25 @@ public class LineDetectionService {
    }


-    private static double getAvgTextPositionHeight(List<TextPositionSequence> textPositionSequences) {
+    private static double getAvgTextPositionHeight(List<Word> words) {

-        return textPositionSequences.stream().mapToDouble(TextPositionSequence::getHeight).average().orElseThrow();
+        return words.stream().mapToDouble(Word::getHeight).average().orElseThrow();
    }


-    private static boolean isXGap(TextPositionSequence currentTextPosition, TextPositionSequence previousTextPosition, double avgTextPositionHeight) {
+    private static boolean isXGap(Word currentTextPosition, Word previousTextPosition, double avgTextPositionHeight) {

        return Math.abs(previousTextPosition.getMaxXDirAdj() - currentTextPosition.getXDirAdj()) > (avgTextPositionHeight * X_GAP_FACTOR);
    }


-    private static boolean isSplitByOrientation(TextPositionSequence currentTextPosition, TextPositionSequence previousTextPosition) {
+    private static boolean isSplitByOrientation(Word currentTextPosition, Word previousTextPosition) {

        return !previousTextPosition.getDir().equals(currentTextPosition.getDir());
    }


-    private static boolean isNewLine(TextPositionSequence currentTextPosition, TextPositionSequence previousTextPosition, double avgTextPositionHeight) {
+    private static boolean isNewLine(Word currentTextPosition, Word previousTextPosition, double avgTextPositionHeight) {

        return Math.abs(previousTextPosition.getYDirAdj() - currentTextPosition.getYDirAdj()) > avgTextPositionHeight;
    }
@ -96,13 +96,13 @@ public class LineDetectionService {
        List<List<Rectangle2D>> bBoxWithGapsByLines;
        List<Rectangle2D> bBoxWithGapsInCurrentLine;

-        List<List<List<TextPositionSequence>>> sequencesWithGapsByLines;
-        List<List<TextPositionSequence>> sequencesWithGapsInCurrentLine;
+        List<List<List<Word>>> sequencesWithGapsByLines;
+        List<List<Word>> sequencesWithGapsInCurrentLine;

-        List<TextPositionSequence> currentSequencesWithoutGaps;
+        List<Word> currentSequencesWithoutGaps;

-        List<List<TextPositionSequence>> sequencesByLines;
-        List<TextPositionSequence> sequencesInCurrentLine;
+        List<List<Word>> sequencesByLines;
+        List<Word> sequencesInCurrentLine;

        List<List<Rectangle2D>> xGaps;
        List<List<Rectangle2D>> yGaps;
@ -116,14 +116,14 @@ public class LineDetectionService {
            List<Rectangle2D> bBoxWithGapsInCurrentLine = new LinkedList<>();
            bBoxWithGapsByLines.add(bBoxWithGapsInCurrentLine);

-            List<List<List<TextPositionSequence>>> sequencesWithGapsByLines = new LinkedList<>();
-            List<List<TextPositionSequence>> sequencesWithGapsInCurrentLine = new LinkedList<>();
+            List<List<List<Word>>> sequencesWithGapsByLines = new LinkedList<>();
+            List<List<Word>> sequencesWithGapsInCurrentLine = new LinkedList<>();
            sequencesWithGapsByLines.add(sequencesWithGapsInCurrentLine);
-            List<TextPositionSequence> currentSequencesWithoutGaps = new LinkedList<>();
+            List<Word> currentSequencesWithoutGaps = new LinkedList<>();
            sequencesWithGapsInCurrentLine.add(currentSequencesWithoutGaps);

-            List<List<TextPositionSequence>> sequencesByLines = new LinkedList<>();
-            List<TextPositionSequence> sequencesInCurrentLine = new LinkedList<>();
+            List<List<Word>> sequencesByLines = new LinkedList<>();
+            List<Word> sequencesInCurrentLine = new LinkedList<>();
            sequencesByLines.add(sequencesInCurrentLine);

            return new LineFactory(lineBBox,
@ -178,13 +178,13 @@ public class LineDetectionService {
        }


-        private Rectangle2D textPositionBBox(List<TextPositionSequence> textPositionSequences) {
+        private Rectangle2D textPositionBBox(List<Word> words) {

-            return RectangleTransformations.rectangle2DBBox(textPositionSequences.stream().map(TextPositionSequence::getBBox).toList());
+            return RectangleTransformations.rectangle2DBBox(words.stream().map(Word::getBBox).toList());
        }


-        public void addToCurrentLine(TextPositionSequence current) {
+        public void addToCurrentLine(Word current) {

            sequencesInCurrentLine.add(current);
            currentSequencesWithoutGaps.add(current);
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/PageContentExtractor.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/PageContentExtractor.java
@ -13,7 +13,7 @@ import org.apache.pdfbox.pdmodel.PDPage;
 import org.springframework.core.io.ClassPathResource;

 import com.knecon.fforesight.service.layoutparser.processor.model.PageContents;
-import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.Word;
 import com.knecon.fforesight.service.layoutparser.processor.services.parsing.PDFLinesTextStripper;
 import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;

@ -40,7 +40,7 @@ public class PageContentExtractor {
                stripper.setPdpage(pdPage);
                stripper.getText(pdDocument);

-                Map<Float, List<TextPositionSequence>> sortedTextPositionSequencesPerDir = stripper.getTextPositionSequences()
+                Map<Float, List<Word>> sortedTextPositionSequencesPerDir = stripper.getWords()
                        .stream()
                        .collect(Collectors.groupingBy(textPositionSequence -> textPositionSequence.getDir().getDegrees()));

@ -57,7 +57,7 @@ public class PageContentExtractor {
    }


-    public List<TextPositionSequence> sortByDirAccordingToPageRotation(Map<Float, List<TextPositionSequence>> sortedTextPositionSequencesPerDir, int rotation) {
+    public List<Word> sortByDirAccordingToPageRotation(Map<Float, List<Word>> sortedTextPositionSequencesPerDir, int rotation) {

        LinkedList<Float> sortedKeys = new LinkedList<>(sortedTextPositionSequencesPerDir.keySet().stream().sorted().toList());

--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/PageInformationService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/PageInformationService.java
@ -14,9 +14,9 @@ public class PageInformationService {

    public PageInformation build(PageContents pageContents) {

-        LineInformation lineInformation = LineDetectionService.calculateLineInformation(pageContents.getSortedTextPositionSequences());
+        LineInformation lineInformation = LineDetectionService.calculateLineInformation(pageContents.getSortedWords());
        Rectangle2D mainBodyTextFrame = MainBodyTextFrameExtractionService.calculateMainBodyTextFrame(lineInformation);
-        GapInformation gapInformation = GapDetectionService.findGapsInLines(pageContents.getSortedTextPositionSequences(), mainBodyTextFrame);
+        GapInformation gapInformation = GapDetectionService.findGapsInLines(pageContents.getSortedWords(), mainBodyTextFrame);

        return new PageInformation(pageContents, lineInformation, mainBodyTextFrame, gapInformation);
    }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/SimplifiedSectionTextService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/SimplifiedSectionTextService.java
@ -6,10 +6,11 @@ import java.util.stream.Stream;

 import org.springframework.stereotype.Service;

+import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
+import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType;
+import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
 import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.SimplifiedSectionText;
 import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.SimplifiedText;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
-import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SemanticNode;

 import lombok.extern.slf4j.Slf4j;

@ -23,18 +24,29 @@ public class SimplifiedSectionTextService {
                .stream()
                .map(this::toSimplifiedSectionText)
                .toList();
-        List<SimplifiedSectionText> simplifiedHeadersList = document.getHeaders()
-                .stream()
+        List<SimplifiedSectionText> simplifiedHeadersList = document.streamAllSubNodesOfType(NodeType.HEADER)
                .map(this::toSimplifiedSectionText)
                .toList();
-        List<SimplifiedSectionText> simplifiedFootersList = document.getFooters()
-                .stream()
+        List<SimplifiedSectionText> simplifiedFootersList = document.streamAllSubNodesOfType(NodeType.FOOTER)
                .map(this::toSimplifiedSectionText)
                .toList();
        List<SimplifiedSectionText> simplifiedText = Stream.of(simplifiedMainSectionsList, simplifiedHeadersList, simplifiedFootersList)
                .flatMap(List::stream)
                .collect(Collectors.toList());
-        return SimplifiedText.builder().numberOfPages(document.getNumberOfPages()).sectionTexts(simplifiedText).build();
+        return SimplifiedText.builder()
+                .numberOfPages(document.getNumberOfPages())
+                .sectionTexts(simplifiedText)
+                .mainSectionNumbers(document.getAllSections()
+                                            .stream()
+                                            .map(this::getSectionNumber)
+                                            .toList())
+                .headerSectionNumbers(document.streamAllSubNodesOfType(NodeType.HEADER)
+                                              .map(this::getSectionNumber)
+                                              .toList())
+                .footerSectionNumbers(document.streamAllSubNodesOfType(NodeType.FOOTER)
+                                              .map(this::getSectionNumber)
+                                              .toList())
+                .build();
    }


@ -49,4 +61,13 @@ public class SimplifiedSectionTextService {
                .build();
    }

+
+    private String getSectionNumber(SemanticNode semanticNode) {
+
+        return semanticNode.getTreeId()
+                .stream()
+                .map(String::valueOf)
+                .collect(Collectors.joining("."));
+    }
+
 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/TextRulingsClassifier.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/TextRulingsClassifier.java
@ -5,7 +5,7 @@ import java.util.List;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextDirection;
-import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.Word;

 import lombok.experimental.UtilityClass;

@ -17,9 +17,9 @@ public class TextRulingsClassifier {
    private final static double TEXT_BBOX_THRESHOLD_FACTOR = 0.15; // multiplied with text width then subtracted from word width. If ruling covers this width, it is considered as strikethrough/underline.


-    public static void classifyUnderlinedAndStrikethroughText(List<TextPositionSequence> words, CleanRulings cleanRulings) {
+    public static void classifyUnderlinedAndStrikethroughText(List<Word> words, CleanRulings cleanRulings) {

-        for (TextPositionSequence word : words) {
+        for (Word word : words) {
            if (word.getDir().equals(TextDirection.ZERO) || word.getDir().equals(TextDirection.HALF_CIRCLE)) {
                handleHorizontalText(cleanRulings, word);
            } else {
@ -29,7 +29,7 @@ public class TextRulingsClassifier {
    }


-    private static void handleVerticalText(CleanRulings cleanRulings, TextPositionSequence word) {
+    private static void handleVerticalText(CleanRulings cleanRulings, Word word) {

        float lowerY = (float) (word.getBBoxPdf().getMinY() + TEXT_BBOX_THRESHOLD_FACTOR * word.getWidth());
        float upperY = (float) (word.getBBoxPdf().getMaxY() - TEXT_BBOX_THRESHOLD_FACTOR * word.getWidth());
@ -63,7 +63,7 @@ public class TextRulingsClassifier {
    }


-    private static void handleHorizontalText(CleanRulings cleanRulings, TextPositionSequence word) {
+    private static void handleHorizontalText(CleanRulings cleanRulings, Word word) {

        float leftX = (float) (word.getBBoxPdf().getMinX() + TEXT_BBOX_THRESHOLD_FACTOR * word.getWidth());
        float rightX = (float) (word.getBBoxPdf().getMaxX() - TEXT_BBOX_THRESHOLD_FACTOR * word.getWidth());
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/BlockificationPostprocessingService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/BlockificationPostprocessingService.java
@ -7,23 +7,44 @@ import java.util.List;
 import java.util.ListIterator;
 import java.util.Locale;

-import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.text.similarity.LevenshteinDistance;
 import org.springframework.stereotype.Service;

-import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
+import com.iqser.red.service.redaction.v1.server.model.document.nodes.LayoutEngine;
+import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument;
 import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
 import com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType;
 import com.knecon.fforesight.service.layoutparser.processor.model.SectionIdentifier;
 import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineObject;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
-import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.Word;
+import com.knecon.fforesight.service.layoutparser.processor.utils.PageInformation;
+import com.knecon.fforesight.service.layoutparser.processor.utils.TextNormalizationUtilities;

 import lombok.Data;

@Service
 public class BlockificationPostprocessingService {

-    private static final float BLOCK_TO_OUTLINE_DISTANCE_THRESHOLD = 5.0f;
+    private static final float STRING_SIMILARITY_THRESHOLD = 0.1f;
+
+
+    public void findHeadlinesFromOutline(ClassificationDocument classificationDocument, int pageNumber, ClassificationPage classificationPage, PageInformation pageInformation) {
+
+        OutlineObject lastProcessedOutlineObject = null;
+        List<OutlineObject> outlineObjects = classificationDocument.getOutlineObjectTree().getOutlineObjectsPerPage().getOrDefault(pageNumber, new ArrayList<>());
+
+        OutlineObject notFoundOutlineObject = null;
+        if (lastProcessedOutlineObject != null && !lastProcessedOutlineObject.isFound()) {
+            lastProcessedOutlineObject.resetPoint();
+            notFoundOutlineObject = lastProcessedOutlineObject;
+        }
+        if (!outlineObjects.isEmpty()) {
+            classificationPage.setOutlineObjects(outlineObjects);
+            lastProcessedOutlineObject = sanitizeOutlineBlocks(classificationPage, notFoundOutlineObject);
+        }
+        classificationDocument.getLayoutDebugLayer().addOutlineObjects(outlineObjects, pageInformation);
+    }


    public OutlineObject sanitizeOutlineBlocks(ClassificationPage classificationPage, OutlineObject notFoundOutlineObject) {
@ -34,38 +55,36 @@ public class BlockificationPostprocessingService {
            return null;
        }

-        float pageHeight = classificationPage.getPageHeight();
-
        ListIterator<OutlineObject> outlineObjectListIterator = outlineObjects.listIterator();

        if (notFoundOutlineObject != null) {
            OutlineProcessionContext notFoundOutlineObjectProcessionContext = new OutlineProcessionContext(notFoundOutlineObject);
-            processTextBlocks(getTextPageBlocks(classificationPage), pageHeight, notFoundOutlineObjectProcessionContext);
+            processTextBlocks(getTextPageBlocks(classificationPage), notFoundOutlineObjectProcessionContext);

            OutlineObject firstOutlineObject = null;
            OutlineProcessionContext firstOutlineObjectProcessionContext = null;
            if (outlineObjectListIterator.hasNext()) {
                firstOutlineObject = outlineObjectListIterator.next();
                firstOutlineObjectProcessionContext = new OutlineProcessionContext(firstOutlineObject);
-                processTextBlocks(getTextPageBlocks(classificationPage), pageHeight, firstOutlineObjectProcessionContext);
+                processTextBlocks(getTextPageBlocks(classificationPage), firstOutlineObjectProcessionContext);
            }

            if (!contextsOverlap(notFoundOutlineObjectProcessionContext, firstOutlineObjectProcessionContext)) {
-                notFoundOutlineObject.setFound(selectMatch(classificationPage, notFoundOutlineObjectProcessionContext, pageHeight));
+                notFoundOutlineObject.setFound(selectMatch(classificationPage, notFoundOutlineObjectProcessionContext));
            }
            if (firstOutlineObject != null) {
                // re-create the context for the updated blocks
                firstOutlineObjectProcessionContext = new OutlineProcessionContext(firstOutlineObject);
-                processTextBlocks(getTextPageBlocks(classificationPage), pageHeight, firstOutlineObjectProcessionContext);
-                firstOutlineObject.setFound(selectMatch(classificationPage, firstOutlineObjectProcessionContext, pageHeight));
+                processTextBlocks(getTextPageBlocks(classificationPage), firstOutlineObjectProcessionContext);
+                firstOutlineObject.setFound(selectMatch(classificationPage, firstOutlineObjectProcessionContext));
            }

        }

        outlineObjectListIterator.forEachRemaining(outlineObject -> {
            OutlineProcessionContext outlineObjectProcessionContext = new OutlineProcessionContext(outlineObject);
-            processTextBlocks(getTextPageBlocks(classificationPage), pageHeight, outlineObjectProcessionContext);
-            outlineObject.setFound(selectMatch(classificationPage, outlineObjectProcessionContext, pageHeight));
+            processTextBlocks(getTextPageBlocks(classificationPage), outlineObjectProcessionContext);
+            outlineObject.setFound(selectMatch(classificationPage, outlineObjectProcessionContext));
        });

        if (!outlineObjects.isEmpty()) {
@ -104,8 +123,7 @@ public class BlockificationPostprocessingService {

        double maxYFirst = blocksOfFirstOutline.stream()
                .mapToDouble(TextPageBlock::getPdfMaxY)
-                .max()
-                .orElse(Double.NEGATIVE_INFINITY);
+                .max().orElse(Double.NEGATIVE_INFINITY);

        return blocksOfNotFoundOutline.stream()
                .mapToDouble(TextPageBlock::getPdfMaxY)
@ -127,13 +145,13 @@ public class BlockificationPostprocessingService {
    }


-    private void processTextBlocks(List<TextPageBlock> textBlocks, float pageHeight, OutlineProcessionContext context) {
+    private void processTextBlocks(List<TextPageBlock> textBlocks, OutlineProcessionContext context) {

        OutlineObject outlineObject = context.getOutlineObject();
        ListIterator<TextPageBlock> iterator = textBlocks.listIterator();
        while (iterator.hasNext()) {
            TextPageBlock pageBlock = iterator.next();
-            if (pageHeight - outlineObject.getPoint().getY() - BLOCK_TO_OUTLINE_DISTANCE_THRESHOLD <= pageBlock.getMaxY()) {
+            if (outlineObject.isAbove(pageBlock)) {
                break;
            }
        }
@ -148,7 +166,7 @@ public class BlockificationPostprocessingService {
    }


-    private boolean selectMatch(ClassificationPage classificationPage, OutlineProcessionContext context, float pageHeight) {
+    private boolean selectMatch(ClassificationPage classificationPage, OutlineProcessionContext context) {

        OutlineObject outlineObject = context.outlineObject;
        TextPageBlock directMatch = context.directMatch;
@ -156,8 +174,8 @@ public class BlockificationPostprocessingService {
        TextPageBlock splitCandidate = context.splitCandidate;
        PageBlockType headlineType = PageBlockType.getHeadlineType(outlineObject.getTreeDepth());

-        double distanceToDirectMatch = directMatch != null ? calculateDistance(outlineObject, directMatch, pageHeight) : Double.MAX_VALUE;
-        double distanceToSplitCandidate = splitCandidate != null ? calculateDistance(outlineObject, splitCandidate, pageHeight) : Double.MAX_VALUE;
+        double distanceToDirectMatch = directMatch != null ? calculateDistance(outlineObject, directMatch) : Double.MAX_VALUE;
+        double distanceToSplitCandidate = splitCandidate != null ? calculateDistance(outlineObject, splitCandidate) : Double.MAX_VALUE;

        double distanceToBestMergeCandidates = Double.MAX_VALUE;
        List<TextPageBlock> bestMergeCandidateCombination = new ArrayList<>();
@ -177,9 +195,8 @@ public class BlockificationPostprocessingService {

            for (List<TextPageBlock> combination : combinations) {
                double averageDistance = combination.stream()
-                        .map(block -> calculateDistance(outlineObject, block, pageHeight))
-                        .mapToDouble(Double::doubleValue).average()
-                        .orElse(Double.MAX_VALUE);
+                        .map(block -> calculateDistance(outlineObject, block))
+                        .mapToDouble(Double::doubleValue).average().orElse(Double.MAX_VALUE);
                if (distanceToBestMergeCandidates > averageDistance) {
                    distanceToBestMergeCandidates = averageDistance;
                    bestMergeCandidateCombination = combination;
@ -225,14 +242,14 @@ public class BlockificationPostprocessingService {
            headline = sectionIdentifier + headline;
        }

-        WordSequenceResult wordSequenceResult = findWordSequence(blockToSplit.getSequences(), headline);
+        WordSequenceResult wordSequenceResult = findWordSequence(blockToSplit.getWords(), headline);
        if (wordSequenceResult.inSequence.isEmpty() && !headline.equals(title)) {
-            wordSequenceResult = findWordSequence(blockToSplit.getSequences(), title);
+            wordSequenceResult = findWordSequence(blockToSplit.getWords(), title);
        }

        boolean modifiedBlockToSplit = false;
        if (!wordSequenceResult.inSequence.isEmpty()) {
-            blockToSplit.setSequences(wordSequenceResult.inSequence);
+            blockToSplit.setWords(wordSequenceResult.inSequence);
            blockToSplit.recalculateBBox();
            modifiedBlockToSplit = true;
        }
@ -253,19 +270,19 @@ public class BlockificationPostprocessingService {
    }


-    private static WordSequenceResult findWordSequence(List<TextPositionSequence> textPositionSequences, String text) {
+    private static WordSequenceResult findWordSequence(List<Word> words, String text) {

        String target = sanitizeString(text);
-        List<TextPositionSequence> inSequence = new ArrayList<>();
-        List<TextPositionSequence> preSequence = new ArrayList<>();
-        List<TextPositionSequence> postSequence = new ArrayList<>();
+        List<Word> inSequence = new ArrayList<>();
+        List<Word> preSequence = new ArrayList<>();
+        List<Word> postSequence = new ArrayList<>();
        StringBuilder currentSequence = new StringBuilder();

        if (target.isBlank()) {
            return new WordSequenceResult();
        }

-        for (TextPositionSequence sequence : textPositionSequences) {
+        for (Word sequence : words) {

            currentSequence.append(sanitizeString(sequence.toString()));
            inSequence.add(sequence);
@ -277,10 +294,10 @@ public class BlockificationPostprocessingService {
                    int index = 0;
                    String toRemove = currentSequence.substring(0, currentSequence.length() - target.length());

-                    TextPositionSequence next = inSequence.get(index);
+                    Word next = inSequence.get(index);
                    while (currentSequence.length() - next.length() >= target.length()) {

-                        TextPositionSequence removed = inSequence.remove(index);
+                        Word removed = inSequence.remove(index);
                        currentSequence.delete(0, removed.toString().length());
                        preSequence.add(removed);

@ -309,7 +326,7 @@ public class BlockificationPostprocessingService {
                }

                if (currentSequence.toString().equals(target)) {
-                    postSequence.addAll(textPositionSequences.subList(textPositionSequences.indexOf(sequence) + 1, textPositionSequences.size()));
+                    postSequence.addAll(words.subList(words.indexOf(sequence) + 1, words.size()));
                    return new WordSequenceResult(inSequence, preSequence, postSequence);
                }
            }
@ -319,10 +336,10 @@ public class BlockificationPostprocessingService {
    }


-    private static SplitSequenceResult splitSequence(TextPositionSequence sequence, String toRemove) {
+    private static SplitSequenceResult splitSequence(Word sequence, String toRemove) {

-        TextPositionSequence in = null;
-        TextPositionSequence out;
+        Word in = null;
+        Word out;

        String currentSequence = sequence.toString().toLowerCase(Locale.ROOT);
        int index = currentSequence.indexOf(toRemove);
@ -332,17 +349,17 @@ public class BlockificationPostprocessingService {

        if (index > 0) {
            in = createSubSequence(sequence, 0, index);
-        } else if (endIndex < sequence.getTextPositions().size()) {
-            in = createSubSequence(sequence, endIndex, sequence.getTextPositions().size());
+        } else if (endIndex < sequence.length()) {
+            in = createSubSequence(sequence, endIndex, sequence.length());
        }

        return new SplitSequenceResult(in, out);
    }


-    private static TextPositionSequence createSubSequence(TextPositionSequence sequence, int start, int end) {
+    private static Word createSubSequence(Word sequence, int start, int end) {

-        TextPositionSequence newSeq = new TextPositionSequence(new ArrayList<>(sequence.getTextPositions().subList(start, end)), sequence.getPage());
+        Word newSeq = new Word(new ArrayList<>(sequence.getCharacters().subList(start, end)), sequence.getPage());
        newSeq.setParagraphStart(sequence.isParagraphStart());
        return newSeq;
    }
@ -357,10 +374,10 @@ public class BlockificationPostprocessingService {
            List<TextPageBlock> mergedBlocks = new ArrayList<>();
            for (TextPageBlock textPageBlock : blocksToMerge.subList(1, blocksToMerge.size())) {

-                if (firstBlock != null && !firstBlock.getSequences().isEmpty()) {
+                if (firstBlock != null && !firstBlock.getWords().isEmpty()) {

                    if (textPageBlock.getDir() == firstBlock.getDir()) {
-                        firstBlock.getSequences().addAll(textPageBlock.getSequences());
+                        firstBlock.addAll(textPageBlock.getWords());
                        mergedBlocks.add(textPageBlock);
                    }
                }
@ -406,11 +423,9 @@ public class BlockificationPostprocessingService {
    }


-    private double calculateDistance(OutlineObject outlineObject, TextPageBlock pageBlock, float pageHeight) {
+    private double calculateDistance(OutlineObject outlineObject, TextPageBlock pageBlock) {

-        double deltaX = outlineObject.getPoint().getX() - pageBlock.getMinX();
-        double deltaY = pageHeight - outlineObject.getPoint().getY() - pageBlock.getMinY();
-        return Math.sqrt(deltaX * deltaX + deltaY * deltaY);
+        return outlineObject.distance(pageBlock);
    }


@ -427,6 +442,13 @@ public class BlockificationPostprocessingService {
        String blockText = sanitizeString(pageBlock.getText());
        String outlineTitle = sanitizeString(outlineObject.getTitle());

+        int threshold = (int) (Math.min(blockText.length(), outlineTitle.length()) * STRING_SIMILARITY_THRESHOLD) + 1;
+        int distance = new LevenshteinDistance(threshold).apply(blockText, outlineTitle);
+        if (distance >= 0 && distance < threshold) {
+            context.directMatch = pageBlock;
+            return true;
+        }
+
        boolean blockTextContainsOutlineTitle = blockText.contains(outlineTitle);
        boolean outlineTitleContainsBlockText = outlineTitle.contains(blockText);

@ -465,7 +487,9 @@ public class BlockificationPostprocessingService {

    private static String sanitizeString(String text) {

-        return StringUtils.deleteWhitespace(text).toLowerCase(Locale.ROOT);
+        return TextNormalizationUtilities.removeAllWhitespaces(text)//
+                .trim() // sometimes there are trailing empty bytes at the end of the string trim() seems to remove them
+                .toLowerCase(Locale.ENGLISH);
    }


@ -492,12 +516,12 @@ public class BlockificationPostprocessingService {

    public static class WordSequenceResult {

-        public List<TextPositionSequence> inSequence;
-        public List<TextPositionSequence> preSequence;
-        public List<TextPositionSequence> postSequence;
+        public List<Word> inSequence;
+        public List<Word> preSequence;
+        public List<Word> postSequence;


-        public WordSequenceResult(List<TextPositionSequence> inSequence, List<TextPositionSequence> preSequence, List<TextPositionSequence> postSequence) {
+        public WordSequenceResult(List<Word> inSequence, List<Word> preSequence, List<Word> postSequence) {

            this.inSequence = inSequence;
            this.preSequence = preSequence;
@ -518,7 +542,7 @@ public class BlockificationPostprocessingService {

    }

-    public record SplitSequenceResult(TextPositionSequence in, TextPositionSequence out) {
+    public record SplitSequenceResult(Word in, Word out) {

    }

--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocstrumBlockificationService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocstrumBlockificationService.java
@ -14,7 +14,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.Classification
 import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
-import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.Word;
 import com.knecon.fforesight.service.layoutparser.processor.visualization.LayoutDebugLayer;

 import lombok.RequiredArgsConstructor;
@ -30,7 +30,7 @@ public class DocstrumBlockificationService {
    static final float THRESHOLD = 1f;


-    public ClassificationPage blockify(List<TextPositionSequence> textPositions,
+    public ClassificationPage blockify(List<Word> textPositions,
                                       CleanRulings rulings,
                                       boolean xyOrder,
                                       LayoutDebugLayer visualizations,
@ -72,16 +72,16 @@ public class DocstrumBlockificationService {
        List<AbstractPageBlock> abstractPageBlocks = new ArrayList<>();
        zones.forEach(zone -> {

-            List<TextPositionSequence> textPositionSequences = new ArrayList<>();
+            List<Word> words = new ArrayList<>();
            zone.getLines()
                    .forEach(line -> {
                        line.getWords()
                                .forEach(word -> {
-                                    textPositionSequences.add(new TextPositionSequence(word.getTextPositions(), word.getPage()));
+                                    words.add(new Word(word.getCharacters(), word.getPage()));
                                });
                    });

-            abstractPageBlocks.add(buildTextBlock(textPositionSequences, 0));
+            abstractPageBlocks.add(buildTextBlock(words, 0));
        });

        return abstractPageBlocks;
@ -102,7 +102,7 @@ public class DocstrumBlockificationService {
            }
            TextPageBlock current = (TextPageBlock) block;

-            if (previous != null && !previous.getSequences().isEmpty()) {
+            if (previous != null && !previous.getWords().isEmpty()) {

                if (current.getDir() != previous.getDir() || usedRulings.lineBetween(current, previous)) {
                    previous = current;
@ -182,8 +182,8 @@ public class DocstrumBlockificationService {

    private TextPageBlock combineBlocksAndResetIterator(TextPageBlock previous, TextPageBlock current, ListIterator<AbstractPageBlock> itty, boolean toDuplicate) {

-        previous.getSequences().addAll(current.getSequences());
-        previous = buildTextBlock(previous.getSequences(), 0);
+        previous.addAll(current.getWords());
+        previous = buildTextBlock(previous.getWords(), 0);
        previous.setToDuplicate(toDuplicate);
        if (current.getClassification() != null && previous.getClassification() == null) {
            previous.setClassification(current.getClassification());
@ -283,8 +283,8 @@ public class DocstrumBlockificationService {
                if (current.getDir() == inner.getDir() && current.intersects(inner, yThreshold, xThreshold)) {

                    boolean toDuplicate = current.isToDuplicate() || inner.isToDuplicate();
-                    current.getSequences().addAll(inner.getSequences());
-                    current = buildTextBlock(current.getSequences(), 0);
+                    current.addAll(inner.getWords());
+                    current = buildTextBlock(current.getWords(), 0);

                    current.setToDuplicate(toDuplicate);
                    blocks.set(i, null);
@ -301,7 +301,7 @@ public class DocstrumBlockificationService {
    }


-    public static TextPageBlock buildTextBlock(List<TextPositionSequence> wordBlockList, int indexOnPage) {
+    public static TextPageBlock buildTextBlock(List<Word> wordBlockList, int indexOnPage) {

        return new TextPageBlock(wordBlockList);
    }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocuMineBlockificationService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocuMineBlockificationService.java
@ -2,23 +2,28 @@ package com.knecon.fforesight.service.layoutparser.processor.services.blockifica

 import java.util.ArrayList;
 import java.util.List;
+import java.util.ListIterator;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;

 import org.springframework.stereotype.Service;

+import com.iqser.red.service.redaction.v1.server.model.document.nodes.LayoutEngine;
 import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
 import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
 import com.knecon.fforesight.service.layoutparser.processor.model.Orientation;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
+import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
-import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.Word;

+@SuppressWarnings("all")
@Service
 public class DocuMineBlockificationService {

    static final float THRESHOLD = 1f;
+    public static final double FONT_SIZE_CHANGE_RATIO = 0.15;

    Pattern pattern = Pattern.compile("^(\\d{1,2}\\.){1,3}\\d{1,2}\\.?\\s[0-9A-Za-z ()-]{2,50}", Pattern.CASE_INSENSITIVE);

@ -32,9 +37,9 @@ public class DocuMineBlockificationService {
     * @param cleanRulings  All rulings on a page
     * @return Page object that contains the Textblock and text statistics.
     */
-    public ClassificationPage blockify(List<TextPositionSequence> textPositions, CleanRulings cleanRulings) {
+    public ClassificationPage blockify(List<Word> textPositions, CleanRulings cleanRulings) {

-        List<TextPositionSequence> chunkWords = new ArrayList<>();
+        List<Word> chunkWords = new ArrayList<>();
        List<AbstractPageBlock> textPageBlocks = new ArrayList<>();

        CleanRulings usedRulings = cleanRulings.withoutTextRulings();
@ -43,11 +48,11 @@ public class DocuMineBlockificationService {
        double maxX = 0;
        double minY = 1000;
        double maxY = 0;
-        TextPositionSequence prev = null;
+        Word prev = null;

        boolean wasSplitted = false;
        Double splitX1 = null;
-        for (TextPositionSequence word : textPositions) {
+        for (Word word : textPositions) {

            boolean lineSeparation = prev != null && word.getYDirAdj() - prev.getMaxYDirAdj() > Math.min(word.getHeight(), prev.getHeight()) * 1.1;
            boolean startFromTop = prev != null && word.getYDirAdj() < prev.getYDirAdj() - prev.getTextHeight();
@ -56,9 +61,7 @@ public class DocuMineBlockificationService {
            boolean newLineAfterSplit = prev != null && word.getYDirAdj() != prev.getYDirAdj() && wasSplitted && splitX1 != word.getXDirAdj();
            boolean isSplitByRuling = prev != null && usedRulings.lineBetween(prev, word);
            boolean splitByDir = prev != null && !prev.getDir().equals(word.getDir());
-            boolean splitByOtherFontAndOtherY = prev != null && Math.abs(prev.getMaxYDirAdj() - word.getMaxYDirAdj()) > word.getTextHeight() * 0.2 //
-                                                && (word.getFontStyle().contains("bold") && !prev.getFontStyle().contains("bold") //
-                                                    || prev.getFontStyle().contains("bold") && !word.getFontStyle().contains("bold"));
+            boolean splitByOtherFontAndOtherY = prev != null && Math.abs(prev.getMaxYDirAdj() - word.getMaxYDirAdj()) > word.getTextHeight() * 0.2 && isFontChange(word, prev);

            Matcher matcher = pattern.matcher(chunkWords.stream()
                                                      .collect(Collectors.joining(" ")).toString());
@ -120,5 +123,86 @@ public class DocuMineBlockificationService {
        return new ClassificationPage(textPageBlocks);
    }

+
+    private static boolean isFontChange(Word word, Word prev) {
+
+        return word.getFontStyle().contains("bold") && !prev.getFontStyle().contains("bold")
+               || prev.getFontStyle().contains("bold") && !word.getFontStyle().contains("bold")
+               || Math.abs(prev.getFontSize() - word.getFontSize()) >= FONT_SIZE_CHANGE_RATIO * Math.min(prev.getFontSize(), word.getFontSize())
+               || Math.abs(word.getTextHeight() - prev.getTextHeight()) >= FONT_SIZE_CHANGE_RATIO * Math.min(prev.getTextHeight(), word.getTextHeight());
+    }
+
+
+    public void mergeblocks(ClassificationPage page, CleanRulings usedRulings, float xThreshold, float yThreshold) {
+
+        var blocks = page.getTextBlocks();
+        ListIterator<AbstractPageBlock> itty = blocks.listIterator();
+        while (itty.hasNext()) {
+            AbstractPageBlock block = itty.next();
+            if (block == null) {
+                continue;
+            }
+            if (block instanceof TablePageBlock) {
+                continue;
+            }
+
+            TextPageBlock current = (TextPageBlock) block;
+
+            for (int i = 0; i < blocks.size(); i++) {
+
+                AbstractPageBlock abstractPageBlock = blocks.get(i);
+                if (abstractPageBlock == null) {
+                    continue;
+                }
+                if (abstractPageBlock == current) {
+                    continue;
+                }
+                if (abstractPageBlock instanceof TablePageBlock) {
+                    continue;
+                }
+
+                if (isHeadlineFromOutline(current) || isHeadlineFromOutline(abstractPageBlock)) {
+                    continue;
+                }
+
+                TextPageBlock inner = (TextPageBlock) abstractPageBlock;
+
+                if (usedRulings.lineBetween(current, blocks.get(i))) {
+                    continue;
+                }
+
+                if (current.getDir() == inner.getDir() && current.intersects(inner, yThreshold, xThreshold) && (current.getClassification() == null || current.getClassification()
+                        .equals(inner.getClassification()))) {
+
+                    boolean toDuplicate = current.isToDuplicate() || inner.isToDuplicate();
+                    current.addAll(inner.getWords());
+                    current = buildTextBlock(current.getWords(), 0);
+                    current.setClassification(inner.getClassification());
+                    current.setToDuplicate(toDuplicate);
+                    blocks.set(i, null);
+                    itty.set(current);
+                }
+            }
+        }
+        var blocksIterator = blocks.iterator();
+        while (blocksIterator.hasNext()) {
+            if (blocksIterator.next() == null) {
+                blocksIterator.remove();
+            }
+        }
+    }
+
+
+    private boolean isHeadlineFromOutline(AbstractPageBlock abstractPageBlock) {
+
+        return abstractPageBlock.getEngines().contains(LayoutEngine.OUTLINE) && abstractPageBlock.getClassification() != null && abstractPageBlock.getClassification().isHeadline();
+    }
+
+
+    public static TextPageBlock buildTextBlock(List<Word> wordBlockList, int indexOnPage) {
+
+        return new TextPageBlock(wordBlockList);
+    }
+
 }

--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/RedactManagerBlockificationService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/RedactManagerBlockificationService.java
@ -11,7 +11,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.Classification
 import com.knecon.fforesight.service.layoutparser.processor.model.Orientation;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
-import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.Word;
 import com.knecon.fforesight.service.layoutparser.processor.visualization.LayoutDebugLayer;

@SuppressWarnings("all")
@ -30,20 +30,20 @@ public class RedactManagerBlockificationService {
     * @param visualizations
     * @return Page object that contains the Textblock and text statistics.
     */
-    public ClassificationPage blockify(List<TextPositionSequence> textPositions, CleanRulings cleanRulings, LayoutDebugLayer visualizations) {
+    public ClassificationPage blockify(List<Word> textPositions, CleanRulings cleanRulings, LayoutDebugLayer visualizations) {

        CleanRulings usedRulings = cleanRulings.withoutTextRulings();

        int indexOnPage = 0;
-        List<TextPositionSequence> chunkWords = new ArrayList<>();
+        List<Word> chunkWords = new ArrayList<>();
        List<AbstractPageBlock> chunkBlockList = new ArrayList<>();

        double minX = 1000, maxX = 0, minY = 1000, maxY = 0;
-        TextPositionSequence prev = null;
+        Word prev = null;

        boolean wasSplitted = false;
        Double splitX1 = null;
-        for (TextPositionSequence word : textPositions) {
+        for (Word word : textPositions) {

            boolean lineSeparation = word.getYDirAdj() - maxY > word.getHeight() * 1.25;
            boolean startFromTop = prev != null && word.getYDirAdj() < prev.getYDirAdj() - prev.getTextHeight();
@ -161,7 +161,6 @@ public class RedactManagerBlockificationService {
        }
        if (!textPositions.isEmpty()) {
            visualizations.addTextBlockVisualizations(chunkBlockList.stream()
-                                                              .map(tb -> (TextPageBlock) tb)
                                                              .toList(), textPositions.get(0).getPage());
        }

--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/ClarifyndClassificationService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/ClarifyndClassificationService.java
@ -10,7 +10,6 @@ import com.knecon.fforesight.service.layoutparser.processor.model.Classification
 import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
 import com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
-import com.knecon.fforesight.service.layoutparser.processor.utils.MarkedContentUtils;
 import com.knecon.fforesight.service.layoutparser.processor.utils.PositionUtils;

 import lombok.RequiredArgsConstructor;
@ -23,7 +22,7 @@ public class ClarifyndClassificationService {

    public void classifyDocument(ClassificationDocument document) {

-        List<Double> headlineFontSizes = document.getFontSizeCounter().getHigherThanMostPopular();
+        List<Double> headlineFontSizes = document.getFontSizeCounter().getValuesInReverseOrder();

        log.debug("Document FontSize counters are: {}", document.getFontSizeCounter().getCountPerValue());

@ -35,7 +34,10 @@ public class ClarifyndClassificationService {
    }


-    private void classifyPage(HeadlineClassificationService headlineClassificationService, ClassificationPage page, ClassificationDocument document, List<Double> headlineFontSizes) {
+    private void classifyPage(HeadlineClassificationService headlineClassificationService,
+                              ClassificationPage page,
+                              ClassificationDocument document,
+                              List<Double> headlineFontSizes) {

        for (AbstractPageBlock textBlock : page.getTextBlocks()) {
            if (textBlock instanceof TextPageBlock) {
@ -45,7 +47,11 @@ public class ClarifyndClassificationService {
    }


-    private void classifyBlock(HeadlineClassificationService headlineClassificationService, TextPageBlock textBlock, ClassificationPage page, ClassificationDocument document, List<Double> headlineFontSizes) {
+    private void classifyBlock(HeadlineClassificationService headlineClassificationService,
+                               TextPageBlock textBlock,
+                               ClassificationPage page,
+                               ClassificationDocument document,
+                               List<Double> headlineFontSizes) {

        var bodyTextFrame = page.getBodyTextFrame();

@ -53,63 +59,58 @@ public class ClarifyndClassificationService {
            headlineClassificationService.setLastHeadlineFromOutline(textBlock);
            return;
        }
+        if (textBlock.getClassification() != null && (textBlock.getClassification().equals(PageBlockType.HEADER)
+                                                      || textBlock.getClassification().equals(PageBlockType.FOOTER)
+                                                      || textBlock.getClassification().equals(PageBlockType.TABLE_OF_CONTENTS_ITEM))) {
+            return;
+        }
        if (document.getFontSizeCounter().getMostPopular() == null) {
            textBlock.setClassification(PageBlockType.PARAGRAPH);
            return;
        }
-        if (MarkedContentUtils.intersects(textBlock, page.getMarkedContentBboxPerType(), MarkedContentUtils.HEADER) || PositionUtils.isOverBodyTextFrame(bodyTextFrame,
-                textBlock,
-                page.getRotation()) && (document.getFontSizeCounter().getMostPopular() == null || textBlock.getHighestFontSize() <= document.getFontSizeCounter()
-                .getMostPopular())) {
-            textBlock.setClassification(PageBlockType.PARAGRAPH);
-
-        } else if (MarkedContentUtils.intersects(textBlock, page.getMarkedContentBboxPerType(), MarkedContentUtils.FOOTER) || PositionUtils.isUnderBodyTextFrame(bodyTextFrame,
-                textBlock,
-                page.getRotation()) && (document.getFontSizeCounter().getMostPopular() == null || textBlock.getHighestFontSize() <= document.getFontSizeCounter()
-                .getMostPopular())) {
-            textBlock.setClassification(PageBlockType.PARAGRAPH);
-        } else if (page.getPageNumber() == 1 && (PositionUtils.getHeightDifferenceBetweenChunkWordAndDocumentWord(textBlock,
-                document.getTextHeightCounter().getMostPopular()) > 2.5 && textBlock.getHighestFontSize() > document.getFontSizeCounter().getMostPopular() || page.getTextBlocks()
-                .size() == 1)) {
+        if (page.getPageNumber() == 1 //
+            && (PositionUtils.getHeightDifferenceBetweenChunkWordAndDocumentWord(textBlock, document.getTextHeightCounter().getMostPopular()) > 2.5
+                && textBlock.getHighestFontSize() > document.getFontSizeCounter().getMostPopular() || page.getTextBlocks().size() == 1)) {
            if (!Pattern.matches("[0-9]+", textBlock.toString())) {
-                textBlock.setClassification(PageBlockType.TITLE);
+                PageBlockType headlineType = HeadlineClassificationService.headlineClassByFontSize(textBlock, headlineFontSizes);
+                headlineClassificationService.classifyHeadline(textBlock, headlineType);
+                document.setHeadlines(true);
            }
-        } else if (textBlock.getMostPopularWordFontSize() > document.getFontSizeCounter()
-                .getMostPopular() && PositionUtils.getApproxLineCount(textBlock) < 4.9 && (textBlock.getMostPopularWordStyle().equals("bold") || !document.getFontStyleCounter()
-                .getCountPerValue()
-                .containsKey("bold") && textBlock.getMostPopularWordFontSize() > document.getFontSizeCounter().getMostPopular() + 1) && textBlock.getSequences()
-                .get(0)
-                .getTextPositions()
-                .get(0)
-                .getFontSizeInPt() >= textBlock.getMostPopularWordFontSize()) {
+        } else if (textBlock.getMostPopularWordFontSize() > document.getFontSizeCounter().getMostPopular()
+                   && PositionUtils.getApproxLineCount(textBlock) < 4.9
+                   && (textBlock.getMostPopularWordStyle().equals("bold")
+                       || !document.getFontStyleCounter().getCountPerValue().containsKey("bold")
+                          && textBlock.getMostPopularWordFontSize() > document.getFontSizeCounter().getMostPopular() + 1)
+                   && textBlock.getWords().get(0).getFontSize()>= textBlock.getMostPopularWordFontSize()) {

-            for (int i = 1; i <= headlineFontSizes.size(); i++) {
-                if (textBlock.getMostPopularWordFontSize() == headlineFontSizes.get(i - 1)) {
-                    PageBlockType headlineType = PageBlockType.getHeadlineType(i);
-                    headlineClassificationService.classifyHeadline(textBlock, headlineType);
-                    document.setHeadlines(true);
-                }
-            }
-        } else if (!textBlock.getText().startsWith("Figure ") && PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordStyle()
-                .equals("bold") && !document.getFontStyleCounter().getMostPopular().equals("bold") && PositionUtils.getApproxLineCount(textBlock) < 2.9 && textBlock.getSequences()
-                .get(0)
-                .getTextPositions()
-                .get(0)
-                .getFontSizeInPt() >= textBlock.getMostPopularWordFontSize()) {
-            PageBlockType headlineType = PageBlockType.getHeadlineType(headlineFontSizes.size() + 1);
+            PageBlockType headlineType = HeadlineClassificationService.headlineClassByFontSize(textBlock, headlineFontSizes);
            headlineClassificationService.classifyHeadline(textBlock, headlineType);
            document.setHeadlines(true);
-        } else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter()
-                .getMostPopular() && textBlock.getMostPopularWordStyle().equals("bold") && !document.getFontStyleCounter().getMostPopular().equals("bold")) {
+        } else if (!textBlock.getText().startsWith("Figure ")
+                   && PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock)
+                   && textBlock.getMostPopularWordStyle().equals("bold")
+                   && !document.getFontStyleCounter().getMostPopular().equals("bold")
+                   && PositionUtils.getApproxLineCount(textBlock) < 2.9
+                   && textBlock.getWords().get(0).getFontSize() >= textBlock.getMostPopularWordFontSize()) {
+
+            PageBlockType headlineType = HeadlineClassificationService.headlineClassByFontSize(textBlock, headlineFontSizes);
+            headlineClassificationService.classifyHeadline(textBlock, headlineType);
+            document.setHeadlines(true);
+        } else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock)
+                   && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter().getMostPopular()
+                   && textBlock.getMostPopularWordStyle().equals("bold")
+                   && !document.getFontStyleCounter().getMostPopular().equals("bold")) {
            textBlock.setClassification(PageBlockType.PARAGRAPH_BOLD);
-        } else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFont()
-                .equals(document.getFontCounter().getMostPopular()) && textBlock.getMostPopularWordStyle()
-                .equals(document.getFontStyleCounter().getMostPopular()) && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter().getMostPopular()) {
+        } else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock)
+                   && textBlock.getMostPopularWordFont().equals(document.getFontCounter().getMostPopular())
+                   && textBlock.getMostPopularWordStyle().equals(document.getFontStyleCounter().getMostPopular())
+                   && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter().getMostPopular()) {
            textBlock.setClassification(PageBlockType.PARAGRAPH);
-        } else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter()
-                .getMostPopular() && textBlock.getMostPopularWordStyle().equals("italic") && !document.getFontStyleCounter()
-                .getMostPopular()
-                .equals("italic") && PositionUtils.getApproxLineCount(textBlock) < 2.9) {
+        } else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock)
+                   && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter().getMostPopular()
+                   && textBlock.getMostPopularWordStyle().equals("italic")
+                   && !document.getFontStyleCounter().getMostPopular().equals("italic")
+                   && PositionUtils.getApproxLineCount(textBlock) < 2.9) {
            textBlock.setClassification(PageBlockType.PARAGRAPH_ITALIC);
        } else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock)) {
            textBlock.setClassification(PageBlockType.PARAGRAPH_UNKNOWN);
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/ClassificationPatterns.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/ClassificationPatterns.java
@ -0,0 +1,33 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.classification;
+
+import java.util.regex.Pattern;
+
+public class ClassificationPatterns {
+
+    public static final Pattern HEADLINE_WITH_2_IDENTIFER_PATTERN = Pattern.compile("^([1-9]\\d?\\.){1,3}\\d{1,2}\\.?\\s(?:14C)?\\s*[a-z][0-9a-z \\[\\]]{2,50}", Pattern.CASE_INSENSITIVE);
+
+    public static final Pattern HEADLINE_WITH_SINGLE_IDENTIFER_PATTERN = Pattern.compile("^([0-9]\\.)\\s[a-z][0-9a-z \\[\\]]{2,50}", Pattern.CASE_INSENSITIVE);
+
+    public static final Pattern AT_LEAST_3_CHARS_PATTERN = Pattern.compile("\\p{L}{3,}", Pattern.CASE_INSENSITIVE);
+
+    public static final Pattern HEADLINE_PATTERN_WITH_SLASHES = Pattern.compile("^(\\d{1,1}\\.){1,3}\\d{1,2}\\.?\\s[a-z]{1,2}\\/[a-z]{1,2}.*");
+
+    public static final Pattern AMOUNT_PATTERN = Pattern.compile(
+            "^\\s*\\d+(?:\\.\\d+)?\\s*(?:ml|ul|μl|l|ug|μg|g|kg|mg|cm|cm2|cm3|mm|mm2|mm3|km|km2|m|m2|m3|lb|oz|ppm|dpm|days|weeks|months|%|f|ppb)\\b",
+            Pattern.CASE_INSENSITIVE);
+
+
+
+    public static final Pattern TABLE_OR_FIGURE_HEADLINE_PATTERN = Pattern.compile(
+            "^\\s*(?:table|continued\\s+table|appendix|figure)\\s+(?:[xvi]+|[a-z0-9]{1,3}(?:\\.[0-9]{1,3})*(?:-[0-9]{1,3})?)\\b",
+            Pattern.CASE_INSENSITIVE);
+
+    public static final Pattern TABLE_MID_SENTENCE_PATTERN = Pattern.compile(
+            "(?:table|continued\\s+table|appendix|figure)\\s+(?:[xvi]+|[a-z0-9]{1,3}(?:\\.[0-9]{1,3})*(?:-[0-9]{1,3})?)\\b",
+            Pattern.CASE_INSENSITIVE);
+
+    public static final Pattern ALPHANUMERIC = Pattern.compile("[a-zA-Z0-9]");
+
+    public static final Pattern NUMERIC = Pattern.compile("[0-9]+");
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/ClassificationService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/ClassificationService.java
@ -0,0 +1,62 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.classification;
+
+import java.util.Map;
+
+import org.springframework.stereotype.Service;
+
+import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
+import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument;
+import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
+import com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
+import com.knecon.fforesight.service.layoutparser.processor.services.BodyTextFrameService;
+import com.knecon.fforesight.service.layoutparser.processor.services.blockification.DocuMineBlockificationService;
+
+import lombok.AccessLevel;
+import lombok.RequiredArgsConstructor;
+import lombok.experimental.FieldDefaults;
+import lombok.extern.slf4j.Slf4j;
+
+@Slf4j
+@Service
+@RequiredArgsConstructor
+@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
+public class ClassificationService {
+
+    DocuMineBlockificationService docuMineBlockificationService;
+    BodyTextFrameService bodyTextFrameService;
+    TableOfContentsClassificationService tableOfContentsClassificationService;
+    RedactManagerClassificationService redactManagerClassificationService;
+    ClarifyndClassificationService clarifyndClassificationService;
+    DocuMineClassificationService docuMineClassificationService;
+    HeaderFooterClassificationService headerFooterClassificationService;
+
+
+    public void classify(ClassificationDocument document, LayoutParsingType layoutParsingType, Map<String, String> identifier) {
+
+        log.info("Calculating BodyTextFrame for {}", identifier);
+        bodyTextFrameService.setBodyTextFrames(document, layoutParsingType);
+        for (ClassificationPage page : document.getPages()) {
+            document.getLayoutDebugLayer().addCleanRulingVisualization(page.getCleanRulings(), page.getPageNumber());
+        }
+        log.info("Classify TextBlocks for {}", identifier);
+
+        headerFooterClassificationService.classifyHeadersAndFooters(document);
+
+        tableOfContentsClassificationService.classifyTableOfContents(document);
+
+        switch (layoutParsingType) {
+            case REDACT_MANAGER, REDACT_MANAGER_PARAGRAPH_DEBUG, REDACT_MANAGER_OLD, CLARIFYND_PARAGRAPH_DEBUG, REDACT_MANAGER_WITHOUT_DUPLICATE_PARAGRAPH ->
+                    redactManagerClassificationService.classifyDocument(document);
+            case DOCUMINE_OLD, DOCUMINE -> docuMineClassificationService.classifyDocument(document);
+            case CLARIFYND -> clarifyndClassificationService.classifyDocument(document);
+        }
+
+        if (layoutParsingType.equals(LayoutParsingType.DOCUMINE_OLD)) {
+            for (ClassificationPage page : document.getPages()) {
+                docuMineBlockificationService.mergeblocks(page, page.getCleanRulings().withoutTextRulings(), 0, 10);
+            }
+        }
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/DocuMineClassificationService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/DocuMineClassificationService.java
@ -1,9 +1,21 @@
 package com.knecon.fforesight.service.layoutparser.processor.services.classification;

+import static com.knecon.fforesight.service.layoutparser.processor.services.classification.ClassificationPatterns.ALPHANUMERIC;
+import static com.knecon.fforesight.service.layoutparser.processor.services.classification.ClassificationPatterns.AMOUNT_PATTERN;
+import static com.knecon.fforesight.service.layoutparser.processor.services.classification.ClassificationPatterns.AT_LEAST_3_CHARS_PATTERN;
+import static com.knecon.fforesight.service.layoutparser.processor.services.classification.ClassificationPatterns.HEADLINE_PATTERN_WITH_SLASHES;
+import static com.knecon.fforesight.service.layoutparser.processor.services.classification.ClassificationPatterns.HEADLINE_WITH_2_IDENTIFER_PATTERN;
+import static com.knecon.fforesight.service.layoutparser.processor.services.classification.ClassificationPatterns.HEADLINE_WITH_SINGLE_IDENTIFER_PATTERN;
+import static com.knecon.fforesight.service.layoutparser.processor.services.classification.ClassificationPatterns.TABLE_MID_SENTENCE_PATTERN;
+import static com.knecon.fforesight.service.layoutparser.processor.services.classification.ClassificationPatterns.TABLE_OR_FIGURE_HEADLINE_PATTERN;
+
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Locale;
+import java.util.Map;
 import java.util.regex.Matcher;
-import java.util.regex.Pattern;

 import org.springframework.stereotype.Service;

@ -11,142 +23,328 @@ import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBl
 import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument;
 import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
 import com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.AbstractBlockOnPage;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.ListIdentifier;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
-import com.knecon.fforesight.service.layoutparser.processor.utils.HeaderFooterDetection;
-import com.knecon.fforesight.service.layoutparser.processor.utils.MarkedContentUtils;
 import com.knecon.fforesight.service.layoutparser.processor.utils.PositionUtils;

+import lombok.AccessLevel;
 import lombok.RequiredArgsConstructor;
+import lombok.experimental.FieldDefaults;
 import lombok.extern.slf4j.Slf4j;

@Slf4j
@Service
@RequiredArgsConstructor
+@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
 public class DocuMineClassificationService {

-    private static final Pattern HEADLINE_WITH_IDENTIFER_PATTERN = Pattern.compile("^([1-9]\\d?\\.){1,3}\\d{1,2}\\.?\\s[0-9A-Za-z \\[\\]]{2,50}", Pattern.CASE_INSENSITIVE);
-    private static final Pattern AT_LEAST_3_PATTERN = Pattern.compile("\\p{L}{3,}", Pattern.CASE_INSENSITIVE);
-    private static final Pattern HEADLINE_PATTTERN_WITH_SLASHES = Pattern.compile("^(\\d{1,1}\\.){1,3}\\d{1,2}\\.?\\s[a-z]{1,2}\\/[a-z]{1,2}.*");
+    public static final int SEPARATION_THRESHOLD = 10; // if the min distance between a textblock and all its surrounding blocks, the regexes can be more lenient.
+    public static final int SURROUNDING_BLOCKS_RADIUS = 3; // number of surrounding blocks before and after the current textblock to be tested
+
+    ListItemClassificationService listItemClassificationService;


    public void classifyDocument(ClassificationDocument document) {

-        List<Double> headlineFontSizes = document.getFontSizeCounter().getHigherThanMostPopular();
-
-        log.debug("Document FontSize counters are: {}", document.getFontSizeCounter().getCountPerValue());
+        List<Double> headlineFontSizes = buildHeadlineFontSizes(document);
+        List<AbstractBlockOnPage> blocks = buildBlocksPerPage(document);
+        log.debug("Headline FontSizes are: {}", headlineFontSizes);

        HeadlineClassificationService headlineClassificationService = new HeadlineClassificationService();

-        for (ClassificationPage page : document.getPages()) {
-            classifyPage(headlineClassificationService, page, document, headlineFontSizes);
+        for (int i = 0; i < blocks.size(); i++) {
+            AbstractBlockOnPage block = blocks.get(i);
+            document.getLayoutDebugLayer().addTextBlockVisualizations(block.page().getTextBlocks(), block.page().getPageNumber());
+            classifyBlock(headlineClassificationService, i, blocks, document, headlineFontSizes);
        }
-    }

-
-    private void classifyPage(HeadlineClassificationService headlineClassificationService,
-                              ClassificationPage page,
-                              ClassificationDocument document,
-                              List<Double> headlineFontSizes) {
-
-        for (AbstractPageBlock textBlock : page.getTextBlocks()) {
-            if (textBlock instanceof TextPageBlock) {
-                classifyBlock(headlineClassificationService, (TextPageBlock) textBlock, page, document, headlineFontSizes);
-            }
-        }
    }


    private void classifyBlock(HeadlineClassificationService headlineClassificationService,
-                               TextPageBlock textBlock,
-                               ClassificationPage page,
+                               int currentIndex,
+                               List<AbstractBlockOnPage> allBlocks,
                               ClassificationDocument document,
                               List<Double> headlineFontSizes) {

+        TextPageBlock textBlock;
+        if (allBlocks.get(currentIndex).block() instanceof TextPageBlock block) {
+            textBlock = block;
+        } else {
+            return;
+        }
+        ClassificationPage page = allBlocks.get(currentIndex).page();
+        List<AbstractPageBlock> surroundingBlocks = getSurroundingBlocksOnPage(currentIndex, allBlocks);
+
        log.debug("headlineFontSizes: {}", headlineFontSizes);
        var bodyTextFrame = page.getBodyTextFrame();

-        Matcher headlineWithIdentifierMatcher = HEADLINE_WITH_IDENTIFER_PATTERN.matcher(textBlock.toString());
-        Matcher atLeast3Matcher = AT_LEAST_3_PATTERN.matcher(textBlock.toString());
-        Matcher headlineWithSlashesMatcher = HEADLINE_PATTTERN_WITH_SLASHES.matcher(textBlock.toString());
+        Matcher headlineWith2IdentifierMatcher = HEADLINE_WITH_2_IDENTIFER_PATTERN.matcher(textBlock.toString());
+        Matcher atLeast3Matcher = AT_LEAST_3_CHARS_PATTERN.matcher(textBlock.toString());
+        Matcher headlineWithSlashesMatcher = HEADLINE_PATTERN_WITH_SLASHES.matcher(textBlock.toString());
+        Matcher amountMatcher = AMOUNT_PATTERN.matcher(textBlock.toString());
+        Matcher tableOrFigureMatcher = TABLE_OR_FIGURE_HEADLINE_PATTERN.matcher(textBlock.toString());
+        Matcher tableMidSentenceMatcher = TABLE_MID_SENTENCE_PATTERN.matcher(textBlock.toString());
+        Matcher headlineWithSingleIdentifierMatcher = HEADLINE_WITH_SINGLE_IDENTIFER_PATTERN.matcher(textBlock.toString());
+        boolean isAtLeast3Characters = atLeast3Matcher.reset().find();
+        boolean headlineWithSlashesMatches = headlineWithSlashesMatcher.reset().matches();
+        boolean isAmount = amountMatcher.reset().find();
+        int charCount = countChars(textBlock);
+
+        boolean enoughChars = charCount > textBlock.getText().length() * 0.5;
+
+        List<ListIdentifier> listIdentifiers = listItemClassificationService.findConfirmedListIdentifiers(currentIndex, allBlocks);
+        document.getLayoutDebugLayer().addListIdentifiers(listIdentifiers);

        if (textBlock.getClassification() != null && textBlock.getClassification().isHeadline()) {
            headlineClassificationService.setLastHeadlineFromOutline(textBlock);
            return;
        }
-        if (document.getFontSizeCounter().getMostPopular() == null) {
-            textBlock.setClassification(PageBlockType.OTHER);
+        if (textBlock.getClassification() != null && (textBlock.getClassification().equals(PageBlockType.HEADER)
+                                                      || textBlock.getClassification().equals(PageBlockType.FOOTER)
+                                                      || textBlock.getClassification().equals(PageBlockType.TABLE_OF_CONTENTS_ITEM))) {
            return;
        }
-        if (MarkedContentUtils.intersects(textBlock, page.getMarkedContentBboxPerType(), MarkedContentUtils.HEADER) //
-            || (PositionUtils.isOverBodyTextFrame(bodyTextFrame, textBlock, page.getRotation()) //
-                && (document.getFontSizeCounter().getMostPopular() == null //
-                    || textBlock.getHighestFontSize() <= document.getFontSizeCounter().getMostPopular()))) {
-            textBlock.setClassification(PageBlockType.HEADER);
+        if (document.getFontSizeCounter().getMostPopular() == null) {
+            textBlock.setClassification(PageBlockType.PARAGRAPH);
+            return;
+        }
+        if (textBlock.getText().length() > 5
+            && greaterOrEqualFontThanDocumentAverage(textBlock, document)
+            && PositionUtils.getApproxLineCount(textBlock) < 5.9
+            && ((textBlock.getMostPopularWordStyle().contains("bold") || textBlock.isUnderlined())//
+                && Character.isDigit(textBlock.toString().charAt(0)) //
+                && isAtLeast3Characters //
+                && !textBlock.toString().contains(":") //
+                || textBlock.toString().startsWith("APPENDIX") //
+                || textBlock.toString().startsWith("FIGURE") //
+                || textBlock.toString().startsWith("Continued TABLE") //
+                || textBlock.toString().startsWith("TABLE"))
+            && !textBlock.toString().endsWith(":")
+            && isAtLeast3Characters
+            && !isAmount
+            && enoughChars) {

-        } else if (MarkedContentUtils.intersects(textBlock, page.getMarkedContentBboxPerType(), MarkedContentUtils.FOOTER)
-                   || (PositionUtils.isUnderBodyTextFrame(bodyTextFrame,
-                                                          textBlock,
-                                                          page.getRotation())
-                       && (document.getFontSizeCounter().getMostPopular()
-                           == null
-                           || textBlock.getHighestFontSize()
-                              <= document.getFontSizeCounter()
-                                      .getMostPopular()))
-                   || HeaderFooterDetection.isLikelyFooter(textBlock, document, page)) {
-            textBlock.setClassification(PageBlockType.FOOTER);
-        } else if (page.getPageNumber() == 1 && (PositionUtils.getHeightDifferenceBetweenChunkWordAndDocumentWord(textBlock, document.getTextHeightCounter().getMostPopular()) > 2.5
-                                                 && textBlock.getHighestFontSize() > document.getFontSizeCounter().getMostPopular() || page.getTextBlocks().size() == 1)) {
-            if (!Pattern.matches("[0-9]+", textBlock.toString())) {
-                textBlock.setClassification(PageBlockType.TITLE);
-            }
-        } else if (textBlock.getText().length() > 5
-                   && (textBlock.getMostPopularWordHeight() > document.getTextHeightCounter().getMostPopular()
-                       || textBlock.getMostPopularWordFontSize() > document.getFontSizeCounter().getMostPopular())
-                   && PositionUtils.getApproxLineCount(textBlock) < 5.9
+            setAsHeadline(headlineClassificationService, textBlock, document, headlineFontSizes);
+        } else if (isAllCaps(textBlock)
+                   && ALPHANUMERIC.matcher(Character.toString(textBlock.getText().charAt(0))).matches()
+                   && hasSeparation(textBlock, surroundingBlocks)
+                   && textBlock.getText().length() > 5
+                   && isAtLeast3Characters
+                   && !isAmount
+                   && enoughChars
+                   && !textBlock.toString().contains(":")
+                   && !textBlock.toString().endsWith(".")
+                   && PositionUtils.getApproxLineCount(textBlock) < 2.9) {

-                   && (textBlock.getMostPopularWordStyle().contains("bold")
-                       && Character.isDigit(textBlock.toString().charAt(0))
-                       && atLeast3Matcher.reset().find()
-                       && !textBlock.toString().contains(":") //
-                       || textBlock.toString().equals(textBlock.toString().toUpperCase(Locale.ROOT)) && atLeast3Matcher.reset().find() && !textBlock.toString().contains(":") //
-                       || textBlock.toString().startsWith("APPENDIX") //
-                       || textBlock.toString().startsWith("FIGURE") //
-                       || textBlock.toString().startsWith("Continued TABLE") //
-                       || textBlock.toString().startsWith("TABLE"))
-                   && !textBlock.toString().endsWith(":")
-                   && atLeast3Matcher.reset().find()) {
-            PageBlockType headlineType = PageBlockType.getHeadlineType(1);
-            headlineClassificationService.classifyHeadline(textBlock, headlineType);
-            document.setHeadlines(true);
-
-        } else if (headlineWithIdentifierMatcher.reset().find()
+            setAsHeadline(headlineClassificationService, textBlock, document, headlineFontSizes);
+        } else if (headlineWith2IdentifierMatcher.reset().find()
                   && PositionUtils.getApproxLineCount(textBlock) < 2.9
-                   && atLeast3Matcher.reset().find()
-                   && !headlineWithSlashesMatcher.reset().matches()) {
-            PageBlockType headlineType = PageBlockType.getHeadlineType(2);
-            headlineClassificationService.classifyHeadline(textBlock, headlineType);
-            document.setHeadlines(true);
+                   && isAtLeast3Characters
+                   && !headlineWithSlashesMatches
+                   && !isAmount) {
+
+            setAsHeadline(headlineClassificationService, textBlock, document, headlineFontSizes);
+        } else if (hasSeparation(textBlock, surroundingBlocks)//
+                   && greaterOrEqualFontThanPageAverage(textBlock, page)//
+                   && PositionUtils.getApproxLineCount(textBlock) < 2.9//
+                   && (tableOrFigureMatcher.reset().find() || (headlineWithSingleIdentifierMatcher.reset().find() && listIdentifiers.isEmpty())) //
+                   && tableMidSentenceMatcher.reset().results()
+                              .count() <= 1 //
+                   && !isAmount//
+                   && !headlineWithSlashesMatches) {
+
+            setAsHeadline(headlineClassificationService, textBlock, document, headlineFontSizes);
+//        } else if (textBlock.getMostPopularWordFont().contains("bold")
+//                   && greaterOrEqualFontThanPageAverage(textBlock, page)
+//                   && textBlock.getWords().size() <= 6
+//                   && PositionUtils.getApproxLineCount(textBlock) < 2.9
+//                   && isAtLeast3Characters
+//                   && charCount > textBlock.getText().length() * 0.75
+//                   && !textBlock.getText().contains(":")
+//                   && textBlock.getWidth() < page.getBodyTextFrame().getWidth() * 0.7) {
+//
+//            setAsHeadline(headlineClassificationService, textBlock, document, headlineFontSizes);
+        } else if (!listIdentifiers.isEmpty()) {
+
+            textBlock.setClassification(PageBlockType.LIST_ITEM);
        } else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock)
                   && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter().getMostPopular()
                   && textBlock.getMostPopularWordStyle().equals("bold")
                   && !document.getFontStyleCounter().getMostPopular().equals("bold")) {
+
            textBlock.setClassification(PageBlockType.PARAGRAPH_BOLD);
        } else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock)
                   && textBlock.getMostPopularWordFont().equals(document.getFontCounter().getMostPopular())
                   && textBlock.getMostPopularWordStyle().equals(document.getFontStyleCounter().getMostPopular())
                   && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter().getMostPopular()) {
+
            textBlock.setClassification(PageBlockType.PARAGRAPH);
        } else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock)
                   && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter().getMostPopular()
                   && textBlock.getMostPopularWordStyle().equals("italic")
                   && !document.getFontStyleCounter().getMostPopular().equals("italic")
                   && PositionUtils.getApproxLineCount(textBlock) < 2.9) {
+
            textBlock.setClassification(PageBlockType.PARAGRAPH_ITALIC);
-        } else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock)) {
-            textBlock.setClassification(PageBlockType.PARAGRAPH_UNKNOWN);
        } else {
-            textBlock.setClassification(PageBlockType.OTHER);
+            textBlock.setClassification(PageBlockType.PARAGRAPH);
        }
    }

-}
+
+    private int countChars(TextPageBlock textBlock) {
+
+        int count = 0;
+
+        for (int i = 0; i < textBlock.getText().length(); i++) {
+            if (Character.isAlphabetic(textBlock.getText().charAt(i))) {
+                count++;
+            }
+        }
+        return count;
+    }
+
+
+    private static boolean greaterOrEqualFontThanPageAverage(TextPageBlock textBlock, ClassificationPage page) {
+
+        return textBlock.getMostPopularWordHeight() >= page.getTextHeightCounter().getMostPopular() //
+               || textBlock.getMostPopularWordFontSize() >= page.getFontSizeCounter().getMostPopular();
+    }
+
+
+    private static boolean greaterOrEqualFontThanDocumentAverage(TextPageBlock textBlock, ClassificationDocument document) {
+
+        return textBlock.getMostPopularWordHeight() > document.getTextHeightCounter().getMostPopular() //
+               || textBlock.getMostPopularWordFontSize() > document.getFontSizeCounter().getMostPopular();
+    }
+
+
+    private static boolean isAllCaps(TextPageBlock textBlock) {
+
+        return textBlock.toString().equals(textBlock.toString().toUpperCase(Locale.ROOT));
+    }
+
+
+    private boolean hasSeparation(TextPageBlock textBlock, List<AbstractPageBlock> surroundingBlocks) {
+
+        return surroundingBlocks.stream()
+                .allMatch(surroundingBlock -> calculateSeparation(textBlock, surroundingBlock) > Math.pow(SEPARATION_THRESHOLD, 2));
+    }
+
+
+    private double calculateMinSeparation(TextPageBlock textBlock, List<AbstractPageBlock> surroundingBlocks) {
+
+        return surroundingBlocks.stream()
+                .mapToDouble(surroundingBlock -> calculateSeparation(textBlock, surroundingBlock))
+                .min().orElse(Double.MAX_VALUE);
+    }
+
+
+    private static double calculateSeparation(TextPageBlock textBlock, AbstractPageBlock surroundingBlock) {
+
+        return Math.pow(surroundingBlock.horizontalDistance(textBlock), 2) + Math.pow(surroundingBlock.verticalDistance(textBlock), 2);
+    }
+
+
+    private static void setAsHeadline(HeadlineClassificationService headlineClassificationService,
+                                      TextPageBlock textBlock,
+                                      ClassificationDocument document,
+                                      List<Double> headlineFontSizes) {
+
+        PageBlockType headlineType = HeadlineClassificationService.headlineClassByFontSize(textBlock, headlineFontSizes);
+        headlineClassificationService.classifyHeadline(textBlock, headlineType);
+        document.setHeadlines(true);
+    }
+
+
+    private List<AbstractBlockOnPage> buildBlocksPerPage(ClassificationDocument document) {
+
+        List<AbstractBlockOnPage> blocks = new ArrayList<>();
+        for (ClassificationPage page : document.getPages()) {
+            for (AbstractPageBlock abstractPageBlock : page.getTextBlocks()) {
+                if (abstractPageBlock instanceof TextPageBlock textBlock) {
+                    if (textBlock.getClassification() != null && (textBlock.getClassification().equals(PageBlockType.HEADER) //
+                                                                  || textBlock.getClassification().equals(PageBlockType.FOOTER))) {
+                        continue;
+                    }
+                    blocks.add(new AbstractBlockOnPage(textBlock, page));
+                }
+            }
+        }
+        return blocks;
+    }
+
+
+    private static List<Double> buildHeadlineFontSizes(ClassificationDocument document) {
+
+        if (document.getFontSizeCounter().getCountPerValue().size() <= 6) {
+            return document.getFontSizeCounter().getValuesInReverseOrder();
+        }
+
+        List<Map.Entry<Double, Integer>> sortedEntries = new ArrayList<>(document.getFontSizeCounter().getCountPerValue().entrySet());
+        sortedEntries.sort(Map.Entry.comparingByKey());
+
+        int totalCount = sortedEntries.stream()
+                .mapToInt(Map.Entry::getValue).sum();
+
+        int cumulativeCount = 0;
+        Iterator<Map.Entry<Double, Integer>> iterator = sortedEntries.iterator();
+        while (iterator.hasNext()) {
+            Map.Entry<Double, Integer> entry = iterator.next();
+            cumulativeCount += entry.getValue();
+            if (cumulativeCount > totalCount * 0.3) {
+                break; // We've filtered the bottom 30%, so stop.
+            }
+            iterator.remove();
+        }
+
+        if (sortedEntries.size() < 6) {
+            return document.getFontSizeCounter().getValuesInReverseOrder();
+        }
+        int clusterSize = Math.max(1, sortedEntries.size() / 6);
+
+        List<List<Double>> clusters = new ArrayList<>();
+        for (int i = 0; i < 6; i++) {
+            clusters.add(new ArrayList<>());
+        }
+
+        for (int i = 0; i < sortedEntries.size(); i++) {
+            int clusterIndex = Math.min(i / clusterSize, 5);
+            clusters.get(clusterIndex).add(sortedEntries.get(i).getKey());
+        }
+
+        return clusters.stream()
+                .map(cluster -> cluster.stream()
+                        .mapToDouble(d -> d).average()
+                        .orElseThrow())
+                .sorted(Comparator.reverseOrder())
+                .toList();
+    }
+
+
+    private List<AbstractPageBlock> getSurroundingBlocksOnPage(int originalIndex, List<AbstractBlockOnPage> textBlocks) {
+
+        int start = Math.max(originalIndex - SURROUNDING_BLOCKS_RADIUS, 0);
+        int end = Math.min(originalIndex + SURROUNDING_BLOCKS_RADIUS, textBlocks.size());
+        List<AbstractPageBlock> surroundingBlocks = new ArrayList<>(2 * SURROUNDING_BLOCKS_RADIUS);
+        for (int i = start; i < end; i++) {
+            if (i == originalIndex) {
+                continue;
+            }
+            if (textBlocks.get(i).block().getText().length() <= 1) {
+                continue;
+            }
+            if (!textBlocks.get(i).page().equals(textBlocks.get(originalIndex).page())) {
+                continue;
+            }
+            surroundingBlocks.add(textBlocks.get(i).block());
+        }
+        return surroundingBlocks;
+    }
+
+}
+
+
+
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/HeaderFooterClassificationService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/HeaderFooterClassificationService.java
@ -0,0 +1,55 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.classification;
+
+import org.springframework.stereotype.Service;
+
+import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
+import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument;
+import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
+import com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
+import com.knecon.fforesight.service.layoutparser.processor.utils.MarkedContentUtils;
+import com.knecon.fforesight.service.layoutparser.processor.utils.PositionUtils;
+
+import lombok.AccessLevel;
+import lombok.RequiredArgsConstructor;
+import lombok.experimental.FieldDefaults;
+
+@Service
+@RequiredArgsConstructor
+@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
+public class HeaderFooterClassificationService {
+
+    public void classifyHeadersAndFooters(ClassificationDocument document) {
+
+        for (ClassificationPage page : document.getPages()) {
+            for (AbstractPageBlock pageBlock : page.getTextBlocks()) {
+                if (pageBlock instanceof TextPageBlock textBlock) {
+                    classifyBlock(document, page, textBlock);
+                }
+            }
+        }
+
+    }
+
+
+    private static void classifyBlock(ClassificationDocument document, ClassificationPage page, TextPageBlock textBlock) {
+
+        if (MarkedContentUtils.intersects(textBlock, page.getMarkedContentBboxPerType(), MarkedContentUtils.HEADER)
+            || PositionUtils.isOverBodyTextFrame(page.getBodyTextFrame(), textBlock, page.getRotation()) && smallerFontThanDocAverage(document, textBlock)) {
+
+            textBlock.setClassification(PageBlockType.HEADER);
+
+        } else if (MarkedContentUtils.intersects(textBlock, page.getMarkedContentBboxPerType(), MarkedContentUtils.FOOTER)
+                   || PositionUtils.isUnderBodyTextFrame(page.getBodyTextFrame(), textBlock, page.getRotation()) && smallerFontThanDocAverage(document, textBlock)) {
+
+            textBlock.setClassification(PageBlockType.FOOTER);
+        }
+    }
+
+
+    private static boolean smallerFontThanDocAverage(ClassificationDocument document, TextPageBlock textBlock) {
+
+        return document.getFontSizeCounter().getMostPopular() == null || textBlock.getHighestFontSize() <= document.getFontSizeCounter().getMostPopular();
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/HeadlineClassificationService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/HeadlineClassificationService.java
@ -2,7 +2,10 @@ package com.knecon.fforesight.service.layoutparser.processor.services.classifica

 import static com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType.getHeadlineNumber;

+import java.util.List;
+
 import com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType;
+import com.knecon.fforesight.service.layoutparser.processor.model.SectionIdentifier;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;

 import lombok.Getter;
@ -16,6 +19,7 @@ public class HeadlineClassificationService {
    PageBlockType originalClassifiedBlockType;
    TextPageBlock lastHeadlineFromOutline;

+
    public void setLastHeadlineFromOutline(TextPageBlock lastHeadlineFromOutline) {

        this.lastHeadlineFromOutline = lastHeadlineFromOutline;
@ -25,28 +29,62 @@ public class HeadlineClassificationService {

    public void classifyHeadline(TextPageBlock textBlock, PageBlockType initialHeadlineType) {

-        TextPageBlock lastHeadline = getLastHeadline();
-        TextPageBlock lastHeadlineFromOutline = getLastHeadlineFromOutline();
-        PageBlockType originalClassifiedBlockType = getOriginalClassifiedBlockType();
        PageBlockType finalHeadlineType = initialHeadlineType;

        if (lastHeadline != null) {

-            if (lastHeadline.equals(lastHeadlineFromOutline)) {
-
-                finalHeadlineType = PageBlockType.getHeadlineType(getHeadlineNumber(lastHeadline.getClassification()) + 1);
-
-            } else if (originalClassifiedBlockType != null && lastHeadline.getClassification() != originalClassifiedBlockType) {
-
-                PageBlockType lastHeadlineType = lastHeadline.getClassification();
-                int difference = getHeadlineNumber(originalClassifiedBlockType) - getHeadlineNumber(lastHeadlineType);
-                finalHeadlineType = PageBlockType.getHeadlineType(getHeadlineNumber(initialHeadlineType) - difference);
-            }
+            finalHeadlineType = decideOnClassification(textBlock, initialHeadlineType);
        }

-        setOriginalClassifiedBlockType(initialHeadlineType);
+        lastHeadline = textBlock;
+        originalClassifiedBlockType = initialHeadlineType;
        textBlock.setClassification(finalHeadlineType);
-        setLastHeadline(textBlock);
+    }
+
+
+    private PageBlockType decideOnClassification(TextPageBlock textBlock, PageBlockType initialHeadlineType) {
+
+        SectionIdentifier identifier = SectionIdentifier.fromSearchText(textBlock.getText());
+        TextPageBlock lastHeadlineFromOutline = getLastHeadlineFromOutline();
+        PageBlockType originalClassifiedBlockType = getOriginalClassifiedBlockType();
+
+        if (!identifier.isEmpty()) {
+            return PageBlockType.getHeadlineType(identifier.level());
+        }
+
+        if (lastHeadline.equals(lastHeadlineFromOutline) && lastHeadline.getMostPopularWordFontSize() >= textBlock.getMostPopularWordFontSize()) {
+
+            return PageBlockType.getHeadlineType(getHeadlineNumber(lastHeadline.getClassification()) + 1);
+
+        } else if (originalClassifiedBlockType != null && lastHeadline.getClassification() != originalClassifiedBlockType) {
+
+            return adjustInitialLevelToLastHeadlineLevel(initialHeadlineType);
+        }
+        return initialHeadlineType;
+    }
+
+
+    private PageBlockType adjustInitialLevelToLastHeadlineLevel(PageBlockType initialHeadlineType) {
+
+        int difference = getHeadlineNumber(originalClassifiedBlockType) - getHeadlineNumber(lastHeadline.getClassification());
+        return PageBlockType.getHeadlineType(Math.max(1, getHeadlineNumber(initialHeadlineType) - difference));
+    }
+
+
+    public static PageBlockType headlineClassByFontSize(TextPageBlock textBlock, List<Double> fontSizeGroups) {
+
+        List<Double> distances = fontSizeGroups.stream()
+                .map(fontSize -> Math.abs(fontSize - textBlock.getMostPopularWordFontSize()))
+                .toList();
+        double min = Double.MAX_VALUE;
+        int argMin = -1;
+        for (int i = 0; i < distances.size(); i++) {
+            if (distances.get(i) < min) {
+                min = distances.get(i);
+                argMin = i;
+            }
+        }
+        return PageBlockType.getHeadlineType(argMin);
    }

 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/ListItemClassificationService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/classification/ListItemClassificationService.java
@ -0,0 +1,99 @@
+package com.knecon.fforesight.service.layoutparser.processor.services.classification;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.springframework.stereotype.Service;
+
+import com.knecon.fforesight.service.layoutparser.processor.model.text.AbstractBlockOnPage;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.ListIdentifier;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.Word;
+
+@Service
+public class ListItemClassificationService {
+
+    public static final int LIST_IDENTIFIER_SEARCH_RADIUS = 3;
+
+
+    public List<ListIdentifier> findConfirmedListIdentifiers(int currentIndex, List<AbstractBlockOnPage> allBlocks) {
+
+        List<ListIdentifier> listIdentifiers = extractListIdentifiers(allBlocks.get(currentIndex));
+        if (listIdentifiers.isEmpty()) {
+            return Collections.emptyList();
+        }
+        if (listIdentifiers.size() > 1 && ListIdentifier.isInOrder(listIdentifiers)) {
+            return listIdentifiers;
+        }
+
+        int start = Math.max(0, currentIndex - LIST_IDENTIFIER_SEARCH_RADIUS);
+        int end = Math.min(allBlocks.size(), currentIndex + LIST_IDENTIFIER_SEARCH_RADIUS);
+
+        List<ListIdentifier> identifiersBehind = new ArrayList<>();
+        if (start < currentIndex) {
+            identifiersBehind.addAll(allBlocks.subList(start, currentIndex)
+                                             .stream()
+                                             .map(this::extractListIdentifiers)
+                                             .flatMap(Collection::stream)
+                                             .toList());
+        }
+        if (!identifiersBehind.isEmpty()) {
+            listIdentifiers.add(0, identifiersBehind.get(identifiersBehind.size() - 1));
+            if (ListIdentifier.isInOrder(listIdentifiers)) {
+                return listIdentifiers;
+            }
+            listIdentifiers.remove(0);
+        }
+        List<ListIdentifier> identifiersAhead = new ArrayList<>();
+        if (currentIndex + 1 < end) {
+            identifiersAhead.addAll(allBlocks.subList(currentIndex + 1, end)
+                                            .stream()
+                                            .map(this::extractListIdentifiers)
+                                            .flatMap(Collection::stream)
+                                            .toList());
+        }
+        if (!identifiersAhead.isEmpty()) {
+            listIdentifiers.add(identifiersAhead.get(0));
+            if (ListIdentifier.isInOrder(listIdentifiers)) {
+                return listIdentifiers;
+            }
+            listIdentifiers.remove(listIdentifiers.size() - 1);
+        }
+        return Collections.emptyList();
+
+    }
+
+
+    private List<ListIdentifier> extractListIdentifiers(AbstractBlockOnPage block) {
+
+        List<ListIdentifier> result = new LinkedList<>();
+        if (block.block() instanceof TextPageBlock textBlock) {
+            List<Word> sequences = textBlock.getWords();
+            for (int i = 0; i < sequences.size(); i++) {
+
+                if (i != 0 && sequences.get(i - 1).getXDirAdj() < sequences.get(i).getXDirAdj()) {
+                    // is not the start of a line, continue
+                    continue;
+                }
+
+                Word sequence = sequences.get(i);
+                List<Word> wordsAtStartOfLine = new ArrayList<>(3);
+                int end = Math.min(sequences.size(), i + 3);
+                for (int j = i; j < end; j++) {
+                    if (sequences.get(j).intersectsYDirAdj(sequence, 2)) {
+                        wordsAtStartOfLine.add(sequences.get(j));
+                    } else {
+                        break;
+                    }
+                }
+
+                ListIdentifier.parse(wordsAtStartOfLine, block.page().getPageNumber()).ifPresent(result::add);
+            }
+        }
+        return result;
+    }
+
+}
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Dominique Eifländer	ef23ee0ade	Merge branch 'RED-10752-main' into 'main' RED-10752: Enabled prometheus See merge request fforesight/layout-parser!267	2025-01-29 13:34:01 +01:00
Dominique Eifländer	af31f52b47	RED-10752: Enabled prometheus	2025-01-29 11:09:29 +01:00
Kilian Schüttler	b5152112ee	Merge branch 'RM-231' into 'main' RM-231: missing whitespace in name See merge request fforesight/layout-parser!264	2025-01-14 13:04:10 +01:00
Kilian Schuettler	85ea4ef455	RM-231: missing whitespace in name	2025-01-14 12:59:01 +01:00
Kilian Schüttler	01f8c01fff	Merge branch 'RED-10714' into 'main' RED-10714: fix IndexOutOfBoundsException See merge request fforesight/layout-parser!262	2025-01-10 12:33:18 +01:00
Kilian Schuettler	0b6a292c75	RED-10714: fix IndexOutOfBoundsException	2025-01-10 12:12:14 +01:00
Maverick Studer	e24020589c	Merge branch 'feature/RED-9998' into 'main' RED-9998: App version history (for conditional re-analyzing the layout of a file) See merge request fforesight/layout-parser!259	2024-12-12 09:58:46 +01:00
Maverick Studer	c619b845e8	RED-9998: App version history (for conditional re-analyzing the layout of a file)	2024-12-12 09:58:46 +01:00
Kilian Schüttler	ed0371ca11	Merge branch 'RED-10127' into 'main' RED-10127: Paragraphs with multiple table, appendix, figure can't be headlines See merge request fforesight/layout-parser!257	2024-12-06 14:49:48 +01:00
Kilian Schuettler	89b5be8d67	RED-10127: Paragraphs with multiple table, appendix, figure can't be headlines	2024-12-06 13:41:44 +01:00
Kilian Schuettler	077ce60c9d	RED-9139: update document version	2024-11-15 16:48:56 +01:00
Kilian Schüttler	ab171be6e2	Merge branch 'feature/RED-9139' into 'main' RED-9139: more robust TOC detection See merge request fforesight/layout-parser!253	2024-11-14 16:50:52 +01:00
Kilian Schuettler	664b47b4c3	RED-9139: move document to module in redaction-service * add feature version	2024-11-14 16:39:49 +01:00
Kilian Schuettler	8005c1f25f	RED-9139: move document to module in redaction-service * add feature version	2024-11-14 16:39:48 +01:00
Kilian Schuettler	42185a95a0	RED-9139: move document to module in redaction-service * add feature version	2024-11-14 16:39:48 +01:00
Kilian Schuettler	51b42efaf6	RED-9139: move document to module in redaction-service * add feature version	2024-11-14 16:39:48 +01:00
Kilian Schuettler	6a50d45947	RED-9139: move document to module in redaction-service * add feature version	2024-11-14 16:39:48 +01:00
Kilian Schuettler	073ac12cf7	RED-9139: move document to module in redaction-service * add feature version	2024-11-14 16:39:48 +01:00
Kilian Schuettler	84b054a4cc	RED-9139: move document to module in redaction-service * add feature version	2024-11-14 16:39:48 +01:00
Kilian Schuettler	905b65a5fa	RED-9139: move document to module in redaction-service * add feature version	2024-11-14 16:39:48 +01:00
Kilian Schuettler	7617c1f308	RED-9139: move document to module in redaction-service * add feature version	2024-11-14 16:39:48 +01:00
Kilian Schuettler	2b3936c09b	RED-9139: move document to module in redaction-service * add feature version	2024-11-14 16:39:48 +01:00
Kilian Schuettler	6e5b1f1978	RED-9139: move document to module in redaction-service * add feature version	2024-11-14 16:39:48 +01:00
Kilian Schuettler	cf846d18bc	RED-9139: move document to module in redaction-service * add feature version	2024-11-14 16:39:48 +01:00
Kilian Schuettler	25c46f16ac	RED-9139: move document to module in redaction-service * add feature version	2024-11-14 16:39:48 +01:00
Kilian Schuettler	96acefed78	RED-9139: move document to module in redaction-service * add TableOfContents node	2024-11-14 16:39:48 +01:00
Kilian Schuettler	366241e6c6	RED-9139: move document to module in redaction-service * add TableOfContents node	2024-11-14 16:39:48 +01:00
Kilian Schuettler	7f472ccc52	RED-9139: move document to module in redaction-service * add TableOfContents node	2024-11-14 16:39:48 +01:00
Kilian Schuettler	6f807c7d94	RED-9139: add new TableOfContents Node * rename previous TableOfContent to SectionTree * added protobuf compile script	2024-11-14 16:39:48 +01:00
Kilian Schuettler	6e04c15f3d	RED-9139: add new TableOfContents Node * rename previous TableOfContent to SectionTree * added protobuf compile script	2024-11-14 16:39:48 +01:00
Kilian Schuettler	1384584e2f	RED-9139: more robust TOC detection * detect numbers in words, and not just whole words that are numbers	2024-11-14 16:39:46 +01:00
Kilian Schuettler	e58011e111	RED-9139: more robust TOC detection * detect numbers in words, and not just whole words that are numbers	2024-11-14 16:39:21 +01:00
Kilian Schüttler	a821570065	Merge branch 'RED-9139-bp' into 'main' RED-9139: more robust TOC detection See merge request fforesight/layout-parser!254	2024-11-13 10:54:39 +01:00
Kilian Schüttler	7ee1f9e360	RED-9139: more robust TOC detection	2024-11-13 10:54:39 +01:00
Kilian Schüttler	f9b25c8157	Merge branch 'RED-10249' into 'main' RED-10249: regex found incorrectly due to wrong text sorting See merge request fforesight/layout-parser!252	2024-11-04 12:51:38 +01:00
Kilian Schüttler	c90874da7a	RED-10249: regex found incorrectly due to wrong text sorting	2024-11-04 12:51:37 +01:00
Kilian Schüttler	4683c696a5	Merge branch 'RED-10247' into 'main' RED-10247: dictionary entry not found in footer due to wrong text sorting See merge request fforesight/layout-parser!251	2024-10-25 18:30:35 +02:00
Kilian Schuettler	95c02ce3cf	RED-10247: dictionary entry not found in footer due to wrong text sorting	2024-10-25 17:18:14 +02:00
Kilian Schüttler	b2d62e32fe	Merge branch 'RED-10270-fp' into 'main' RED-10270: fix NumberFormatException See merge request fforesight/layout-parser!248	2024-10-24 17:14:47 +02:00
Kilian Schuettler	65c1f03ea3	RED-10270: fix NumberFormatException	2024-10-24 10:59:05 +02:00
Kilian Schüttler	2219519a2b	Merge branch 'RED-10127' into 'main' RED-10127: rename TextPositionSequence to Word See merge request fforesight/layout-parser!244	2024-10-18 12:20:15 +02:00
Kilian Schüttler	af05218e37	RED-10127: rename TextPositionSequence to Word	2024-10-18 12:20:15 +02:00
Kilian Schüttler	736f531df3	Merge branch 'hotfix' into 'main' Hotfix See merge request fforesight/layout-parser!243	2024-10-18 12:12:15 +02:00
Kilian Schüttler	c64445d54b	Hotfix	2024-10-18 12:12:15 +02:00
Kilian Schüttler	af29233b10	Merge branch 'feature/RED-10127' into 'main' RED-10127: add more units See merge request fforesight/layout-parser!242	2024-10-15 09:57:21 +02:00
Kilian Schuettler	5f04b45554	RED-10127: add more units	2024-10-15 09:47:39 +02:00
Kilian Schüttler	6c41533f0b	Merge branch 'feature/RED-10127' into 'main' RED-10127: improve list classification See merge request fforesight/layout-parser!240	2024-10-14 17:34:33 +02:00
Kilian Schuettler	9d2596e5ef	RED-10127: improve list classification * add one more format to list identification * add 'ppb' to known units * special case for headlines continuing with 14C after the identifier (quite often in some specific files)	2024-10-14 17:21:44 +02:00
Kilian Schüttler	e7b01161ac	Merge branch 'feature/RED-10127' into 'main' RED-10127: add list classification See merge request fforesight/layout-parser!237	2024-10-10 10:50:10 +02:00
Kilian Schüttler	7b073eb4f3	RED-10127: add list classification	2024-10-10 10:50:10 +02:00
Dominique Eifländer	4b0c041d84	Merge branch 'feature/RED-10127' into 'main' RED-10127: improve headline detection See merge request fforesight/layout-parser!235	2024-10-09 08:48:48 +02:00
Kilian Schüttler	6c7442ac6d	RED-10127: improve headline detection	2024-10-09 08:48:48 +02:00
Maverick Studer	23e23328ee	Merge branch 'RED-10126' into 'main' RM-187: Footers are recognized in the middle of the page See merge request fforesight/layout-parser!233	2024-10-08 14:27:45 +02:00
Maverick Studer	9d1ffdd779	RM-187: Footers are recognized in the middle of the page	2024-10-08 14:27:44 +02:00
Maverick Studer	3109a30ae1	Merge branch 'RED-9123-proto' into 'main' RED-9123: Improve performance of re-analysis (Spike) See merge request fforesight/layout-parser!232	2024-10-07 12:28:10 +02:00
Maverick Studer	fe2ed1807e	RED-9123: Improve performance of re-analysis (Spike)	2024-10-07 12:28:10 +02:00
Maverick Studer	31de229fa5	Merge branch 'feature/RED-9010' into 'main' RED-9010: remove redaction log See merge request fforesight/layout-parser!231	2024-09-19 11:34:32 +02:00
Maverick Studer	8a80abfff1	RED-9010: remove redaction log	2024-09-19 11:34:32 +02:00
Dominique Eifländer	7c08905eda	Merge branch 'RED-9975-main' into 'main' RED-9975: Fixed missing section numbers in layout grid See merge request fforesight/layout-parser!230	2024-09-18 11:29:51 +02:00
Dominique Eifländer	4f40c9dbc9	RED-9975: Fixed missing section numbers in layout grid	2024-09-18 11:22:37 +02:00
Dominique Eifländer	32381b4472	Merge branch 'RED-9974' into 'main' Red 9974: improce headline classification, fix font size calculation See merge request fforesight/layout-parser!226	2024-09-16 14:06:48 +02:00
Kilian Schüttler	469da38952	Red 9974: improce headline classification, fix font size calculation	2024-09-16 14:06:48 +02:00
Dominique Eifländer	0f8c4674b3	Merge branch 'hotfix' into 'main' hotfix: viewerDocService doesn't remove existing marked content See merge request fforesight/layout-parser!225	2024-09-12 09:12:54 +02:00
Kilian Schuettler	8e165a41d7	hotfix: viewerDocService doesn't remove existing marked content	2024-09-11 16:34:21 +02:00
Kilian Schüttler	ed7a701ad9	Merge branch 'RED-9975' into 'main' RED-9975: improve SuperSection handling See merge request fforesight/layout-parser!223	2024-09-11 13:38:09 +02:00
Kilian Schüttler	393103e074	RED-9975: improve SuperSection handling	2024-09-11 13:38:09 +02:00
Dominique Eifländer	bd02066e2c	Merge branch 'RED-9976-main' into 'main' RED-9976: Removed sorting that scrambles text in PDFTextStripper See merge request fforesight/layout-parser!222	2024-09-10 13:02:36 +02:00
Dominique Eifländer	fec19f4afb	RED-9976: Removed sorting that scrambles text in PDFTextStripper	2024-09-10 12:50:37 +02:00
Kilian Schüttler	c726a643f0	Merge branch 'hotfix' into 'main' Hotfix: unmerge super large tables See merge request fforesight/layout-parser!220	2024-09-05 15:05:21 +02:00
Kilian Schüttler	519e95735c	Hotfix: unmerge super large tables	2024-09-05 15:05:21 +02:00
Maverick Studer	b52af2637f	Merge branch 'RED-9942-2' into 'main' RED-9942: File only with images not recognised See merge request fforesight/layout-parser!218	2024-09-05 10:49:12 +02:00
Maverick Studer	46ea7edc4c	RED-9942: File only with images not recognised	2024-09-05 10:49:12 +02:00
Kilian Schüttler	9650195afd	Merge branch 'hotfix-fp' into 'main' hotfix: add Java advanced imaging See merge request fforesight/layout-parser!217	2024-09-04 15:43:56 +02:00
Kilian Schuettler	ce628a99f7	hotfix: add Java advanced imaging	2024-09-04 15:18:12 +02:00
Maverick Studer	b66afe135c	Merge branch 'RED-9524' into 'main' RED-9524: File processing does not annotate images See merge request fforesight/layout-parser!214	2024-09-04 13:27:06 +02:00
Maverick Studer	dc892d0fec	RED-9524: File processing does not annotate images	2024-09-04 13:27:06 +02:00
Kilian Schüttler	af45f2cd8c	Merge branch 'RED-9964' into 'main' RED-9964: fix errors with images See merge request fforesight/layout-parser!212	2024-09-04 09:16:59 +02:00
Kilian Schuettler	befb6b1df6	RED-9964: fix errors with images	2024-09-03 16:37:48 +02:00
Maverick Studer	61efb4cae9	Merge branch 'update-tc' into 'main' Update tenant-commons for dlq fix See merge request fforesight/layout-parser!211	2024-09-03 13:50:02 +02:00
maverickstuder	4a06059258	Update tenant-commons for dlq fix	2024-09-03 13:15:08 +02:00
Dominique Eifländer	292e5b215e	Merge branch 'RED-9988-main' into 'main' RED-9988: Fixed NPE when image representation is not present See merge request fforesight/layout-parser!210	2024-09-02 09:56:53 +02:00
Dominique Eifländer	7c2db6c3c5	RED-9988: Fixed NPE when image representation is not present	2024-09-02 09:51:59 +02:00
Dominique Eifländer	4395074b21	Merge branch 'RED-9975' into 'main' Red 9975: fix outline detection See merge request fforesight/layout-parser!206	2024-09-02 09:02:36 +02:00
Kilian Schüttler	8e14b74da2	Red 9975: fix outline detection	2024-09-02 09:02:36 +02:00
Kilian Schüttler	3b91639ea9	Merge branch 'RED-9964-fp' into 'main' RED-9964: don't merge tables on non-consecutive pages See merge request fforesight/layout-parser!205	2024-08-30 14:00:48 +02:00
Kilian Schüttler	c5178ea5c2	RED-9964: don't merge tables on non-consecutive pages	2024-08-30 14:00:48 +02:00
Dominique Eifländer	cf39d4dfcc	Merge branch 'RED-9974' into 'main' RED-9974: Improved headline detection for documine old See merge request fforesight/layout-parser!202	2024-08-30 10:57:20 +02:00
Dominique Eifländer	bb40345f79	RED-9974: Improved headline detection for documine old	2024-08-30 10:36:22 +02:00
Kilian Schüttler	e3e9d16145	Merge branch 'RED-9975' into 'main' RED-9975: activate outline detection See merge request fforesight/layout-parser!201	2024-08-29 14:27:00 +02:00
Kilian Schuettler	f6ca5a3c17	RED-9975: activate outline detection	2024-08-29 14:18:29 +02:00
Maverick Studer	15e3dced35	Merge branch 'tenants-retry' into 'main' Tenants retry logic and queue renames See merge request fforesight/layout-parser!197	2024-08-29 13:46:54 +02:00
Maverick Studer	933054b332	Tenants retry logic and queue renames	2024-08-29 13:46:54 +02:00
Kilian Schüttler	ab86714cb3	Merge branch 'RED-9975' into 'main' RED-9975: activate outline detection See merge request fforesight/layout-parser!198	2024-08-29 12:25:42 +02:00
Kilian Schuettler	8626b106d0	RED-9975: activate outline detection	2024-08-29 12:16:07 +02:00
Maverick Studer	52e948e66c	Merge branch 'RED-9331' into 'main' RED-9331: Explore possibilities for fair upload / analysis processing per tenant See merge request fforesight/layout-parser!182	2024-08-27 09:27:37 +02:00
Maverick Studer	3b33405cbf	RED-9331: Explore possibilities for fair upload / analysis processing per tenant	2024-08-27 09:27:37 +02:00
Maverick Studer	b2fa14dde2	Merge branch 'AZURE_NER' into 'main' RED-9918: Azure entity recognition (Spike) See merge request fforesight/layout-parser!196	2024-08-26 14:34:46 +02:00
Maverick Studer	62e07686d7	RED-9918: Azure entity recognition (Spike)	2024-08-26 14:34:46 +02:00