RED-7074: Design Subsection section tree structure algorithm

* added toc enrichment logic and changed section computation to build upon created toc
2024-04-30 14:41:17 +02:00 · 2024-04-30 14:41:17 +02:00 · c071a133e6
commit c071a133e6
parent 9f9ea68706
12 changed files with 482 additions and 101 deletions
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java
@ -32,6 +32,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.image.Classifi
 import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineExtractorService;
 import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineObject;
 import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineValidationService;
+import com.knecon.fforesight.service.layoutparser.processor.model.outline.TOCEnrichmentService;
 import com.knecon.fforesight.service.layoutparser.processor.model.outline.TableOfContents;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
 import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
@ -101,6 +102,7 @@ public class LayoutParsingPipeline {
    GraphicExtractorService graphicExtractorService;
    OutlineExtractorService outlineExtractorService;
    OutlineValidationService outlineValidationService;
+    TOCEnrichmentService tocEnrichmentService;


    public LayoutParsingFinishedEvent parseLayoutAndSaveFilesToStorage(LayoutParsingRequest layoutParsingRequest) throws IOException {
@ -279,17 +281,17 @@ public class LayoutParsingPipeline {
            List<Cell> emptyTableCells = TableExtractionService.findCells(cleanRulings.getHorizontal(), cleanRulings.getVertical());

            var graphics = graphicExtractorService.extractPathElementGraphics(originDocument,
-                    pdPage,
-                    pageNumber,
-                    cleanRulings,
-                    stripper.getTextPositionSequences(),
-                    emptyTableCells,
-                    false);
+                                                                              pdPage,
+                                                                              pageNumber,
+                                                                              cleanRulings,
+                                                                              stripper.getTextPositionSequences(),
+                                                                              emptyTableCells,
+                                                                              false);

            pdfImages.computeIfAbsent(pageNumber, x -> new ArrayList<>())
                    .addAll(graphics.stream()
-                            .map(g -> new ClassifiedImage(new Rectangle2D.Double(g.x1, g.y1, g.width(), g.height()), ImageType.GRAPHIC, false, stripper.getPageNumber()))
-                            .toList());
+                                    .map(g -> new ClassifiedImage(new Rectangle2D.Double(g.x1, g.y1, g.width(), g.height()), ImageType.GRAPHIC, false, stripper.getPageNumber()))
+                                    .toList());

            ClassificationPage classificationPage = switch (layoutParsingType) {
                case REDACT_MANAGER_OLD -> redactManagerBlockificationService.blockify(stripper.getTextPositionSequences(), emptyTableCells);
@ -372,6 +374,8 @@ public class LayoutParsingPipeline {
            default -> {
                sectionsBuilderService.buildSections(classificationDocument);
                sectionsBuilderService.addImagesToSections(classificationDocument);
+
+                tocEnrichmentService.assignSectionBlocksAndImages(classificationDocument);
            }
        }

--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/ClassificationDocument.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/ClassificationDocument.java
@ -1,11 +1,15 @@
 package com.knecon.fforesight.service.layoutparser.processor.model;

 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;

 import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineObjectTree;
+import com.knecon.fforesight.service.layoutparser.processor.model.outline.TableOfContentItem;
 import com.knecon.fforesight.service.layoutparser.processor.model.outline.TableOfContents;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.StringFrequencyCounter;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.UnclassifiedText;

 import lombok.Data;
@ -17,6 +21,7 @@ public class ClassificationDocument {

    private List<ClassificationPage> pages = new ArrayList<>();
    private List<ClassificationSection> sections = new ArrayList<>();
+    //private Map<TextPageBlock, List<AbstractPageBlock>> sectionsMap = new HashMap<>();
    private List<ClassificationHeader> headers = new ArrayList<>();
    private List<ClassificationFooter> footers = new ArrayList<>();
    private List<UnclassifiedText> unclassifiedTexts = new ArrayList<>();
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/DocumentTree.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/DocumentTree.java
@ -140,8 +140,8 @@ public class DocumentTree {
        if (treeId.isEmpty()) {
            return root;
        }
-        Entry entry = root.children.get(treeId.get(0));
-        for (int id : treeId.subList(1, treeId.size())) {
+        Entry entry = root;
+        for (int id : treeId) {
            entry = entry.children.get(id);
        }
        return entry;
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineValidationService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineValidationService.java
@ -39,7 +39,7 @@ public class OutlineValidationService {
    private boolean containsBlock(TableOfContents toc, TextPageBlock block) {

        for (TableOfContentItem existingItem : toc.getMainSections()) {
-            if (existingItem.getTextPageBlock().equals(block) || existingItem.contains(block)) {
+            if (existingItem.getHeadline().equals(block) || existingItem.contains(block)) {
                return true;
            }
        }
@ -82,7 +82,7 @@ public class OutlineValidationService {
                assert (parent != null);
                while (parentDepth < currentDepth && parent.getParent() != null) {
                    parent = parent.getParent();
-                    parentDepth = getDepth(parent.getTextPageBlock().getClassification());
+                    parentDepth = getDepth(parent.getHeadline().getClassification());
                }
                parent.addChild(new TableOfContentItem(current));
            }
@ -110,12 +110,12 @@ public class OutlineValidationService {

            } else {
                assert last != null;
-                int lastDepth = getDepth(last.getTextPageBlock().getClassification());
+                int lastDepth = getDepth(last.getHeadline().getClassification());

                if (lastDepth < parentDepth) {
                    parentDepth = lastDepth;
                } else if (lastDepth == currentDepth && last.getParent() != null) {
-                    parentDepth = getDepth(last.getParent().getTextPageBlock().getClassification());
+                    parentDepth = getDepth(last.getParent().getHeadline().getClassification());
                }

                TableOfContentItem parent = lastItemsPerDepth.get(parentDepth);
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/TOCEnrichmentService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/TOCEnrichmentService.java
@ -0,0 +1,266 @@
+package com.knecon.fforesight.service.layoutparser.processor.model.outline;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.springframework.stereotype.Service;
+
+import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
+import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument;
+import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationFooter;
+import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationHeader;
+import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
+import com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType;
+import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
+import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
+import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
+import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
+
+import lombok.extern.slf4j.Slf4j;
+
+@Slf4j
+@Service
+public class TOCEnrichmentService {
+
+    public void assignSectionBlocksAndImages(ClassificationDocument document) {
+
+        TableOfContents toc = document.getTableOfContents();
+        List<AbstractPageBlock> startBlocks = new ArrayList<>();
+        List<ClassifiedImage> startImages = new ArrayList<>();
+        //Map<TableOfContentItem, List<AbstractPageBlock>> sectionsMap = new HashMap<>();
+        TableOfContentItem currentSection = null;
+        boolean foundFirstHeadline = false;
+
+        //for (TableOfContentItem item : toc.getAllTableOfContentItems()) {
+        //    sectionsMap.put(item, new ArrayList<>());
+        //}
+
+        List<ClassificationHeader> headers = new ArrayList<>();
+        List<ClassificationFooter> footers = new ArrayList<>();
+        TablePageBlock previousTable = null;
+        List<TableOfContentItem> lastFoundTOCItems = new ArrayList<>();
+
+        for (ClassificationPage page : document.getPages()) {
+            List<TableOfContentItem> currentPageTOCItems = new ArrayList<>();
+            List<TextPageBlock> header = new ArrayList<>();
+            List<TextPageBlock> footer = new ArrayList<>();
+            for (AbstractPageBlock current : page.getTextBlocks()) {
+
+                if (current.getClassification() == null) {
+                    continue;
+                }
+
+                current.setPage(page.getPageNumber());
+
+                if (current.getClassification().equals(PageBlockType.HEADER)) {
+                    header.add((TextPageBlock) current);
+                    continue;
+                }
+
+                if (current.getClassification().equals(PageBlockType.FOOTER)) {
+                    footer.add((TextPageBlock) current);
+                    continue;
+                }
+
+                if (current instanceof TablePageBlock table) {
+                    if (previousTable != null) {
+                        mergeTableMetadata(table, previousTable);
+                    }
+                    previousTable = table;
+                }
+                boolean matched = false;
+
+                for (TableOfContentItem tocItem : toc) {
+                    if (current instanceof TextPageBlock && tocItem.getHeadline().getText().equals(current.getText())) {
+                        if (!foundFirstHeadline) {
+                            foundFirstHeadline = true;
+                        }
+                        currentSection = tocItem;
+                        //sectionsMap.get(tocItem).add(current);
+                        tocItem.getSectionBlocks().add(current);
+                        currentPageTOCItems.add(tocItem);
+                        matched = true;
+                        break;
+                    }
+                }
+
+                if (!matched) {
+                    if (!foundFirstHeadline) {
+                        startBlocks.add(current);
+                    } else {
+                        currentSection.getSectionBlocks().add(current);
+                        //sectionsMap.get(currentSection).add(current);
+                    }
+                }
+            }
+
+            if (!currentPageTOCItems.isEmpty()) {
+                lastFoundTOCItems = currentPageTOCItems;
+            }
+
+            for (ClassifiedImage image : page.getImages()) {
+
+                Float xMin = null;
+                Float yMin = null;
+                Float xMax = null;
+                Float yMax = null;
+
+                for (TableOfContentItem tocItem : lastFoundTOCItems) {
+                    var headline = tocItem.getHeadline();
+
+                    if (headline.getPage() != page.getPageNumber()) {
+                        continue;
+                    }
+
+                    if (headline.getMinX() < headline.getMaxX()) {
+                        if (xMin == null || headline.getMinX() < xMin) {
+                            xMin = headline.getMinX();
+                        }
+                        if (xMax == null || headline.getMaxX() > xMax) {
+                            xMax = headline.getMaxX();
+                        }
+                    } else {
+                        if (xMin == null || headline.getMaxX() < xMin) {
+                            xMin = headline.getMaxX();
+                        }
+                        if (xMax == null || headline.getMinX() > xMax) {
+                            xMax = headline.getMinX();
+                        }
+                    }
+
+                    if (headline.getMinY() < headline.getMaxY()) {
+                        if (yMin == null || headline.getMinY() < yMin) {
+                            yMin = headline.getMinY();
+                        }
+                        if (yMax == null || headline.getMaxY() > yMax) {
+                            yMax = headline.getMaxY();
+                        }
+                    } else {
+                        if (yMin == null || headline.getMaxY() < yMin) {
+                            yMin = headline.getMaxY();
+                        }
+                        if (yMax == null || headline.getMinY() > yMax) {
+                            yMax = headline.getMinY();
+                        }
+                    }
+
+                    log.debug("Image position x: {}, y: {}", image.getPosition().getX(), image.getPosition().getY());
+                    log.debug("Headline position xMin: {}, xMax: {}, yMin: {}, yMax: {}", xMin, xMax, yMin, yMax);
+
+                    if (image.getPosition().getX() >= xMin && image.getPosition().getX() <= xMax && image.getPosition().getY() >= yMin && image.getPosition().getY() <= yMax) {
+                        tocItem.getImages().add(image);
+                        image.setAppendedToSection(true);
+                        break;
+                    }
+                }
+                if (!image.isAppendedToSection()) {
+                    log.debug("Image uses first paragraph");
+                    if (!lastFoundTOCItems.isEmpty()) {
+                        lastFoundTOCItems.get(0).getImages().add(image);
+                    } else {
+                        startImages.add(image);
+                    }
+                    image.setAppendedToSection(true);
+                }
+            }
+
+            if (!header.isEmpty()) {
+                headers.add(new ClassificationHeader(header));
+            }
+            if (!footer.isEmpty()) {
+                footers.add(new ClassificationFooter(footer));
+            }
+        }
+
+        if (!startBlocks.isEmpty()) {
+            TableOfContentItem unassigned = new TableOfContentItem(null);
+            unassigned.setSectionBlocks(startBlocks);
+            unassigned.setImages(startImages);
+            document.getTableOfContents().getMainSections().add(0, unassigned);
+        }
+        //document.setSectionsMap(sectionsMap);
+        document.setHeaders(headers);
+        document.setFooters(footers);
+    }
+
+
+    private void mergeTableMetadata(TablePageBlock currentTable, TablePageBlock previousTable) {
+
+        // Distribute header information for subsequent tables
+        if (previousTable != null && hasInvalidHeaderInformation(currentTable) && hasValidHeaderInformation(previousTable)) {
+            List<Cell> previousTableNonHeaderRow = getRowWithNonHeaderCells(previousTable);
+            List<Cell> tableNonHeaderRow = getRowWithNonHeaderCells(currentTable);
+            // Allow merging of tables if header row is separated from first logical non-header row
+            if (previousTableNonHeaderRow.isEmpty()
+                && previousTable.getRowCount() == 1
+                && previousTable.getRows()
+                           .get(0).size() == tableNonHeaderRow.size()) {
+                previousTableNonHeaderRow = previousTable.getRows()
+                        .get(0)
+                        .stream()
+                        .map(cell -> {
+                            Cell fakeCell = new Cell(cell.getPoints()[0], cell.getPoints()[2]);
+                            fakeCell.setHeaderCells(Collections.singletonList(cell));
+                            return fakeCell;
+                        })
+                        .toList();
+            }
+            if (previousTableNonHeaderRow.size() == tableNonHeaderRow.size()) {
+                for (int i = currentTable.getRowCount() - 1; i >= 0; i--) { // Non header rows are most likely at bottom of table
+                    List<Cell> row = currentTable.getRows()
+                            .get(i);
+                    if (row.size() == tableNonHeaderRow.size() && row.stream()
+                            .allMatch(cell -> cell.getHeaderCells().isEmpty())) {
+                        for (int j = 0; j < row.size(); j++) {
+                            row.get(j).setHeaderCells(previousTableNonHeaderRow.get(j).getHeaderCells());
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+
+    private boolean hasValidHeaderInformation(TablePageBlock table) {
+
+        return !hasInvalidHeaderInformation(table);
+    }
+
+
+    private boolean hasInvalidHeaderInformation(TablePageBlock table) {
+
+        return table.getRows()
+                .stream()
+                .flatMap(row -> row.stream()
+                        .filter(cell -> !cell.getHeaderCells().isEmpty()))
+                .findAny().isEmpty();
+
+    }
+
+
+    private List<Cell> getRowWithNonHeaderCells(TablePageBlock table) {
+
+        for (int i = table.getRowCount() - 1; i >= 0; i--) { // Non header rows are most likely at bottom of table
+            List<Cell> row = table.getRows()
+                    .get(i);
+            if (row.size() == 1) {
+                continue;
+            }
+            boolean allNonHeader = true;
+            for (Cell cell : row) {
+                if (cell.isHeaderCell()) {
+                    allNonHeader = false;
+                    break;
+                }
+            }
+            if (allNonHeader) {
+                return row;
+            }
+        }
+
+        return Collections.emptyList();
+
+    }
+
+}
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/TableOfContentItem.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/TableOfContentItem.java
@ -1,9 +1,13 @@
 package com.knecon.fforesight.service.layoutparser.processor.model.outline;

 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.List;
+import java.util.stream.Collectors;

-import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
+import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
+import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Section;
+import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;

 import lombok.Data;
@ -14,14 +18,19 @@ import lombok.EqualsAndHashCode;
 public class TableOfContentItem {

    @EqualsAndHashCode.Include
-    private TextPageBlock textPageBlock;
+    private TextPageBlock headline;
    private List<TableOfContentItem> children = new ArrayList<>();
    private TableOfContentItem parent;

+    private List<AbstractPageBlock> sectionBlocks = new ArrayList<>();
+    private List<ClassifiedImage> images = new ArrayList<>();

-    public TableOfContentItem(TextPageBlock textPageBlock) {
+    private Section section;

-        this.textPageBlock = textPageBlock;
+
+    public TableOfContentItem(TextPageBlock headline) {
+
+        this.headline = headline;
    }


@ -34,60 +43,68 @@ public class TableOfContentItem {

    public TableOfContentItem getSiblingBefore() {

-        try {
-            return parent.getChildren()
-                    .get(parent.getChildren().indexOf(this) - 1);
-        } catch (IndexOutOfBoundsException indexOutOfBoundsException) {
-            return null;
+        if (parent != null) {
+            int index = parent.getChildren().indexOf(this);
+            if (index > 0) {
+                return parent.getChildren()
+                        .get(index - 1);
+            }
        }
+        return null;
    }
+
+
    public TableOfContentItem getSiblingAfter() {

-        try {
-            return parent.getChildren()
-                    .get(parent.getChildren().indexOf(this) + 1);
-        } catch (IndexOutOfBoundsException indexOutOfBoundsException) {
-            return null;
+        if (parent != null) {
+            int index = parent.getChildren().indexOf(this);
+            if (index >= 0 && index < parent.getChildren().size() - 1) {
+                return parent.getChildren()
+                        .get(index + 1);
+            }
        }
+        return null;
    }


    public boolean contains(TextPageBlock block) {

-        boolean anyChildContains = false;
-        if (!children.isEmpty()) {
-            for (TableOfContentItem child : children) {
-                if (child.getTextPageBlock().equals(block)) {
-                    return true;
-                } else {
-                    anyChildContains = anyChildContains || child.contains(block);
-                }
+        if (headline.equals(block)) {
+            return true;
+        }
+        for (TableOfContentItem child : children) {
+            if (child.contains(block)) {
+                return true;
            }
        }
-        return anyChildContains;
+        return false;
    }


    public boolean contains(TableOfContentItem tocItem) {

-        boolean anyChildContains = false;
-        if (!children.isEmpty()) {
-            for (TableOfContentItem child : children) {
-                if (child.equals(tocItem)) {
-                    return true;
-                } else {
-                    anyChildContains = anyChildContains || child.contains(tocItem);
-                }
+        if (this.equals(tocItem)) {
+            return true;
+        }
+        for (TableOfContentItem child : children) {
+            if (child.contains(tocItem)) {
+                return true;
            }
        }
-        return anyChildContains;
+        return false;
    }

+    public List<AbstractPageBlock> getNonEmptySectionBlocks() {
+
+        return sectionBlocks.stream().filter(pageBlock -> !pageBlock.isEmpty()).collect(Collectors.toList());
+    }

    @Override
    public String toString() {

-        return "OutlineObjectTreeNode{" + "textPageBlock=" + textPageBlock + '}';
+        return "OutlineObjectTreeNode{" + "textPageBlock=" + headline + '}';
    }

+
+
 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/TableOfContents.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/TableOfContents.java
@ -1,9 +1,11 @@
 package com.knecon.fforesight.service.layoutparser.processor.model.outline;

 import java.util.ArrayList;
-import java.util.HashMap;
+import java.util.Iterator;
 import java.util.List;
-import java.util.Map;
+import java.util.Stack;
+
+import org.springframework.lang.NonNull;

 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;

@ -12,7 +14,7 @@ import lombok.RequiredArgsConstructor;

@Data
@RequiredArgsConstructor
-public class TableOfContents {
+public class TableOfContents implements Iterable<TableOfContentItem> {

    private List<TableOfContentItem> mainSections = new ArrayList<>();

@ -35,7 +37,7 @@ public class TableOfContents {

    private void collectTextPageBlocks(TableOfContentItem item, List<TextPageBlock> textPageBlocks) {

-        textPageBlocks.add(item.getTextPageBlock());
+        textPageBlocks.add(item.getHeadline());
        for (TableOfContentItem child : item.getChildren()) {
            collectTextPageBlocks(child, textPageBlocks);
        }
@ -56,4 +58,40 @@ public class TableOfContents {
        }
    }

+
+    @Override
+    public @NonNull Iterator<TableOfContentItem> iterator() {
+
+        return new TableOfContentItemIterator(mainSections);
+    }
+
+    private static class TableOfContentItemIterator implements Iterator<TableOfContentItem> {
+        private final Stack<Iterator<TableOfContentItem>> stack = new Stack<>();
+
+        public TableOfContentItemIterator(List<TableOfContentItem> mainSections) {
+            stack.push(mainSections.iterator());
+        }
+
+        @Override
+        public boolean hasNext() {
+            ensureStackTopIsCurrent();
+            return !stack.isEmpty() && stack.peek().hasNext();
+        }
+
+        @Override
+        public TableOfContentItem next() {
+            ensureStackTopIsCurrent();
+            TableOfContentItem currentItem = stack.peek().next();
+            if (currentItem.getChildren() != null && !currentItem.getChildren().isEmpty()) {
+                stack.push(currentItem.getChildren().iterator());
+            }
+            return currentItem;
+        }
+
+        private void ensureStackTopIsCurrent() {
+            while (!stack.isEmpty() && !stack.peek().hasNext()) {
+                stack.pop();
+            }
+        }
+    }
 }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/SectionsBuilderService.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/SectionsBuilderService.java
@ -29,6 +29,7 @@ import lombok.extern.slf4j.Slf4j;
@Service
 public class SectionsBuilderService {

+
    public void buildSections(ClassificationDocument document) {

        List<AbstractPageBlock> chunkWords = new ArrayList<>();
@ -71,7 +72,8 @@ public class SectionsBuilderService {
                    chunkBlockList.add(chunkBlock);
                    chunkWords = new ArrayList<>();
                    if (!chunkBlock.getTables().isEmpty()) {
-                        previousTable = chunkBlock.getTables().get(chunkBlock.getTables().size() - 1);
+                        previousTable = chunkBlock.getTables()
+                                .get(chunkBlock.getTables().size() - 1);
                    }
                }
                if (current instanceof TablePageBlock table) {
@ -106,11 +108,12 @@ public class SectionsBuilderService {

        List<ClassificationSection> sections = new ArrayList<>();
        for (var page : document.getPages()) {
-            page.getTextBlocks().forEach(block -> {
-                block.setPage(page.getPageNumber());
-                var section = buildTextBlock(List.of(block), Strings.EMPTY);
-                sections.add(section);
-            });
+            page.getTextBlocks()
+                    .forEach(block -> {
+                        block.setPage(page.getPageNumber());
+                        var section = buildTextBlock(List.of(block), Strings.EMPTY);
+                        sections.add(section);
+                    });
        }
        document.setSections(sections);
    }
@ -202,8 +205,14 @@ public class SectionsBuilderService {
                    log.debug("Image position x: {}, y: {}", image.getPosition().getX(), image.getPosition().getY());
                    log.debug("Paragraph position xMin: {}, xMax: {}, yMin: {}, yMax: {}", xMin, xMax, yMin, yMax);

-                    if (xMin != null && xMax != null && yMin != null && yMax != null && image.getPosition().getX() >= xMin && image.getPosition()
-                            .getX() <= xMax && image.getPosition().getY() >= yMin && image.getPosition().getY() <= yMax) {
+                    if (xMin != null
+                        && xMax != null
+                        && yMin != null
+                        && yMax != null
+                        && image.getPosition().getX() >= xMin
+                        && image.getPosition().getX() <= xMax
+                        && image.getPosition().getY() >= yMin
+                        && image.getPosition().getY() <= yMax) {
                        section.getImages().add(image);
                        image.setAppendedToSection(true);
                        break;
@ -226,17 +235,26 @@ public class SectionsBuilderService {
            List<Cell> previousTableNonHeaderRow = getRowWithNonHeaderCells(previousTable);
            List<Cell> tableNonHeaderRow = getRowWithNonHeaderCells(currentTable);
            // Allow merging of tables if header row is separated from first logical non-header row
-            if (previousTableNonHeaderRow.isEmpty() && previousTable.getRowCount() == 1 && previousTable.getRows().get(0).size() == tableNonHeaderRow.size()) {
-                previousTableNonHeaderRow = previousTable.getRows().get(0).stream().map(cell -> {
-                    Cell fakeCell = new Cell(cell.getPoints()[0], cell.getPoints()[2]);
-                    fakeCell.setHeaderCells(Collections.singletonList(cell));
-                    return fakeCell;
-                }).collect(Collectors.toList());
+            if (previousTableNonHeaderRow.isEmpty()
+                && previousTable.getRowCount() == 1
+                && previousTable.getRows()
+                           .get(0).size() == tableNonHeaderRow.size()) {
+                previousTableNonHeaderRow = previousTable.getRows()
+                        .get(0)
+                        .stream()
+                        .map(cell -> {
+                            Cell fakeCell = new Cell(cell.getPoints()[0], cell.getPoints()[2]);
+                            fakeCell.setHeaderCells(Collections.singletonList(cell));
+                            return fakeCell;
+                        })
+                        .collect(Collectors.toList());
            }
            if (previousTableNonHeaderRow.size() == tableNonHeaderRow.size()) {
                for (int i = currentTable.getRowCount() - 1; i >= 0; i--) { // Non header rows are most likely at bottom of table
-                    List<Cell> row = currentTable.getRows().get(i);
-                    if (row.size() == tableNonHeaderRow.size() && row.stream().allMatch(cell -> cell.getHeaderCells().isEmpty())) {
+                    List<Cell> row = currentTable.getRows()
+                            .get(i);
+                    if (row.size() == tableNonHeaderRow.size() && row.stream()
+                            .allMatch(cell -> cell.getHeaderCells().isEmpty())) {
                        for (int j = 0; j < row.size(); j++) {
                            row.get(j).setHeaderCells(previousTableNonHeaderRow.get(j).getHeaderCells());
                        }
@ -279,7 +297,11 @@ public class SectionsBuilderService {

    private boolean hasInvalidHeaderInformation(TablePageBlock table) {

-        return table.getRows().stream().flatMap(row -> row.stream().filter(cell -> !cell.getHeaderCells().isEmpty())).findAny().isEmpty();
+        return table.getRows()
+                .stream()
+                .flatMap(row -> row.stream()
+                        .filter(cell -> !cell.getHeaderCells().isEmpty()))
+                .findAny().isEmpty();

    }

@ -287,7 +309,8 @@ public class SectionsBuilderService {
    private List<Cell> getRowWithNonHeaderCells(TablePageBlock table) {

        for (int i = table.getRowCount() - 1; i >= 0; i--) { // Non header rows are most likely at bottom of table
-            List<Cell> row = table.getRows().get(i);
+            List<Cell> row = table.getRows()
+                    .get(i);
            if (row.size() == 1) {
                continue;
            }
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/DocumentGraphFactory.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/DocumentGraphFactory.java
@ -12,6 +12,7 @@ import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.NoSuchElementException;
+import java.util.Optional;
 import java.util.Set;
 import java.util.stream.Collectors;

@ -35,6 +36,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Pa
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Section;
 import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.AtomicTextBlock;
 import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
+import com.knecon.fforesight.service.layoutparser.processor.model.outline.TableOfContentItem;
 import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
 import com.knecon.fforesight.service.layoutparser.processor.utils.IdBuilder;
 import com.knecon.fforesight.service.layoutparser.processor.utils.TextPositionOperations;
@ -74,8 +76,14 @@ public class DocumentGraphFactory {

    private void addSections(LayoutParsingType layoutParsingType, ClassificationDocument classificationDocument, Context context, Document document) {

-        classificationDocument.getSections()
-                .forEach(section -> SectionNodeFactory.addSection(layoutParsingType, null, section.getNonEmptyPageBlocks(), section.getImages(), context, document));
+        //classificationDocument.getSections()
+        //       .forEach(section -> SectionNodeFactory.addSection(layoutParsingType, null, section.getNonEmptyPageBlocks(), section.getImages(), context, document));
+
+        for (TableOfContentItem tocItem : classificationDocument.getTableOfContents()) {
+            var parent = tocItem.getParent() == null ? null : tocItem.getParent().getSection();
+            Optional<Section> section = SectionNodeFactory.addSection(layoutParsingType, parent, tocItem.getNonEmptySectionBlocks(), tocItem.getImages(), context, document);
+            tocItem.setSection(section.orElse(null));
+        }
    }


@ -85,14 +93,11 @@ public class DocumentGraphFactory {

        GenericSemanticNode node;
        if (originalTextBlock.isHeadline()) {
-            node = Headline.builder().documentTree(context.getDocumentTree())
-                    .build();
+            node = Headline.builder().documentTree(context.getDocumentTree()).build();
        } else if (originalTextBlock.isToDuplicate()) {
-            node = DuplicatedParagraph.builder().documentTree(context.getDocumentTree())
-                    .build();
+            node = DuplicatedParagraph.builder().documentTree(context.getDocumentTree()).build();
        } else {
-            node = Paragraph.builder().documentTree(context.getDocumentTree())
-                    .build();
+            node = Paragraph.builder().documentTree(context.getDocumentTree()).build();
        }

        page.getMainBody().add(node);
@ -178,12 +183,8 @@ public class DocumentGraphFactory {
    private void addFooter(List<TextPageBlock> textBlocks, Context context) {

        Page page = context.getPage(textBlocks.get(0).getPage());
-        Footer footer = Footer.builder().documentTree(context.getDocumentTree())
-                .build();
-        AtomicTextBlock textBlock = context.textBlockFactory.buildAtomicTextBlock(TextPositionOperations.mergeTextPositionSequence(textBlocks),
-                                                                                  footer,
-                                                                                  context,
-                                                                                  page);
+        Footer footer = Footer.builder().documentTree(context.getDocumentTree()).build();
+        AtomicTextBlock textBlock = context.textBlockFactory.buildAtomicTextBlock(TextPositionOperations.mergeTextPositionSequence(textBlocks), footer, context, page);
        List<Integer> tocId = context.getDocumentTree().createNewMainEntryAndReturnId(footer);
        footer.setTreeId(tocId);
        footer.setLeafTextBlock(textBlock);
@ -194,8 +195,7 @@ public class DocumentGraphFactory {
    public void addHeader(List<TextPageBlock> textBlocks, Context context) {

        Page page = context.getPage(textBlocks.get(0).getPage());
-        Header header = Header.builder().documentTree(context.getDocumentTree())
-                .build();
+        Header header = Header.builder().documentTree(context.getDocumentTree()).build();
        AtomicTextBlock textBlock = context.textBlockFactory.buildAtomicTextBlock(TextPositionOperations.mergeTextPositionSequence(textBlocks), header, 0, page);
        List<Integer> tocId = context.getDocumentTree().createNewMainEntryAndReturnId(header);
        header.setTreeId(tocId);
@ -207,8 +207,7 @@ public class DocumentGraphFactory {
    private void addEmptyFooter(int pageIndex, Context context) {

        Page page = context.getPage(pageIndex);
-        Footer footer = Footer.builder().documentTree(context.getDocumentTree())
-                .build();
+        Footer footer = Footer.builder().documentTree(context.getDocumentTree()).build();
        AtomicTextBlock textBlock = context.textBlockFactory.emptyTextBlock(footer, context, page);
        List<Integer> tocId = context.getDocumentTree().createNewMainEntryAndReturnId(footer);
        footer.setTreeId(tocId);
@ -220,8 +219,7 @@ public class DocumentGraphFactory {
    private void addEmptyHeader(int pageIndex, Context context) {

        Page page = context.getPage(pageIndex);
-        Header header = Header.builder().documentTree(context.getDocumentTree())
-                .build();
+        Header header = Header.builder().documentTree(context.getDocumentTree()).build();
        AtomicTextBlock textBlock = context.textBlockFactory.emptyTextBlock(header, 0, page);
        List<Integer> tocId = context.getDocumentTree().createNewMainEntryAndReturnId(header);
        header.setTreeId(tocId);
--- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/SectionNodeFactory.java
+++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/SectionNodeFactory.java
@ -9,6 +9,7 @@ import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.Optional;
 import java.util.Set;

 import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
@ -27,12 +28,12 @@ import lombok.experimental.UtilityClass;
@UtilityClass
 public class SectionNodeFactory {

-    public void addSection(LayoutParsingType layoutParsingType,
-                           GenericSemanticNode parentNode,
-                           List<AbstractPageBlock> pageBlocks,
-                           List<ClassifiedImage> images,
-                           DocumentGraphFactory.Context context,
-                           Document document) {
+    public Optional<Section> addSection(LayoutParsingType layoutParsingType,
+                                        GenericSemanticNode parentNode,
+                                        List<AbstractPageBlock> pageBlocks,
+                                        List<ClassifiedImage> images,
+                                        DocumentGraphFactory.Context context,
+                                        Document document) {

        // This is for the case where we have images on a page without any text/footer/header.
        // The pageBlocks list is empty, but we still need to add those images to the document.
@ -40,11 +41,11 @@ public class SectionNodeFactory {
            images.stream()
                    .distinct()
                    .forEach(image -> DocumentGraphFactory.addImage(document, image, context));
-            return;
+            return Optional.empty();
        }

        if (pageBlocks.isEmpty()) {
-            return;
+            return Optional.empty();
        }

        Map<Integer, List<AbstractPageBlock>> blocksPerPage = pageBlocks.stream()
@ -73,6 +74,8 @@ public class SectionNodeFactory {
        images.stream()
                .distinct()
                .forEach(image -> DocumentGraphFactory.addImage(section, image, context));
+
+        return Optional.of(section);
    }


--- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java
+++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java
@ -32,10 +32,10 @@ public class ViewerDocumentTest extends BuildDocumentTest {
        //String fileName = "files/new/abschlussarbeiten-template-institut-fur-informatik-padagogische-hochschule-karlsruhe.pdf";
        //String fileName = "files/new/kaust-official-thesis-template.pdf";
        //String fileName = "files/new/$100m Offers.pdf";
-        //String fileName = "files/new/18-Curacron_ToxicidadeOcularInVitro.pdf";
-        String fileName = "files/new/UTT-Books-53.pdf";
+        String fileName = "files/new/18-Curacron_ToxicidadeOcularInVitro.pdf";
        //String fileName = "files/new/mistitled_outlines_example.pdf";
        //String fileName = "files/bdr/Plenarprotokoll 1 (keine Druchsache!) (1) 1.pdf";
+        //String fileName = "files/new/UTT-Books-53.pdf";
        String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf";

        var documentFile = new ClassPathResource(fileName).getFile();
@ -48,6 +48,32 @@ public class ViewerDocumentTest extends BuildDocumentTest {
        System.out.printf("Total time: %.2fs%n", ((float) (System.currentTimeMillis() - start)) / 1000);
    }

+    @Test
+    @SneakyThrows
+    public void testViewerDocumentWithImages() {
+
+        String fileName = "files/new/UTT-Books-53.pdf";
+        Path path = Path.of(fileName);
+        String tmpFileName = "/tmp/" + path.getFileName() + "_VIEWER.pdf";
+        String imageFileName = "files/images/test_outlines.IMAGE_INFO.json";
+
+        var mapper = ObjectMapperFactory.create();
+        var imageServiceResponse = mapper.readValue(new ClassPathResource(imageFileName).getInputStream(), ImageServiceResponse.class);
+        var documentFile = new ClassPathResource(fileName).getFile();
+
+        var classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
+                                                                       documentFile,
+                                                                       imageServiceResponse,
+                                                                       new TableServiceResponse(),
+                                                                       new VisualLayoutParsingResponse(),
+                                                                       Map.of("file", path.getFileName().toFile().toString()));
+        ViewerDocumentService viewerDocumentService = new ViewerDocumentService(null);
+        LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService);
+        Document document = DocumentGraphFactory.buildDocumentGraph(LayoutParsingType.REDACT_MANAGER, classificationDocument);
+
+        layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), true);
+    }
+

    @Test
    @Disabled
@ -56,7 +82,8 @@ public class ViewerDocumentTest extends BuildDocumentTest {

        String fileName = "files/cv_tables/brokenTablesOnOcr_ocred.pdf";
        String tableFileName = "files/cv_tables/brokenTablesOnOcr_ocred.TABLES.json";
-        String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf";
+        Path path = Path.of(fileName);
+        String tmpFileName = "/tmp/" + path.getFileName() + "_VIEWER.pdf";

        var mapper = ObjectMapperFactory.create();
        var tableResponse = mapper.readValue(new ClassPathResource(tableFileName).getInputStream(), TableServiceResponse.class);
@ -67,7 +94,7 @@ public class ViewerDocumentTest extends BuildDocumentTest {
                new ImageServiceResponse(),
                tableResponse,
                new VisualLayoutParsingResponse(),
-                Map.of("file", Path.of(fileName).getFileName().toFile().toString()));
+                Map.of("file", path.getFileName().toFile().toString()));
        ViewerDocumentService viewerDocumentService = new ViewerDocumentService(null);
        LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService);
        Document document = DocumentGraphFactory.buildDocumentGraph(LayoutParsingType.DOCUMINE, classificationDocument);
--- a/layoutparser-service/layoutparser-service-server/src/test/resources/files/new/UTT-Books-53.pdf
+++ b/layoutparser-service/layoutparser-service-server/src/test/resources/files/new/UTT-Books-53.pdf