From a9338262c5249215936185bc228d4ede199f7e12 Mon Sep 17 00:00:00 2001 From: maverickstuder Date: Tue, 7 May 2024 15:51:54 +0200 Subject: [PATCH] RED-7074: Design Subsection section tree structure algorithm * fix for boundary error --- .../textblock/ConcatenatedTextBlock.java | 3 +- .../model/outline/TOCEnrichmentService.java | 38 ++++++++++--------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/textblock/ConcatenatedTextBlock.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/textblock/ConcatenatedTextBlock.java index 7038dbf..d48170b 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/textblock/ConcatenatedTextBlock.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/textblock/ConcatenatedTextBlock.java @@ -56,8 +56,7 @@ public class ConcatenatedTextBlock implements TextBlock { boundary.setStart(start); boundary.setEnd(end); } else if (boundary.end() != start) { - //throw new UnsupportedOperationException(format("Can only concat consecutive TextBlocks, trying to concat %s and %s", boundary, textBlock.getBoundary())); - return this; + throw new UnsupportedOperationException(format("Can only concat consecutive TextBlocks, trying to concat %s and %s", boundary, textBlock.getBoundary())); } this.atomicTextBlocks.addAll(textBlock.getAtomicTextBlocks()); boundary.setEnd(end); diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/TOCEnrichmentService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/TOCEnrichmentService.java index 95849b4..4e257cb 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/TOCEnrichmentService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/TOCEnrichmentService.java @@ -2,6 +2,7 @@ package com.knecon.fforesight.service.layoutparser.processor.model.outline; import java.util.ArrayList; import java.util.Collections; +import java.util.Iterator; import java.util.List; import org.springframework.stereotype.Service; @@ -26,6 +27,11 @@ public class TOCEnrichmentService { public void assignSectionBlocksAndImages(ClassificationDocument document) { TableOfContents toc = document.getTableOfContents(); + Iterator iterator = toc.iterator(); + TableOfContentItem currentTOCItem = null; + if(iterator.hasNext()) { + currentTOCItem = iterator.next(); + } List startBlocks = new ArrayList<>(); List startImages = new ArrayList<>(); TableOfContentItem currentSection = null; @@ -64,28 +70,24 @@ public class TOCEnrichmentService { } previousTable = table; } - boolean matched = false; - for (TableOfContentItem tocItem : toc) { - if (current instanceof TextPageBlock && tocItem.getHeadline().getText().equals(current.getText())) { - if (!foundFirstHeadline) { - foundFirstHeadline = true; - } - currentSection = tocItem; - //sectionsMap.get(tocItem).add(current); - tocItem.getSectionBlocks().add(current); - currentPageTOCItems.add(tocItem); - matched = true; - break; + if (current instanceof TextPageBlock && currentTOCItem != null && currentTOCItem.getHeadline().getText().equals(current.getText())) { + if (!foundFirstHeadline) { + foundFirstHeadline = true; + } + currentSection = currentTOCItem; + currentTOCItem.getSectionBlocks().add(current); + currentPageTOCItems.add(currentTOCItem); + + if(iterator.hasNext()) { + currentTOCItem = iterator.next(); } } - if (!matched) { - if (!foundFirstHeadline) { - startBlocks.add(current); - } else { - currentSection.getSectionBlocks().add(current); - } + if (!foundFirstHeadline) { + startBlocks.add(current); + } else { + currentSection.getSectionBlocks().add(current); } }