RED-7074: Design Subsection section tree structure algorithm

* fix for boundary error
This commit is contained in:
maverickstuder 2024-05-07 15:51:54 +02:00
parent d2dc369df3
commit a9338262c5
2 changed files with 21 additions and 20 deletions

View File

@ -56,8 +56,7 @@ public class ConcatenatedTextBlock implements TextBlock {
boundary.setStart(start);
boundary.setEnd(end);
} else if (boundary.end() != start) {
//throw new UnsupportedOperationException(format("Can only concat consecutive TextBlocks, trying to concat %s and %s", boundary, textBlock.getBoundary()));
return this;
throw new UnsupportedOperationException(format("Can only concat consecutive TextBlocks, trying to concat %s and %s", boundary, textBlock.getBoundary()));
}
this.atomicTextBlocks.addAll(textBlock.getAtomicTextBlocks());
boundary.setEnd(end);

View File

@ -2,6 +2,7 @@ package com.knecon.fforesight.service.layoutparser.processor.model.outline;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import org.springframework.stereotype.Service;
@ -26,6 +27,11 @@ public class TOCEnrichmentService {
public void assignSectionBlocksAndImages(ClassificationDocument document) {
TableOfContents toc = document.getTableOfContents();
Iterator<TableOfContentItem> iterator = toc.iterator();
TableOfContentItem currentTOCItem = null;
if(iterator.hasNext()) {
currentTOCItem = iterator.next();
}
List<AbstractPageBlock> startBlocks = new ArrayList<>();
List<ClassifiedImage> startImages = new ArrayList<>();
TableOfContentItem currentSection = null;
@ -64,28 +70,24 @@ public class TOCEnrichmentService {
}
previousTable = table;
}
boolean matched = false;
for (TableOfContentItem tocItem : toc) {
if (current instanceof TextPageBlock && tocItem.getHeadline().getText().equals(current.getText())) {
if (!foundFirstHeadline) {
foundFirstHeadline = true;
}
currentSection = tocItem;
//sectionsMap.get(tocItem).add(current);
tocItem.getSectionBlocks().add(current);
currentPageTOCItems.add(tocItem);
matched = true;
break;
if (current instanceof TextPageBlock && currentTOCItem != null && currentTOCItem.getHeadline().getText().equals(current.getText())) {
if (!foundFirstHeadline) {
foundFirstHeadline = true;
}
currentSection = currentTOCItem;
currentTOCItem.getSectionBlocks().add(current);
currentPageTOCItems.add(currentTOCItem);
if(iterator.hasNext()) {
currentTOCItem = iterator.next();
}
}
if (!matched) {
if (!foundFirstHeadline) {
startBlocks.add(current);
} else {
currentSection.getSectionBlocks().add(current);
}
if (!foundFirstHeadline) {
startBlocks.add(current);
} else {
currentSection.getSectionBlocks().add(current);
}
}