hotfix for table/paragraph section creation on document start before first headline

This commit is contained in:
Maverick Studer 2024-06-18 17:36:04 +02:00
parent 133e06460f
commit 1c5d755111

View File

@ -55,7 +55,8 @@ public class SectionNodeFactory {
.collect(groupingBy(AbstractPageBlock::getPage)); .collect(groupingBy(AbstractPageBlock::getPage));
AbstractSemanticNode section; AbstractSemanticNode section;
if (isLeaf) { boolean containsTablesAndTextBlocks = containsTablesAndTextBlocks(pageBlocks);
if (isLeaf && !containsTablesAndTextBlocks) {
section = Section.builder().documentTree(context.getDocumentTree()).build(); section = Section.builder().documentTree(context.getDocumentTree()).build();
} else { } else {
section = SuperSection.builder().documentTree(context.getDocumentTree()).build(); section = SuperSection.builder().documentTree(context.getDocumentTree()).build();
@ -68,7 +69,7 @@ public class SectionNodeFactory {
section.setTreeId(getTreeId(parentNode, context, section)); section.setTreeId(getTreeId(parentNode, context, section));
addFirstHeadlineDirectlyToSection(layoutParsingType, pageBlocks, context, section, document); addFirstHeadlineDirectlyToSection(layoutParsingType, pageBlocks, context, section, document);
if (containsTablesAndTextBlocks(pageBlocks)) { if (containsTablesAndTextBlocks) {
splitPageBlocksIntoSubSections(pageBlocks).forEach(subSectionPageBlocks -> addSection(layoutParsingType, splitPageBlocksIntoSubSections(pageBlocks).forEach(subSectionPageBlocks -> addSection(layoutParsingType,
section, section,
true, true,