Merge branch 'RED-8402' into 'main'

RED-8402: Header and footer are not indexed / searched

See merge request fforesight/layout-parser!128
This commit is contained in:
Yannik Hampe 2024-04-08 12:28:06 +02:00
commit 9bd8419770
2 changed files with 47 additions and 7 deletions

View File

@ -64,9 +64,24 @@ public class Document implements GenericSemanticNode {
}
public List<Header> getHeaders() {
return streamChildrenOfType(NodeType.HEADER).map(node -> (Header) node)
.collect(Collectors.toList());
}
public List<Footer> getFooters() {
return streamChildrenOfType(NodeType.FOOTER).map(node -> (Footer) node)
.collect(Collectors.toList());
}
public Stream<TextBlock> streamTerminalTextBlocksInOrder() {
return streamAllNodes().filter(SemanticNode::isLeaf).map(SemanticNode::getTextBlock);
return streamAllNodes().filter(SemanticNode::isLeaf)
.map(SemanticNode::getTextBlock);
}

View File

@ -1,26 +1,51 @@
package com.knecon.fforesight.service.layoutparser.processor.services;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.springframework.stereotype.Service;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.SimplifiedSectionText;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Section;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.SimplifiedText;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.GenericSemanticNode;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SemanticNode;
import lombok.extern.slf4j.Slf4j;
@Service
@Slf4j
public class SimplifiedSectionTextService {
public SimplifiedText toSimplifiedText(Document document) {
List<SimplifiedSectionText> simplifiedSectionTexts = document.getMainSections().stream().map(this::toSimplifiedSectionText).toList();
return SimplifiedText.builder().numberOfPages(document.getNumberOfPages()).sectionTexts(simplifiedSectionTexts).build();
List<SimplifiedSectionText> simplifiedMainSectionsList = document.getMainSections()
.stream()
.map(this::toSimplifiedSectionText)
.toList();
List<SimplifiedSectionText> simplifiedHeadersList = document.getHeaders()
.stream()
.map(this::toSimplifiedSectionText)
.toList();
List<SimplifiedSectionText> simplifiedFootersList = document.getFooters()
.stream()
.map(this::toSimplifiedSectionText)
.toList();
List<SimplifiedSectionText> simplifiedText = Stream.of(simplifiedMainSectionsList, simplifiedHeadersList, simplifiedFootersList)
.flatMap(List::stream)
.collect(Collectors.toList());
return SimplifiedText.builder().numberOfPages(document.getNumberOfPages()).sectionTexts(simplifiedText).build();
}
private SimplifiedSectionText toSimplifiedSectionText(Section section) {
private SimplifiedSectionText toSimplifiedSectionText(SemanticNode section) {
return SimplifiedSectionText.builder().sectionNumber(section.getTreeId().get(0)).text(section.getTextBlock().getSearchText()).build();
return SimplifiedSectionText.builder()
.sectionNumber(section.getTreeId()
.get(0))
.text(section.getTextBlock().getSearchText())
.build();
}
}