RED-8402: Header and footer are not indexed / searched

added headers and footers to simplifiedtext
This commit is contained in:
yhampe 2024-04-08 11:59:35 +02:00
parent 990c376ce6
commit 0c3194276a
2 changed files with 44 additions and 6 deletions

View File

@ -63,6 +63,19 @@ public class Document implements GenericSemanticNode {
.collect(Collectors.toList());
}
public List<Header> getHeaderSections() {
return streamChildrenOfType(NodeType.HEADER).map(node -> (Header) node)
.collect(Collectors.toList());
}
public List<Footer> getFooterSections() {
return streamChildrenOfType(NodeType.FOOTER).map(node -> (Footer) node)
.collect(Collectors.toList());
}
public Stream<TextBlock> streamTerminalTextBlocksInOrder() {

View File

@ -1,26 +1,51 @@
package com.knecon.fforesight.service.layoutparser.processor.services;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.springframework.stereotype.Service;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.SimplifiedSectionText;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Section;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.SimplifiedText;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.GenericSemanticNode;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Section;
import lombok.extern.slf4j.Slf4j;
@Service
@Slf4j
public class SimplifiedSectionTextService {
public SimplifiedText toSimplifiedText(Document document) {
List<SimplifiedSectionText> simplifiedSectionTexts = document.getMainSections().stream().map(this::toSimplifiedSectionText).toList();
return SimplifiedText.builder().numberOfPages(document.getNumberOfPages()).sectionTexts(simplifiedSectionTexts).build();
List<SimplifiedSectionText> simplifiedMainSectionsList = document.getMainSections()
.stream()
.map(this::toSimplifiedSectionText)
.toList();
List<SimplifiedSectionText> simplifiedHeadersList = document.getHeaderSections()
.stream()
.map(this::toSimplifiedSectionText)
.toList();
List<SimplifiedSectionText> simplifiedFootersList = document.getFooterSections()
.stream()
.map(this::toSimplifiedSectionText)
.toList();
List<SimplifiedSectionText> simplifiedText = Stream.of(simplifiedMainSectionsList, simplifiedHeadersList, simplifiedFootersList)
.flatMap(List::stream)
.collect(Collectors.toList());
return SimplifiedText.builder().numberOfPages(document.getNumberOfPages()).sectionTexts(simplifiedText).build();
}
private SimplifiedSectionText toSimplifiedSectionText(Section section) {
private SimplifiedSectionText toSimplifiedSectionText(GenericSemanticNode section) {
return SimplifiedSectionText.builder().sectionNumber(section.getTreeId().get(0)).text(section.getTextBlock().getSearchText()).build();
return SimplifiedSectionText.builder()
.sectionNumber(section.getTreeId()
.get(0))
.text(section.getTextBlock().getSearchText())
.build();
}
}