Merge branch 'RED-9964' into 'main'

RED-9964: fix errors with images

See merge request fforesight/layout-parser!212
This commit is contained in:
Kilian Schüttler 2024-09-04 09:16:59 +02:00
commit af45f2cd8c
3 changed files with 3 additions and 0 deletions

View File

@ -68,6 +68,7 @@ public class Page {
public TextBlock getMainBodyTextBlock() { public TextBlock getMainBodyTextBlock() {
return textBlocksOnPage.stream() return textBlocksOnPage.stream()
.filter(atb -> !atb.isEmpty())
.collect(new TextBlockCollector()); .collect(new TextBlockCollector());
} }

View File

@ -84,6 +84,7 @@ public class DocumentGraphFactory {
.filter(SemanticNode::isLeaf) .filter(SemanticNode::isLeaf)
.filter(node -> !node.getType().equals(NodeType.HEADER)) .filter(node -> !node.getType().equals(NodeType.HEADER))
.filter(node -> !node.getType().equals(NodeType.FOOTER)) .filter(node -> !node.getType().equals(NodeType.FOOTER))
.filter(node -> !node.getType().equals(NodeType.IMAGE))
.map(SemanticNode::getTextBlock) .map(SemanticNode::getTextBlock)
.map(TextBlock::getAtomicTextBlocks) .map(TextBlock::getAtomicTextBlocks)
.flatMap(Collection::stream) .flatMap(Collection::stream)

View File

@ -84,6 +84,7 @@ public class DocumentGraphMapper {
switch (entryData.getType()) { switch (entryData.getType()) {
case HEADER -> pages.forEach(page -> page.setHeader((Header) node)); case HEADER -> pages.forEach(page -> page.setHeader((Header) node));
case FOOTER -> pages.forEach(page -> page.setFooter((Footer) node)); case FOOTER -> pages.forEach(page -> page.setFooter((Footer) node));
case IMAGE -> pages.forEach(page -> page.getImages().add((Image) node));
default -> textBlock.getAtomicTextBlocks() default -> textBlock.getAtomicTextBlocks()
.forEach(atb -> atb.getPage().getTextBlocksOnPage().add(atb)); .forEach(atb -> atb.getPage().getTextBlocksOnPage().add(atb));
} }