From 0dda309829a0210d579e370eaa9ac5590ddd5be0 Mon Sep 17 00:00:00 2001 From: Kilian Schuettler Date: Tue, 23 Apr 2024 11:30:13 +0200 Subject: [PATCH] RED-7384: add empty textBlock to Image to ensure continuous textranges across all SemanticNodes --- .../processor/model/graph/nodes/Image.java | 12 ++++++++++-- .../services/factory/DocumentGraphFactory.java | 17 +++++------------ 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Image.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Image.java index 1a9c42d..d2f08a0 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Image.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Image.java @@ -12,7 +12,6 @@ import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.No import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree; import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity; import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock; -import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlockCollector; import lombok.AccessLevel; import lombok.AllArgsConstructor; @@ -36,6 +35,8 @@ public class Image implements GenericSemanticNode { boolean transparent; Rectangle2D position; + TextBlock leafTextBlock; + boolean redaction; boolean ignored; @Builder.Default @@ -66,7 +67,7 @@ public class Image implements GenericSemanticNode { @Override public TextBlock getTextBlock() { - return streamAllSubNodes().filter(SemanticNode::isLeaf).map(SemanticNode::getLeafTextBlock).collect(new TextBlockCollector()); + return leafTextBlock; } @@ -92,4 +93,11 @@ public class Image implements GenericSemanticNode { return bBoxPerPage; } + + @Override + public boolean isLeaf() { + + return true; + } + } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/DocumentGraphFactory.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/DocumentGraphFactory.java index e6abd2f..b46fa23 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/DocumentGraphFactory.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/DocumentGraphFactory.java @@ -18,8 +18,6 @@ import com.knecon.fforesight.service.layoutparser.processor.model.Classification import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationFooter; import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationHeader; import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage; -import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage; -import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock; import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree; import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document; import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Footer; @@ -31,6 +29,8 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Pa import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Paragraph; import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Section; import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.AtomicTextBlock; +import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage; +import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock; import com.knecon.fforesight.service.layoutparser.processor.utils.IdBuilder; import com.knecon.fforesight.service.layoutparser.processor.utils.TextPositionOperations; @@ -99,19 +99,12 @@ public class DocumentGraphFactory { } - public void addImage(Section section, ClassifiedImage image, Context context) { + public void addImage(GenericSemanticNode parent, ClassifiedImage image, Context context) { Image imageNode = createImage(image, context); - List treeId = context.getDocumentTree().createNewChildEntryAndReturnId(section, imageNode); - imageNode.setTreeId(treeId); - } - - - public void addImage(Document document, ClassifiedImage image, Context context) { - - Image imageNode = createImage(image, context); - List treeId = context.getDocumentTree().createNewChildEntryAndReturnId(document, imageNode); + List treeId = context.getDocumentTree().createNewChildEntryAndReturnId(parent, imageNode); imageNode.setTreeId(treeId); + imageNode.setLeafTextBlock(context.textBlockFactory.emptyTextBlock(parent, context, context.getPage(image.getPage()))); }