RED-7384: add empty textBlock to Image to ensure continuous textranges across all SemanticNodes

This commit is contained in:
Kilian Schuettler 2024-04-23 11:30:13 +02:00
parent bfa90c2d79
commit 0dda309829
2 changed files with 15 additions and 14 deletions

View File

@ -12,7 +12,6 @@ import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.No
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlockCollector;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
@ -36,6 +35,8 @@ public class Image implements GenericSemanticNode {
boolean transparent;
Rectangle2D position;
TextBlock leafTextBlock;
boolean redaction;
boolean ignored;
@Builder.Default
@ -66,7 +67,7 @@ public class Image implements GenericSemanticNode {
@Override
public TextBlock getTextBlock() {
return streamAllSubNodes().filter(SemanticNode::isLeaf).map(SemanticNode::getLeafTextBlock).collect(new TextBlockCollector());
return leafTextBlock;
}
@ -92,4 +93,11 @@ public class Image implements GenericSemanticNode {
return bBoxPerPage;
}
@Override
public boolean isLeaf() {
return true;
}
}

View File

@ -18,8 +18,6 @@ import com.knecon.fforesight.service.layoutparser.processor.model.Classification
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationFooter;
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationHeader;
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Footer;
@ -31,6 +29,8 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Pa
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Paragraph;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Section;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.AtomicTextBlock;
import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
import com.knecon.fforesight.service.layoutparser.processor.utils.IdBuilder;
import com.knecon.fforesight.service.layoutparser.processor.utils.TextPositionOperations;
@ -99,19 +99,12 @@ public class DocumentGraphFactory {
}
public void addImage(Section section, ClassifiedImage image, Context context) {
public void addImage(GenericSemanticNode parent, ClassifiedImage image, Context context) {
Image imageNode = createImage(image, context);
List<Integer> treeId = context.getDocumentTree().createNewChildEntryAndReturnId(section, imageNode);
imageNode.setTreeId(treeId);
}
public void addImage(Document document, ClassifiedImage image, Context context) {
Image imageNode = createImage(image, context);
List<Integer> treeId = context.getDocumentTree().createNewChildEntryAndReturnId(document, imageNode);
List<Integer> treeId = context.getDocumentTree().createNewChildEntryAndReturnId(parent, imageNode);
imageNode.setTreeId(treeId);
imageNode.setLeafTextBlock(context.textBlockFactory.emptyTextBlock(parent, context, context.getPage(image.getPage())));
}