Merge branch 'RED-8481' into 'master'

RED-8481: use visual layout parser for signature extraction

Closes RED-8481

See merge request redactmanager/redaction-service!289
This commit is contained in:
Yannik Hampe 2024-02-27 08:38:03 +01:00
commit 94b7b741f8
14 changed files with 60 additions and 10 deletions

View File

@ -12,7 +12,7 @@ plugins {
description = "redaction-service-server-v1"
val layoutParserVersion = "0.91.0"
val layoutParserVersion = "0.93.0"
val jacksonVersion = "2.15.2"
val droolsVersion = "9.44.0.Final"
val pdfBoxVersion = "3.0.0"

View File

@ -14,6 +14,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
@ -33,6 +34,8 @@ public class Document implements GenericSemanticNode {
@EqualsAndHashCode.Include
List<Integer> treeId = Collections.emptyList();
@Builder.Default
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
Set<Page> pages;
DocumentTree documentTree;

View File

@ -9,6 +9,7 @@ import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
@ -27,6 +28,8 @@ import lombok.experimental.FieldDefaults;
public class Footer implements GenericSemanticNode {
final static SectionIdentifier sectionIdentifier = SectionIdentifier.empty();
@Builder.Default
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
@EqualsAndHashCode.Include
List<Integer> treeId;

View File

@ -9,6 +9,7 @@ import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
@ -27,6 +28,8 @@ import lombok.experimental.FieldDefaults;
public class Header implements GenericSemanticNode {
final static SectionIdentifier sectionIdentifier = SectionIdentifier.empty();
@Builder.Default
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
@EqualsAndHashCode.Include
List<Integer> treeId;

View File

@ -10,6 +10,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
@ -27,6 +28,8 @@ import lombok.experimental.FieldDefaults;
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
public class Headline implements GenericSemanticNode {
@Builder.Default
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
@EqualsAndHashCode.Include
List<Integer> treeId;
TextBlock leafTextBlock;

View File

@ -18,6 +18,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRu
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
@ -34,6 +35,8 @@ import lombok.experimental.FieldDefaults;
@FieldDefaults(level = AccessLevel.PRIVATE)
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
public class Image implements GenericSemanticNode, IEntity {
@Builder.Default
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
@EqualsAndHashCode.Include
List<Integer> treeId;
@ -123,6 +126,14 @@ public class Image implements GenericSemanticNode, IEntity {
return name.charAt(0) + name.substring(1).toLowerCase(Locale.ENGLISH);
}
public boolean mostlyContainedBy(Image image) {
Map<Page,Rectangle2D> bboxImage = image.getBBox();
Map<Page,Rectangle2D> bbox = this.getBBox();
Rectangle2D intersection = bboxImage.get(this.page).createIntersection(bbox.get(this.page));
double calculatedIntersection = intersection.getWidth() * intersection.getHeight();
double area = bbox.get(this.page).getWidth() * bbox.get(this.page).getHeight();
return (calculatedIntersection / area) > 0.8;
}
public int length() {

View File

@ -9,6 +9,7 @@ import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
@ -24,6 +25,9 @@ import lombok.experimental.FieldDefaults;
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
public class Paragraph implements GenericSemanticNode {
@Builder.Default
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
@EqualsAndHashCode.Include
List<Integer> treeId;
TextBlock leafTextBlock;

View File

@ -10,12 +10,14 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntit
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.RequiredArgsConstructor;
import lombok.experimental.FieldDefaults;
import lombok.extern.slf4j.Slf4j;
@ -26,6 +28,7 @@ import lombok.extern.slf4j.Slf4j;
@FieldDefaults(level = AccessLevel.PRIVATE)
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
public class Section implements GenericSemanticNode {
@EqualsAndHashCode.Include
List<Integer> treeId;
@ -37,6 +40,9 @@ public class Section implements GenericSemanticNode {
Map<Page, Rectangle2D> bBoxCache;
@Builder.Default
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
@Override
public NodeType getType() {

View File

@ -22,6 +22,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBl
import com.iqser.red.service.redaction.v1.server.service.document.NodeVisitor;
import com.iqser.red.service.redaction.v1.server.utils.RectangleTransformations;
import com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
public interface SemanticNode {
@ -356,6 +357,13 @@ public interface SemanticNode {
return getTextBlock().getSearchText().contains(string);
}
Set<LayoutEngine> getEngines();
default void addEngine(LayoutEngine engine) {
getEngines().add(engine);
}
/**
* Checks whether this SemanticNode contains all the provided Strings.
@ -554,7 +562,6 @@ public interface SemanticNode {
if (textBlock.containsTextRange(textEntity.getTextRange())) {
textEntity.setDeepestFullyContainingNode(this);
}
textEntity.addIntersectingNode(this);
streamChildren().filter(semanticNode -> semanticNode.getTextRange().intersects(textEntity.getTextRange()))
.forEach(node -> node.addThisToEntityIfIntersects(textEntity));

View File

@ -17,6 +17,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
@ -32,6 +33,8 @@ import lombok.experimental.FieldDefaults;
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
public class Table implements SemanticNode {
@Builder.Default
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
@EqualsAndHashCode.Include
List<Integer> treeId;
DocumentTree documentTree;

View File

@ -11,6 +11,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntit
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
@ -26,6 +27,9 @@ import lombok.experimental.FieldDefaults;
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
public class TableCell implements GenericSemanticNode {
@Builder.Default
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
@EqualsAndHashCode.Include
List<Integer> treeId;
int row;

View File

@ -6,6 +6,7 @@ import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Footer;
@ -76,6 +77,7 @@ public class DocumentGraphMapper {
node.setLeafTextBlock(textBlock);
}
List<Integer> treeId = Arrays.stream(entryData.getTreeId()).boxed().toList();
entryData.getEngines().forEach(engine -> node.addEngine(engine));
node.setTreeId(treeId);
switch (entryData.getType()) {

View File

@ -16,8 +16,8 @@ class SemanticNodeComparatorsTest {
@Test
public void testFirstSemanticNode() {
var node = new Section(List.of(0, 1), null, null, null, null);
var otherNode = new Section(List.of(0, 2), null, null, null, null);
var node = new Section(List.of(0, 1), null, null, null, null,null);
var otherNode = new Section(List.of(0, 2), null, null, null, null,null);
List<SemanticNode> list = new ArrayList<>();
list.add(otherNode);
list.add(node);
@ -29,8 +29,8 @@ class SemanticNodeComparatorsTest {
@Test
public void testFirstSemanticNode2() {
var node = new Section(Collections.emptyList(), null, null, null, null);
var otherNode = new Section(List.of(0, 2), null, null, null, null);
var node = new Section(Collections.emptyList(), null, null, null, null, null);
var otherNode = new Section(List.of(0, 2), null, null, null, null, null);
List<SemanticNode> list = new ArrayList<>();
list.add(otherNode);
list.add(node);
@ -42,8 +42,8 @@ class SemanticNodeComparatorsTest {
@Test
public void testFirstSemanticNode3() {
var node = new Section(List.of(1, 5, 8), null, null, null, null);
var otherNode = new Section(List.of(0, 2), null, null, null, null);
var node = new Section(List.of(1, 5, 8), null, null, null, null, null);
var otherNode = new Section(List.of(0, 2), null, null, null, null, null);
List<SemanticNode> list = new ArrayList<>();
list.add(otherNode);
list.add(node);
@ -55,8 +55,8 @@ class SemanticNodeComparatorsTest {
@Test
public void testFirstSemanticNode4() {
var node = new Section(List.of(1, 5, 8), null, null, null, null);
var otherNode = new Section(List.of(1, 5, 9), null, null, null, null);
var node = new Section(List.of(1, 5, 8), null, null, null, null,null);
var otherNode = new Section(List.of(1, 5, 9), null, null, null, null,null);
List<SemanticNode> list = new ArrayList<>();
list.add(otherNode);
list.add(node);

View File

@ -29,6 +29,7 @@ public class LayoutParsingRequestProvider {
.originFileStorageId(originFileStorageId)
.tablesFileStorageId(Optional.of(tablesFileStorageId))
.imagesFileStorageId(Optional.of(imagesFileStorageId))
.visualLayoutParsingFileId(Optional.empty())
.structureFileStorageId(structureFileStorageId)
.textBlockFileStorageId(textBlockFileStorageId)
.positionBlockFileStorageId(positionBlockFileStorageId)