Merge branch 'RED-8481' into 'master'
RED-8481: use visual layout parser for signature extraction Closes RED-8481 See merge request redactmanager/redaction-service!289
This commit is contained in:
commit
94b7b741f8
@ -12,7 +12,7 @@ plugins {
|
||||
description = "redaction-service-server-v1"
|
||||
|
||||
|
||||
val layoutParserVersion = "0.91.0"
|
||||
val layoutParserVersion = "0.93.0"
|
||||
val jacksonVersion = "2.15.2"
|
||||
val droolsVersion = "9.44.0.Final"
|
||||
val pdfBoxVersion = "3.0.0"
|
||||
|
||||
@ -14,6 +14,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -33,6 +34,8 @@ public class Document implements GenericSemanticNode {
|
||||
|
||||
@EqualsAndHashCode.Include
|
||||
List<Integer> treeId = Collections.emptyList();
|
||||
@Builder.Default
|
||||
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
|
||||
|
||||
Set<Page> pages;
|
||||
DocumentTree documentTree;
|
||||
|
||||
@ -9,6 +9,7 @@ import java.util.Set;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -27,6 +28,8 @@ import lombok.experimental.FieldDefaults;
|
||||
public class Footer implements GenericSemanticNode {
|
||||
|
||||
final static SectionIdentifier sectionIdentifier = SectionIdentifier.empty();
|
||||
@Builder.Default
|
||||
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
|
||||
|
||||
@EqualsAndHashCode.Include
|
||||
List<Integer> treeId;
|
||||
|
||||
@ -9,6 +9,7 @@ import java.util.Set;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -27,6 +28,8 @@ import lombok.experimental.FieldDefaults;
|
||||
public class Header implements GenericSemanticNode {
|
||||
|
||||
final static SectionIdentifier sectionIdentifier = SectionIdentifier.empty();
|
||||
@Builder.Default
|
||||
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
|
||||
|
||||
@EqualsAndHashCode.Include
|
||||
List<Integer> treeId;
|
||||
|
||||
@ -10,6 +10,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -27,6 +28,8 @@ import lombok.experimental.FieldDefaults;
|
||||
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
|
||||
public class Headline implements GenericSemanticNode {
|
||||
|
||||
@Builder.Default
|
||||
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
|
||||
@EqualsAndHashCode.Include
|
||||
List<Integer> treeId;
|
||||
TextBlock leafTextBlock;
|
||||
|
||||
@ -18,6 +18,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRu
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -34,6 +35,8 @@ import lombok.experimental.FieldDefaults;
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
|
||||
public class Image implements GenericSemanticNode, IEntity {
|
||||
@Builder.Default
|
||||
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
|
||||
|
||||
@EqualsAndHashCode.Include
|
||||
List<Integer> treeId;
|
||||
@ -123,6 +126,14 @@ public class Image implements GenericSemanticNode, IEntity {
|
||||
return name.charAt(0) + name.substring(1).toLowerCase(Locale.ENGLISH);
|
||||
}
|
||||
|
||||
public boolean mostlyContainedBy(Image image) {
|
||||
Map<Page,Rectangle2D> bboxImage = image.getBBox();
|
||||
Map<Page,Rectangle2D> bbox = this.getBBox();
|
||||
Rectangle2D intersection = bboxImage.get(this.page).createIntersection(bbox.get(this.page));
|
||||
double calculatedIntersection = intersection.getWidth() * intersection.getHeight();
|
||||
double area = bbox.get(this.page).getWidth() * bbox.get(this.page).getHeight();
|
||||
return (calculatedIntersection / area) > 0.8;
|
||||
}
|
||||
|
||||
public int length() {
|
||||
|
||||
|
||||
@ -9,6 +9,7 @@ import java.util.Set;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -24,6 +25,9 @@ import lombok.experimental.FieldDefaults;
|
||||
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
|
||||
public class Paragraph implements GenericSemanticNode {
|
||||
|
||||
@Builder.Default
|
||||
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
|
||||
|
||||
@EqualsAndHashCode.Include
|
||||
List<Integer> treeId;
|
||||
TextBlock leafTextBlock;
|
||||
|
||||
@ -10,12 +10,14 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntit
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@ -26,6 +28,7 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
|
||||
public class Section implements GenericSemanticNode {
|
||||
|
||||
@EqualsAndHashCode.Include
|
||||
List<Integer> treeId;
|
||||
|
||||
@ -37,6 +40,9 @@ public class Section implements GenericSemanticNode {
|
||||
|
||||
Map<Page, Rectangle2D> bBoxCache;
|
||||
|
||||
@Builder.Default
|
||||
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
|
||||
|
||||
|
||||
@Override
|
||||
public NodeType getType() {
|
||||
|
||||
@ -22,6 +22,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBl
|
||||
import com.iqser.red.service.redaction.v1.server.service.document.NodeVisitor;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.RectangleTransformations;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
|
||||
public interface SemanticNode {
|
||||
|
||||
@ -356,6 +357,13 @@ public interface SemanticNode {
|
||||
return getTextBlock().getSearchText().contains(string);
|
||||
}
|
||||
|
||||
Set<LayoutEngine> getEngines();
|
||||
|
||||
default void addEngine(LayoutEngine engine) {
|
||||
getEngines().add(engine);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode contains all the provided Strings.
|
||||
@ -554,7 +562,6 @@ public interface SemanticNode {
|
||||
if (textBlock.containsTextRange(textEntity.getTextRange())) {
|
||||
textEntity.setDeepestFullyContainingNode(this);
|
||||
}
|
||||
|
||||
textEntity.addIntersectingNode(this);
|
||||
streamChildren().filter(semanticNode -> semanticNode.getTextRange().intersects(textEntity.getTextRange()))
|
||||
.forEach(node -> node.addThisToEntityIfIntersects(textEntity));
|
||||
|
||||
@ -17,6 +17,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -32,6 +33,8 @@ import lombok.experimental.FieldDefaults;
|
||||
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
|
||||
public class Table implements SemanticNode {
|
||||
|
||||
@Builder.Default
|
||||
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
|
||||
@EqualsAndHashCode.Include
|
||||
List<Integer> treeId;
|
||||
DocumentTree documentTree;
|
||||
|
||||
@ -11,6 +11,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntit
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -26,6 +27,9 @@ import lombok.experimental.FieldDefaults;
|
||||
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
|
||||
public class TableCell implements GenericSemanticNode {
|
||||
|
||||
@Builder.Default
|
||||
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
|
||||
|
||||
@EqualsAndHashCode.Include
|
||||
List<Integer> treeId;
|
||||
int row;
|
||||
|
||||
@ -6,6 +6,7 @@ import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Footer;
|
||||
@ -76,6 +77,7 @@ public class DocumentGraphMapper {
|
||||
node.setLeafTextBlock(textBlock);
|
||||
}
|
||||
List<Integer> treeId = Arrays.stream(entryData.getTreeId()).boxed().toList();
|
||||
entryData.getEngines().forEach(engine -> node.addEngine(engine));
|
||||
node.setTreeId(treeId);
|
||||
|
||||
switch (entryData.getType()) {
|
||||
|
||||
@ -16,8 +16,8 @@ class SemanticNodeComparatorsTest {
|
||||
@Test
|
||||
public void testFirstSemanticNode() {
|
||||
|
||||
var node = new Section(List.of(0, 1), null, null, null, null);
|
||||
var otherNode = new Section(List.of(0, 2), null, null, null, null);
|
||||
var node = new Section(List.of(0, 1), null, null, null, null,null);
|
||||
var otherNode = new Section(List.of(0, 2), null, null, null, null,null);
|
||||
List<SemanticNode> list = new ArrayList<>();
|
||||
list.add(otherNode);
|
||||
list.add(node);
|
||||
@ -29,8 +29,8 @@ class SemanticNodeComparatorsTest {
|
||||
@Test
|
||||
public void testFirstSemanticNode2() {
|
||||
|
||||
var node = new Section(Collections.emptyList(), null, null, null, null);
|
||||
var otherNode = new Section(List.of(0, 2), null, null, null, null);
|
||||
var node = new Section(Collections.emptyList(), null, null, null, null, null);
|
||||
var otherNode = new Section(List.of(0, 2), null, null, null, null, null);
|
||||
List<SemanticNode> list = new ArrayList<>();
|
||||
list.add(otherNode);
|
||||
list.add(node);
|
||||
@ -42,8 +42,8 @@ class SemanticNodeComparatorsTest {
|
||||
@Test
|
||||
public void testFirstSemanticNode3() {
|
||||
|
||||
var node = new Section(List.of(1, 5, 8), null, null, null, null);
|
||||
var otherNode = new Section(List.of(0, 2), null, null, null, null);
|
||||
var node = new Section(List.of(1, 5, 8), null, null, null, null, null);
|
||||
var otherNode = new Section(List.of(0, 2), null, null, null, null, null);
|
||||
List<SemanticNode> list = new ArrayList<>();
|
||||
list.add(otherNode);
|
||||
list.add(node);
|
||||
@ -55,8 +55,8 @@ class SemanticNodeComparatorsTest {
|
||||
@Test
|
||||
public void testFirstSemanticNode4() {
|
||||
|
||||
var node = new Section(List.of(1, 5, 8), null, null, null, null);
|
||||
var otherNode = new Section(List.of(1, 5, 9), null, null, null, null);
|
||||
var node = new Section(List.of(1, 5, 8), null, null, null, null,null);
|
||||
var otherNode = new Section(List.of(1, 5, 9), null, null, null, null,null);
|
||||
List<SemanticNode> list = new ArrayList<>();
|
||||
list.add(otherNode);
|
||||
list.add(node);
|
||||
|
||||
@ -29,6 +29,7 @@ public class LayoutParsingRequestProvider {
|
||||
.originFileStorageId(originFileStorageId)
|
||||
.tablesFileStorageId(Optional.of(tablesFileStorageId))
|
||||
.imagesFileStorageId(Optional.of(imagesFileStorageId))
|
||||
.visualLayoutParsingFileId(Optional.empty())
|
||||
.structureFileStorageId(structureFileStorageId)
|
||||
.textBlockFileStorageId(textBlockFileStorageId)
|
||||
.positionBlockFileStorageId(positionBlockFileStorageId)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user