RED-10708: Tables as components in DM
This commit is contained in:
parent
518c38c2e9
commit
723263a7c4
@ -10,6 +10,7 @@ import java.util.Optional;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.GenericSemanticNode;
|
||||
@ -362,22 +363,25 @@ public class DocumentTree {
|
||||
}
|
||||
|
||||
|
||||
public void addEntityToGraph(TextEntity entity) {
|
||||
public void addEntityToGraph(SemanticEntity entity) {
|
||||
|
||||
getRoot().getNode().addThisToEntityIfIntersects(entity);
|
||||
|
||||
TextBlock textBlock = entity.getDeepestFullyContainingNode().getTextBlock();
|
||||
EntityEnrichmentService.enrichEntity(entity, textBlock);
|
||||
|
||||
EntityCreationUtility.addToPages(entity);
|
||||
EntityCreationUtility.addEntityToNodeEntitySets(entity);
|
||||
|
||||
if (entity.getEntityType().equals(EntityType.TEMPORARY)) {
|
||||
return;
|
||||
}
|
||||
|
||||
entity.computeRelations();
|
||||
entity.notifyEntityInserted();
|
||||
if (entity instanceof TextEntity textEntity) {
|
||||
EntityEnrichmentService.enrichEntity(textEntity, textBlock);
|
||||
textEntity.computeRelations();
|
||||
entity.notifyEntityInserted(); // todo: table entity currently causes loop?
|
||||
}
|
||||
EntityCreationUtility.addEntityToNodeEntitySets(entity);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -188,7 +188,7 @@ public interface IEntity {
|
||||
*
|
||||
* @return A set of references.
|
||||
*/
|
||||
default Set<TextEntity> references() {
|
||||
default Set<SemanticEntity> references() {
|
||||
|
||||
return getMatchedRule().getReferences();
|
||||
}
|
||||
|
||||
@ -40,7 +40,7 @@ public final class MatchedRule implements Comparable<MatchedRule> {
|
||||
boolean ignored;
|
||||
|
||||
@Builder.Default
|
||||
Set<TextEntity> references = Collections.emptySet();
|
||||
Set<SemanticEntity> references = Collections.emptySet();
|
||||
|
||||
|
||||
/**
|
||||
|
||||
@ -0,0 +1,110 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.entity;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.PriorityQueue;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.experimental.SuperBuilder;
|
||||
|
||||
@Data
|
||||
@FieldDefaults(level = AccessLevel.PROTECTED)
|
||||
@SuperBuilder
|
||||
public abstract class SemanticEntity implements IEntity {
|
||||
|
||||
final EntityType entityType;
|
||||
String type; // TODO: make final once ManualChangesApplicationService::recategorize is deleted
|
||||
|
||||
@Builder.Default
|
||||
final PriorityQueue<MatchedRule> matchedRuleList = new PriorityQueue<>();
|
||||
@Builder.Default
|
||||
final ManualChangeOverwrite manualOverwrite = new ManualChangeOverwrite();
|
||||
|
||||
@Builder.Default
|
||||
Set<Page> pages = new HashSet<>();
|
||||
List<PositionOnPage> positionsOnPagePerPage;
|
||||
|
||||
@Builder.Default
|
||||
List<SemanticNode> intersectingNodes = new LinkedList<>();
|
||||
SemanticNode deepestFullyContainingNode;
|
||||
|
||||
@Builder.Default
|
||||
Map<SemanticEntity, Set<Relation>> relations = new HashMap<>();
|
||||
|
||||
@Builder.Default
|
||||
Collection<EntityEventListener> entityEventListeners = new ArrayList<>();
|
||||
|
||||
|
||||
/**
|
||||
* @return true when this entity is of EntityType ENTITY or HINT
|
||||
*/
|
||||
public boolean validEntityType() {
|
||||
|
||||
return entityType.equals(EntityType.ENTITY) || entityType.equals(EntityType.HINT);
|
||||
}
|
||||
|
||||
|
||||
public boolean valid() {
|
||||
|
||||
return active() && validEntityType();
|
||||
}
|
||||
|
||||
|
||||
public boolean isType(String type) {
|
||||
|
||||
return type().equals(type);
|
||||
}
|
||||
|
||||
|
||||
public boolean isAnyType(List<String> types) {
|
||||
|
||||
return types.contains(type());
|
||||
}
|
||||
|
||||
|
||||
public boolean matchesAnnotationId(String manualRedactionId) {
|
||||
|
||||
return getPositionsOnPagePerPage().stream()
|
||||
.anyMatch(entityPosition -> entityPosition.getId().equals(manualRedactionId));
|
||||
}
|
||||
|
||||
|
||||
public void addIntersectingNode(SemanticNode containingNode) {
|
||||
|
||||
intersectingNodes.add(containingNode);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void addEntityEventListener(EntityEventListener listener) {
|
||||
|
||||
entityEventListeners.add(listener);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void removeEntityEventListener(EntityEventListener listener) {
|
||||
|
||||
entityEventListeners.remove(listener);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Collection<EntityEventListener> getEntityEventListeners() {
|
||||
|
||||
return entityEventListeners;
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,111 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.entity;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.PriorityQueue;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.collections4.map.HashedMap;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.experimental.SuperBuilder;
|
||||
|
||||
@Data
|
||||
@SuperBuilder
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false)
|
||||
public class TableEntity extends SemanticEntity {
|
||||
|
||||
@EqualsAndHashCode.Include
|
||||
final String id;
|
||||
|
||||
Table table;
|
||||
|
||||
|
||||
public static TableEntity initialEntityNode(Table table, String type, EntityType entityType) {
|
||||
|
||||
return TableEntity.builder()
|
||||
.id(table.buildId(table.getTextRange(), type, entityType))
|
||||
.type(type)
|
||||
.entityType(entityType)
|
||||
.manualOverwrite(new ManualChangeOverwrite(entityType))
|
||||
.table(table)
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String getValue() {
|
||||
|
||||
return "Table:" + table.getHeadline();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public TextRange getTextRange() {
|
||||
|
||||
return table.getTextBlock().getTextRange();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String type() {
|
||||
|
||||
return getManualOverwrite().getType()
|
||||
.orElse(NodeType.TABLE.toString());
|
||||
}
|
||||
|
||||
|
||||
public void removeFromGraph() {
|
||||
|
||||
remove("FINAL.0.0", "removed completely");
|
||||
intersectingNodes.forEach(node -> node.getEntities().remove(this));
|
||||
pages.forEach(page -> page.getSemanticEntities().remove(this));
|
||||
intersectingNodes = new LinkedList<>();
|
||||
relations.keySet()
|
||||
.forEach(entity -> entity.getRelations().remove(this));
|
||||
relations = new HashedMap<>();
|
||||
deepestFullyContainingNode = null;
|
||||
pages = new HashSet<>();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
public List<PositionOnPage> getPositionsOnPagePerPage() {
|
||||
|
||||
if (positionsOnPagePerPage == null || positionsOnPagePerPage.isEmpty()) {
|
||||
Map<Page, List<Rectangle2D>> rectanglesPerLinePerPage = table.getTextBlock().getPositionsPerPage(table.getTextRange());
|
||||
|
||||
positionsOnPagePerPage = rectanglesPerLinePerPage.entrySet()
|
||||
.stream()
|
||||
.map(entry -> new PositionOnPage(id, entry.getKey(), entry.getValue()))
|
||||
.toList();
|
||||
}
|
||||
return positionsOnPagePerPage;
|
||||
}
|
||||
|
||||
|
||||
public String asCsv() {
|
||||
|
||||
return table.asCsv();
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,15 +1,11 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.entity;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.PriorityQueue;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.collections4.map.HashedMap;
|
||||
@ -19,26 +15,24 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.IdBuilder;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.experimental.SuperBuilder;
|
||||
|
||||
/**
|
||||
* Represents a text entity within a document, characterized by its text range, type, entity type,
|
||||
* and associated metadata like matched rules, pages, and engines.
|
||||
*/
|
||||
@Data
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@SuperBuilder
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
|
||||
@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false)
|
||||
@SuppressWarnings("PMD.AvoidFieldNameMatchingMethodName")
|
||||
public class TextEntity implements IEntity {
|
||||
public class TextEntity extends SemanticEntity {
|
||||
|
||||
// primary key
|
||||
@EqualsAndHashCode.Include
|
||||
@ -48,13 +42,6 @@ public class TextEntity implements IEntity {
|
||||
TextRange textRange;
|
||||
@Builder.Default
|
||||
Set<TextRange> duplicateTextRanges = new HashSet<>();
|
||||
String type; // TODO: make final once ManualChangesApplicationService::recategorize is deleted
|
||||
final EntityType entityType;
|
||||
|
||||
@Builder.Default
|
||||
final PriorityQueue<MatchedRule> matchedRuleList = new PriorityQueue<>();
|
||||
@Builder.Default
|
||||
final ManualChangeOverwrite manualOverwrite = new ManualChangeOverwrite();
|
||||
|
||||
boolean dictionaryEntry;
|
||||
boolean dossierDictionaryEntry;
|
||||
@ -66,24 +53,12 @@ public class TextEntity implements IEntity {
|
||||
String value;
|
||||
String textBefore;
|
||||
String textAfter;
|
||||
@Builder.Default
|
||||
Set<Page> pages = new HashSet<>();
|
||||
List<PositionOnPage> positionsOnPagePerPage;
|
||||
@Builder.Default
|
||||
List<SemanticNode> intersectingNodes = new LinkedList<>();
|
||||
SemanticNode deepestFullyContainingNode;
|
||||
|
||||
@Builder.Default
|
||||
Map<TextEntity, Set<Relation>> relations = new HashMap<>();
|
||||
|
||||
@Builder.Default
|
||||
Collection<EntityEventListener> entityEventListeners = new ArrayList<>();
|
||||
|
||||
|
||||
public static TextEntity initialEntityNode(TextRange textRange, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
return TextEntity.builder()
|
||||
.id(buildId(node, textRange, type, entityType))
|
||||
.id(node.buildId(textRange, type, entityType))
|
||||
.type(type)
|
||||
.entityType(entityType)
|
||||
.textRange(textRange)
|
||||
@ -110,19 +85,6 @@ public class TextEntity implements IEntity {
|
||||
}
|
||||
|
||||
|
||||
private static String buildId(SemanticNode node, TextRange textRange, String type, EntityType entityType) {
|
||||
|
||||
Map<Page, List<Rectangle2D>> rectanglesPerLinePerPage = node.getPositionsPerPage(textRange);
|
||||
return IdBuilder.buildId(rectanglesPerLinePerPage.keySet(),
|
||||
rectanglesPerLinePerPage.values()
|
||||
.stream()
|
||||
.flatMap(Collection::stream)
|
||||
.toList(),
|
||||
type,
|
||||
entityType.name());
|
||||
}
|
||||
|
||||
|
||||
public void addTextRange(TextRange textRange) {
|
||||
|
||||
duplicateTextRanges.add(textRange);
|
||||
@ -143,24 +105,6 @@ public class TextEntity implements IEntity {
|
||||
}
|
||||
|
||||
|
||||
public boolean isType(String type) {
|
||||
|
||||
return type().equals(type);
|
||||
}
|
||||
|
||||
|
||||
public boolean isAnyType(List<String> types) {
|
||||
|
||||
return types.contains(type());
|
||||
}
|
||||
|
||||
|
||||
public void addIntersectingNode(SemanticNode containingNode) {
|
||||
|
||||
intersectingNodes.add(containingNode);
|
||||
}
|
||||
|
||||
|
||||
public String getValueWithLineBreaks() {
|
||||
|
||||
return getDeepestFullyContainingNode().getTextBlock().subSequenceWithLineBreaks(getTextRange());
|
||||
@ -171,7 +115,7 @@ public class TextEntity implements IEntity {
|
||||
|
||||
remove("FINAL.0.0", "removed completely");
|
||||
intersectingNodes.forEach(node -> node.getEntities().remove(this));
|
||||
pages.forEach(page -> page.getEntities().remove(this));
|
||||
pages.forEach(page -> page.getSemanticEntities().remove(this));
|
||||
intersectingNodes = new LinkedList<>();
|
||||
relations.keySet()
|
||||
.forEach(entity -> entity.getRelations().remove(this));
|
||||
@ -215,6 +159,7 @@ public class TextEntity implements IEntity {
|
||||
return textEntity.contains(this);
|
||||
}
|
||||
|
||||
|
||||
public boolean contains(TextEntity textEntity) {
|
||||
|
||||
if (this.textRange.contains(textEntity.getTextRange())) {
|
||||
@ -239,7 +184,6 @@ public class TextEntity implements IEntity {
|
||||
}
|
||||
|
||||
|
||||
|
||||
public boolean intersects(TextEntity textEntity) {
|
||||
|
||||
return this.textRange.intersects(textEntity.getTextRange()) //
|
||||
@ -277,14 +221,6 @@ public class TextEntity implements IEntity {
|
||||
notifyEntityUpdated();
|
||||
}
|
||||
|
||||
|
||||
public boolean matchesAnnotationId(String manualRedactionId) {
|
||||
|
||||
return getPositionsOnPagePerPage().stream()
|
||||
.anyMatch(entityPosition -> entityPosition.getId().equals(manualRedactionId));
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
@ -316,21 +252,6 @@ public class TextEntity implements IEntity {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return true when this entity is of EntityType ENTITY or HINT
|
||||
*/
|
||||
public boolean validEntityType() {
|
||||
|
||||
return entityType.equals(EntityType.ENTITY) || entityType.equals(EntityType.HINT);
|
||||
}
|
||||
|
||||
|
||||
public boolean valid() {
|
||||
|
||||
return active() && validEntityType();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String value() {
|
||||
|
||||
@ -339,41 +260,32 @@ public class TextEntity implements IEntity {
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void addEntityEventListener(EntityEventListener listener) {
|
||||
|
||||
entityEventListeners.add(listener);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void removeEntityEventListener(EntityEventListener listener) {
|
||||
|
||||
entityEventListeners.remove(listener);
|
||||
|
||||
}
|
||||
|
||||
|
||||
public void computeRelations() {
|
||||
|
||||
for (TextEntity textEntity : this.getDeepestFullyContainingNode().getEntities()) {
|
||||
if (this.intersects(textEntity) && !this.equals(textEntity) && !textEntity.getEntityType().equals(EntityType.TEMPORARY)) {
|
||||
if (textEntity.getTextRange().equals(this.getTextRange())) {
|
||||
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Equality(this, textEntity));
|
||||
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Equality(textEntity, this));
|
||||
} else if (textEntity.containedBy(this)) {
|
||||
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this));
|
||||
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Containment(this, textEntity));
|
||||
} else if (this.containedBy(textEntity)) {
|
||||
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Containment(textEntity, this));
|
||||
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity));
|
||||
} else {
|
||||
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this));
|
||||
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity));
|
||||
}
|
||||
this.getDeepestFullyContainingNode().getEntities()
|
||||
.stream()
|
||||
.filter(semanticEntity -> semanticEntity instanceof TextEntity)
|
||||
.map(semanticEntity -> (TextEntity) semanticEntity)
|
||||
.forEach(textEntity -> {
|
||||
|
||||
if (this.intersects(textEntity) && !this.equals(textEntity) && !textEntity.getEntityType().equals(EntityType.TEMPORARY)) {
|
||||
if (textEntity.getTextRange().equals(this.getTextRange())) {
|
||||
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Equality(this, textEntity));
|
||||
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Equality(textEntity, this));
|
||||
} else if (textEntity.containedBy(this)) {
|
||||
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this));
|
||||
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Containment(this, textEntity));
|
||||
} else if (this.containedBy(textEntity)) {
|
||||
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Containment(textEntity, this));
|
||||
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity));
|
||||
} else {
|
||||
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this));
|
||||
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity));
|
||||
}
|
||||
|
||||
}
|
||||
});
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -7,6 +7,7 @@ import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
|
||||
@ -38,7 +39,7 @@ public abstract class AbstractSemanticNode implements GenericSemanticNode {
|
||||
DocumentTree documentTree;
|
||||
|
||||
@Builder.Default
|
||||
Set<TextEntity> entities = new HashSet<>();
|
||||
Set<SemanticEntity> entities = new HashSet<>();
|
||||
|
||||
Map<Page, Rectangle2D> bBoxCache;
|
||||
|
||||
@ -61,7 +62,7 @@ public abstract class AbstractSemanticNode implements GenericSemanticNode {
|
||||
|
||||
|
||||
@Override
|
||||
public Map<Page, Rectangle2D> getBBox() {
|
||||
public Map<Page, Rectangle2D>getBBox() {
|
||||
|
||||
if (bBoxCache == null) {
|
||||
bBoxCache = GenericSemanticNode.super.getBBox();
|
||||
|
||||
@ -5,6 +5,7 @@ import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
@ -42,7 +43,7 @@ public class Page {
|
||||
Footer footer;
|
||||
|
||||
@Builder.Default
|
||||
Set<TextEntity> entities = new HashSet<>();
|
||||
Set<SemanticEntity> semanticEntities = new HashSet<>();
|
||||
|
||||
@Builder.Default
|
||||
Set<Image> images = new HashSet<>();
|
||||
|
||||
@ -1,6 +1,9 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
|
||||
@ -4,6 +4,7 @@ import static java.lang.String.format;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
@ -17,12 +18,16 @@ import java.util.stream.Stream;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.ConsecutiveTextBlockCollector;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.IdBuilder;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.RectangleTransformations;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility;
|
||||
|
||||
@ -74,7 +79,25 @@ public interface SemanticNode {
|
||||
*
|
||||
* @return Set of all Entities associated with this Node
|
||||
*/
|
||||
Set<TextEntity> getEntities();
|
||||
Set<SemanticEntity> getEntities();
|
||||
|
||||
|
||||
default Set<TextEntity> getTextEntities() {
|
||||
|
||||
return getEntities().stream()
|
||||
.filter(semanticEntity -> semanticEntity instanceof TextEntity)
|
||||
.map(semanticEntity -> (TextEntity) semanticEntity)
|
||||
.collect(Collectors.toSet());
|
||||
}
|
||||
|
||||
|
||||
default Set<TableEntity> getTableEntities() {
|
||||
|
||||
return getEntities().stream()
|
||||
.filter(semanticEntity -> semanticEntity instanceof TableEntity)
|
||||
.map(semanticEntity -> (TableEntity) semanticEntity)
|
||||
.collect(Collectors.toSet());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
@ -85,9 +108,9 @@ public interface SemanticNode {
|
||||
*/
|
||||
default Stream<TextEntity> streamValidEntities() {
|
||||
|
||||
return getEntities().stream()
|
||||
return getTextEntities().stream()
|
||||
.filter(IEntity::active)
|
||||
.filter(TextEntity::validEntityType);
|
||||
.filter(SemanticEntity::validEntityType);
|
||||
}
|
||||
|
||||
|
||||
@ -638,18 +661,18 @@ public interface SemanticNode {
|
||||
* This function is used during insertion of EntityNodes into the graph, it checks if the TextRange of the RedactionEntity intersects or even contains the RedactionEntity.
|
||||
* It sets the fields accordingly and recursively calls this function on all its children.
|
||||
*
|
||||
* @param textEntity RedactionEntity, which is being inserted into the graph
|
||||
* @param entity RedactionEntity, which is being inserted into the graph
|
||||
*/
|
||||
default void addThisToEntityIfIntersects(TextEntity textEntity) {
|
||||
default void addThisToEntityIfIntersects(SemanticEntity entity) {
|
||||
|
||||
TextBlock textBlock = getTextBlock();
|
||||
if (textBlock.getTextRange().intersects(textEntity.getTextRange())) {
|
||||
if (textBlock.containsTextRange(textEntity.getTextRange())) {
|
||||
textEntity.setDeepestFullyContainingNode(this);
|
||||
if (textBlock.getTextRange().intersects(entity.getTextRange())) {
|
||||
if (textBlock.containsTextRange(entity.getTextRange())) {
|
||||
entity.setDeepestFullyContainingNode(this);
|
||||
}
|
||||
textEntity.addIntersectingNode(this);
|
||||
getDocumentTree().findIntersectingChildNodes(getTreeId(), textEntity.getTextRange())
|
||||
.forEach(node -> node.addThisToEntityIfIntersects(textEntity));
|
||||
entity.addIntersectingNode(this);
|
||||
getDocumentTree().findIntersectingChildNodes(getTreeId(), entity.getTextRange())
|
||||
.forEach(node -> node.addThisToEntityIfIntersects(entity));
|
||||
}
|
||||
}
|
||||
|
||||
@ -838,4 +861,17 @@ public interface SemanticNode {
|
||||
return pages.size() == 1 && pages.contains(page);
|
||||
}
|
||||
|
||||
|
||||
default String buildId(TextRange textRange, String type, EntityType entityType) {
|
||||
|
||||
Map<Page, List<Rectangle2D>> rectanglesPerLinePerPage = getPositionsPerPage(textRange);
|
||||
return IdBuilder.buildId(rectanglesPerLinePerPage.keySet(),
|
||||
rectanglesPerLinePerPage.values()
|
||||
.stream()
|
||||
.flatMap(Collection::stream)
|
||||
.toList(),
|
||||
type,
|
||||
entityType.name());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -15,6 +15,7 @@ import java.util.stream.Stream;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
|
||||
@ -48,7 +49,7 @@ public class Table implements SemanticNode {
|
||||
TextBlock textBlock;
|
||||
|
||||
@Builder.Default
|
||||
Set<TextEntity> entities = new HashSet<>();
|
||||
Set<SemanticEntity> entities = new HashSet<>();
|
||||
|
||||
Map<Page, Rectangle2D> bBoxCache;
|
||||
|
||||
@ -109,7 +110,7 @@ public class Table implements SemanticNode {
|
||||
.toList();
|
||||
return streamTableCells().filter(tableCellNode -> vertebrateStudyCols.stream()
|
||||
.anyMatch(vertebrateStudyCol -> getCell(tableCellNode.getRow(), vertebrateStudyCol).containsString(value)))
|
||||
.map(TableCell::getEntities)
|
||||
.map(TableCell::getTextEntities)
|
||||
.flatMap(Collection::stream);
|
||||
}
|
||||
|
||||
@ -128,7 +129,7 @@ public class Table implements SemanticNode {
|
||||
.toList();
|
||||
return streamTableCells().filter(tableCellNode -> colsWithHeader.stream()
|
||||
.anyMatch(colWithHeader -> getCell(tableCellNode.getRow(), colWithHeader).containsAnyString(values)))
|
||||
.map(TableCell::getEntities)
|
||||
.map(TableCell::getTextEntities)
|
||||
.flatMap(Collection::stream);
|
||||
}
|
||||
|
||||
@ -146,7 +147,7 @@ public class Table implements SemanticNode {
|
||||
.filter(rowNumber -> streamTextEntitiesInRow(rowNumber).map(TextEntity::type)
|
||||
.anyMatch(types::contains))
|
||||
.flatMap(this::streamRow)
|
||||
.map(TableCell::getEntities)
|
||||
.map(TableCell::getTextEntities)
|
||||
.flatMap(Collection::stream);
|
||||
}
|
||||
|
||||
@ -167,7 +168,7 @@ public class Table implements SemanticNode {
|
||||
return entityTypes.containsAll(types);
|
||||
})
|
||||
.flatMap(this::streamRow)
|
||||
.map(TableCell::getEntities)
|
||||
.map(TableCell::getTextEntities)
|
||||
.flatMap(Collection::stream);
|
||||
}
|
||||
|
||||
@ -185,7 +186,7 @@ public class Table implements SemanticNode {
|
||||
.filter(rowNumber -> streamTextEntitiesInRow(rowNumber).map(TextEntity::type)
|
||||
.noneMatch(types::contains))
|
||||
.flatMap(this::streamRow)
|
||||
.map(TableCell::getEntities)
|
||||
.map(TableCell::getTextEntities)
|
||||
.flatMap(Collection::stream);
|
||||
}
|
||||
|
||||
@ -426,4 +427,30 @@ public class Table implements SemanticNode {
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
||||
|
||||
public String asCsv() {
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
for (int row = 0; row < numberOfRows; row++) {
|
||||
for (int col = 0; col < numberOfCols; col++) {
|
||||
TableCell cell = getCell(row, col);
|
||||
String cellText = cell.getTextBlock().getSearchText().replaceAll("\\r?\\n", " ").trim();
|
||||
|
||||
if (cellText.contains(",") || cellText.contains("\"")) {
|
||||
cellText = "\"" + cellText.replace("\"", "\"\"") + "\"";
|
||||
}
|
||||
|
||||
sb.append(cellText);
|
||||
|
||||
if (col < numberOfCols - 1) {
|
||||
sb.append(",");
|
||||
}
|
||||
}
|
||||
sb.append("\n");
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -5,6 +5,8 @@ import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.IntersectingNodeVisitor;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||
@ -50,15 +52,15 @@ public class EntityCreationUtility {
|
||||
}
|
||||
|
||||
|
||||
public void addToPages(TextEntity entity) {
|
||||
public void addToPages(SemanticEntity entity) {
|
||||
|
||||
Set<Page> pages = entity.getDeepestFullyContainingNode().getPages(entity.getTextRange());
|
||||
entity.getPages().addAll(pages);
|
||||
pages.forEach(page -> page.getEntities().add(entity));
|
||||
pages.forEach(page -> page.getSemanticEntities().add(entity));
|
||||
}
|
||||
|
||||
|
||||
public void addEntityToNodeEntitySets(TextEntity entity) {
|
||||
public void addEntityToNodeEntitySets(SemanticEntity entity) {
|
||||
|
||||
entity.getIntersectingNodes()
|
||||
.forEach(node -> node.getEntities().add(entity));
|
||||
|
||||
@ -26,6 +26,9 @@ public class Component {
|
||||
|
||||
List<Entity> references;
|
||||
|
||||
@Builder.Default
|
||||
ComponentFormat componentFormat = ComponentFormat.TEXT;
|
||||
|
||||
|
||||
public boolean addReference(Entity entity) {
|
||||
|
||||
|
||||
@ -0,0 +1,8 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.component;
|
||||
|
||||
public enum ComponentFormat {
|
||||
|
||||
TEXT,
|
||||
//OVERRIDE, //todo: do we need this?
|
||||
CSV
|
||||
}
|
||||
@ -36,6 +36,8 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityTyp
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.ManualChangeOverwrite;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
|
||||
@ -70,11 +72,11 @@ public class EntityLogCreatorService {
|
||||
ObservationRegistry observationRegistry;
|
||||
|
||||
|
||||
private static boolean notFalsePositiveOrFalseRecommendationOrRemoval(TextEntity textEntity) {
|
||||
private static boolean notFalsePositiveOrFalseRecommendationOrRemoval(SemanticEntity semanticEntity) {
|
||||
|
||||
return !(textEntity.getEntityType().equals(EntityType.FALSE_POSITIVE) //
|
||||
|| textEntity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION) //
|
||||
|| textEntity.getEntityType().equals(EntityType.DICTIONARY_REMOVAL));
|
||||
return !(semanticEntity.getEntityType().equals(EntityType.FALSE_POSITIVE) //
|
||||
|| semanticEntity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION) //
|
||||
|| semanticEntity.getEntityType().equals(EntityType.DICTIONARY_REMOVAL));
|
||||
}
|
||||
|
||||
|
||||
@ -175,7 +177,7 @@ public class EntityLogCreatorService {
|
||||
|
||||
List<EntityLogEntry> entries = new ArrayList<>();
|
||||
|
||||
List<TextEntity> textEntities = document.getEntities()
|
||||
List<SemanticEntity> semanticEntities = document.getEntities()
|
||||
.stream()
|
||||
.filter(entity -> !entity.getValue().isEmpty())
|
||||
.filter(EntityLogCreatorService::notFalsePositiveOrFalseRecommendationOrRemoval)
|
||||
@ -190,7 +192,7 @@ public class EntityLogCreatorService {
|
||||
.toList();
|
||||
|
||||
List<String> allIds = new ArrayList<>();
|
||||
allIds.addAll(textEntities.stream()
|
||||
allIds.addAll(semanticEntities.stream()
|
||||
.flatMap(entity -> entity.getPositionsOnPagePerPage()
|
||||
.stream()
|
||||
.map(PositionOnPage::getId))
|
||||
@ -204,7 +206,7 @@ public class EntityLogCreatorService {
|
||||
|
||||
Map<String, List<ManualChange>> manualChangesMap = getManualChangesByEntityLogIds(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), allIds);
|
||||
|
||||
textEntities.forEach(entityNode -> entries.addAll(toEntityLogEntries(entityNode, analysisNumber, manualChangesMap)));
|
||||
semanticEntities.forEach(entityNode -> entries.addAll(toEntityLogEntries(entityNode, analysisNumber, manualChangesMap)));
|
||||
|
||||
images.forEach(imageNode -> entries.add(createEntityLogEntry(imageNode,
|
||||
dossierTemplateId,
|
||||
@ -219,19 +221,19 @@ public class EntityLogCreatorService {
|
||||
}
|
||||
|
||||
|
||||
public List<EntityLogEntry> toEntityLogEntries(TextEntity textEntity, int analysisNumber, Map<String, List<ManualChange>> existingManualChangesMap) {
|
||||
public List<EntityLogEntry> toEntityLogEntries(SemanticEntity semanticEntity, int analysisNumber, Map<String, List<ManualChange>> existingManualChangesMap) {
|
||||
|
||||
List<EntityLogEntry> entityLogEntries = new ArrayList<>();
|
||||
|
||||
// split entity into multiple entries if it occurs on multiple pages, since FE can't handle multi page entities
|
||||
for (PositionOnPage positionOnPage : textEntity.getPositionsOnPagePerPage()) {
|
||||
for (PositionOnPage positionOnPage : semanticEntity.getPositionsOnPagePerPage()) {
|
||||
|
||||
List<Position> rectanglesPerLine = positionOnPage.getRectanglePerLine()
|
||||
.stream()
|
||||
.map(rectangle2D -> new Position(rectangle2D, positionOnPage.getPage().getNumber()))
|
||||
.toList();
|
||||
|
||||
EntityLogEntry entityLogEntry = createEntityLogEntry(textEntity, analysisNumber, existingManualChangesMap.getOrDefault(positionOnPage.getId(), new ArrayList<>()));
|
||||
EntityLogEntry entityLogEntry = createEntityLogEntry(semanticEntity, analysisNumber, existingManualChangesMap.getOrDefault(positionOnPage.getId(), new ArrayList<>()));
|
||||
|
||||
// set the ID from the positions, since it might contain a "-" with the page number if the entity is split across multiple pages
|
||||
entityLogEntry.setId(positionOnPage.getId());
|
||||
@ -317,12 +319,24 @@ public class EntityLogCreatorService {
|
||||
}
|
||||
|
||||
|
||||
private EntityLogEntry createEntityLogEntry(SemanticEntity entity, int analysisNumber, List<ManualChange> existingManualChanges) {
|
||||
|
||||
if (entity instanceof TextEntity textEntity) {
|
||||
return createEntityLogEntry(textEntity, analysisNumber, existingManualChanges);
|
||||
} else if (entity instanceof TableEntity tableEntity) {
|
||||
return createEntityLogEntry(tableEntity, analysisNumber, existingManualChanges);
|
||||
} else {
|
||||
throw new UnsupportedOperationException("Wrong semantic entity type given for entity log entry creation!!");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private EntityLogEntry createEntityLogEntry(TextEntity entity, int analysisNumber, List<ManualChange> existingManualChanges) {
|
||||
|
||||
Set<String> referenceIds = new HashSet<>();
|
||||
entity.references()
|
||||
.stream()
|
||||
.filter(TextEntity::active)
|
||||
.filter(SemanticEntity::active)
|
||||
.forEach(ref -> ref.getPositionsOnPagePerPage()
|
||||
.forEach(pos -> referenceIds.add(pos.getId())));
|
||||
|
||||
@ -365,7 +379,42 @@ public class EntityLogCreatorService {
|
||||
}
|
||||
|
||||
|
||||
private int determinePageParagraphIndex(TextEntity textEntity, EntryType entryType) {
|
||||
private EntityLogEntry createEntityLogEntry(TableEntity tableEntity, int analysisNumber, List<ManualChange> existingManualChanges) {
|
||||
|
||||
Set<String> referenceIds = new HashSet<>();
|
||||
tableEntity.references()
|
||||
.stream()
|
||||
.filter(IEntity::applied)
|
||||
.forEach(ref -> ref.getPositionsOnPagePerPage()
|
||||
.forEach(pos -> referenceIds.add(pos.getId())));
|
||||
|
||||
EntryType entryType = buildEntryType(tableEntity);
|
||||
|
||||
List<ManualChange> allManualChanges = ManualChangeFactory.toLocalManualChangeList(tableEntity.getManualOverwrite().getManualChangeLog(), true, analysisNumber);
|
||||
|
||||
return EntityLogEntry.builder()
|
||||
.reason(tableEntity.buildReason())
|
||||
.legalBasis(tableEntity.legalBasis())
|
||||
.value(tableEntity.getValue())
|
||||
.type(tableEntity.type())
|
||||
.section(tableEntity.getManualOverwrite().getSection()
|
||||
.orElse(this.buildSectionString(tableEntity.getDeepestFullyContainingNode())))
|
||||
.containingNodeId(tableEntity.getDeepestFullyContainingNode().getTreeId())
|
||||
.closestHeadline(tableEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText())
|
||||
.matchedRule(tableEntity.getMatchedRule().getRuleIdentifier().toString())
|
||||
.startOffset(tableEntity.getTextRange().start())
|
||||
.endOffset(tableEntity.getTextRange().end())
|
||||
// .engines(getEngines(tableEntity.getEngines(), tableEntity.getManualOverwrite()))
|
||||
.reference(referenceIds)
|
||||
.manualChanges(ManualChangesUtils.mergeManualChanges(existingManualChanges, allManualChanges))
|
||||
.state(buildEntryState(tableEntity))
|
||||
.entryType(entryType)
|
||||
.paragraphPageIdx(determinePageParagraphIndex(tableEntity, entryType))
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
private int determinePageParagraphIndex(SemanticEntity textEntity, EntryType entryType) {
|
||||
|
||||
int pageParagraphIdx = -1;
|
||||
|
||||
@ -414,7 +463,7 @@ public class EntityLogCreatorService {
|
||||
|
||||
public static EntryType buildEntryType(IEntity entity) {
|
||||
|
||||
if (entity instanceof TextEntity textEntity) {
|
||||
if (entity instanceof SemanticEntity textEntity) {
|
||||
return getEntryType(textEntity.getEntityType());
|
||||
} else if (entity instanceof PrecursorEntity precursorEntity) {
|
||||
if (precursorEntity.isRectangle()) {
|
||||
|
||||
@ -18,8 +18,6 @@ import com.google.common.collect.Sets;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
|
||||
import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryModel;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
@ -146,9 +144,9 @@ public class ManualChangesApplicationService {
|
||||
Set<Page> newIntersectingPages = new HashSet<>(closestEntity.getPages());
|
||||
|
||||
Sets.difference(currentIntersectingPages, newIntersectingPages)
|
||||
.forEach(removedPage -> removedPage.getEntities().remove(entityToBeResized));
|
||||
.forEach(removedPage -> removedPage.getSemanticEntities().remove(entityToBeResized));
|
||||
Sets.difference(newIntersectingPages, currentIntersectingPages)
|
||||
.forEach(addedPage -> addedPage.getEntities().add(entityToBeResized));
|
||||
.forEach(addedPage -> addedPage.getSemanticEntities().add(entityToBeResized));
|
||||
|
||||
entityToBeResized.setDeepestFullyContainingNode(closestEntity.getDeepestFullyContainingNode());
|
||||
entityToBeResized.setIntersectingNodes(new ArrayList<>(newIntersectingNodes));
|
||||
|
||||
@ -102,9 +102,9 @@ public class UnprocessedChangesService {
|
||||
}
|
||||
|
||||
document.getEntities()
|
||||
.forEach(textEntity -> {
|
||||
.forEach(entity -> {
|
||||
Set<String> processedIds = new HashSet<>();
|
||||
for (var positionsOnPerPage : textEntity.getPositionsOnPagePerPage()) {
|
||||
for (var positionsOnPerPage : entity.getPositionsOnPagePerPage()) {
|
||||
if (processedIds.contains(positionsOnPerPage.getId())) {
|
||||
continue;
|
||||
}
|
||||
@ -113,17 +113,18 @@ public class UnprocessedChangesService {
|
||||
.stream()
|
||||
.map(rectangle2D -> new Position(rectangle2D, positionsOnPerPage.getPage().getNumber()))
|
||||
.collect(Collectors.toList());
|
||||
unprocessedManualEntities.add(UnprocessedManualEntity.builder()
|
||||
.annotationId(allAnnotationIds.stream()
|
||||
.filter(textEntity::matchesAnnotationId)
|
||||
.findFirst()
|
||||
.orElse(""))
|
||||
.textBefore(textEntity.getTextBefore())
|
||||
.textAfter(textEntity.getTextAfter())
|
||||
.section(textEntity.getManualOverwrite().getSection()
|
||||
.orElse(textEntity.getDeepestFullyContainingNode().toString()))
|
||||
.positions(positions)
|
||||
.build());
|
||||
UnprocessedManualEntity.UnprocessedManualEntityBuilder builder = UnprocessedManualEntity.builder()
|
||||
.annotationId(allAnnotationIds.stream()
|
||||
.filter(entity::matchesAnnotationId)
|
||||
.findFirst()
|
||||
.orElse(""))
|
||||
.section(entity.getManualOverwrite().getSection()
|
||||
.orElse(entity.getDeepestFullyContainingNode().toString()))
|
||||
.positions(positions);
|
||||
if (entity instanceof TextEntity textEntity) {
|
||||
builder.textBefore(textEntity.getTextBefore()).textAfter(textEntity.getTextAfter());
|
||||
}
|
||||
unprocessedManualEntities.add(builder.build());
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
@ -23,8 +23,11 @@ import java.util.stream.Stream;
|
||||
import org.kie.api.runtime.KieSession;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.component.Component;
|
||||
import com.iqser.red.service.redaction.v1.server.model.component.ComponentFormat;
|
||||
import com.iqser.red.service.redaction.v1.server.model.component.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.RuleIdentifier;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.ComponentCreationUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.DateConverter;
|
||||
@ -98,6 +101,22 @@ public class ComponentCreationService {
|
||||
.value(value)
|
||||
.valueDescription(valueDescription)
|
||||
.references(new LinkedList<>(references))
|
||||
.componentFormat(ComponentFormat.TEXT)
|
||||
.build());
|
||||
}
|
||||
|
||||
|
||||
public void create(String ruleIdentifier, String name, String value, String valueDescription, Collection<Entity> references, ComponentFormat componentFormat) {
|
||||
|
||||
referencedEntities.addAll(references);
|
||||
|
||||
kieSession.insert(Component.builder()
|
||||
.matchedRule(RuleIdentifier.fromString(ruleIdentifier))
|
||||
.name(name)
|
||||
.value(value)
|
||||
.valueDescription(valueDescription)
|
||||
.references(new LinkedList<>(references))
|
||||
.componentFormat(componentFormat)
|
||||
.build());
|
||||
}
|
||||
|
||||
@ -376,6 +395,27 @@ public class ComponentCreationService {
|
||||
}
|
||||
|
||||
|
||||
public void createComponentForTables(String ruleIdentifier, Collection<Entity> entities) {
|
||||
|
||||
entities.stream()
|
||||
.filter(entity -> !referencedEntities.contains(entity))
|
||||
.sorted(EntityComparators.first())
|
||||
.forEach(entity -> {
|
||||
String value = entity.getValue();
|
||||
ComponentFormat componentFormat = ComponentFormat.TEXT;
|
||||
SemanticNode containingNode = entity.getContainingNode();
|
||||
|
||||
if (containingNode instanceof TableCell cell) { // case for tables with 0 rows and 0 columns
|
||||
value = cell.getTextBlock().getSearchText();
|
||||
} else if (containingNode instanceof Table table) {
|
||||
value = table.asCsv();
|
||||
componentFormat = ComponentFormat.CSV;
|
||||
}
|
||||
create(ruleIdentifier, entity.getType(), value, "Table Entity", List.of(entity), componentFormat);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Converts entity values to the 'dd/MM/yyyy' format and joins them with ', '. If the value could not be parsed as a date, it will be created as is.
|
||||
*
|
||||
|
||||
@ -22,7 +22,9 @@ import com.iqser.red.service.redaction.v1.server.model.document.ConsecutiveBound
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.ManualChangeOverwrite;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||
@ -906,6 +908,25 @@ public class EntityCreationService {
|
||||
return byTextRange(textRange, type, entityType, node);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a table entity based on the document table.
|
||||
*
|
||||
* @param table The table to base the table entity on.
|
||||
* @param type The type of entity to create.
|
||||
* @param entityType The entity's classification.
|
||||
* @return The created {@link TableEntity}.
|
||||
*/
|
||||
public TableEntity bySemanticNode(Table table, String type, EntityType entityType) {
|
||||
|
||||
TableEntity tableEntity = TableEntity.initialEntityNode(table, type, entityType);
|
||||
|
||||
addListenerToEntity(tableEntity);
|
||||
|
||||
table.getDocumentTree().addEntityToGraph(tableEntity);
|
||||
|
||||
return tableEntity;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Expands a text entity's start boundary based on a regex pattern match.
|
||||
@ -978,7 +999,8 @@ public class EntityCreationService {
|
||||
if (node.getDocumentTree().getRoot().getNode().getEntities().contains(entity)) {
|
||||
Optional<TextEntity> optionalTextEntity = node.getDocumentTree().getRoot().getNode().getEntities()
|
||||
.stream()
|
||||
.filter(e -> e.equals(entity) && e.type().equals(type))
|
||||
.filter(e -> e instanceof TextEntity && e.equals(entity) && e.type().equals(type))
|
||||
.map(e -> (TextEntity)e)
|
||||
.peek(e -> e.addEngines(engines))
|
||||
.findAny();
|
||||
if (optionalTextEntity.isEmpty()) {
|
||||
@ -1419,7 +1441,7 @@ public class EntityCreationService {
|
||||
.filter(e -> e.equals(entity))//
|
||||
.filter(e -> !e.getTextRange().equals(entity.getTextRange()))//
|
||||
.findAny()
|
||||
.ifPresent(e -> addDuplicateEntityToGraph(e, entity.getTextRange(), node));
|
||||
.ifPresent(e -> addDuplicateEntityToGraph((TextEntity) e, entity.getTextRange(), node));
|
||||
|
||||
} else {
|
||||
addListenerToEntity(entity);
|
||||
@ -1469,13 +1491,13 @@ public class EntityCreationService {
|
||||
}
|
||||
additionalIntersectingNode.getEntities().add(entityToDuplicate);
|
||||
additionalIntersectingNode.getPages(newTextRange)
|
||||
.forEach(page -> page.getEntities().add(entityToDuplicate));
|
||||
.forEach(page -> page.getSemanticEntities().add(entityToDuplicate));
|
||||
entityToDuplicate.addIntersectingNode(additionalIntersectingNode);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
private void addListenerToEntity(TextEntity textEntity) {
|
||||
private void addListenerToEntity(IEntity textEntity) {
|
||||
|
||||
if(kieSessionUpdater != null) {
|
||||
textEntity.addEntityEventListener(kieSessionUpdater);
|
||||
|
||||
@ -162,7 +162,7 @@ public class EntityFromPrecursorCreationService {
|
||||
correctEntity.getIntersectingNodes()
|
||||
.forEach(n -> n.getEntities().add(correctEntity));
|
||||
correctEntity.getPages()
|
||||
.forEach(page -> page.getEntities().add(correctEntity));
|
||||
.forEach(page -> page.getSemanticEntities().add(correctEntity));
|
||||
|
||||
correctEntity.addMatchedRules(precursorEntity.getMatchedRuleList());
|
||||
correctEntity.setDictionaryEntry(precursorEntity.isDictionaryEntry());
|
||||
|
||||
@ -105,6 +105,11 @@ public class ComponentDroolsExecutionService {
|
||||
})
|
||||
.forEach(kieSession::insert);
|
||||
|
||||
// todo?
|
||||
// document.getPages().stream().map(Page::getTableEntities).flatMap(Collection::stream)
|
||||
// //.filter(this::isApplied)
|
||||
// .forEach(kieSession::insert);
|
||||
|
||||
fileAttributes.stream()
|
||||
.filter(f -> f.getValue() != null)
|
||||
.forEach(kieSession::insert);
|
||||
|
||||
@ -28,6 +28,7 @@ import com.iqser.red.service.redaction.v1.server.logger.RulesLogger;
|
||||
import com.iqser.red.service.redaction.v1.server.logger.TrackingAgendaEventListener;
|
||||
import com.iqser.red.service.redaction.v1.server.model.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
|
||||
@ -128,9 +129,9 @@ public class EntityDroolsExecutionService {
|
||||
}
|
||||
});
|
||||
|
||||
for (TextEntity textEntity : document.getEntities()) {
|
||||
textEntity.addEntityEventListener(kieSessionUpdater);
|
||||
textEntity.notifyEntityInserted();
|
||||
for (SemanticEntity semanticEntity : document.getEntities()) {
|
||||
semanticEntity.addEntityEventListener(kieSessionUpdater);
|
||||
semanticEntity.notifyEntityInserted();
|
||||
}
|
||||
|
||||
document.getPages()
|
||||
|
||||
@ -9,6 +9,8 @@ import org.kie.api.runtime.rule.FactHandle;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityEventListener;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||
@ -50,14 +52,14 @@ public class KieSessionUpdater implements EntityEventListener {
|
||||
|
||||
private void handleOnEntityEvent(IEntity entity, Consumer<Object> consumer) {
|
||||
|
||||
if (entity instanceof TextEntity textEntity) {
|
||||
updateIntersectingNodes(textEntity);
|
||||
textEntity.getRelations().values()
|
||||
if (entity instanceof SemanticEntity semanticEntity) {
|
||||
updateIntersectingNodes(semanticEntity);
|
||||
semanticEntity.getRelations().values()
|
||||
.stream()
|
||||
.flatMap(Collection::stream)
|
||||
.forEach(consumer);
|
||||
textEntity.getRelations().keySet()
|
||||
.forEach(k -> k.getRelations().getOrDefault(textEntity, Collections.emptySet())
|
||||
semanticEntity.getRelations().keySet()
|
||||
.forEach(k -> k.getRelations().getOrDefault(semanticEntity, Collections.emptySet())
|
||||
.forEach(consumer));
|
||||
}
|
||||
|
||||
@ -71,9 +73,9 @@ public class KieSessionUpdater implements EntityEventListener {
|
||||
}
|
||||
|
||||
|
||||
private void updateIntersectingNodes(TextEntity textEntity) {
|
||||
private void updateIntersectingNodes(SemanticEntity semanticEntity) {
|
||||
|
||||
textEntity.getIntersectingNodes()
|
||||
semanticEntity.getIntersectingNodes()
|
||||
.forEach(this::updateFactIfPresent);
|
||||
}
|
||||
|
||||
|
||||
@ -37,6 +37,8 @@ import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||
import com.knecon.fforesight.tenantcommons.TenantContext;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
@ExtendWith(SpringExtension.class)
|
||||
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, properties = {"application.type=DocuMine"})
|
||||
public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
|
||||
@ -262,6 +264,7 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
|
||||
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
public void testDoseMortalityExtraction() {
|
||||
|
||||
AnalyzeRequest request = uploadFileToStorage("files/syngenta/CustomerFiles/Documine/Flora/VV-547525_Toxicidade_Oral_Aguda.pdf");
|
||||
@ -269,9 +272,18 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
|
||||
System.out.println("Start Full integration test");
|
||||
analyzeDocumentStructure(LayoutParsingType.DOCUMINE, request);
|
||||
System.out.println("Finished structure analysis");
|
||||
analyzeService.analyze(request);
|
||||
AnalyzeResult analyze = analyzeService.analyze(request);
|
||||
System.out.println("Finished analysis");
|
||||
|
||||
|
||||
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build());
|
||||
|
||||
String outputFileName = OsUtils.getTemporaryDirectory() + "/Documine.pdf";
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream(outputFileName)) {
|
||||
fileOutputStream.write(annotateResponse.getDocument());
|
||||
}
|
||||
|
||||
var componentLog = redactionStorageService.getComponentLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
var doseMortality = componentLog.getComponentLogEntries()
|
||||
.stream()
|
||||
|
||||
@ -348,11 +348,11 @@ public class DocumentIEntityInsertionIntegrationTest extends BuildDocumentIntegr
|
||||
.orElseThrow();
|
||||
|
||||
assertEquals(textEntity.getValue(), searchTerm);
|
||||
assertTrue(pageNode.getEntities().contains(textEntity));
|
||||
assertTrue(pageNode.getSemanticEntities().contains(textEntity));
|
||||
assertTrue(document.getPages()
|
||||
.stream()
|
||||
.filter(page -> page != pageNode)
|
||||
.noneMatch(page -> page.getEntities().contains(textEntity)));
|
||||
.noneMatch(page -> page.getSemanticEntities().contains(textEntity)));
|
||||
assertTrue(textEntity.getPages().contains(pageNode));
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity);
|
||||
assertTrue(textEntity.getIntersectingNodes()
|
||||
|
||||
@ -30,6 +30,7 @@ import com.iqser.red.service.redaction.v1.server.logger.Context;
|
||||
import com.iqser.red.service.redaction.v1.server.model.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionarySearch;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||
@ -200,14 +201,14 @@ public class DocumentPerformanceIntegrationTest extends RulesIntegrationTest {
|
||||
System.out.printf("%d Searches took %s s, average %.2f ms\n", numberOfRuns, ((float) totalSearchTime / 1000), totalSearchTime / numberOfRuns);
|
||||
System.out.printf("%d Insertions took %s s, average %.2f ms\n", numberOfRuns, ((float) totalInsertTime / 1000), totalInsertTime / numberOfRuns);
|
||||
System.out.printf("Found %d entities and saved %d\n", foundEntities.size(), document.getEntities().size());
|
||||
for (TextEntity entity : document.getEntities()) {
|
||||
for (TextEntity entity : document.getTextEntities()) {
|
||||
var foundEntity = foundEntities.stream()
|
||||
.filter(f -> f.getId().equals(entity.getId()))
|
||||
.findFirst()
|
||||
.get();
|
||||
assertTrue(foundEntity.getTextRange().equals(entity.getTextRange()) || foundEntity.getDuplicateTextRanges().contains(entity.getTextRange()));
|
||||
}
|
||||
assert document.getEntities()
|
||||
assert document.getTextEntities()
|
||||
.stream()
|
||||
.mapToInt(e -> e.getDuplicateTextRanges().size() + 1).sum() == foundEntities.size();
|
||||
assert foundEntities.stream()
|
||||
@ -225,10 +226,10 @@ public class DocumentPerformanceIntegrationTest extends RulesIntegrationTest {
|
||||
try (PDDocument pdDocument = Loader.loadPDF(fileResource.getFile())) {
|
||||
|
||||
for (Page page : document.getPages()) {
|
||||
List<Rectangle2D> entityPositionsOnPage = page.getEntities()
|
||||
List<Rectangle2D> entityPositionsOnPage = page.getSemanticEntities()
|
||||
.stream()
|
||||
.filter(entityNode -> !entityNode.removed())
|
||||
.filter(TextEntity::applied)
|
||||
.filter(SemanticEntity::applied)
|
||||
.flatMap(entityNode -> entityNode.getPositionsOnPagePerPage()
|
||||
.stream())
|
||||
.filter(entityPosition -> entityPosition.getPage().equals(page))
|
||||
@ -241,7 +242,7 @@ public class DocumentPerformanceIntegrationTest extends RulesIntegrationTest {
|
||||
}
|
||||
|
||||
for (Page page : document.getPages()) {
|
||||
List<Rectangle2D> entityPositionsOnPage = page.getEntities()
|
||||
List<Rectangle2D> entityPositionsOnPage = page.getSemanticEntities()
|
||||
.stream()
|
||||
.filter(entityNode -> !entityNode.removed())
|
||||
.filter(entityNode -> !entityNode.applied())
|
||||
|
||||
@ -83,7 +83,7 @@ public class TableTest extends BuildDocumentIntegrationTest {
|
||||
file);
|
||||
PDFTronViewerDocumentService viewerDocumentService = new PDFTronViewerDocumentService(null);
|
||||
|
||||
var entityLayerGroup = EntityVisualizationUtility.createEntityLayerGroup(document.getEntities(), Color.MAGENTA);
|
||||
var entityLayerGroup = EntityVisualizationUtility.createEntityLayerGroup(document.getTextEntities(), Color.MAGENTA);
|
||||
|
||||
viewerDocumentService.addLayerGroups(file, file, List.of(entityLayerGroup));
|
||||
}
|
||||
|
||||
@ -22,7 +22,7 @@ public class Cbi11Test extends RulesIntegrationTest {
|
||||
|
||||
doAnalysis(document, Collections.emptyList());
|
||||
|
||||
List<String> authorNames = document.getEntities()
|
||||
List<String> authorNames = document.getTextEntities()
|
||||
.stream()
|
||||
.map(Dictionary::splitIntoAuthorNames)
|
||||
.flatMap(Collection::stream)
|
||||
|
||||
@ -9,6 +9,7 @@ import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||
import com.knecon.fforesight.service.viewerdoc.LayerIdentifier;
|
||||
@ -46,9 +47,9 @@ public class EntityVisualizationUtility {
|
||||
|
||||
private static List<ColoredRectangle> getEntityRectangles(Color color, Page page) {
|
||||
|
||||
return page.getEntities()
|
||||
return page.getSemanticEntities()
|
||||
.stream()
|
||||
.map(TextEntity::getPositionsOnPagePerPage)
|
||||
.map(SemanticEntity::getPositionsOnPagePerPage)
|
||||
.flatMap(Collection::stream)
|
||||
.filter(p -> p.getPage().equals(page))
|
||||
.map(PositionOnPage::getRectanglePerLine)
|
||||
|
||||
@ -1155,6 +1155,15 @@ rule "DOC.35.0: Doses (mg/kg bodyweight)"
|
||||
end
|
||||
|
||||
|
||||
rule "DOC.100.0: Create TableEntities for all Tables"
|
||||
when
|
||||
$table: Table()
|
||||
then
|
||||
TableEntity tableEntity = entityCreationService.bySemanticNode($table, "table", EntityType.ENTITY);
|
||||
tableEntity.apply("DOC.100.0", "Table found.", "n-a");
|
||||
end
|
||||
|
||||
|
||||
//------------------------------------ AI rules ------------------------------------
|
||||
|
||||
// Rule unit: AI.4
|
||||
|
||||
@ -461,6 +461,14 @@ rule "UsedApproach.1.0: Used approach not found and thus 'Individual'"
|
||||
end
|
||||
|
||||
|
||||
rule "TableComponents.900.0: Create components for all table entities."
|
||||
salience -900
|
||||
when
|
||||
$tables: List() from collect (Entity(type == "Table"))
|
||||
then
|
||||
componentCreationService.createComponentForTables("TableComponents.900.0", $tables);
|
||||
end
|
||||
|
||||
rule "DefaultComponents.999.0: Create components for all unmapped entities."
|
||||
salience -999
|
||||
when
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user