diff --git a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/DocumentTree.java b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/DocumentTree.java index faf1de14..c209794e 100644 --- a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/DocumentTree.java +++ b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/DocumentTree.java @@ -10,6 +10,7 @@ import java.util.Optional; import java.util.stream.Stream; import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; +import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.model.document.nodes.GenericSemanticNode; @@ -362,22 +363,25 @@ public class DocumentTree { } - public void addEntityToGraph(TextEntity entity) { + public void addEntityToGraph(SemanticEntity entity) { getRoot().getNode().addThisToEntityIfIntersects(entity); TextBlock textBlock = entity.getDeepestFullyContainingNode().getTextBlock(); - EntityEnrichmentService.enrichEntity(entity, textBlock); EntityCreationUtility.addToPages(entity); - EntityCreationUtility.addEntityToNodeEntitySets(entity); if (entity.getEntityType().equals(EntityType.TEMPORARY)) { return; } - entity.computeRelations(); - entity.notifyEntityInserted(); + if (entity instanceof TextEntity textEntity) { + EntityEnrichmentService.enrichEntity(textEntity, textBlock); + textEntity.computeRelations(); + entity.notifyEntityInserted(); // todo: table entity currently causes loop? + } + EntityCreationUtility.addEntityToNodeEntitySets(entity); + } diff --git a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/IEntity.java b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/IEntity.java index dee5fef9..8ab6b52c 100644 --- a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/IEntity.java +++ b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/IEntity.java @@ -188,7 +188,7 @@ public interface IEntity { * * @return A set of references. */ - default Set references() { + default Set references() { return getMatchedRule().getReferences(); } diff --git a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/MatchedRule.java b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/MatchedRule.java index 10bb441d..3c40bb36 100644 --- a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/MatchedRule.java +++ b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/MatchedRule.java @@ -40,7 +40,7 @@ public final class MatchedRule implements Comparable { boolean ignored; @Builder.Default - Set references = Collections.emptySet(); + Set references = Collections.emptySet(); /** diff --git a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/SemanticEntity.java b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/SemanticEntity.java new file mode 100644 index 00000000..36cb3542 --- /dev/null +++ b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/SemanticEntity.java @@ -0,0 +1,110 @@ +package com.iqser.red.service.redaction.v1.server.model.document.entity; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.PriorityQueue; +import java.util.Set; + +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; + +import lombok.AccessLevel; +import lombok.Builder; +import lombok.Data; +import lombok.experimental.FieldDefaults; +import lombok.experimental.SuperBuilder; + +@Data +@FieldDefaults(level = AccessLevel.PROTECTED) +@SuperBuilder +public abstract class SemanticEntity implements IEntity { + + final EntityType entityType; + String type; // TODO: make final once ManualChangesApplicationService::recategorize is deleted + + @Builder.Default + final PriorityQueue matchedRuleList = new PriorityQueue<>(); + @Builder.Default + final ManualChangeOverwrite manualOverwrite = new ManualChangeOverwrite(); + + @Builder.Default + Set pages = new HashSet<>(); + List positionsOnPagePerPage; + + @Builder.Default + List intersectingNodes = new LinkedList<>(); + SemanticNode deepestFullyContainingNode; + + @Builder.Default + Map> relations = new HashMap<>(); + + @Builder.Default + Collection entityEventListeners = new ArrayList<>(); + + + /** + * @return true when this entity is of EntityType ENTITY or HINT + */ + public boolean validEntityType() { + + return entityType.equals(EntityType.ENTITY) || entityType.equals(EntityType.HINT); + } + + + public boolean valid() { + + return active() && validEntityType(); + } + + + public boolean isType(String type) { + + return type().equals(type); + } + + + public boolean isAnyType(List types) { + + return types.contains(type()); + } + + + public boolean matchesAnnotationId(String manualRedactionId) { + + return getPositionsOnPagePerPage().stream() + .anyMatch(entityPosition -> entityPosition.getId().equals(manualRedactionId)); + } + + + public void addIntersectingNode(SemanticNode containingNode) { + + intersectingNodes.add(containingNode); + } + + + @Override + public void addEntityEventListener(EntityEventListener listener) { + + entityEventListeners.add(listener); + } + + + @Override + public void removeEntityEventListener(EntityEventListener listener) { + + entityEventListeners.remove(listener); + } + + + @Override + public Collection getEntityEventListeners() { + + return entityEventListeners; + } + +} diff --git a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/TableEntity.java b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/TableEntity.java new file mode 100644 index 00000000..5c940010 --- /dev/null +++ b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/TableEntity.java @@ -0,0 +1,111 @@ +package com.iqser.red.service.redaction.v1.server.model.document.entity; + +import java.awt.geom.Rectangle2D; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.PriorityQueue; +import java.util.Set; + +import org.apache.commons.collections4.map.HashedMap; + +import com.iqser.red.service.redaction.v1.server.model.document.TextRange; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table; + +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.experimental.FieldDefaults; +import lombok.experimental.SuperBuilder; + +@Data +@SuperBuilder +@FieldDefaults(level = AccessLevel.PRIVATE) +@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false) +public class TableEntity extends SemanticEntity { + + @EqualsAndHashCode.Include + final String id; + + Table table; + + + public static TableEntity initialEntityNode(Table table, String type, EntityType entityType) { + + return TableEntity.builder() + .id(table.buildId(table.getTextRange(), type, entityType)) + .type(type) + .entityType(entityType) + .manualOverwrite(new ManualChangeOverwrite(entityType)) + .table(table) + .build(); + } + + + @Override + public String getValue() { + + return "Table:" + table.getHeadline(); + } + + + @Override + public TextRange getTextRange() { + + return table.getTextBlock().getTextRange(); + } + + + @Override + public String type() { + + return getManualOverwrite().getType() + .orElse(NodeType.TABLE.toString()); + } + + + public void removeFromGraph() { + + remove("FINAL.0.0", "removed completely"); + intersectingNodes.forEach(node -> node.getEntities().remove(this)); + pages.forEach(page -> page.getSemanticEntities().remove(this)); + intersectingNodes = new LinkedList<>(); + relations.keySet() + .forEach(entity -> entity.getRelations().remove(this)); + relations = new HashedMap<>(); + deepestFullyContainingNode = null; + pages = new HashSet<>(); + } + + + + + public List getPositionsOnPagePerPage() { + + if (positionsOnPagePerPage == null || positionsOnPagePerPage.isEmpty()) { + Map> rectanglesPerLinePerPage = table.getTextBlock().getPositionsPerPage(table.getTextRange()); + + positionsOnPagePerPage = rectanglesPerLinePerPage.entrySet() + .stream() + .map(entry -> new PositionOnPage(id, entry.getKey(), entry.getValue())) + .toList(); + } + return positionsOnPagePerPage; + } + + + public String asCsv() { + + return table.asCsv(); + } + +} diff --git a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/TextEntity.java b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/TextEntity.java index 83a0baa9..fe4a3f5e 100644 --- a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/TextEntity.java +++ b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/TextEntity.java @@ -1,15 +1,11 @@ package com.iqser.red.service.redaction.v1.server.model.document.entity; import java.awt.geom.Rectangle2D; -import java.util.ArrayList; -import java.util.Collection; import java.util.Comparator; -import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.PriorityQueue; import java.util.Set; import org.apache.commons.collections4.map.HashedMap; @@ -19,26 +15,24 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations import com.iqser.red.service.redaction.v1.server.model.document.TextRange; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page; import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; -import com.iqser.red.service.redaction.v1.server.utils.IdBuilder; import lombok.AccessLevel; -import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; import lombok.EqualsAndHashCode; import lombok.experimental.FieldDefaults; +import lombok.experimental.SuperBuilder; /** * Represents a text entity within a document, characterized by its text range, type, entity type, * and associated metadata like matched rules, pages, and engines. */ @Data -@Builder -@AllArgsConstructor +@SuperBuilder @FieldDefaults(level = AccessLevel.PRIVATE) -@EqualsAndHashCode(onlyExplicitlyIncluded = true) +@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false) @SuppressWarnings("PMD.AvoidFieldNameMatchingMethodName") -public class TextEntity implements IEntity { +public class TextEntity extends SemanticEntity { // primary key @EqualsAndHashCode.Include @@ -48,13 +42,6 @@ public class TextEntity implements IEntity { TextRange textRange; @Builder.Default Set duplicateTextRanges = new HashSet<>(); - String type; // TODO: make final once ManualChangesApplicationService::recategorize is deleted - final EntityType entityType; - - @Builder.Default - final PriorityQueue matchedRuleList = new PriorityQueue<>(); - @Builder.Default - final ManualChangeOverwrite manualOverwrite = new ManualChangeOverwrite(); boolean dictionaryEntry; boolean dossierDictionaryEntry; @@ -66,24 +53,12 @@ public class TextEntity implements IEntity { String value; String textBefore; String textAfter; - @Builder.Default - Set pages = new HashSet<>(); - List positionsOnPagePerPage; - @Builder.Default - List intersectingNodes = new LinkedList<>(); - SemanticNode deepestFullyContainingNode; - - @Builder.Default - Map> relations = new HashMap<>(); - - @Builder.Default - Collection entityEventListeners = new ArrayList<>(); public static TextEntity initialEntityNode(TextRange textRange, String type, EntityType entityType, SemanticNode node) { return TextEntity.builder() - .id(buildId(node, textRange, type, entityType)) + .id(node.buildId(textRange, type, entityType)) .type(type) .entityType(entityType) .textRange(textRange) @@ -110,19 +85,6 @@ public class TextEntity implements IEntity { } - private static String buildId(SemanticNode node, TextRange textRange, String type, EntityType entityType) { - - Map> rectanglesPerLinePerPage = node.getPositionsPerPage(textRange); - return IdBuilder.buildId(rectanglesPerLinePerPage.keySet(), - rectanglesPerLinePerPage.values() - .stream() - .flatMap(Collection::stream) - .toList(), - type, - entityType.name()); - } - - public void addTextRange(TextRange textRange) { duplicateTextRanges.add(textRange); @@ -143,24 +105,6 @@ public class TextEntity implements IEntity { } - public boolean isType(String type) { - - return type().equals(type); - } - - - public boolean isAnyType(List types) { - - return types.contains(type()); - } - - - public void addIntersectingNode(SemanticNode containingNode) { - - intersectingNodes.add(containingNode); - } - - public String getValueWithLineBreaks() { return getDeepestFullyContainingNode().getTextBlock().subSequenceWithLineBreaks(getTextRange()); @@ -171,7 +115,7 @@ public class TextEntity implements IEntity { remove("FINAL.0.0", "removed completely"); intersectingNodes.forEach(node -> node.getEntities().remove(this)); - pages.forEach(page -> page.getEntities().remove(this)); + pages.forEach(page -> page.getSemanticEntities().remove(this)); intersectingNodes = new LinkedList<>(); relations.keySet() .forEach(entity -> entity.getRelations().remove(this)); @@ -215,6 +159,7 @@ public class TextEntity implements IEntity { return textEntity.contains(this); } + public boolean contains(TextEntity textEntity) { if (this.textRange.contains(textEntity.getTextRange())) { @@ -239,7 +184,6 @@ public class TextEntity implements IEntity { } - public boolean intersects(TextEntity textEntity) { return this.textRange.intersects(textEntity.getTextRange()) // @@ -277,14 +221,6 @@ public class TextEntity implements IEntity { notifyEntityUpdated(); } - - public boolean matchesAnnotationId(String manualRedactionId) { - - return getPositionsOnPagePerPage().stream() - .anyMatch(entityPosition -> entityPosition.getId().equals(manualRedactionId)); - } - - @Override public String toString() { @@ -316,21 +252,6 @@ public class TextEntity implements IEntity { } - /** - * @return true when this entity is of EntityType ENTITY or HINT - */ - public boolean validEntityType() { - - return entityType.equals(EntityType.ENTITY) || entityType.equals(EntityType.HINT); - } - - - public boolean valid() { - - return active() && validEntityType(); - } - - @Override public String value() { @@ -339,41 +260,32 @@ public class TextEntity implements IEntity { } - @Override - public void addEntityEventListener(EntityEventListener listener) { - - entityEventListeners.add(listener); - } - - - @Override - public void removeEntityEventListener(EntityEventListener listener) { - - entityEventListeners.remove(listener); - - } - - public void computeRelations() { - for (TextEntity textEntity : this.getDeepestFullyContainingNode().getEntities()) { - if (this.intersects(textEntity) && !this.equals(textEntity) && !textEntity.getEntityType().equals(EntityType.TEMPORARY)) { - if (textEntity.getTextRange().equals(this.getTextRange())) { - textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Equality(this, textEntity)); - this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Equality(textEntity, this)); - } else if (textEntity.containedBy(this)) { - textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this)); - this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Containment(this, textEntity)); - } else if (this.containedBy(textEntity)) { - textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Containment(textEntity, this)); - this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity)); - } else { - textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this)); - this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity)); - } + this.getDeepestFullyContainingNode().getEntities() + .stream() + .filter(semanticEntity -> semanticEntity instanceof TextEntity) + .map(semanticEntity -> (TextEntity) semanticEntity) + .forEach(textEntity -> { + + if (this.intersects(textEntity) && !this.equals(textEntity) && !textEntity.getEntityType().equals(EntityType.TEMPORARY)) { + if (textEntity.getTextRange().equals(this.getTextRange())) { + textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Equality(this, textEntity)); + this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Equality(textEntity, this)); + } else if (textEntity.containedBy(this)) { + textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this)); + this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Containment(this, textEntity)); + } else if (this.containedBy(textEntity)) { + textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Containment(textEntity, this)); + this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity)); + } else { + textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this)); + this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity)); + } + + } + }); - } - } } } diff --git a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/AbstractSemanticNode.java b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/AbstractSemanticNode.java index ddf32c06..9ea7d54d 100644 --- a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/AbstractSemanticNode.java +++ b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/AbstractSemanticNode.java @@ -7,6 +7,7 @@ import java.util.Map; import java.util.Set; import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree; +import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock; @@ -38,7 +39,7 @@ public abstract class AbstractSemanticNode implements GenericSemanticNode { DocumentTree documentTree; @Builder.Default - Set entities = new HashSet<>(); + Set entities = new HashSet<>(); Map bBoxCache; @@ -61,7 +62,7 @@ public abstract class AbstractSemanticNode implements GenericSemanticNode { @Override - public Map getBBox() { + public MapgetBBox() { if (bBoxCache == null) { bBoxCache = GenericSemanticNode.super.getBBox(); diff --git a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Page.java b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Page.java index 03a91e0d..94c48479 100644 --- a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Page.java +++ b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Page.java @@ -5,6 +5,7 @@ import java.util.List; import java.util.Set; import java.util.stream.Stream; +import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock; import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock; @@ -42,7 +43,7 @@ public class Page { Footer footer; @Builder.Default - Set entities = new HashSet<>(); + Set semanticEntities = new HashSet<>(); @Builder.Default Set images = new HashSet<>(); diff --git a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Section.java b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Section.java index 39ed0699..90ddbee5 100644 --- a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Section.java +++ b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Section.java @@ -1,6 +1,9 @@ package com.iqser.red.service.redaction.v1.server.model.document.nodes; +import java.util.Set; + import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor; +import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity; import lombok.AccessLevel; import lombok.AllArgsConstructor; diff --git a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SemanticNode.java b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SemanticNode.java index c8b33c3d..3691c55b 100644 --- a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SemanticNode.java +++ b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SemanticNode.java @@ -4,6 +4,7 @@ import static java.lang.String.format; import java.awt.geom.Rectangle2D; import java.util.Arrays; +import java.util.Collection; import java.util.Comparator; import java.util.HashMap; import java.util.List; @@ -17,12 +18,16 @@ import java.util.stream.Stream; import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree; import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor; import com.iqser.red.service.redaction.v1.server.model.document.TextRange; +import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity; +import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity; +import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock; import com.iqser.red.service.redaction.v1.server.model.document.textblock.ConsecutiveTextBlockCollector; import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock; import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector; +import com.iqser.red.service.redaction.v1.server.utils.IdBuilder; import com.iqser.red.service.redaction.v1.server.utils.RectangleTransformations; import com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility; @@ -74,7 +79,25 @@ public interface SemanticNode { * * @return Set of all Entities associated with this Node */ - Set getEntities(); + Set getEntities(); + + + default Set getTextEntities() { + + return getEntities().stream() + .filter(semanticEntity -> semanticEntity instanceof TextEntity) + .map(semanticEntity -> (TextEntity) semanticEntity) + .collect(Collectors.toSet()); + } + + + default Set getTableEntities() { + + return getEntities().stream() + .filter(semanticEntity -> semanticEntity instanceof TableEntity) + .map(semanticEntity -> (TableEntity) semanticEntity) + .collect(Collectors.toSet()); + } /** @@ -85,9 +108,9 @@ public interface SemanticNode { */ default Stream streamValidEntities() { - return getEntities().stream() + return getTextEntities().stream() .filter(IEntity::active) - .filter(TextEntity::validEntityType); + .filter(SemanticEntity::validEntityType); } @@ -638,18 +661,18 @@ public interface SemanticNode { * This function is used during insertion of EntityNodes into the graph, it checks if the TextRange of the RedactionEntity intersects or even contains the RedactionEntity. * It sets the fields accordingly and recursively calls this function on all its children. * - * @param textEntity RedactionEntity, which is being inserted into the graph + * @param entity RedactionEntity, which is being inserted into the graph */ - default void addThisToEntityIfIntersects(TextEntity textEntity) { + default void addThisToEntityIfIntersects(SemanticEntity entity) { TextBlock textBlock = getTextBlock(); - if (textBlock.getTextRange().intersects(textEntity.getTextRange())) { - if (textBlock.containsTextRange(textEntity.getTextRange())) { - textEntity.setDeepestFullyContainingNode(this); + if (textBlock.getTextRange().intersects(entity.getTextRange())) { + if (textBlock.containsTextRange(entity.getTextRange())) { + entity.setDeepestFullyContainingNode(this); } - textEntity.addIntersectingNode(this); - getDocumentTree().findIntersectingChildNodes(getTreeId(), textEntity.getTextRange()) - .forEach(node -> node.addThisToEntityIfIntersects(textEntity)); + entity.addIntersectingNode(this); + getDocumentTree().findIntersectingChildNodes(getTreeId(), entity.getTextRange()) + .forEach(node -> node.addThisToEntityIfIntersects(entity)); } } @@ -838,4 +861,17 @@ public interface SemanticNode { return pages.size() == 1 && pages.contains(page); } + + default String buildId(TextRange textRange, String type, EntityType entityType) { + + Map> rectanglesPerLinePerPage = getPositionsPerPage(textRange); + return IdBuilder.buildId(rectanglesPerLinePerPage.keySet(), + rectanglesPerLinePerPage.values() + .stream() + .flatMap(Collection::stream) + .toList(), + type, + entityType.name()); + } + } diff --git a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Table.java b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Table.java index 6e5e03a9..dad5c098 100644 --- a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Table.java +++ b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Table.java @@ -15,6 +15,7 @@ import java.util.stream.Stream; import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree; import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor; +import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock; import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector; @@ -48,7 +49,7 @@ public class Table implements SemanticNode { TextBlock textBlock; @Builder.Default - Set entities = new HashSet<>(); + Set entities = new HashSet<>(); Map bBoxCache; @@ -109,7 +110,7 @@ public class Table implements SemanticNode { .toList(); return streamTableCells().filter(tableCellNode -> vertebrateStudyCols.stream() .anyMatch(vertebrateStudyCol -> getCell(tableCellNode.getRow(), vertebrateStudyCol).containsString(value))) - .map(TableCell::getEntities) + .map(TableCell::getTextEntities) .flatMap(Collection::stream); } @@ -128,7 +129,7 @@ public class Table implements SemanticNode { .toList(); return streamTableCells().filter(tableCellNode -> colsWithHeader.stream() .anyMatch(colWithHeader -> getCell(tableCellNode.getRow(), colWithHeader).containsAnyString(values))) - .map(TableCell::getEntities) + .map(TableCell::getTextEntities) .flatMap(Collection::stream); } @@ -146,7 +147,7 @@ public class Table implements SemanticNode { .filter(rowNumber -> streamTextEntitiesInRow(rowNumber).map(TextEntity::type) .anyMatch(types::contains)) .flatMap(this::streamRow) - .map(TableCell::getEntities) + .map(TableCell::getTextEntities) .flatMap(Collection::stream); } @@ -167,7 +168,7 @@ public class Table implements SemanticNode { return entityTypes.containsAll(types); }) .flatMap(this::streamRow) - .map(TableCell::getEntities) + .map(TableCell::getTextEntities) .flatMap(Collection::stream); } @@ -185,7 +186,7 @@ public class Table implements SemanticNode { .filter(rowNumber -> streamTextEntitiesInRow(rowNumber).map(TextEntity::type) .noneMatch(types::contains)) .flatMap(this::streamRow) - .map(TableCell::getEntities) + .map(TableCell::getTextEntities) .flatMap(Collection::stream); } @@ -426,4 +427,30 @@ public class Table implements SemanticNode { visitor.visit(this); } + + public String asCsv() { + + StringBuilder sb = new StringBuilder(); + + for (int row = 0; row < numberOfRows; row++) { + for (int col = 0; col < numberOfCols; col++) { + TableCell cell = getCell(row, col); + String cellText = cell.getTextBlock().getSearchText().replaceAll("\\r?\\n", " ").trim(); + + if (cellText.contains(",") || cellText.contains("\"")) { + cellText = "\"" + cellText.replace("\"", "\"\"") + "\""; + } + + sb.append(cellText); + + if (col < numberOfCols - 1) { + sb.append(","); + } + } + sb.append("\n"); + } + + return sb.toString(); + } + } diff --git a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/utils/EntityCreationUtility.java b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/utils/EntityCreationUtility.java index 2e0afd66..d5fb9dd0 100644 --- a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/utils/EntityCreationUtility.java +++ b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/utils/EntityCreationUtility.java @@ -5,6 +5,8 @@ import java.util.Set; import com.iqser.red.service.redaction.v1.server.model.document.IntersectingNodeVisitor; import com.iqser.red.service.redaction.v1.server.model.document.TextRange; +import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity; +import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page; import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; @@ -50,15 +52,15 @@ public class EntityCreationUtility { } - public void addToPages(TextEntity entity) { + public void addToPages(SemanticEntity entity) { Set pages = entity.getDeepestFullyContainingNode().getPages(entity.getTextRange()); entity.getPages().addAll(pages); - pages.forEach(page -> page.getEntities().add(entity)); + pages.forEach(page -> page.getSemanticEntities().add(entity)); } - public void addEntityToNodeEntitySets(TextEntity entity) { + public void addEntityToNodeEntitySets(SemanticEntity entity) { entity.getIntersectingNodes() .forEach(node -> node.getEntities().add(entity)); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/Component.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/Component.java index e577cf0b..4aa07c20 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/Component.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/Component.java @@ -26,6 +26,9 @@ public class Component { List references; + @Builder.Default + ComponentFormat componentFormat = ComponentFormat.TEXT; + public boolean addReference(Entity entity) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/ComponentFormat.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/ComponentFormat.java new file mode 100644 index 00000000..e320aa38 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/ComponentFormat.java @@ -0,0 +1,8 @@ +package com.iqser.red.service.redaction.v1.server.model.component; + +public enum ComponentFormat { + + TEXT, + //OVERRIDE, //todo: do we need this? + CSV +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java index c92d0d42..1c7902c8 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java @@ -36,6 +36,8 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityTyp import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.ManualChangeOverwrite; import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage; +import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity; +import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image; @@ -70,11 +72,11 @@ public class EntityLogCreatorService { ObservationRegistry observationRegistry; - private static boolean notFalsePositiveOrFalseRecommendationOrRemoval(TextEntity textEntity) { + private static boolean notFalsePositiveOrFalseRecommendationOrRemoval(SemanticEntity semanticEntity) { - return !(textEntity.getEntityType().equals(EntityType.FALSE_POSITIVE) // - || textEntity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION) // - || textEntity.getEntityType().equals(EntityType.DICTIONARY_REMOVAL)); + return !(semanticEntity.getEntityType().equals(EntityType.FALSE_POSITIVE) // + || semanticEntity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION) // + || semanticEntity.getEntityType().equals(EntityType.DICTIONARY_REMOVAL)); } @@ -175,7 +177,7 @@ public class EntityLogCreatorService { List entries = new ArrayList<>(); - List textEntities = document.getEntities() + List semanticEntities = document.getEntities() .stream() .filter(entity -> !entity.getValue().isEmpty()) .filter(EntityLogCreatorService::notFalsePositiveOrFalseRecommendationOrRemoval) @@ -190,7 +192,7 @@ public class EntityLogCreatorService { .toList(); List allIds = new ArrayList<>(); - allIds.addAll(textEntities.stream() + allIds.addAll(semanticEntities.stream() .flatMap(entity -> entity.getPositionsOnPagePerPage() .stream() .map(PositionOnPage::getId)) @@ -204,7 +206,7 @@ public class EntityLogCreatorService { Map> manualChangesMap = getManualChangesByEntityLogIds(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), allIds); - textEntities.forEach(entityNode -> entries.addAll(toEntityLogEntries(entityNode, analysisNumber, manualChangesMap))); + semanticEntities.forEach(entityNode -> entries.addAll(toEntityLogEntries(entityNode, analysisNumber, manualChangesMap))); images.forEach(imageNode -> entries.add(createEntityLogEntry(imageNode, dossierTemplateId, @@ -219,19 +221,19 @@ public class EntityLogCreatorService { } - public List toEntityLogEntries(TextEntity textEntity, int analysisNumber, Map> existingManualChangesMap) { + public List toEntityLogEntries(SemanticEntity semanticEntity, int analysisNumber, Map> existingManualChangesMap) { List entityLogEntries = new ArrayList<>(); // split entity into multiple entries if it occurs on multiple pages, since FE can't handle multi page entities - for (PositionOnPage positionOnPage : textEntity.getPositionsOnPagePerPage()) { + for (PositionOnPage positionOnPage : semanticEntity.getPositionsOnPagePerPage()) { List rectanglesPerLine = positionOnPage.getRectanglePerLine() .stream() .map(rectangle2D -> new Position(rectangle2D, positionOnPage.getPage().getNumber())) .toList(); - EntityLogEntry entityLogEntry = createEntityLogEntry(textEntity, analysisNumber, existingManualChangesMap.getOrDefault(positionOnPage.getId(), new ArrayList<>())); + EntityLogEntry entityLogEntry = createEntityLogEntry(semanticEntity, analysisNumber, existingManualChangesMap.getOrDefault(positionOnPage.getId(), new ArrayList<>())); // set the ID from the positions, since it might contain a "-" with the page number if the entity is split across multiple pages entityLogEntry.setId(positionOnPage.getId()); @@ -317,12 +319,24 @@ public class EntityLogCreatorService { } + private EntityLogEntry createEntityLogEntry(SemanticEntity entity, int analysisNumber, List existingManualChanges) { + + if (entity instanceof TextEntity textEntity) { + return createEntityLogEntry(textEntity, analysisNumber, existingManualChanges); + } else if (entity instanceof TableEntity tableEntity) { + return createEntityLogEntry(tableEntity, analysisNumber, existingManualChanges); + } else { + throw new UnsupportedOperationException("Wrong semantic entity type given for entity log entry creation!!"); + } + } + + private EntityLogEntry createEntityLogEntry(TextEntity entity, int analysisNumber, List existingManualChanges) { Set referenceIds = new HashSet<>(); entity.references() .stream() - .filter(TextEntity::active) + .filter(SemanticEntity::active) .forEach(ref -> ref.getPositionsOnPagePerPage() .forEach(pos -> referenceIds.add(pos.getId()))); @@ -365,7 +379,42 @@ public class EntityLogCreatorService { } - private int determinePageParagraphIndex(TextEntity textEntity, EntryType entryType) { + private EntityLogEntry createEntityLogEntry(TableEntity tableEntity, int analysisNumber, List existingManualChanges) { + + Set referenceIds = new HashSet<>(); + tableEntity.references() + .stream() + .filter(IEntity::applied) + .forEach(ref -> ref.getPositionsOnPagePerPage() + .forEach(pos -> referenceIds.add(pos.getId()))); + + EntryType entryType = buildEntryType(tableEntity); + + List allManualChanges = ManualChangeFactory.toLocalManualChangeList(tableEntity.getManualOverwrite().getManualChangeLog(), true, analysisNumber); + + return EntityLogEntry.builder() + .reason(tableEntity.buildReason()) + .legalBasis(tableEntity.legalBasis()) + .value(tableEntity.getValue()) + .type(tableEntity.type()) + .section(tableEntity.getManualOverwrite().getSection() + .orElse(this.buildSectionString(tableEntity.getDeepestFullyContainingNode()))) + .containingNodeId(tableEntity.getDeepestFullyContainingNode().getTreeId()) + .closestHeadline(tableEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText()) + .matchedRule(tableEntity.getMatchedRule().getRuleIdentifier().toString()) + .startOffset(tableEntity.getTextRange().start()) + .endOffset(tableEntity.getTextRange().end()) +// .engines(getEngines(tableEntity.getEngines(), tableEntity.getManualOverwrite())) + .reference(referenceIds) + .manualChanges(ManualChangesUtils.mergeManualChanges(existingManualChanges, allManualChanges)) + .state(buildEntryState(tableEntity)) + .entryType(entryType) + .paragraphPageIdx(determinePageParagraphIndex(tableEntity, entryType)) + .build(); + } + + + private int determinePageParagraphIndex(SemanticEntity textEntity, EntryType entryType) { int pageParagraphIdx = -1; @@ -414,7 +463,7 @@ public class EntityLogCreatorService { public static EntryType buildEntryType(IEntity entity) { - if (entity instanceof TextEntity textEntity) { + if (entity instanceof SemanticEntity textEntity) { return getEntryType(textEntity.getEntityType()); } else if (entity instanceof PrecursorEntity precursorEntity) { if (precursorEntity.isRectangle()) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ManualChangesApplicationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ManualChangesApplicationService.java index 1c81a126..a6b99ead 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ManualChangesApplicationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ManualChangesApplicationService.java @@ -18,8 +18,6 @@ import com.google.common.collect.Sets; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction; import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity; -import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary; -import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryModel; import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; @@ -146,9 +144,9 @@ public class ManualChangesApplicationService { Set newIntersectingPages = new HashSet<>(closestEntity.getPages()); Sets.difference(currentIntersectingPages, newIntersectingPages) - .forEach(removedPage -> removedPage.getEntities().remove(entityToBeResized)); + .forEach(removedPage -> removedPage.getSemanticEntities().remove(entityToBeResized)); Sets.difference(newIntersectingPages, currentIntersectingPages) - .forEach(addedPage -> addedPage.getEntities().add(entityToBeResized)); + .forEach(addedPage -> addedPage.getSemanticEntities().add(entityToBeResized)); entityToBeResized.setDeepestFullyContainingNode(closestEntity.getDeepestFullyContainingNode()); entityToBeResized.setIntersectingNodes(new ArrayList<>(newIntersectingNodes)); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/UnprocessedChangesService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/UnprocessedChangesService.java index 446dc723..76c2f7e7 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/UnprocessedChangesService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/UnprocessedChangesService.java @@ -102,9 +102,9 @@ public class UnprocessedChangesService { } document.getEntities() - .forEach(textEntity -> { + .forEach(entity -> { Set processedIds = new HashSet<>(); - for (var positionsOnPerPage : textEntity.getPositionsOnPagePerPage()) { + for (var positionsOnPerPage : entity.getPositionsOnPagePerPage()) { if (processedIds.contains(positionsOnPerPage.getId())) { continue; } @@ -113,17 +113,18 @@ public class UnprocessedChangesService { .stream() .map(rectangle2D -> new Position(rectangle2D, positionsOnPerPage.getPage().getNumber())) .collect(Collectors.toList()); - unprocessedManualEntities.add(UnprocessedManualEntity.builder() - .annotationId(allAnnotationIds.stream() - .filter(textEntity::matchesAnnotationId) - .findFirst() - .orElse("")) - .textBefore(textEntity.getTextBefore()) - .textAfter(textEntity.getTextAfter()) - .section(textEntity.getManualOverwrite().getSection() - .orElse(textEntity.getDeepestFullyContainingNode().toString())) - .positions(positions) - .build()); + UnprocessedManualEntity.UnprocessedManualEntityBuilder builder = UnprocessedManualEntity.builder() + .annotationId(allAnnotationIds.stream() + .filter(entity::matchesAnnotationId) + .findFirst() + .orElse("")) + .section(entity.getManualOverwrite().getSection() + .orElse(entity.getDeepestFullyContainingNode().toString())) + .positions(positions); + if (entity instanceof TextEntity textEntity) { + builder.textBefore(textEntity.getTextBefore()).textAfter(textEntity.getTextAfter()); + } + unprocessedManualEntities.add(builder.build()); } }); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ComponentCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ComponentCreationService.java index 87fc39b5..d81e4936 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ComponentCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ComponentCreationService.java @@ -23,8 +23,11 @@ import java.util.stream.Stream; import org.kie.api.runtime.KieSession; import com.iqser.red.service.redaction.v1.server.model.component.Component; +import com.iqser.red.service.redaction.v1.server.model.component.ComponentFormat; import com.iqser.red.service.redaction.v1.server.model.component.Entity; import com.iqser.red.service.redaction.v1.server.model.document.entity.RuleIdentifier; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table; import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell; import com.iqser.red.service.redaction.v1.server.utils.ComponentCreationUtils; import com.iqser.red.service.redaction.v1.server.utils.DateConverter; @@ -98,6 +101,22 @@ public class ComponentCreationService { .value(value) .valueDescription(valueDescription) .references(new LinkedList<>(references)) + .componentFormat(ComponentFormat.TEXT) + .build()); + } + + + public void create(String ruleIdentifier, String name, String value, String valueDescription, Collection references, ComponentFormat componentFormat) { + + referencedEntities.addAll(references); + + kieSession.insert(Component.builder() + .matchedRule(RuleIdentifier.fromString(ruleIdentifier)) + .name(name) + .value(value) + .valueDescription(valueDescription) + .references(new LinkedList<>(references)) + .componentFormat(componentFormat) .build()); } @@ -376,6 +395,27 @@ public class ComponentCreationService { } + public void createComponentForTables(String ruleIdentifier, Collection entities) { + + entities.stream() + .filter(entity -> !referencedEntities.contains(entity)) + .sorted(EntityComparators.first()) + .forEach(entity -> { + String value = entity.getValue(); + ComponentFormat componentFormat = ComponentFormat.TEXT; + SemanticNode containingNode = entity.getContainingNode(); + + if (containingNode instanceof TableCell cell) { // case for tables with 0 rows and 0 columns + value = cell.getTextBlock().getSearchText(); + } else if (containingNode instanceof Table table) { + value = table.asCsv(); + componentFormat = ComponentFormat.CSV; + } + create(ruleIdentifier, entity.getType(), value, "Table Entity", List.of(entity), componentFormat); + }); + } + + /** * Converts entity values to the 'dd/MM/yyyy' format and joins them with ', '. If the value could not be parsed as a date, it will be created as is. * diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java index cf0f1900..4bcbf67b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java @@ -22,7 +22,9 @@ import com.iqser.red.service.redaction.v1.server.model.document.ConsecutiveBound import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree; import com.iqser.red.service.redaction.v1.server.model.document.TextRange; import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; +import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.ManualChangeOverwrite; +import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType; import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; @@ -906,6 +908,25 @@ public class EntityCreationService { return byTextRange(textRange, type, entityType, node); } + /** + * Creates a table entity based on the document table. + * + * @param table The table to base the table entity on. + * @param type The type of entity to create. + * @param entityType The entity's classification. + * @return The created {@link TableEntity}. + */ + public TableEntity bySemanticNode(Table table, String type, EntityType entityType) { + + TableEntity tableEntity = TableEntity.initialEntityNode(table, type, entityType); + + addListenerToEntity(tableEntity); + + table.getDocumentTree().addEntityToGraph(tableEntity); + + return tableEntity; + } + /** * Expands a text entity's start boundary based on a regex pattern match. @@ -978,7 +999,8 @@ public class EntityCreationService { if (node.getDocumentTree().getRoot().getNode().getEntities().contains(entity)) { Optional optionalTextEntity = node.getDocumentTree().getRoot().getNode().getEntities() .stream() - .filter(e -> e.equals(entity) && e.type().equals(type)) + .filter(e -> e instanceof TextEntity && e.equals(entity) && e.type().equals(type)) + .map(e -> (TextEntity)e) .peek(e -> e.addEngines(engines)) .findAny(); if (optionalTextEntity.isEmpty()) { @@ -1419,7 +1441,7 @@ public class EntityCreationService { .filter(e -> e.equals(entity))// .filter(e -> !e.getTextRange().equals(entity.getTextRange()))// .findAny() - .ifPresent(e -> addDuplicateEntityToGraph(e, entity.getTextRange(), node)); + .ifPresent(e -> addDuplicateEntityToGraph((TextEntity) e, entity.getTextRange(), node)); } else { addListenerToEntity(entity); @@ -1469,13 +1491,13 @@ public class EntityCreationService { } additionalIntersectingNode.getEntities().add(entityToDuplicate); additionalIntersectingNode.getPages(newTextRange) - .forEach(page -> page.getEntities().add(entityToDuplicate)); + .forEach(page -> page.getSemanticEntities().add(entityToDuplicate)); entityToDuplicate.addIntersectingNode(additionalIntersectingNode); }); } - private void addListenerToEntity(TextEntity textEntity) { + private void addListenerToEntity(IEntity textEntity) { if(kieSessionUpdater != null) { textEntity.addEntityEventListener(kieSessionUpdater); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFromPrecursorCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFromPrecursorCreationService.java index 7887c0c2..8615afb2 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFromPrecursorCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFromPrecursorCreationService.java @@ -162,7 +162,7 @@ public class EntityFromPrecursorCreationService { correctEntity.getIntersectingNodes() .forEach(n -> n.getEntities().add(correctEntity)); correctEntity.getPages() - .forEach(page -> page.getEntities().add(correctEntity)); + .forEach(page -> page.getSemanticEntities().add(correctEntity)); correctEntity.addMatchedRules(precursorEntity.getMatchedRuleList()); correctEntity.setDictionaryEntry(precursorEntity.isDictionaryEntry()); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java index 355c6a73..b3386b43 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java @@ -105,6 +105,11 @@ public class ComponentDroolsExecutionService { }) .forEach(kieSession::insert); + // todo? +// document.getPages().stream().map(Page::getTableEntities).flatMap(Collection::stream) +// //.filter(this::isApplied) +// .forEach(kieSession::insert); + fileAttributes.stream() .filter(f -> f.getValue() != null) .forEach(kieSession::insert); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/EntityDroolsExecutionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/EntityDroolsExecutionService.java index 32b7e8be..24761c7e 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/EntityDroolsExecutionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/EntityDroolsExecutionService.java @@ -28,6 +28,7 @@ import com.iqser.red.service.redaction.v1.server.logger.RulesLogger; import com.iqser.red.service.redaction.v1.server.logger.TrackingAgendaEventListener; import com.iqser.red.service.redaction.v1.server.model.NerEntities; import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary; +import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image; @@ -128,9 +129,9 @@ public class EntityDroolsExecutionService { } }); - for (TextEntity textEntity : document.getEntities()) { - textEntity.addEntityEventListener(kieSessionUpdater); - textEntity.notifyEntityInserted(); + for (SemanticEntity semanticEntity : document.getEntities()) { + semanticEntity.addEntityEventListener(kieSessionUpdater); + semanticEntity.notifyEntityInserted(); } document.getPages() diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/KieSessionUpdater.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/KieSessionUpdater.java index a540bf71..35fed5a8 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/KieSessionUpdater.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/KieSessionUpdater.java @@ -9,6 +9,8 @@ import org.kie.api.runtime.rule.FactHandle; import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityEventListener; import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity; +import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity; +import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image; import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; @@ -50,14 +52,14 @@ public class KieSessionUpdater implements EntityEventListener { private void handleOnEntityEvent(IEntity entity, Consumer consumer) { - if (entity instanceof TextEntity textEntity) { - updateIntersectingNodes(textEntity); - textEntity.getRelations().values() + if (entity instanceof SemanticEntity semanticEntity) { + updateIntersectingNodes(semanticEntity); + semanticEntity.getRelations().values() .stream() .flatMap(Collection::stream) .forEach(consumer); - textEntity.getRelations().keySet() - .forEach(k -> k.getRelations().getOrDefault(textEntity, Collections.emptySet()) + semanticEntity.getRelations().keySet() + .forEach(k -> k.getRelations().getOrDefault(semanticEntity, Collections.emptySet()) .forEach(consumer)); } @@ -71,9 +73,9 @@ public class KieSessionUpdater implements EntityEventListener { } - private void updateIntersectingNodes(TextEntity textEntity) { + private void updateIntersectingNodes(SemanticEntity semanticEntity) { - textEntity.getIntersectingNodes() + semanticEntity.getIntersectingNodes() .forEach(this::updateFactIfPresent); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java index 430ab672..7c80f6e8 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java @@ -37,6 +37,8 @@ import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType; import com.knecon.fforesight.tenantcommons.TenantContext; +import lombok.SneakyThrows; + @ExtendWith(SpringExtension.class) @SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, properties = {"application.type=DocuMine"}) public class DocumineFloraTest extends AbstractRedactionIntegrationTest { @@ -262,6 +264,7 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest { @Test + @SneakyThrows public void testDoseMortalityExtraction() { AnalyzeRequest request = uploadFileToStorage("files/syngenta/CustomerFiles/Documine/Flora/VV-547525_Toxicidade_Oral_Aguda.pdf"); @@ -269,9 +272,18 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest { System.out.println("Start Full integration test"); analyzeDocumentStructure(LayoutParsingType.DOCUMINE, request); System.out.println("Finished structure analysis"); - analyzeService.analyze(request); + AnalyzeResult analyze = analyzeService.analyze(request); System.out.println("Finished analysis"); + + AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build()); + + String outputFileName = OsUtils.getTemporaryDirectory() + "/Documine.pdf"; + + try (FileOutputStream fileOutputStream = new FileOutputStream(outputFileName)) { + fileOutputStream.write(annotateResponse.getDocument()); + } + var componentLog = redactionStorageService.getComponentLog(TEST_DOSSIER_ID, TEST_FILE_ID); var doseMortality = componentLog.getComponentLogEntries() .stream() diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentIEntityInsertionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentIEntityInsertionIntegrationTest.java index df7d6e50..51638735 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentIEntityInsertionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentIEntityInsertionIntegrationTest.java @@ -348,11 +348,11 @@ public class DocumentIEntityInsertionIntegrationTest extends BuildDocumentIntegr .orElseThrow(); assertEquals(textEntity.getValue(), searchTerm); - assertTrue(pageNode.getEntities().contains(textEntity)); + assertTrue(pageNode.getSemanticEntities().contains(textEntity)); assertTrue(document.getPages() .stream() .filter(page -> page != pageNode) - .noneMatch(page -> page.getEntities().contains(textEntity))); + .noneMatch(page -> page.getSemanticEntities().contains(textEntity))); assertTrue(textEntity.getPages().contains(pageNode)); assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity); assertTrue(textEntity.getIntersectingNodes() diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentPerformanceIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentPerformanceIntegrationTest.java index 78442fb6..2afc2e16 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentPerformanceIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentPerformanceIntegrationTest.java @@ -30,6 +30,7 @@ import com.iqser.red.service.redaction.v1.server.logger.Context; import com.iqser.red.service.redaction.v1.server.model.NerEntities; import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary; import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionarySearch; +import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page; @@ -200,14 +201,14 @@ public class DocumentPerformanceIntegrationTest extends RulesIntegrationTest { System.out.printf("%d Searches took %s s, average %.2f ms\n", numberOfRuns, ((float) totalSearchTime / 1000), totalSearchTime / numberOfRuns); System.out.printf("%d Insertions took %s s, average %.2f ms\n", numberOfRuns, ((float) totalInsertTime / 1000), totalInsertTime / numberOfRuns); System.out.printf("Found %d entities and saved %d\n", foundEntities.size(), document.getEntities().size()); - for (TextEntity entity : document.getEntities()) { + for (TextEntity entity : document.getTextEntities()) { var foundEntity = foundEntities.stream() .filter(f -> f.getId().equals(entity.getId())) .findFirst() .get(); assertTrue(foundEntity.getTextRange().equals(entity.getTextRange()) || foundEntity.getDuplicateTextRanges().contains(entity.getTextRange())); } - assert document.getEntities() + assert document.getTextEntities() .stream() .mapToInt(e -> e.getDuplicateTextRanges().size() + 1).sum() == foundEntities.size(); assert foundEntities.stream() @@ -225,10 +226,10 @@ public class DocumentPerformanceIntegrationTest extends RulesIntegrationTest { try (PDDocument pdDocument = Loader.loadPDF(fileResource.getFile())) { for (Page page : document.getPages()) { - List entityPositionsOnPage = page.getEntities() + List entityPositionsOnPage = page.getSemanticEntities() .stream() .filter(entityNode -> !entityNode.removed()) - .filter(TextEntity::applied) + .filter(SemanticEntity::applied) .flatMap(entityNode -> entityNode.getPositionsOnPagePerPage() .stream()) .filter(entityPosition -> entityPosition.getPage().equals(page)) @@ -241,7 +242,7 @@ public class DocumentPerformanceIntegrationTest extends RulesIntegrationTest { } for (Page page : document.getPages()) { - List entityPositionsOnPage = page.getEntities() + List entityPositionsOnPage = page.getSemanticEntities() .stream() .filter(entityNode -> !entityNode.removed()) .filter(entityNode -> !entityNode.applied()) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/TableTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/TableTest.java index c2e76757..0d60f45c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/TableTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/TableTest.java @@ -83,7 +83,7 @@ public class TableTest extends BuildDocumentIntegrationTest { file); PDFTronViewerDocumentService viewerDocumentService = new PDFTronViewerDocumentService(null); - var entityLayerGroup = EntityVisualizationUtility.createEntityLayerGroup(document.getEntities(), Color.MAGENTA); + var entityLayerGroup = EntityVisualizationUtility.createEntityLayerGroup(document.getTextEntities(), Color.MAGENTA); viewerDocumentService.addLayerGroups(file, file, List.of(entityLayerGroup)); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/rules/Cbi11Test.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/rules/Cbi11Test.java index 61013efb..d20c6599 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/rules/Cbi11Test.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/rules/Cbi11Test.java @@ -22,7 +22,7 @@ public class Cbi11Test extends RulesIntegrationTest { doAnalysis(document, Collections.emptyList()); - List authorNames = document.getEntities() + List authorNames = document.getTextEntities() .stream() .map(Dictionary::splitIntoAuthorNames) .flatMap(Collection::stream) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/EntityVisualizationUtility.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/EntityVisualizationUtility.java index a0ef7b5c..cb045d8f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/EntityVisualizationUtility.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/EntityVisualizationUtility.java @@ -9,6 +9,7 @@ import java.util.Set; import java.util.stream.Collectors; import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage; +import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page; import com.knecon.fforesight.service.viewerdoc.LayerIdentifier; @@ -46,9 +47,9 @@ public class EntityVisualizationUtility { private static List getEntityRectangles(Color color, Page page) { - return page.getEntities() + return page.getSemanticEntities() .stream() - .map(TextEntity::getPositionsOnPagePerPage) + .map(SemanticEntity::getPositionsOnPagePerPage) .flatMap(Collection::stream) .filter(p -> p.getPage().equals(page)) .map(PositionOnPage::getRectanglePerLine) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl index 00f56aca..a3ef6909 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl @@ -1155,6 +1155,15 @@ rule "DOC.35.0: Doses (mg/kg bodyweight)" end +rule "DOC.100.0: Create TableEntities for all Tables" + when + $table: Table() + then + TableEntity tableEntity = entityCreationService.bySemanticNode($table, "table", EntityType.ENTITY); + tableEntity.apply("DOC.100.0", "Table found.", "n-a"); + end + + //------------------------------------ AI rules ------------------------------------ // Rule unit: AI.4 diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_components.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_components.drl index 71d9d598..d932c28b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_components.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_components.drl @@ -461,6 +461,14 @@ rule "UsedApproach.1.0: Used approach not found and thus 'Individual'" end +rule "TableComponents.900.0: Create components for all table entities." + salience -900 + when + $tables: List() from collect (Entity(type == "Table")) + then + componentCreationService.createComponentForTables("TableComponents.900.0", $tables); + end + rule "DefaultComponents.999.0: Create components for all unmapped entities." salience -999 when