From 723263a7c4651553e5a2ffe59bfed3c5aa3f9bd3 Mon Sep 17 00:00:00 2001 From: maverickstuder Date: Mon, 27 Jan 2025 16:52:13 +0100 Subject: [PATCH 1/5] RED-10708: Tables as components in DM --- .../server/model/document/DocumentTree.java | 14 +- .../server/model/document/entity/IEntity.java | 2 +- .../model/document/entity/MatchedRule.java | 2 +- .../model/document/entity/SemanticEntity.java | 110 +++++++++++++ .../model/document/entity/TableEntity.java | 111 +++++++++++++ .../model/document/entity/TextEntity.java | 148 ++++-------------- .../document/nodes/AbstractSemanticNode.java | 5 +- .../v1/server/model/document/nodes/Page.java | 3 +- .../server/model/document/nodes/Section.java | 3 + .../model/document/nodes/SemanticNode.java | 58 +++++-- .../v1/server/model/document/nodes/Table.java | 39 ++++- .../server/utils/EntityCreationUtility.java | 8 +- .../v1/server/model/component/Component.java | 3 + .../model/component/ComponentFormat.java | 8 + .../service/EntityLogCreatorService.java | 75 +++++++-- .../ManualChangesApplicationService.java | 6 +- .../service/UnprocessedChangesService.java | 27 ++-- .../document/ComponentCreationService.java | 40 +++++ .../document/EntityCreationService.java | 30 +++- .../EntityFromPrecursorCreationService.java | 2 +- .../ComponentDroolsExecutionService.java | 5 + .../drools/EntityDroolsExecutionService.java | 7 +- .../service/drools/KieSessionUpdater.java | 16 +- .../v1/server/DocumineFloraTest.java | 14 +- ...cumentIEntityInsertionIntegrationTest.java | 4 +- .../DocumentPerformanceIntegrationTest.java | 11 +- .../v1/server/document/graph/TableTest.java | 2 +- .../redaction/v1/server/rules/Cbi11Test.java | 2 +- .../utils/EntityVisualizationUtility.java | 5 +- .../test/resources/drools/documine_flora.drl | 9 ++ .../drools/documine_flora_components.drl | 8 + 31 files changed, 572 insertions(+), 205 deletions(-) create mode 100644 redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/SemanticEntity.java create mode 100644 redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/TableEntity.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/ComponentFormat.java diff --git a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/DocumentTree.java b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/DocumentTree.java index faf1de14..c209794e 100644 --- a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/DocumentTree.java +++ b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/DocumentTree.java @@ -10,6 +10,7 @@ import java.util.Optional; import java.util.stream.Stream; import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; +import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.model.document.nodes.GenericSemanticNode; @@ -362,22 +363,25 @@ public class DocumentTree { } - public void addEntityToGraph(TextEntity entity) { + public void addEntityToGraph(SemanticEntity entity) { getRoot().getNode().addThisToEntityIfIntersects(entity); TextBlock textBlock = entity.getDeepestFullyContainingNode().getTextBlock(); - EntityEnrichmentService.enrichEntity(entity, textBlock); EntityCreationUtility.addToPages(entity); - EntityCreationUtility.addEntityToNodeEntitySets(entity); if (entity.getEntityType().equals(EntityType.TEMPORARY)) { return; } - entity.computeRelations(); - entity.notifyEntityInserted(); + if (entity instanceof TextEntity textEntity) { + EntityEnrichmentService.enrichEntity(textEntity, textBlock); + textEntity.computeRelations(); + entity.notifyEntityInserted(); // todo: table entity currently causes loop? + } + EntityCreationUtility.addEntityToNodeEntitySets(entity); + } diff --git a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/IEntity.java b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/IEntity.java index dee5fef9..8ab6b52c 100644 --- a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/IEntity.java +++ b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/IEntity.java @@ -188,7 +188,7 @@ public interface IEntity { * * @return A set of references. */ - default Set references() { + default Set references() { return getMatchedRule().getReferences(); } diff --git a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/MatchedRule.java b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/MatchedRule.java index 10bb441d..3c40bb36 100644 --- a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/MatchedRule.java +++ b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/MatchedRule.java @@ -40,7 +40,7 @@ public final class MatchedRule implements Comparable { boolean ignored; @Builder.Default - Set references = Collections.emptySet(); + Set references = Collections.emptySet(); /** diff --git a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/SemanticEntity.java b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/SemanticEntity.java new file mode 100644 index 00000000..36cb3542 --- /dev/null +++ b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/SemanticEntity.java @@ -0,0 +1,110 @@ +package com.iqser.red.service.redaction.v1.server.model.document.entity; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.PriorityQueue; +import java.util.Set; + +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; + +import lombok.AccessLevel; +import lombok.Builder; +import lombok.Data; +import lombok.experimental.FieldDefaults; +import lombok.experimental.SuperBuilder; + +@Data +@FieldDefaults(level = AccessLevel.PROTECTED) +@SuperBuilder +public abstract class SemanticEntity implements IEntity { + + final EntityType entityType; + String type; // TODO: make final once ManualChangesApplicationService::recategorize is deleted + + @Builder.Default + final PriorityQueue matchedRuleList = new PriorityQueue<>(); + @Builder.Default + final ManualChangeOverwrite manualOverwrite = new ManualChangeOverwrite(); + + @Builder.Default + Set pages = new HashSet<>(); + List positionsOnPagePerPage; + + @Builder.Default + List intersectingNodes = new LinkedList<>(); + SemanticNode deepestFullyContainingNode; + + @Builder.Default + Map> relations = new HashMap<>(); + + @Builder.Default + Collection entityEventListeners = new ArrayList<>(); + + + /** + * @return true when this entity is of EntityType ENTITY or HINT + */ + public boolean validEntityType() { + + return entityType.equals(EntityType.ENTITY) || entityType.equals(EntityType.HINT); + } + + + public boolean valid() { + + return active() && validEntityType(); + } + + + public boolean isType(String type) { + + return type().equals(type); + } + + + public boolean isAnyType(List types) { + + return types.contains(type()); + } + + + public boolean matchesAnnotationId(String manualRedactionId) { + + return getPositionsOnPagePerPage().stream() + .anyMatch(entityPosition -> entityPosition.getId().equals(manualRedactionId)); + } + + + public void addIntersectingNode(SemanticNode containingNode) { + + intersectingNodes.add(containingNode); + } + + + @Override + public void addEntityEventListener(EntityEventListener listener) { + + entityEventListeners.add(listener); + } + + + @Override + public void removeEntityEventListener(EntityEventListener listener) { + + entityEventListeners.remove(listener); + } + + + @Override + public Collection getEntityEventListeners() { + + return entityEventListeners; + } + +} diff --git a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/TableEntity.java b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/TableEntity.java new file mode 100644 index 00000000..5c940010 --- /dev/null +++ b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/TableEntity.java @@ -0,0 +1,111 @@ +package com.iqser.red.service.redaction.v1.server.model.document.entity; + +import java.awt.geom.Rectangle2D; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.PriorityQueue; +import java.util.Set; + +import org.apache.commons.collections4.map.HashedMap; + +import com.iqser.red.service.redaction.v1.server.model.document.TextRange; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table; + +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.experimental.FieldDefaults; +import lombok.experimental.SuperBuilder; + +@Data +@SuperBuilder +@FieldDefaults(level = AccessLevel.PRIVATE) +@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false) +public class TableEntity extends SemanticEntity { + + @EqualsAndHashCode.Include + final String id; + + Table table; + + + public static TableEntity initialEntityNode(Table table, String type, EntityType entityType) { + + return TableEntity.builder() + .id(table.buildId(table.getTextRange(), type, entityType)) + .type(type) + .entityType(entityType) + .manualOverwrite(new ManualChangeOverwrite(entityType)) + .table(table) + .build(); + } + + + @Override + public String getValue() { + + return "Table:" + table.getHeadline(); + } + + + @Override + public TextRange getTextRange() { + + return table.getTextBlock().getTextRange(); + } + + + @Override + public String type() { + + return getManualOverwrite().getType() + .orElse(NodeType.TABLE.toString()); + } + + + public void removeFromGraph() { + + remove("FINAL.0.0", "removed completely"); + intersectingNodes.forEach(node -> node.getEntities().remove(this)); + pages.forEach(page -> page.getSemanticEntities().remove(this)); + intersectingNodes = new LinkedList<>(); + relations.keySet() + .forEach(entity -> entity.getRelations().remove(this)); + relations = new HashedMap<>(); + deepestFullyContainingNode = null; + pages = new HashSet<>(); + } + + + + + public List getPositionsOnPagePerPage() { + + if (positionsOnPagePerPage == null || positionsOnPagePerPage.isEmpty()) { + Map> rectanglesPerLinePerPage = table.getTextBlock().getPositionsPerPage(table.getTextRange()); + + positionsOnPagePerPage = rectanglesPerLinePerPage.entrySet() + .stream() + .map(entry -> new PositionOnPage(id, entry.getKey(), entry.getValue())) + .toList(); + } + return positionsOnPagePerPage; + } + + + public String asCsv() { + + return table.asCsv(); + } + +} diff --git a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/TextEntity.java b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/TextEntity.java index 83a0baa9..fe4a3f5e 100644 --- a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/TextEntity.java +++ b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/TextEntity.java @@ -1,15 +1,11 @@ package com.iqser.red.service.redaction.v1.server.model.document.entity; import java.awt.geom.Rectangle2D; -import java.util.ArrayList; -import java.util.Collection; import java.util.Comparator; -import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.PriorityQueue; import java.util.Set; import org.apache.commons.collections4.map.HashedMap; @@ -19,26 +15,24 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations import com.iqser.red.service.redaction.v1.server.model.document.TextRange; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page; import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; -import com.iqser.red.service.redaction.v1.server.utils.IdBuilder; import lombok.AccessLevel; -import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; import lombok.EqualsAndHashCode; import lombok.experimental.FieldDefaults; +import lombok.experimental.SuperBuilder; /** * Represents a text entity within a document, characterized by its text range, type, entity type, * and associated metadata like matched rules, pages, and engines. */ @Data -@Builder -@AllArgsConstructor +@SuperBuilder @FieldDefaults(level = AccessLevel.PRIVATE) -@EqualsAndHashCode(onlyExplicitlyIncluded = true) +@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false) @SuppressWarnings("PMD.AvoidFieldNameMatchingMethodName") -public class TextEntity implements IEntity { +public class TextEntity extends SemanticEntity { // primary key @EqualsAndHashCode.Include @@ -48,13 +42,6 @@ public class TextEntity implements IEntity { TextRange textRange; @Builder.Default Set duplicateTextRanges = new HashSet<>(); - String type; // TODO: make final once ManualChangesApplicationService::recategorize is deleted - final EntityType entityType; - - @Builder.Default - final PriorityQueue matchedRuleList = new PriorityQueue<>(); - @Builder.Default - final ManualChangeOverwrite manualOverwrite = new ManualChangeOverwrite(); boolean dictionaryEntry; boolean dossierDictionaryEntry; @@ -66,24 +53,12 @@ public class TextEntity implements IEntity { String value; String textBefore; String textAfter; - @Builder.Default - Set pages = new HashSet<>(); - List positionsOnPagePerPage; - @Builder.Default - List intersectingNodes = new LinkedList<>(); - SemanticNode deepestFullyContainingNode; - - @Builder.Default - Map> relations = new HashMap<>(); - - @Builder.Default - Collection entityEventListeners = new ArrayList<>(); public static TextEntity initialEntityNode(TextRange textRange, String type, EntityType entityType, SemanticNode node) { return TextEntity.builder() - .id(buildId(node, textRange, type, entityType)) + .id(node.buildId(textRange, type, entityType)) .type(type) .entityType(entityType) .textRange(textRange) @@ -110,19 +85,6 @@ public class TextEntity implements IEntity { } - private static String buildId(SemanticNode node, TextRange textRange, String type, EntityType entityType) { - - Map> rectanglesPerLinePerPage = node.getPositionsPerPage(textRange); - return IdBuilder.buildId(rectanglesPerLinePerPage.keySet(), - rectanglesPerLinePerPage.values() - .stream() - .flatMap(Collection::stream) - .toList(), - type, - entityType.name()); - } - - public void addTextRange(TextRange textRange) { duplicateTextRanges.add(textRange); @@ -143,24 +105,6 @@ public class TextEntity implements IEntity { } - public boolean isType(String type) { - - return type().equals(type); - } - - - public boolean isAnyType(List types) { - - return types.contains(type()); - } - - - public void addIntersectingNode(SemanticNode containingNode) { - - intersectingNodes.add(containingNode); - } - - public String getValueWithLineBreaks() { return getDeepestFullyContainingNode().getTextBlock().subSequenceWithLineBreaks(getTextRange()); @@ -171,7 +115,7 @@ public class TextEntity implements IEntity { remove("FINAL.0.0", "removed completely"); intersectingNodes.forEach(node -> node.getEntities().remove(this)); - pages.forEach(page -> page.getEntities().remove(this)); + pages.forEach(page -> page.getSemanticEntities().remove(this)); intersectingNodes = new LinkedList<>(); relations.keySet() .forEach(entity -> entity.getRelations().remove(this)); @@ -215,6 +159,7 @@ public class TextEntity implements IEntity { return textEntity.contains(this); } + public boolean contains(TextEntity textEntity) { if (this.textRange.contains(textEntity.getTextRange())) { @@ -239,7 +184,6 @@ public class TextEntity implements IEntity { } - public boolean intersects(TextEntity textEntity) { return this.textRange.intersects(textEntity.getTextRange()) // @@ -277,14 +221,6 @@ public class TextEntity implements IEntity { notifyEntityUpdated(); } - - public boolean matchesAnnotationId(String manualRedactionId) { - - return getPositionsOnPagePerPage().stream() - .anyMatch(entityPosition -> entityPosition.getId().equals(manualRedactionId)); - } - - @Override public String toString() { @@ -316,21 +252,6 @@ public class TextEntity implements IEntity { } - /** - * @return true when this entity is of EntityType ENTITY or HINT - */ - public boolean validEntityType() { - - return entityType.equals(EntityType.ENTITY) || entityType.equals(EntityType.HINT); - } - - - public boolean valid() { - - return active() && validEntityType(); - } - - @Override public String value() { @@ -339,41 +260,32 @@ public class TextEntity implements IEntity { } - @Override - public void addEntityEventListener(EntityEventListener listener) { - - entityEventListeners.add(listener); - } - - - @Override - public void removeEntityEventListener(EntityEventListener listener) { - - entityEventListeners.remove(listener); - - } - - public void computeRelations() { - for (TextEntity textEntity : this.getDeepestFullyContainingNode().getEntities()) { - if (this.intersects(textEntity) && !this.equals(textEntity) && !textEntity.getEntityType().equals(EntityType.TEMPORARY)) { - if (textEntity.getTextRange().equals(this.getTextRange())) { - textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Equality(this, textEntity)); - this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Equality(textEntity, this)); - } else if (textEntity.containedBy(this)) { - textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this)); - this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Containment(this, textEntity)); - } else if (this.containedBy(textEntity)) { - textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Containment(textEntity, this)); - this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity)); - } else { - textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this)); - this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity)); - } + this.getDeepestFullyContainingNode().getEntities() + .stream() + .filter(semanticEntity -> semanticEntity instanceof TextEntity) + .map(semanticEntity -> (TextEntity) semanticEntity) + .forEach(textEntity -> { + + if (this.intersects(textEntity) && !this.equals(textEntity) && !textEntity.getEntityType().equals(EntityType.TEMPORARY)) { + if (textEntity.getTextRange().equals(this.getTextRange())) { + textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Equality(this, textEntity)); + this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Equality(textEntity, this)); + } else if (textEntity.containedBy(this)) { + textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this)); + this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Containment(this, textEntity)); + } else if (this.containedBy(textEntity)) { + textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Containment(textEntity, this)); + this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity)); + } else { + textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this)); + this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity)); + } + + } + }); - } - } } } diff --git a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/AbstractSemanticNode.java b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/AbstractSemanticNode.java index ddf32c06..9ea7d54d 100644 --- a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/AbstractSemanticNode.java +++ b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/AbstractSemanticNode.java @@ -7,6 +7,7 @@ import java.util.Map; import java.util.Set; import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree; +import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock; @@ -38,7 +39,7 @@ public abstract class AbstractSemanticNode implements GenericSemanticNode { DocumentTree documentTree; @Builder.Default - Set entities = new HashSet<>(); + Set entities = new HashSet<>(); Map bBoxCache; @@ -61,7 +62,7 @@ public abstract class AbstractSemanticNode implements GenericSemanticNode { @Override - public Map getBBox() { + public MapgetBBox() { if (bBoxCache == null) { bBoxCache = GenericSemanticNode.super.getBBox(); diff --git a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Page.java b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Page.java index 03a91e0d..94c48479 100644 --- a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Page.java +++ b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Page.java @@ -5,6 +5,7 @@ import java.util.List; import java.util.Set; import java.util.stream.Stream; +import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock; import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock; @@ -42,7 +43,7 @@ public class Page { Footer footer; @Builder.Default - Set entities = new HashSet<>(); + Set semanticEntities = new HashSet<>(); @Builder.Default Set images = new HashSet<>(); diff --git a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Section.java b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Section.java index 39ed0699..90ddbee5 100644 --- a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Section.java +++ b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Section.java @@ -1,6 +1,9 @@ package com.iqser.red.service.redaction.v1.server.model.document.nodes; +import java.util.Set; + import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor; +import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity; import lombok.AccessLevel; import lombok.AllArgsConstructor; diff --git a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SemanticNode.java b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SemanticNode.java index c8b33c3d..3691c55b 100644 --- a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SemanticNode.java +++ b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SemanticNode.java @@ -4,6 +4,7 @@ import static java.lang.String.format; import java.awt.geom.Rectangle2D; import java.util.Arrays; +import java.util.Collection; import java.util.Comparator; import java.util.HashMap; import java.util.List; @@ -17,12 +18,16 @@ import java.util.stream.Stream; import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree; import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor; import com.iqser.red.service.redaction.v1.server.model.document.TextRange; +import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity; +import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity; +import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock; import com.iqser.red.service.redaction.v1.server.model.document.textblock.ConsecutiveTextBlockCollector; import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock; import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector; +import com.iqser.red.service.redaction.v1.server.utils.IdBuilder; import com.iqser.red.service.redaction.v1.server.utils.RectangleTransformations; import com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility; @@ -74,7 +79,25 @@ public interface SemanticNode { * * @return Set of all Entities associated with this Node */ - Set getEntities(); + Set getEntities(); + + + default Set getTextEntities() { + + return getEntities().stream() + .filter(semanticEntity -> semanticEntity instanceof TextEntity) + .map(semanticEntity -> (TextEntity) semanticEntity) + .collect(Collectors.toSet()); + } + + + default Set getTableEntities() { + + return getEntities().stream() + .filter(semanticEntity -> semanticEntity instanceof TableEntity) + .map(semanticEntity -> (TableEntity) semanticEntity) + .collect(Collectors.toSet()); + } /** @@ -85,9 +108,9 @@ public interface SemanticNode { */ default Stream streamValidEntities() { - return getEntities().stream() + return getTextEntities().stream() .filter(IEntity::active) - .filter(TextEntity::validEntityType); + .filter(SemanticEntity::validEntityType); } @@ -638,18 +661,18 @@ public interface SemanticNode { * This function is used during insertion of EntityNodes into the graph, it checks if the TextRange of the RedactionEntity intersects or even contains the RedactionEntity. * It sets the fields accordingly and recursively calls this function on all its children. * - * @param textEntity RedactionEntity, which is being inserted into the graph + * @param entity RedactionEntity, which is being inserted into the graph */ - default void addThisToEntityIfIntersects(TextEntity textEntity) { + default void addThisToEntityIfIntersects(SemanticEntity entity) { TextBlock textBlock = getTextBlock(); - if (textBlock.getTextRange().intersects(textEntity.getTextRange())) { - if (textBlock.containsTextRange(textEntity.getTextRange())) { - textEntity.setDeepestFullyContainingNode(this); + if (textBlock.getTextRange().intersects(entity.getTextRange())) { + if (textBlock.containsTextRange(entity.getTextRange())) { + entity.setDeepestFullyContainingNode(this); } - textEntity.addIntersectingNode(this); - getDocumentTree().findIntersectingChildNodes(getTreeId(), textEntity.getTextRange()) - .forEach(node -> node.addThisToEntityIfIntersects(textEntity)); + entity.addIntersectingNode(this); + getDocumentTree().findIntersectingChildNodes(getTreeId(), entity.getTextRange()) + .forEach(node -> node.addThisToEntityIfIntersects(entity)); } } @@ -838,4 +861,17 @@ public interface SemanticNode { return pages.size() == 1 && pages.contains(page); } + + default String buildId(TextRange textRange, String type, EntityType entityType) { + + Map> rectanglesPerLinePerPage = getPositionsPerPage(textRange); + return IdBuilder.buildId(rectanglesPerLinePerPage.keySet(), + rectanglesPerLinePerPage.values() + .stream() + .flatMap(Collection::stream) + .toList(), + type, + entityType.name()); + } + } diff --git a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Table.java b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Table.java index 6e5e03a9..dad5c098 100644 --- a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Table.java +++ b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Table.java @@ -15,6 +15,7 @@ import java.util.stream.Stream; import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree; import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor; +import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock; import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector; @@ -48,7 +49,7 @@ public class Table implements SemanticNode { TextBlock textBlock; @Builder.Default - Set entities = new HashSet<>(); + Set entities = new HashSet<>(); Map bBoxCache; @@ -109,7 +110,7 @@ public class Table implements SemanticNode { .toList(); return streamTableCells().filter(tableCellNode -> vertebrateStudyCols.stream() .anyMatch(vertebrateStudyCol -> getCell(tableCellNode.getRow(), vertebrateStudyCol).containsString(value))) - .map(TableCell::getEntities) + .map(TableCell::getTextEntities) .flatMap(Collection::stream); } @@ -128,7 +129,7 @@ public class Table implements SemanticNode { .toList(); return streamTableCells().filter(tableCellNode -> colsWithHeader.stream() .anyMatch(colWithHeader -> getCell(tableCellNode.getRow(), colWithHeader).containsAnyString(values))) - .map(TableCell::getEntities) + .map(TableCell::getTextEntities) .flatMap(Collection::stream); } @@ -146,7 +147,7 @@ public class Table implements SemanticNode { .filter(rowNumber -> streamTextEntitiesInRow(rowNumber).map(TextEntity::type) .anyMatch(types::contains)) .flatMap(this::streamRow) - .map(TableCell::getEntities) + .map(TableCell::getTextEntities) .flatMap(Collection::stream); } @@ -167,7 +168,7 @@ public class Table implements SemanticNode { return entityTypes.containsAll(types); }) .flatMap(this::streamRow) - .map(TableCell::getEntities) + .map(TableCell::getTextEntities) .flatMap(Collection::stream); } @@ -185,7 +186,7 @@ public class Table implements SemanticNode { .filter(rowNumber -> streamTextEntitiesInRow(rowNumber).map(TextEntity::type) .noneMatch(types::contains)) .flatMap(this::streamRow) - .map(TableCell::getEntities) + .map(TableCell::getTextEntities) .flatMap(Collection::stream); } @@ -426,4 +427,30 @@ public class Table implements SemanticNode { visitor.visit(this); } + + public String asCsv() { + + StringBuilder sb = new StringBuilder(); + + for (int row = 0; row < numberOfRows; row++) { + for (int col = 0; col < numberOfCols; col++) { + TableCell cell = getCell(row, col); + String cellText = cell.getTextBlock().getSearchText().replaceAll("\\r?\\n", " ").trim(); + + if (cellText.contains(",") || cellText.contains("\"")) { + cellText = "\"" + cellText.replace("\"", "\"\"") + "\""; + } + + sb.append(cellText); + + if (col < numberOfCols - 1) { + sb.append(","); + } + } + sb.append("\n"); + } + + return sb.toString(); + } + } diff --git a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/utils/EntityCreationUtility.java b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/utils/EntityCreationUtility.java index 2e0afd66..d5fb9dd0 100644 --- a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/utils/EntityCreationUtility.java +++ b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/utils/EntityCreationUtility.java @@ -5,6 +5,8 @@ import java.util.Set; import com.iqser.red.service.redaction.v1.server.model.document.IntersectingNodeVisitor; import com.iqser.red.service.redaction.v1.server.model.document.TextRange; +import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity; +import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page; import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; @@ -50,15 +52,15 @@ public class EntityCreationUtility { } - public void addToPages(TextEntity entity) { + public void addToPages(SemanticEntity entity) { Set pages = entity.getDeepestFullyContainingNode().getPages(entity.getTextRange()); entity.getPages().addAll(pages); - pages.forEach(page -> page.getEntities().add(entity)); + pages.forEach(page -> page.getSemanticEntities().add(entity)); } - public void addEntityToNodeEntitySets(TextEntity entity) { + public void addEntityToNodeEntitySets(SemanticEntity entity) { entity.getIntersectingNodes() .forEach(node -> node.getEntities().add(entity)); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/Component.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/Component.java index e577cf0b..4aa07c20 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/Component.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/Component.java @@ -26,6 +26,9 @@ public class Component { List references; + @Builder.Default + ComponentFormat componentFormat = ComponentFormat.TEXT; + public boolean addReference(Entity entity) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/ComponentFormat.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/ComponentFormat.java new file mode 100644 index 00000000..e320aa38 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/ComponentFormat.java @@ -0,0 +1,8 @@ +package com.iqser.red.service.redaction.v1.server.model.component; + +public enum ComponentFormat { + + TEXT, + //OVERRIDE, //todo: do we need this? + CSV +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java index c92d0d42..1c7902c8 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java @@ -36,6 +36,8 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityTyp import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.ManualChangeOverwrite; import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage; +import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity; +import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image; @@ -70,11 +72,11 @@ public class EntityLogCreatorService { ObservationRegistry observationRegistry; - private static boolean notFalsePositiveOrFalseRecommendationOrRemoval(TextEntity textEntity) { + private static boolean notFalsePositiveOrFalseRecommendationOrRemoval(SemanticEntity semanticEntity) { - return !(textEntity.getEntityType().equals(EntityType.FALSE_POSITIVE) // - || textEntity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION) // - || textEntity.getEntityType().equals(EntityType.DICTIONARY_REMOVAL)); + return !(semanticEntity.getEntityType().equals(EntityType.FALSE_POSITIVE) // + || semanticEntity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION) // + || semanticEntity.getEntityType().equals(EntityType.DICTIONARY_REMOVAL)); } @@ -175,7 +177,7 @@ public class EntityLogCreatorService { List entries = new ArrayList<>(); - List textEntities = document.getEntities() + List semanticEntities = document.getEntities() .stream() .filter(entity -> !entity.getValue().isEmpty()) .filter(EntityLogCreatorService::notFalsePositiveOrFalseRecommendationOrRemoval) @@ -190,7 +192,7 @@ public class EntityLogCreatorService { .toList(); List allIds = new ArrayList<>(); - allIds.addAll(textEntities.stream() + allIds.addAll(semanticEntities.stream() .flatMap(entity -> entity.getPositionsOnPagePerPage() .stream() .map(PositionOnPage::getId)) @@ -204,7 +206,7 @@ public class EntityLogCreatorService { Map> manualChangesMap = getManualChangesByEntityLogIds(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), allIds); - textEntities.forEach(entityNode -> entries.addAll(toEntityLogEntries(entityNode, analysisNumber, manualChangesMap))); + semanticEntities.forEach(entityNode -> entries.addAll(toEntityLogEntries(entityNode, analysisNumber, manualChangesMap))); images.forEach(imageNode -> entries.add(createEntityLogEntry(imageNode, dossierTemplateId, @@ -219,19 +221,19 @@ public class EntityLogCreatorService { } - public List toEntityLogEntries(TextEntity textEntity, int analysisNumber, Map> existingManualChangesMap) { + public List toEntityLogEntries(SemanticEntity semanticEntity, int analysisNumber, Map> existingManualChangesMap) { List entityLogEntries = new ArrayList<>(); // split entity into multiple entries if it occurs on multiple pages, since FE can't handle multi page entities - for (PositionOnPage positionOnPage : textEntity.getPositionsOnPagePerPage()) { + for (PositionOnPage positionOnPage : semanticEntity.getPositionsOnPagePerPage()) { List rectanglesPerLine = positionOnPage.getRectanglePerLine() .stream() .map(rectangle2D -> new Position(rectangle2D, positionOnPage.getPage().getNumber())) .toList(); - EntityLogEntry entityLogEntry = createEntityLogEntry(textEntity, analysisNumber, existingManualChangesMap.getOrDefault(positionOnPage.getId(), new ArrayList<>())); + EntityLogEntry entityLogEntry = createEntityLogEntry(semanticEntity, analysisNumber, existingManualChangesMap.getOrDefault(positionOnPage.getId(), new ArrayList<>())); // set the ID from the positions, since it might contain a "-" with the page number if the entity is split across multiple pages entityLogEntry.setId(positionOnPage.getId()); @@ -317,12 +319,24 @@ public class EntityLogCreatorService { } + private EntityLogEntry createEntityLogEntry(SemanticEntity entity, int analysisNumber, List existingManualChanges) { + + if (entity instanceof TextEntity textEntity) { + return createEntityLogEntry(textEntity, analysisNumber, existingManualChanges); + } else if (entity instanceof TableEntity tableEntity) { + return createEntityLogEntry(tableEntity, analysisNumber, existingManualChanges); + } else { + throw new UnsupportedOperationException("Wrong semantic entity type given for entity log entry creation!!"); + } + } + + private EntityLogEntry createEntityLogEntry(TextEntity entity, int analysisNumber, List existingManualChanges) { Set referenceIds = new HashSet<>(); entity.references() .stream() - .filter(TextEntity::active) + .filter(SemanticEntity::active) .forEach(ref -> ref.getPositionsOnPagePerPage() .forEach(pos -> referenceIds.add(pos.getId()))); @@ -365,7 +379,42 @@ public class EntityLogCreatorService { } - private int determinePageParagraphIndex(TextEntity textEntity, EntryType entryType) { + private EntityLogEntry createEntityLogEntry(TableEntity tableEntity, int analysisNumber, List existingManualChanges) { + + Set referenceIds = new HashSet<>(); + tableEntity.references() + .stream() + .filter(IEntity::applied) + .forEach(ref -> ref.getPositionsOnPagePerPage() + .forEach(pos -> referenceIds.add(pos.getId()))); + + EntryType entryType = buildEntryType(tableEntity); + + List allManualChanges = ManualChangeFactory.toLocalManualChangeList(tableEntity.getManualOverwrite().getManualChangeLog(), true, analysisNumber); + + return EntityLogEntry.builder() + .reason(tableEntity.buildReason()) + .legalBasis(tableEntity.legalBasis()) + .value(tableEntity.getValue()) + .type(tableEntity.type()) + .section(tableEntity.getManualOverwrite().getSection() + .orElse(this.buildSectionString(tableEntity.getDeepestFullyContainingNode()))) + .containingNodeId(tableEntity.getDeepestFullyContainingNode().getTreeId()) + .closestHeadline(tableEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText()) + .matchedRule(tableEntity.getMatchedRule().getRuleIdentifier().toString()) + .startOffset(tableEntity.getTextRange().start()) + .endOffset(tableEntity.getTextRange().end()) +// .engines(getEngines(tableEntity.getEngines(), tableEntity.getManualOverwrite())) + .reference(referenceIds) + .manualChanges(ManualChangesUtils.mergeManualChanges(existingManualChanges, allManualChanges)) + .state(buildEntryState(tableEntity)) + .entryType(entryType) + .paragraphPageIdx(determinePageParagraphIndex(tableEntity, entryType)) + .build(); + } + + + private int determinePageParagraphIndex(SemanticEntity textEntity, EntryType entryType) { int pageParagraphIdx = -1; @@ -414,7 +463,7 @@ public class EntityLogCreatorService { public static EntryType buildEntryType(IEntity entity) { - if (entity instanceof TextEntity textEntity) { + if (entity instanceof SemanticEntity textEntity) { return getEntryType(textEntity.getEntityType()); } else if (entity instanceof PrecursorEntity precursorEntity) { if (precursorEntity.isRectangle()) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ManualChangesApplicationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ManualChangesApplicationService.java index 1c81a126..a6b99ead 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ManualChangesApplicationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ManualChangesApplicationService.java @@ -18,8 +18,6 @@ import com.google.common.collect.Sets; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction; import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity; -import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary; -import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryModel; import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; @@ -146,9 +144,9 @@ public class ManualChangesApplicationService { Set newIntersectingPages = new HashSet<>(closestEntity.getPages()); Sets.difference(currentIntersectingPages, newIntersectingPages) - .forEach(removedPage -> removedPage.getEntities().remove(entityToBeResized)); + .forEach(removedPage -> removedPage.getSemanticEntities().remove(entityToBeResized)); Sets.difference(newIntersectingPages, currentIntersectingPages) - .forEach(addedPage -> addedPage.getEntities().add(entityToBeResized)); + .forEach(addedPage -> addedPage.getSemanticEntities().add(entityToBeResized)); entityToBeResized.setDeepestFullyContainingNode(closestEntity.getDeepestFullyContainingNode()); entityToBeResized.setIntersectingNodes(new ArrayList<>(newIntersectingNodes)); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/UnprocessedChangesService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/UnprocessedChangesService.java index 446dc723..76c2f7e7 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/UnprocessedChangesService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/UnprocessedChangesService.java @@ -102,9 +102,9 @@ public class UnprocessedChangesService { } document.getEntities() - .forEach(textEntity -> { + .forEach(entity -> { Set processedIds = new HashSet<>(); - for (var positionsOnPerPage : textEntity.getPositionsOnPagePerPage()) { + for (var positionsOnPerPage : entity.getPositionsOnPagePerPage()) { if (processedIds.contains(positionsOnPerPage.getId())) { continue; } @@ -113,17 +113,18 @@ public class UnprocessedChangesService { .stream() .map(rectangle2D -> new Position(rectangle2D, positionsOnPerPage.getPage().getNumber())) .collect(Collectors.toList()); - unprocessedManualEntities.add(UnprocessedManualEntity.builder() - .annotationId(allAnnotationIds.stream() - .filter(textEntity::matchesAnnotationId) - .findFirst() - .orElse("")) - .textBefore(textEntity.getTextBefore()) - .textAfter(textEntity.getTextAfter()) - .section(textEntity.getManualOverwrite().getSection() - .orElse(textEntity.getDeepestFullyContainingNode().toString())) - .positions(positions) - .build()); + UnprocessedManualEntity.UnprocessedManualEntityBuilder builder = UnprocessedManualEntity.builder() + .annotationId(allAnnotationIds.stream() + .filter(entity::matchesAnnotationId) + .findFirst() + .orElse("")) + .section(entity.getManualOverwrite().getSection() + .orElse(entity.getDeepestFullyContainingNode().toString())) + .positions(positions); + if (entity instanceof TextEntity textEntity) { + builder.textBefore(textEntity.getTextBefore()).textAfter(textEntity.getTextAfter()); + } + unprocessedManualEntities.add(builder.build()); } }); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ComponentCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ComponentCreationService.java index 87fc39b5..d81e4936 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ComponentCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ComponentCreationService.java @@ -23,8 +23,11 @@ import java.util.stream.Stream; import org.kie.api.runtime.KieSession; import com.iqser.red.service.redaction.v1.server.model.component.Component; +import com.iqser.red.service.redaction.v1.server.model.component.ComponentFormat; import com.iqser.red.service.redaction.v1.server.model.component.Entity; import com.iqser.red.service.redaction.v1.server.model.document.entity.RuleIdentifier; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table; import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell; import com.iqser.red.service.redaction.v1.server.utils.ComponentCreationUtils; import com.iqser.red.service.redaction.v1.server.utils.DateConverter; @@ -98,6 +101,22 @@ public class ComponentCreationService { .value(value) .valueDescription(valueDescription) .references(new LinkedList<>(references)) + .componentFormat(ComponentFormat.TEXT) + .build()); + } + + + public void create(String ruleIdentifier, String name, String value, String valueDescription, Collection references, ComponentFormat componentFormat) { + + referencedEntities.addAll(references); + + kieSession.insert(Component.builder() + .matchedRule(RuleIdentifier.fromString(ruleIdentifier)) + .name(name) + .value(value) + .valueDescription(valueDescription) + .references(new LinkedList<>(references)) + .componentFormat(componentFormat) .build()); } @@ -376,6 +395,27 @@ public class ComponentCreationService { } + public void createComponentForTables(String ruleIdentifier, Collection entities) { + + entities.stream() + .filter(entity -> !referencedEntities.contains(entity)) + .sorted(EntityComparators.first()) + .forEach(entity -> { + String value = entity.getValue(); + ComponentFormat componentFormat = ComponentFormat.TEXT; + SemanticNode containingNode = entity.getContainingNode(); + + if (containingNode instanceof TableCell cell) { // case for tables with 0 rows and 0 columns + value = cell.getTextBlock().getSearchText(); + } else if (containingNode instanceof Table table) { + value = table.asCsv(); + componentFormat = ComponentFormat.CSV; + } + create(ruleIdentifier, entity.getType(), value, "Table Entity", List.of(entity), componentFormat); + }); + } + + /** * Converts entity values to the 'dd/MM/yyyy' format and joins them with ', '. If the value could not be parsed as a date, it will be created as is. * diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java index cf0f1900..4bcbf67b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java @@ -22,7 +22,9 @@ import com.iqser.red.service.redaction.v1.server.model.document.ConsecutiveBound import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree; import com.iqser.red.service.redaction.v1.server.model.document.TextRange; import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; +import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.ManualChangeOverwrite; +import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType; import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; @@ -906,6 +908,25 @@ public class EntityCreationService { return byTextRange(textRange, type, entityType, node); } + /** + * Creates a table entity based on the document table. + * + * @param table The table to base the table entity on. + * @param type The type of entity to create. + * @param entityType The entity's classification. + * @return The created {@link TableEntity}. + */ + public TableEntity bySemanticNode(Table table, String type, EntityType entityType) { + + TableEntity tableEntity = TableEntity.initialEntityNode(table, type, entityType); + + addListenerToEntity(tableEntity); + + table.getDocumentTree().addEntityToGraph(tableEntity); + + return tableEntity; + } + /** * Expands a text entity's start boundary based on a regex pattern match. @@ -978,7 +999,8 @@ public class EntityCreationService { if (node.getDocumentTree().getRoot().getNode().getEntities().contains(entity)) { Optional optionalTextEntity = node.getDocumentTree().getRoot().getNode().getEntities() .stream() - .filter(e -> e.equals(entity) && e.type().equals(type)) + .filter(e -> e instanceof TextEntity && e.equals(entity) && e.type().equals(type)) + .map(e -> (TextEntity)e) .peek(e -> e.addEngines(engines)) .findAny(); if (optionalTextEntity.isEmpty()) { @@ -1419,7 +1441,7 @@ public class EntityCreationService { .filter(e -> e.equals(entity))// .filter(e -> !e.getTextRange().equals(entity.getTextRange()))// .findAny() - .ifPresent(e -> addDuplicateEntityToGraph(e, entity.getTextRange(), node)); + .ifPresent(e -> addDuplicateEntityToGraph((TextEntity) e, entity.getTextRange(), node)); } else { addListenerToEntity(entity); @@ -1469,13 +1491,13 @@ public class EntityCreationService { } additionalIntersectingNode.getEntities().add(entityToDuplicate); additionalIntersectingNode.getPages(newTextRange) - .forEach(page -> page.getEntities().add(entityToDuplicate)); + .forEach(page -> page.getSemanticEntities().add(entityToDuplicate)); entityToDuplicate.addIntersectingNode(additionalIntersectingNode); }); } - private void addListenerToEntity(TextEntity textEntity) { + private void addListenerToEntity(IEntity textEntity) { if(kieSessionUpdater != null) { textEntity.addEntityEventListener(kieSessionUpdater); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFromPrecursorCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFromPrecursorCreationService.java index 7887c0c2..8615afb2 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFromPrecursorCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFromPrecursorCreationService.java @@ -162,7 +162,7 @@ public class EntityFromPrecursorCreationService { correctEntity.getIntersectingNodes() .forEach(n -> n.getEntities().add(correctEntity)); correctEntity.getPages() - .forEach(page -> page.getEntities().add(correctEntity)); + .forEach(page -> page.getSemanticEntities().add(correctEntity)); correctEntity.addMatchedRules(precursorEntity.getMatchedRuleList()); correctEntity.setDictionaryEntry(precursorEntity.isDictionaryEntry()); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java index 355c6a73..b3386b43 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java @@ -105,6 +105,11 @@ public class ComponentDroolsExecutionService { }) .forEach(kieSession::insert); + // todo? +// document.getPages().stream().map(Page::getTableEntities).flatMap(Collection::stream) +// //.filter(this::isApplied) +// .forEach(kieSession::insert); + fileAttributes.stream() .filter(f -> f.getValue() != null) .forEach(kieSession::insert); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/EntityDroolsExecutionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/EntityDroolsExecutionService.java index 32b7e8be..24761c7e 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/EntityDroolsExecutionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/EntityDroolsExecutionService.java @@ -28,6 +28,7 @@ import com.iqser.red.service.redaction.v1.server.logger.RulesLogger; import com.iqser.red.service.redaction.v1.server.logger.TrackingAgendaEventListener; import com.iqser.red.service.redaction.v1.server.model.NerEntities; import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary; +import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image; @@ -128,9 +129,9 @@ public class EntityDroolsExecutionService { } }); - for (TextEntity textEntity : document.getEntities()) { - textEntity.addEntityEventListener(kieSessionUpdater); - textEntity.notifyEntityInserted(); + for (SemanticEntity semanticEntity : document.getEntities()) { + semanticEntity.addEntityEventListener(kieSessionUpdater); + semanticEntity.notifyEntityInserted(); } document.getPages() diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/KieSessionUpdater.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/KieSessionUpdater.java index a540bf71..35fed5a8 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/KieSessionUpdater.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/KieSessionUpdater.java @@ -9,6 +9,8 @@ import org.kie.api.runtime.rule.FactHandle; import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityEventListener; import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity; +import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity; +import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image; import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; @@ -50,14 +52,14 @@ public class KieSessionUpdater implements EntityEventListener { private void handleOnEntityEvent(IEntity entity, Consumer consumer) { - if (entity instanceof TextEntity textEntity) { - updateIntersectingNodes(textEntity); - textEntity.getRelations().values() + if (entity instanceof SemanticEntity semanticEntity) { + updateIntersectingNodes(semanticEntity); + semanticEntity.getRelations().values() .stream() .flatMap(Collection::stream) .forEach(consumer); - textEntity.getRelations().keySet() - .forEach(k -> k.getRelations().getOrDefault(textEntity, Collections.emptySet()) + semanticEntity.getRelations().keySet() + .forEach(k -> k.getRelations().getOrDefault(semanticEntity, Collections.emptySet()) .forEach(consumer)); } @@ -71,9 +73,9 @@ public class KieSessionUpdater implements EntityEventListener { } - private void updateIntersectingNodes(TextEntity textEntity) { + private void updateIntersectingNodes(SemanticEntity semanticEntity) { - textEntity.getIntersectingNodes() + semanticEntity.getIntersectingNodes() .forEach(this::updateFactIfPresent); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java index 430ab672..7c80f6e8 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java @@ -37,6 +37,8 @@ import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType; import com.knecon.fforesight.tenantcommons.TenantContext; +import lombok.SneakyThrows; + @ExtendWith(SpringExtension.class) @SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, properties = {"application.type=DocuMine"}) public class DocumineFloraTest extends AbstractRedactionIntegrationTest { @@ -262,6 +264,7 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest { @Test + @SneakyThrows public void testDoseMortalityExtraction() { AnalyzeRequest request = uploadFileToStorage("files/syngenta/CustomerFiles/Documine/Flora/VV-547525_Toxicidade_Oral_Aguda.pdf"); @@ -269,9 +272,18 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest { System.out.println("Start Full integration test"); analyzeDocumentStructure(LayoutParsingType.DOCUMINE, request); System.out.println("Finished structure analysis"); - analyzeService.analyze(request); + AnalyzeResult analyze = analyzeService.analyze(request); System.out.println("Finished analysis"); + + AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build()); + + String outputFileName = OsUtils.getTemporaryDirectory() + "/Documine.pdf"; + + try (FileOutputStream fileOutputStream = new FileOutputStream(outputFileName)) { + fileOutputStream.write(annotateResponse.getDocument()); + } + var componentLog = redactionStorageService.getComponentLog(TEST_DOSSIER_ID, TEST_FILE_ID); var doseMortality = componentLog.getComponentLogEntries() .stream() diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentIEntityInsertionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentIEntityInsertionIntegrationTest.java index df7d6e50..51638735 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentIEntityInsertionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentIEntityInsertionIntegrationTest.java @@ -348,11 +348,11 @@ public class DocumentIEntityInsertionIntegrationTest extends BuildDocumentIntegr .orElseThrow(); assertEquals(textEntity.getValue(), searchTerm); - assertTrue(pageNode.getEntities().contains(textEntity)); + assertTrue(pageNode.getSemanticEntities().contains(textEntity)); assertTrue(document.getPages() .stream() .filter(page -> page != pageNode) - .noneMatch(page -> page.getEntities().contains(textEntity))); + .noneMatch(page -> page.getSemanticEntities().contains(textEntity))); assertTrue(textEntity.getPages().contains(pageNode)); assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity); assertTrue(textEntity.getIntersectingNodes() diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentPerformanceIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentPerformanceIntegrationTest.java index 78442fb6..2afc2e16 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentPerformanceIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentPerformanceIntegrationTest.java @@ -30,6 +30,7 @@ import com.iqser.red.service.redaction.v1.server.logger.Context; import com.iqser.red.service.redaction.v1.server.model.NerEntities; import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary; import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionarySearch; +import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page; @@ -200,14 +201,14 @@ public class DocumentPerformanceIntegrationTest extends RulesIntegrationTest { System.out.printf("%d Searches took %s s, average %.2f ms\n", numberOfRuns, ((float) totalSearchTime / 1000), totalSearchTime / numberOfRuns); System.out.printf("%d Insertions took %s s, average %.2f ms\n", numberOfRuns, ((float) totalInsertTime / 1000), totalInsertTime / numberOfRuns); System.out.printf("Found %d entities and saved %d\n", foundEntities.size(), document.getEntities().size()); - for (TextEntity entity : document.getEntities()) { + for (TextEntity entity : document.getTextEntities()) { var foundEntity = foundEntities.stream() .filter(f -> f.getId().equals(entity.getId())) .findFirst() .get(); assertTrue(foundEntity.getTextRange().equals(entity.getTextRange()) || foundEntity.getDuplicateTextRanges().contains(entity.getTextRange())); } - assert document.getEntities() + assert document.getTextEntities() .stream() .mapToInt(e -> e.getDuplicateTextRanges().size() + 1).sum() == foundEntities.size(); assert foundEntities.stream() @@ -225,10 +226,10 @@ public class DocumentPerformanceIntegrationTest extends RulesIntegrationTest { try (PDDocument pdDocument = Loader.loadPDF(fileResource.getFile())) { for (Page page : document.getPages()) { - List entityPositionsOnPage = page.getEntities() + List entityPositionsOnPage = page.getSemanticEntities() .stream() .filter(entityNode -> !entityNode.removed()) - .filter(TextEntity::applied) + .filter(SemanticEntity::applied) .flatMap(entityNode -> entityNode.getPositionsOnPagePerPage() .stream()) .filter(entityPosition -> entityPosition.getPage().equals(page)) @@ -241,7 +242,7 @@ public class DocumentPerformanceIntegrationTest extends RulesIntegrationTest { } for (Page page : document.getPages()) { - List entityPositionsOnPage = page.getEntities() + List entityPositionsOnPage = page.getSemanticEntities() .stream() .filter(entityNode -> !entityNode.removed()) .filter(entityNode -> !entityNode.applied()) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/TableTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/TableTest.java index c2e76757..0d60f45c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/TableTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/TableTest.java @@ -83,7 +83,7 @@ public class TableTest extends BuildDocumentIntegrationTest { file); PDFTronViewerDocumentService viewerDocumentService = new PDFTronViewerDocumentService(null); - var entityLayerGroup = EntityVisualizationUtility.createEntityLayerGroup(document.getEntities(), Color.MAGENTA); + var entityLayerGroup = EntityVisualizationUtility.createEntityLayerGroup(document.getTextEntities(), Color.MAGENTA); viewerDocumentService.addLayerGroups(file, file, List.of(entityLayerGroup)); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/rules/Cbi11Test.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/rules/Cbi11Test.java index 61013efb..d20c6599 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/rules/Cbi11Test.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/rules/Cbi11Test.java @@ -22,7 +22,7 @@ public class Cbi11Test extends RulesIntegrationTest { doAnalysis(document, Collections.emptyList()); - List authorNames = document.getEntities() + List authorNames = document.getTextEntities() .stream() .map(Dictionary::splitIntoAuthorNames) .flatMap(Collection::stream) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/EntityVisualizationUtility.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/EntityVisualizationUtility.java index a0ef7b5c..cb045d8f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/EntityVisualizationUtility.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/EntityVisualizationUtility.java @@ -9,6 +9,7 @@ import java.util.Set; import java.util.stream.Collectors; import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage; +import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page; import com.knecon.fforesight.service.viewerdoc.LayerIdentifier; @@ -46,9 +47,9 @@ public class EntityVisualizationUtility { private static List getEntityRectangles(Color color, Page page) { - return page.getEntities() + return page.getSemanticEntities() .stream() - .map(TextEntity::getPositionsOnPagePerPage) + .map(SemanticEntity::getPositionsOnPagePerPage) .flatMap(Collection::stream) .filter(p -> p.getPage().equals(page)) .map(PositionOnPage::getRectanglePerLine) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl index 00f56aca..a3ef6909 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl @@ -1155,6 +1155,15 @@ rule "DOC.35.0: Doses (mg/kg bodyweight)" end +rule "DOC.100.0: Create TableEntities for all Tables" + when + $table: Table() + then + TableEntity tableEntity = entityCreationService.bySemanticNode($table, "table", EntityType.ENTITY); + tableEntity.apply("DOC.100.0", "Table found.", "n-a"); + end + + //------------------------------------ AI rules ------------------------------------ // Rule unit: AI.4 diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_components.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_components.drl index 71d9d598..d932c28b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_components.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_components.drl @@ -461,6 +461,14 @@ rule "UsedApproach.1.0: Used approach not found and thus 'Individual'" end +rule "TableComponents.900.0: Create components for all table entities." + salience -900 + when + $tables: List() from collect (Entity(type == "Table")) + then + componentCreationService.createComponentForTables("TableComponents.900.0", $tables); + end + rule "DefaultComponents.999.0: Create components for all unmapped entities." salience -999 when -- 2.47.2 From 229d1e98ac2e04fb02c3f4b9b97c8528a03687d8 Mon Sep 17 00:00:00 2001 From: maverickstuder Date: Fri, 31 Jan 2025 12:33:46 +0100 Subject: [PATCH 2/5] RED-10708: Tables as components in DM --- .../server/model/document/DocumentTree.java | 13 +++++----- .../model/document/entity/SemanticEntity.java | 20 +++++++++++++++ .../model/document/entity/TableEntity.java | 25 ++++++------------- .../model/document/entity/TextEntity.java | 15 +---------- .../server/utils/EntityEnrichmentService.java | 11 ++++++++ .../model/component/ComponentFormat.java | 1 - .../document/EntityCreationService.java | 25 ++++++++++++------- .../test/resources/drools/documine_flora.drl | 9 ------- .../drools/documine_flora_components.drl | 9 ------- 9 files changed, 63 insertions(+), 65 deletions(-) diff --git a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/DocumentTree.java b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/DocumentTree.java index c209794e..570a7182 100644 --- a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/DocumentTree.java +++ b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/DocumentTree.java @@ -369,18 +369,19 @@ public class DocumentTree { TextBlock textBlock = entity.getDeepestFullyContainingNode().getTextBlock(); + if (entity instanceof TextEntity textEntity) { + EntityEnrichmentService.enrichEntity(textEntity, textBlock); + } + EntityCreationUtility.addToPages(entity); + EntityCreationUtility.addEntityToNodeEntitySets(entity); if (entity.getEntityType().equals(EntityType.TEMPORARY)) { return; } - if (entity instanceof TextEntity textEntity) { - EntityEnrichmentService.enrichEntity(textEntity, textBlock); - textEntity.computeRelations(); - entity.notifyEntityInserted(); // todo: table entity currently causes loop? - } - EntityCreationUtility.addEntityToNodeEntitySets(entity); + entity.computeRelations(); + entity.notifyEntityInserted(); // todo: table entity currently causes loop? } diff --git a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/SemanticEntity.java b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/SemanticEntity.java index 36cb3542..dad06288 100644 --- a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/SemanticEntity.java +++ b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/SemanticEntity.java @@ -10,6 +10,8 @@ import java.util.Map; import java.util.PriorityQueue; import java.util.Set; +import org.apache.commons.collections4.map.HashedMap; + import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page; import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; @@ -56,6 +58,7 @@ public abstract class SemanticEntity implements IEntity { } + @Override public boolean valid() { return active() && validEntityType(); @@ -107,4 +110,21 @@ public abstract class SemanticEntity implements IEntity { return entityEventListeners; } + + public abstract void computeRelations(); + + + public void removeFromGraph() { + + remove("FINAL.0.0", "removed completely"); + intersectingNodes.forEach(node -> node.getEntities().remove(this)); + pages.forEach(page -> page.getSemanticEntities().remove(this)); + intersectingNodes = new LinkedList<>(); + relations.keySet() + .forEach(entity -> entity.getRelations().remove(this)); + relations = new HashedMap<>(); + deepestFullyContainingNode = null; + pages = new HashSet<>(); + } + } diff --git a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/TableEntity.java b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/TableEntity.java index 5c940010..74b4d38d 100644 --- a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/TableEntity.java +++ b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/TableEntity.java @@ -72,23 +72,7 @@ public class TableEntity extends SemanticEntity { .orElse(NodeType.TABLE.toString()); } - - public void removeFromGraph() { - - remove("FINAL.0.0", "removed completely"); - intersectingNodes.forEach(node -> node.getEntities().remove(this)); - pages.forEach(page -> page.getSemanticEntities().remove(this)); - intersectingNodes = new LinkedList<>(); - relations.keySet() - .forEach(entity -> entity.getRelations().remove(this)); - relations = new HashedMap<>(); - deepestFullyContainingNode = null; - pages = new HashSet<>(); - } - - - - + @Override public List getPositionsOnPagePerPage() { if (positionsOnPagePerPage == null || positionsOnPagePerPage.isEmpty()) { @@ -108,4 +92,11 @@ public class TableEntity extends SemanticEntity { return table.asCsv(); } + + @Override + public void computeRelations() { + // NO - OP + // can be implemented in the future + } + } diff --git a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/TextEntity.java b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/TextEntity.java index fe4a3f5e..b3f964c5 100644 --- a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/TextEntity.java +++ b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/TextEntity.java @@ -111,20 +111,7 @@ public class TextEntity extends SemanticEntity { } - public void removeFromGraph() { - - remove("FINAL.0.0", "removed completely"); - intersectingNodes.forEach(node -> node.getEntities().remove(this)); - pages.forEach(page -> page.getSemanticEntities().remove(this)); - intersectingNodes = new LinkedList<>(); - relations.keySet() - .forEach(entity -> entity.getRelations().remove(this)); - relations = new HashedMap<>(); - deepestFullyContainingNode = null; - pages = new HashSet<>(); - } - - + @Override public List getPositionsOnPagePerPage() { if (positionsOnPagePerPage == null || positionsOnPagePerPage.isEmpty()) { diff --git a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/utils/EntityEnrichmentService.java b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/utils/EntityEnrichmentService.java index fbdd30f5..d8166266 100644 --- a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/utils/EntityEnrichmentService.java +++ b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/utils/EntityEnrichmentService.java @@ -20,12 +20,15 @@ public class EntityEnrichmentService { public void enrichEntity(TextEntity entity, TextBlock textBlock) { + entity.setValue(textBlock.subSequence(entity.getTextRange()).toString()); entity.setTextAfter(findTextAfter(entity.getTextRange().end(), textBlock)); entity.setTextBefore(findTextBefore(entity.getTextRange().start(), textBlock)); } + private String findTextAfter(int index, TextBlock textBlock) { + int endOffset = Math.min(index + SURROUNDING_WORDS_OFFSET_WINDOW, textBlock.getTextRange().end()); String textAfter = textBlock.subSequence(index, endOffset).toString(); if (!textAfter.isBlank()) { @@ -38,7 +41,9 @@ public class EntityEnrichmentService { return ""; } + private String findTextBefore(int index, TextBlock textBlock) { + int offsetBefore = Math.max(index - SURROUNDING_WORDS_OFFSET_WINDOW, textBlock.getTextRange().start()); String textBefore = textBlock.subSequence(offsetBefore, index).toString(); if (!textBefore.isBlank()) { @@ -51,13 +56,17 @@ public class EntityEnrichmentService { return ""; } + private List splitToWordsAndRemoveEmptyWords(String text) { + return Arrays.stream(text.split(" ")) .filter(word -> !Objects.equals("", word)) .toList(); } + private String concatWordsBefore(List words, boolean endWithSpace) { + StringBuilder sb = new StringBuilder(); for (String word : words) { sb.append(word).append(" "); @@ -66,7 +75,9 @@ public class EntityEnrichmentService { return endWithSpace ? result + " " : result; } + private String concatWordsAfter(List words, boolean startWithSpace) { + StringBuilder sb = new StringBuilder(); for (String word : words) { sb.append(word).append(" "); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/ComponentFormat.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/ComponentFormat.java index e320aa38..fd0f9781 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/ComponentFormat.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/ComponentFormat.java @@ -3,6 +3,5 @@ package com.iqser.red.service.redaction.v1.server.model.component; public enum ComponentFormat { TEXT, - //OVERRIDE, //todo: do we need this? CSV } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java index 4bcbf67b..7491c499 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java @@ -908,23 +908,32 @@ public class EntityCreationService { return byTextRange(textRange, type, entityType, node); } + /** * Creates a table entity based on the document table. * - * @param table The table to base the table entity on. + * @param table The table to base the table entity on. * @param type The type of entity to create. * @param entityType The entity's classification. - * @return The created {@link TableEntity}. + * @return An optional containing the created {@link TableEntity}. */ - public TableEntity bySemanticNode(Table table, String type, EntityType entityType) { + public Optional bySemanticNode(Table table, String type, EntityType entityType) { TableEntity tableEntity = TableEntity.initialEntityNode(table, type, entityType); - addListenerToEntity(tableEntity); + Optional optionalTableEntity = table.getDocumentTree().getRoot().getNode().getEntities() + .stream() + .filter(e -> e instanceof TableEntity && e.equals(tableEntity) && e.type().equals(type)) + .map(e -> (TableEntity) e) + .findAny(); + if (optionalTableEntity.isPresent()) { + return optionalTableEntity; + } + addListenerToEntity(tableEntity); table.getDocumentTree().addEntityToGraph(tableEntity); - return tableEntity; + return Optional.of(tableEntity); } @@ -1000,7 +1009,7 @@ public class EntityCreationService { Optional optionalTextEntity = node.getDocumentTree().getRoot().getNode().getEntities() .stream() .filter(e -> e instanceof TextEntity && e.equals(entity) && e.type().equals(type)) - .map(e -> (TextEntity)e) + .map(e -> (TextEntity) e) .peek(e -> e.addEngines(engines)) .findAny(); if (optionalTextEntity.isEmpty()) { @@ -1499,11 +1508,9 @@ public class EntityCreationService { private void addListenerToEntity(IEntity textEntity) { - if(kieSessionUpdater != null) { + if (kieSessionUpdater != null) { textEntity.addEntityEventListener(kieSessionUpdater); } } - - } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl index a3ef6909..00f56aca 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl @@ -1155,15 +1155,6 @@ rule "DOC.35.0: Doses (mg/kg bodyweight)" end -rule "DOC.100.0: Create TableEntities for all Tables" - when - $table: Table() - then - TableEntity tableEntity = entityCreationService.bySemanticNode($table, "table", EntityType.ENTITY); - tableEntity.apply("DOC.100.0", "Table found.", "n-a"); - end - - //------------------------------------ AI rules ------------------------------------ // Rule unit: AI.4 diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_components.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_components.drl index d932c28b..84f2027a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_components.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_components.drl @@ -460,15 +460,6 @@ rule "UsedApproach.1.0: Used approach not found and thus 'Individual'" componentCreationService.create("UsedApproach.1.0", "What_was_the_approach_used", "Individual", "'Group' when approach used is present, else 'Individual'"); end - -rule "TableComponents.900.0: Create components for all table entities." - salience -900 - when - $tables: List() from collect (Entity(type == "Table")) - then - componentCreationService.createComponentForTables("TableComponents.900.0", $tables); - end - rule "DefaultComponents.999.0: Create components for all unmapped entities." salience -999 when -- 2.47.2 From db7debf0d4ac97a8136615cb6217eac8ad2a5326 Mon Sep 17 00:00:00 2001 From: maverickstuder Date: Fri, 31 Jan 2025 12:59:51 +0100 Subject: [PATCH 3/5] RED-10708: Tables as components in DM --- .../redaction/v1/server/model/document/DocumentTree.java | 2 +- .../service/drools/ComponentDroolsExecutionService.java | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/DocumentTree.java b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/DocumentTree.java index 570a7182..73c9beca 100644 --- a/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/DocumentTree.java +++ b/redaction-service-v1/document/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/DocumentTree.java @@ -381,7 +381,7 @@ public class DocumentTree { } entity.computeRelations(); - entity.notifyEntityInserted(); // todo: table entity currently causes loop? + entity.notifyEntityInserted(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java index b3386b43..355c6a73 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java @@ -105,11 +105,6 @@ public class ComponentDroolsExecutionService { }) .forEach(kieSession::insert); - // todo? -// document.getPages().stream().map(Page::getTableEntities).flatMap(Collection::stream) -// //.filter(this::isApplied) -// .forEach(kieSession::insert); - fileAttributes.stream() .filter(f -> f.getValue() != null) .forEach(kieSession::insert); -- 2.47.2 From c3e0aae800bc01a4f12806c9552f431f389d182f Mon Sep 17 00:00:00 2001 From: maverickstuder Date: Tue, 4 Feb 2025 11:23:46 +0100 Subject: [PATCH 4/5] RED-10708: Tables as components in DM --- .../TableComponentsIntegrationTest.java | 90 + .../drools/documine_flora_table_test.drl | 1675 +++++++++++++++++ .../documine_flora_table_test_components.drl | 493 +++++ 3 files changed, 2258 insertions(+) create mode 100644 redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/TableComponentsIntegrationTest.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_table_test.drl create mode 100644 redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_table_test_components.drl diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/TableComponentsIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/TableComponentsIntegrationTest.java new file mode 100644 index 00000000..26ed5e7c --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/TableComponentsIntegrationTest.java @@ -0,0 +1,90 @@ +package com.iqser.red.service.redaction.v1.server; + +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.when; + +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.List; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.junit.jupiter.SpringExtension; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; +import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry; +import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive; +import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest; +import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse; +import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils; +import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType; +import com.knecon.fforesight.tenantcommons.TenantContext; + +@ExtendWith(SpringExtension.class) +@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, properties = {"application.type=DocuMine"}) +class TableComponentsIntegrationTest extends AbstractRedactionIntegrationTest { + + private static final String RULES = loadFromClassPath("drools/documine_flora_table_test.drl"); + private static final String COMPONENT_RULES = loadFromClassPath("drools/documine_flora_table_test_components.drl"); + private static final String DATE_FORMATS = loadFromClassPath("dateFormats.txt"); + + + @BeforeEach + public void stubClients() { + + TenantContext.setTenantId("redaction"); + + when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(System.currentTimeMillis()); + when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(JSONPrimitive.of(RULES)); + when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.COMPONENT)).thenReturn(System.currentTimeMillis()); + when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.COMPONENT)).thenReturn(JSONPrimitive.of(COMPONENT_RULES)); + when(dateFormatsClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(System.currentTimeMillis()); + when(dateFormatsClient.getDateFormats(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(JSONPrimitive.of(DATE_FORMATS)); + + loadDictionaryForTest(); + loadTypeForTest(); + loadNerForTest(); + when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); + when(dictionaryClient.getAllTypesForDossierTemplate(TEST_DOSSIER_TEMPLATE_ID, null, true)).thenReturn(getTemplateDictionaryTypeResponse()); + when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); + when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, null, true)).thenReturn(getDossierDictionaryTypeResponse()); + mockDictionaryCalls(null); + when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors); + } + + + @Test + void testTableComponentsCreation() throws IOException { + + AnalyzeRequest request = uploadFileToStorage("files/syngenta/CustomerFiles/Documine/Flora/VV-547525_Toxicidade_Oral_Aguda.pdf"); + + analyzeDocumentStructure(LayoutParsingType.DOCUMINE, request); + + analyzeService.analyze(request); + + AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build()); + String outputFileName = OsUtils.getTemporaryDirectory() + "/TableComponents.pdf"; + try (FileOutputStream fileOutputStream = new FileOutputStream(outputFileName)) { + fileOutputStream.write(annotateResponse.getDocument()); + } + + + var componentLog = redactionStorageService.getComponentLog(TEST_DOSSIER_ID, TEST_FILE_ID); + boolean tableComponentFound = componentLog.getComponentLogEntries() + .stream() + .anyMatch(entry -> "Table".equals(entry.getName())); + + assertTrue(tableComponentFound, "Expected table component 'Table' to be present in the component log"); + + var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); + boolean tableEntityFound = entityLog.getEntityLogEntry() + .stream() + .anyMatch(entry -> entry.getMatchedRule() != null && entry.getMatchedRule().contains("DOC.100.0")); + + assertTrue(tableEntityFound, "Expected table entity creation ('DOC.100.0') to be present in the entity log"); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_table_test.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_table_test.drl new file mode 100644 index 00000000..75df6c3d --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_table_test.drl @@ -0,0 +1,1675 @@ +package drools + +import static java.lang.String.format; +import static com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility.anyMatch; +import static com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility.exactMatch; + +import java.util.List; +import java.util.LinkedList; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.Collection; +import java.util.stream.Stream; +import java.util.Optional; + +import com.iqser.red.service.redaction.v1.server.logger.RulesLogger; +import com.iqser.red.service.redaction.v1.server.model.document.*; +import com.iqser.red.service.redaction.v1.server.model.document.TextRange; +import com.iqser.red.service.redaction.v1.server.model.document.entity.*; +import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; +import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule; +import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.*; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Headline; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.SectionIdentifier; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Footer; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Header; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.LayoutEngine; +import com.iqser.red.service.redaction.v1.server.model.document.textblock.*; +import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock; +import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector; +import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock; +import com.iqser.red.service.redaction.v1.server.model.document.textblock.ConcatenatedTextBlock; +import com.iqser.red.service.redaction.v1.server.model.NerEntities; +import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary; +import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryModel; +import com.iqser.red.service.redaction.v1.server.service.document.EntityCreationService; +import com.iqser.red.service.redaction.v1.server.service.ManualChangesApplicationService; +import com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange; + +global Document document +global EntityCreationService entityCreationService +global ManualChangesApplicationService manualChangesApplicationService +global Dictionary dictionary +global RulesLogger logger + +//------------------------------------ queries ------------------------------------ + +query "getFileAttributes" + $fileAttribute: FileAttribute() + end + +//------------------------------------ table rules ------------------------------------ + +rule "DOC.100.0: Create TableEntities for all Tables" + when + $table: Table() + then + Optional tableEntity = entityCreationService.bySemanticNode($table, "Table", EntityType.ENTITY); + tableEntity.ifPresent(t -> t.apply("DOC.100.0", "Table found.", "n-a")); + end + +//------------------------------------ Headlines rules ------------------------------------ + +// Rule unit: H.0 +rule "H.0.0: retract table of contents page" + when + $page: Page(getMainBodyTextBlock().getSearchText().contains("........") || (getMainBodyTextBlock().getSearchText().contains("APPENDICES") && getMainBodyTextBlock().getSearchText().contains("TABLES"))) + $node: SemanticNode(onPage($page.getNumber()), !onPage($page.getNumber() -1), getType() != NodeType.IMAGE) + then + retract($node); + end + + +// Rule unit: H.1 +rule "H.1.0: Ignore Table of Contents" + salience 10 + when + $tocHeadline: Headline(containsString("CONTENTS")) + $page: Page() from $tocHeadline.getParent().getPages() + $node: SemanticNode(this != $tocHeadline, getType() != NodeType.IMAGE, onPage($page.getNumber()), !onPage($page.getNumber() -1)) + then + retract($node); + end + + +// Rule unit: H.2 +rule "H.2.0: Show headlines" + when + $headline: Headline() + then + entityCreationService.bySemanticNode($headline, "headline", EntityType.HINT); + end + + +// Rule unit: H.3 +rule "H.3.0: Study Type File Attribute" + when + not FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","438","439","471","487")) + $section: Section( + (containsString("DATA REQUIREMENT") || containsString("TEST GUIDELINE") || containsString("MÉTODO(S) DE REFERÊNCIA(S):")) + ,(containsString("OECD") || containsString("EPA") || containsString("OPPTS")) + ) + then + Stream.of(RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|(?:.{5,40}(?:Number |Procedure |Guideline )))(4[\\d]{2})", 1, $section.getTextBlock()), + RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", 1, $section.getTextBlock()), + RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD) Guideline (4\\d{2})", 1, $section.getTextBlock())).flatMap(Collection::stream).findFirst() + .map(textRange -> $section.getTextBlock().subSequence(textRange).toString()) + .map(value -> FileAttribute.builder().label("OECD Number").value(value).build()) + .ifPresent(fileAttribute -> insert(fileAttribute)); + end + +rule "H.3.1: Study Type File Attribute in Headlines" + when + not FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","438","439","471","487")) + $page: Page($pageNumber:number, + getMainBodyTextBlock().getSearchText().contains("DATA REQUIREMENT") + || getMainBodyTextBlock().getSearchText().contains("TEST GUIDELINE") + || getMainBodyTextBlock().getSearchText().contains("MÉTODO(S) DE REFERÊNCIA(S):")) + $headline: Headline(onPage($pageNumber), containsString("OECD") || containsString("EPA")|| containsString("OPPTS")) + then + Stream.of(RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|(?:.{5,40}(?:Number |Procedure |Guideline )))(4[\\d]{2})", 1, $headline.getTextBlock()), + RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", 1, $headline.getTextBlock()), + RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD) Guideline (4\\d{2})", 1, $headline.getTextBlock())).flatMap(Collection::stream).findFirst() + .map(textRange -> $headline.getTextBlock().subSequence(textRange).toString()) + .map(value -> FileAttribute.builder().label("OECD Number").value(value).build()) + .ifPresent(fileAttribute -> insert(fileAttribute)); + end + + +//------------------------------------ General documine rules ------------------------------------ + +// Rule unit: DOC.1 +rule "DOC.1.0: Guidelines" + when + $section: Section( + ( + containsString("DATA REQUIREMENT") + || containsString("TEST GUIDELINE") + || containsString("MÉTODO(S) DE REFERÊNCIA(S):") + ) + && ( + containsString("OECD") + || containsString("EPA") + || containsString("OPPTS") + ) + ) + then + entityCreationService.byRegex("OECD (No\\.? )?\\d{3}( \\(\\d{4}\\))?", "oecd_guideline", EntityType.ENTITY, $section).forEach(entity -> + entity.apply("DOC.1.0", "OECD Guideline found", "n-a") + ); + entityCreationService.byRegex("OECD[\\s,]{1}(?:.{1,40}.(?>Procedure|Method).{1,20}\\d{3,4}(?>.{1,100}\\d{4}\\))?|\\[.{1,20}.Skin.{1,20}\\]|[\\d\\s,\\(\\)]{7,10}|[\\w\\.\\s]{1,15}[\\d]{3}\\s\\(\\d{4}\\)|.{0,20}[N|n]umber\\s\\d{3}.{0,1}|Test Guideline \\d{3})", "oecd_guideline", EntityType.ENTITY, $section).forEach(entity -> + entity.apply("DOC.1.0", "OECD Guideline found", "n-a") + ); + entityCreationService.byRegex("EPA (OPPTS )?\\d{3}[. ]\\d{4}( \\(\\d{4}\\))?", "epa_guideline", EntityType.ENTITY, $section).forEach(entity -> + entity.apply("DOC.1.0", "EPA Guideline found", "n-a") + ); + entityCreationService.byRegex("EC (Directive )?(No\\.? )?\\d{3,4}\\/\\d{3,4}((,? B(\\.| )\\d{1,2}\\.?)? \\(\\d{4}\\))?", "ec_guideline", EntityType.ENTITY, $section).forEach(entity -> + entity.apply("DOC.1.0", "EC Guideline found", "n-a") + ); + entityCreationService.byRegex("Commission Regulation \\(EC\\) No \\d{3}\\/\\d{4}", "ec_guideline", EntityType.ENTITY, $section).forEach(entity -> + entity.apply("DOC.1.0", "EC Guideline found", "n-a") + ); + entityCreationService.byRegex("OECD Method 4\\d{2}.{5,40}\\(.{5,40}\\d{4}\\)", "oecd_guideline", EntityType.ENTITY, $section).forEach(entity -> + entity.apply("DOC.1.0", "OECD Guideline year found", "n-a") + ); + entityCreationService.byRegex("OPPTS (Guideline Number )?\\d{3}\\.\\d{4}( \\(\\d{4}\\))?", "epa_guideline", EntityType.ENTITY, $section).forEach(entity -> + entity.apply("DOC.1.0", "EPA Guideline found", "n-a") + ); + entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2})", "oecd_guideline_number", EntityType.ENTITY,1, $section).forEach(entity -> + entity.apply("DOC.1.0", "OECD Guideline no. found", "n-a") + ); + entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2}),?\\s\\(?(\\d{4})\\)?", "oecd_guideline_year", EntityType.ENTITY,2, $section).forEach(entity -> + entity.apply("DOC.1.0", "OECD Guideline year found", "n-a") + ); + entityCreationService.byRegex("(?<=OECD)[\\w\\s,\\[\\]]{1,10}\\((\\d{4})\\)\\s(4[\\d]{2})", "oecd_guideline_year", EntityType.ENTITY,1, $section).forEach(entity -> + entity.apply("DOC.1.0", "OECD Guideline year found", "n-a") + ); + entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_number", EntityType.ENTITY,1, $section).forEach(entity -> + entity.apply("DOC.1.0", "OECD Guideline number found", "n-a") + ); + entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_year", EntityType.ENTITY,2, $section).forEach(entity -> + entity.apply("DOC.1.0", "OECD Guideline year found", "n-a") + ); + entityCreationService.byRegex("(?<=OECD) Guideline (4\\d{2})", "oecd_guideline_number", EntityType.ENTITY,1, $section).forEach(entity -> + entity.apply("DOC.1.0", "OECD Guideline number found", "n-a") + ); + entityCreationService.byRegex("OECD Guideline 4\\d{2}", "oecd_guideline", EntityType.ENTITY, $section).forEach(entity -> + entity.apply("DOC.1.0", "OECD Guideline found", "n-a") + ); + end + +rule "DOC.1.2: Guidelines" + when + $section: Section( + ( + containsString("DATA REQUIREMENT") + || containsString("TEST GUIDELINE") + || containsString("MÉTODO(S) DE REFERÊNCIA(S):") + ) + && ( + containsString("OECD") + || containsString("EPA") + || containsString("OPPTS") + ) + && ( + hasEntitiesOfType("oecd_guideline") + || hasEntitiesOfType("epa_guideline") + || hasEntitiesOfType("ec_guideline") + ) + ) + then + $section.getEntitiesOfType(List.of("oecd_guideline","ec_guideline", "epa_guideline")).forEach(entity -> { + entity.apply("DOC.1.2", "OECD guideline found.", "n-a"); + }); + end + +rule "DOC.1.3: Guidelines" + when + $section: Section( + ( + hasEntitiesOfType("oecd_guideline") + || hasEntitiesOfType("epa_guideline") + || hasEntitiesOfType("ec_guideline") + ) + && !( + ( + containsString("DATA REQUIREMENT") + || containsString("TEST GUIDELINE") + || containsString("MÉTODO(S) DE REFERÊNCIA(S):") + ) + && ( + containsString("OECD") + || containsString("EPA") + || containsString("OPPTS") + ) + ) + ) + then + $section.getEntitiesOfType(List.of("oecd_guideline", "ec_guideline", "epa_guideline")).forEach(entity -> { + entity.remove("DOC.1.3", "removed"); + retract(entity); + }); + end + +rule "DOC.1.4: Guideline in Headlines" + when + $page: Page($pageNumber:number, + getMainBodyTextBlock().getSearchText().contains("DATA REQUIREMENT") + || getMainBodyTextBlock().getSearchText().contains("TEST GUIDELINE") + || getMainBodyTextBlock().getSearchText().contains("MÉTODO(S) DE REFERÊNCIA(S):")) + $headline: Headline(onPage($pageNumber), containsString("OECD") || containsString("EPA")|| containsString("OPPTS")) + then + entityCreationService.byRegex("OECD (No\\.? )?\\d{3}( \\(\\d{4}\\))?", "oecd_guideline", EntityType.ENTITY, $headline).forEach(entity -> + entity.apply("DOC.1.4", "OECD Guideline found", "n-a") + ); + entityCreationService.byRegex("OECD[\\s,]{1}(?:.{1,40}.(?>Procedure|Method).{1,20}\\d{3,4}(?>.{1,100}\\d{4}\\))?|\\[.{1,20}.Skin.{1,20}\\]|[\\d\\s,\\(\\)]{7,10}|[\\w\\.\\s]{1,15}[\\d]{3}\\s\\(\\d{4}\\)|.{0,20}[N|n]umber\\s\\d{3}.{0,1}|Test Guideline \\d{3})", "oecd_guideline", EntityType.ENTITY, $headline).forEach(entity -> + entity.apply("DOC.1.4", "OECD Guideline found", "n-a") + ); + entityCreationService.byRegex("EPA (OPPTS )?\\d{3}[. ]\\d{4}( \\(\\d{4}\\))?", "epa_guideline", EntityType.ENTITY, $headline).forEach(entity -> + entity.apply("DOC.1.4", "EPA Guideline found", "n-a") + ); + entityCreationService.byRegex("EC (Directive )?(No\\.? )?\\d{3,4}\\/\\d{3,4}((,? B(\\.| )\\d{1,2}\\.?)? \\(\\d{4}\\))?", "ec_guideline", EntityType.ENTITY, $headline).forEach(entity -> + entity.apply("DOC.1.4", "EC Guideline found", "n-a") + ); + entityCreationService.byRegex("Commission Regulation \\(EC\\) No \\d{3}\\/\\d{4}", "ec_guideline", EntityType.ENTITY, $headline).forEach(entity -> + entity.apply("DOC.1.4", "EC Guideline found", "n-a") + ); + entityCreationService.byRegex("OECD Method 4\\d{2}.{5,40}\\(.{5,40}\\d{4}\\)", "oecd_guideline", EntityType.ENTITY, $headline).forEach(entity -> + entity.apply("DOC.1.4", "OECD Guideline year found", "n-a") + ); + entityCreationService.byRegex("OPPTS (Guideline Number )?\\d{3}\\.\\d{4}( \\(\\d{4}\\))?", "epa_guideline", EntityType.ENTITY, $headline).forEach(entity -> + entity.apply("DOC.1.4", "EPA Guideline found", "n-a") + ); + entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2})", "oecd_guideline_number", EntityType.ENTITY,1, $headline).forEach(entity -> + entity.apply("DOC.1.4", "OECD Guideline no. found", "n-a") + ); + entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2}),?\\s\\(?(\\d{4})\\)?", "oecd_guideline_year", EntityType.ENTITY,2, $headline).forEach(entity -> + entity.apply("DOC.1.4", "OECD Guideline year found", "n-a") + ); + entityCreationService.byRegex("(?<=OECD)[\\w\\s,\\[\\]]{1,10}\\((\\d{4})\\)\\s(4[\\d]{2})", "oecd_guideline_year", EntityType.ENTITY,1, $headline).forEach(entity -> + entity.apply("DOC.1.4", "OECD Guideline year found", "n-a") + ); + entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_number", EntityType.ENTITY,1, $headline).forEach(entity -> + entity.apply("DOC.1.4", "OECD Guideline number found", "n-a") + ); + entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_year", EntityType.ENTITY,2, $headline).forEach(entity -> + entity.apply("DOC.1.4", "OECD Guideline year found", "n-a") + ); + entityCreationService.byRegex("(?<=OECD) Guideline (4\\d{2})", "oecd_guideline_number", EntityType.ENTITY,1, $headline).forEach(entity -> + entity.apply("DOC.1.4", "OECD Guideline number found", "n-a") + ); + entityCreationService.byRegex("OECD Guideline 4\\d{2}", "oecd_guideline", EntityType.ENTITY, $headline).forEach(entity -> + entity.apply("DOC.1.4", "OECD Guideline found", "n-a") + ); + end + + +// Rule unit: DOC.2 +rule "DOC.2.0: Report number" + when + $section: Section(containsString("LABORATORY PROJECT ID") , containsString("Report Number:")) + then + entityCreationService.lineAfterString("Report Number:", "report_number", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> { + entity.apply("DOC.2.0", "Report number found", "n-a"); + }); + end + + +// Rule unit: DOC.3 +rule "DOC.3.0: Experimental Starting Date" + when + $section: Section(containsString("Experimental I. Starting Date:") || containsString("Experimental II. Starting Date:") || containsStringIgnoreCase("experimental start date") || containsStringIgnoreCase("experimental starting date")) + then + entityCreationService.lineAfterStrings( + List.of("Experimental start date", + "Experimental start date:", + "Experimental Starting Date", + "Experimental Starting Date:", + "Experimental starting date", + "Experimental starting date:", + "Experimental Start Date", + "Experimental Start Date:", + "Experimental I. Starting Date:", + "Experimental II. Starting Date:"), "experimental_start_date", EntityType.ENTITY, $section).forEach(entity -> { + entity.apply("DOC.3.0", "Experimental start date found", "n-a"); + }); + end + + +// Rule unit: DOC.4 +rule "DOC.4.0: Experimental Completion Date" + when + $section: Section(containsStringIgnoreCase("experimental termination date") || containsStringIgnoreCase("experimental completion date")) + then + entityCreationService.lineAfterStrings( + List.of("Experimental termination date", + "Experimental termination date:", + "Experimental Completion Date", + "Experimental Completion Date:", + "Experimental completion date", + "Experimental completion date:", + "Experimental Termination Date", + "Experimental Termination Date:"), "experimental_end_date", EntityType.ENTITY, $section).forEach(entity -> { + entity.apply("DOC.4.0", "Experimental end date found", "n-a"); + }); + end + + +// Rule unit: DOC.5 +rule "DOC.5.0: Ignore species and strain in irrelevant study types" + salience 1 + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("406","428","438","439","471","474","487")) + $section: Section(hasEntitiesOfType("species") || hasEntitiesOfType("strain")) + then + $section.getEntitiesOfType(List.of("species", "strain")).forEach(entity -> { + entity.remove("DOC.5.0", "removed"); + retract(entity); + }); + end + +rule "DOC.5.1: Hide all skipped species and strains except in the relevant sections" + salience 1 + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436")) + $section: Section( + (hasEntitiesOfType("species") || hasEntitiesOfType("strain")) + && !( + anyHeadlineContainsStringIgnoreCase("test system") + || anyHeadlineContainsStringIgnoreCase("animals") + || anyHeadlineContainsStringIgnoreCase("specification") + ) + ) + then + $section.getEntitiesOfType(List.of("species", "strain")).forEach(entity -> { + entity.remove("DOC.5.1", "removed"); + retract(entity); + }); + end + +rule "DOC.5.2: Species" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436")) + $section: Section(hasEntitiesOfType("species")) + then + $section.getEntitiesOfType("species").forEach(entity -> { + entity.apply("DOC.5.2", "Species found.", "n-a"); + entity.setValue(entity.getValue().toLowerCase()); + }); + end + +rule "DOC.5.3: Strain" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436")) + $section: Section( + hasEntitiesOfType("species") + && hasEntitiesOfType("strain") + && ( + anyHeadlineContainsStringIgnoreCase("test system") + || anyHeadlineContainsStringIgnoreCase("animals") + || anyHeadlineContainsStringIgnoreCase("specification") + ) + ) + then + $section.getEntitiesOfType("strain").forEach(entity -> { + entity.apply("DOC.5.3", "Strain found.", "n-a"); + }); + end + + +// Rule unit: DOC.6 +rule "DOC.6.0: study title by document structure" + when + $table: Table(onPage(1), + (containsString("Final Report") || containsString("SPL")), + numberOfRows == 1, + numberOfCols == 1) + $tableCell: TableCell(row == 1, col == 1) from $table.streamTableCells().toList() + $paragraph: Paragraph(previousSibling.isPresent(), nextSibling.isPresent()) from $tableCell.streamChildren().toList() + then + entityCreationService.bySemanticNode($paragraph, "title", EntityType.ENTITY).ifPresent(entity -> { + entity.apply("DOC.6.0", "Study title found", "n-a"); + }); + end + +rule "DOC.6.1: study title" + when + $table: Table(onPage(1), (containsString("Final Report") || containsString("SPL"))) + then + entityCreationService.byRegexWithLineBreaksIgnoreCase("(?<=\\n)[\\w\\W]{1,300}(?=\\nFinal Report)", "title", EntityType.ENTITY, $table).findFirst().ifPresent(entity -> { + entity.apply("DOC.6.1", "Title found", "n-a"); + }); + end + +rule "DOC.6.2: study title" + when + not Table(onPage(1), (containsString("Final Report") || containsString("SPL"))) + $section: Section(onPage(1), (containsString("Final Report") || containsString("SPL"))) + then + entityCreationService.byRegexWithLineBreaksIgnoreCase("(?<=\\n)[\\w\\W]{1,300}(?=\\nFinal Report)", "title", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> { + entity.apply("DOC.6.2", "Title found", "n-a"); + }); + end + + +// Rule unit: DOC.7 +rule "DOC.7.0: Performing Laboratory (Name)" + when + $section: Section(containsString("PERFORMING LABORATORY:")) + then + entityCreationService.lineAfterString("PERFORMING LABORATORY:", "laboratory_name", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> { + entity.apply("DOC.7.0", "Performing Laboratory found", "n-a"); + }); + end + +rule "DOC.7.1: Performing Laboratory (Country)" + when + nerEntities: NerEntities(hasEntitiesOfType("COUNTRY")) + $section: Section(containsString("PERFORMING LABORATORY:")) + then + nerEntities.streamEntitiesOfType("COUNTRY") + .filter(nerEntity -> $section.getTextRange().contains(nerEntity.textRange())) + .map(nerEntity -> entityCreationService.optionalByNerEntity(nerEntity, "laboratory_country", EntityType.ENTITY, $section)) + .flatMap(Optional::stream) + .collect(Collectors.toList()) + .forEach(entity -> { + entity.apply("DOC.7.1", "Performing Laboratory found", "n-a"); + }); + end + +rule "DOC.7.2: Performing Laboratory (Country & Name) from dict" + when + $section: Section(containsString("PERFORMING LABORATORY:") || (containsString("PERFORMING") && containsString("LABORATORY:"))) + $countryOrNameFromDictionary: TextEntity(type() == "laboratory_country" || type() == "laboratory_name", $type: type, isDictionaryEntry()) from $section.getEntities() + then + $countryOrNameFromDictionary.apply("DOC.7.2", "Performing " + $type + " dictionary entry found."); + end + +rule "DOC.7.3: Performing Laboratory (Country) from dict" + when + $section: Section( + (hasEntitiesOfType("laboratory_country") || hasEntitiesOfType("laboratory_name")) + && !(containsString("PERFORMING LABORATORY:") || (containsString("PERFORMING") && containsString("LABORATORY:"))) + ) + then + $section.getEntitiesOfType(List.of("laboratory_country", "laboratory_name")).forEach(entity -> { + entity.remove("DOC.7.3", "removed"); + retract(entity); + }); + end + + +// Rule unit: DOC.8 +rule "DOC.8.0: GLP Study" + when + $headline: Headline(containsString("GOOD LABORATORY PRACTICE COMPLIANCE") + || containsString("GOOD LABORATORY PRACTICE COMPLIANCE STATEMENT") + || (containsString("DECLARACAO DE CONFORMIDADE") && containsString("PRATICAS DE LABORATORIO")) + || containsString("GLP Certificate") + || containsString("GLP Certificates") + || containsString("GOOD LABORATORY PRACTICE (GLP) CERTIFICATE") + || containsString("Good Laboratory Practice Certificate") + || containsString("STATEMENT OF GLP COMPLIANCE AND AUTHENTICATION")) + then + entityCreationService.bySemanticNode($headline, "glp_study", EntityType.ENTITY).ifPresent(entity -> { + entity.apply("DOC.8.0", "GLP Study found", "n-a"); + }); + end + + +// Rule unit: DOC.9 +rule "DOC.9.0: Batch number from CoA" + when + $section: Section( + ( + anyHeadlineContainsString("Analytical Report") + || anyHeadlineContainsStringIgnoreCase("Certificate of Analysis") + || containsStringIgnoreCase("Certificate of Analysis") + ) + && ( + containsStringIgnoreCase("batch") + || containsStringIgnoreCase("bath") + || containsStringIgnoreCase("barch") + || containsStringIgnoreCase("bateb") + ) + && ( + containsStringIgnoreCase("identification") + || containsStringIgnoreCase("ldentitfication") + || containsStringIgnoreCase("wentification") + || containsStringIgnoreCase("mentification") + || containsStringIgnoreCase("kientification") + || containsStringIgnoreCase("reference number") + || containsStringIgnoreCase("test substance") + ) + ) + then + entityCreationService.lineAfterStrings(List.of("Batch Identification", + "(Batch Identification):", + "Bateb Identification", + "Batch Wentification", + "Batch Mentification", + "Batch Kientification", + "Barch Identification", + "Bath ldentitfication", + "Batch of test substance :"), "batch_number", EntityType.ENTITY, $section).forEach(entity -> { + entity.apply("DOC.9.0", "Batch number found in CoA", "n-a"); + }); + end + +rule "DOC.9.1: Batch number" + when + $section: Section( + ( + anyHeadlineContainsStringIgnoreCase("Test Substance") + || anyHeadlineContainsStringIgnoreCase("Test and Control Substances") + || anyHeadlineContainsStringIgnoreCase("Test Item") + ) + && !( + anyHeadlineContainsString("component") + || anyHeadlineContainsString("reference") + || anyHeadlineContainsString("blank") + ) + && containsStringIgnoreCase("batch") + ) + then + Stream.of(entityCreationService.byRegex("Batch ID ([A-Z\\d\\-]{7,14})", "batch_number", EntityType.ENTITY, 1, $section), + entityCreationService.lineAfterStrings(List.of("Batch Identification", + "Batch number:", + "Batch reference number:", + "Batch:", + "Batch/Lot number:", + "Batch (Lot) Number:", + "Batch Number:", + "Batch Nº:", + "Batch no:" + ), "batch_number", EntityType.ENTITY, $section)).flatMap(a -> a) + .forEach(entity -> { + entity.apply("DOC.9.1", "Batch number found", "n-a"); + }); + end + +rule "DOC.9.2: Batch number" + when + $section: Section( + ( + anyHeadlineContainsStringIgnoreCase("Test Substance") + || anyHeadlineContainsStringIgnoreCase("Test and Control Substances") + || anyHeadlineContainsStringIgnoreCase("Test Item") + ) + && !( + anyHeadlineContainsString("component") + || anyHeadlineContainsString("reference") + || anyHeadlineContainsString("blank") + ) + && containsStringIgnoreCase("batch") + ) + $batchNumber: String() from List.of("Batch Identification", + "Batch number:", + "Batch reference number:", + "Batch:", + "Batch/Lot number:", + "Batch (Lot) Number:", + "Batch Number:", + "Batch Nº:", + "Batch no:") + $table: Table(containsStringIgnoreCase($batchNumber)) from $section.streamAllSubNodesOfType(NodeType.TABLE).toList() + then + entityCreationService.lineAfterStringAcrossColumnsIgnoreCase($batchNumber, "batch_number", EntityType.ENTITY, $table).forEach(entity -> { + entity.apply("DOC.9.2", "Batch number found", "n-a"); + }); + end + + +// Rule unit: DOC.10 +rule "DOC.10.0: Conclusions - LD50, LC50, Confidence" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","425","436")) + $section: Section( + (getHeadline().containsStringIgnoreCase("Conclusion") || anyHeadlineContainsStringIgnoreCase("Lethality")) + && (containsString("LD") || containsString("LC") || containsString("50") || containsString("LD50") || containsString("lethal concentration") || containsString("lethal dose")) + && ( + containsString("greater than") + || containsString("higher than") + || containsString("above") + || containsString("in excess") + || containsString("exceeds") + || containsString("was found to be") + || containsString("was calculated to be") + || containsString("estimated to be") + ) + ) + then + entityCreationService.byRegexIgnoreCase("(L[D|C]\\s?50|lethal concentration|lethal dose).{1,200}(greater than|considered to be above|in excess of|exceeds|higher than)", "ld50_greater", EntityType.ENTITY,2, $section).forEach(entity -> { + entity.apply("DOC.10.0", "LD50 greater than found", "n-a"); + }); + entityCreationService.byRegexIgnoreCase("\\b(?:(?:greater|higher) than|considered to be above|(?:was|is) (?:found|estimated) to be|was calculated to be|in excess of|exceeds|equal to)\\s?([\\d\\.]{1,6})\\s?mg\\/(?:kg|L)", "ld50_value", EntityType.ENTITY,1, $section).forEach(entity -> { + entity.apply("DOC.10.0", "LD50 value found", "n-a"); + }); + entityCreationService.byRegexIgnoreCase("confidence interval (?:is )?([\\d\\.]{2,6}).{0,20} to (?:greater than )?([\\d\\.]{2,6})", "confidence_minimal", EntityType.ENTITY,1, $section).forEach(entity -> { + entity.apply("DOC.10.0", "Minimal Confidence found", "n-a"); + }); + entityCreationService.byRegexIgnoreCase("confidence interval (?:is )?([\\d\\.]{2,6}).{0,20} to (?:greater than )?([\\d\\.]{2,6})", "confidence_maximal", EntityType.ENTITY,2, $section).forEach(entity -> { + entity.apply("DOC.10.0", "Maximal Confidence found", "n-a"); + }); + end + + +// Rule unit: DOC.11 +rule "DOC.11.0: Guideline Deviation" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471")) + $section: Section( + (getHeadline().containsStringIgnoreCase("General Information") || containsString("GENERAL INFORMATION")) + && (containsStringIgnoreCase("from the") || containsStringIgnoreCase("to the")) + ) + then + entityCreationService.betweenRegexes("(?:Deviations? from the [G|g]uidelines?)(?: and| or)?( the)?(?: Study Plan)?", "(?:(?:Deviations? from the Study Plan)|(?:Performing laboratory test)|(?:Other)|(?:Retention of [S|s]amples)|(?:Amendments? to Final Protocol))", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> { + entity.apply("DOC.11.0", "Deviation from Guidelines found", "n-a"); + }); + entityCreationService.betweenRegexes("(?:Deviations? (?:from|to)(?: the)? [S|s]tudy [P|p]lan)", "(?:Regulatory Guidelines)|(?:Other)|(?:Distribution of the report)|(?:Performing laboratory test)|(?:Distribution of the report)|(?:Retention of [S|s]amples)", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> { + entity.apply("DOC.11.0", "Deviation from Study Plan found", "n-a"); + }); + entityCreationService.betweenStrings("Deviations from the study plan", "Regulatory Guidelines", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> { + entity.apply("DOC.11.0", "Deviation from the study plan found", "n-a"); + }); + entityCreationService.byRegexIgnoreCase("(?>Study plan adherence)(.{1,20}deviations.{1,20} to the study plan.{0,50}\\.)\\s", "guideline_deviation", EntityType.ENTITY, 1, $section).forEach(entity -> { + entity.apply("DOC.11.0", "Guideline deviation found in text.", "n-a"); + }); + entityCreationService.betweenStringsIncludeEnd("Deviations from the study plan", "validity of the study.", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> { + entity.apply("DOC.11.0", "Deviation from the study plan found", "n-a"); + }); + end + +rule "DOC.11.1: Guideline Deviation in text" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471")) + $section: Section( + getHeadline().containsStringIgnoreCase("Introduction") + && containsStringIgnoreCase("deviations from the protocol") + ) + then + entityCreationService.byRegex("There were no deviations from the protocol.{1,100}\\.\\s", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> { + entity.apply("DOC.11.1", "Guideline deviation found in text.", "n-a"); + }); + end + + +// Rule unit: DOC.12 +rule "DOC.12.0: Clinical Signs" + when + FileAttribute(label == "OECD Number", value == "425") + $headline: Headline(containsAnyStringIgnoreCase("Clinical Signs", "Macroscopic Findings") && !containsString("TABLE") && !getHeadline().containsStringIgnoreCase("3 - MACROSCOPIC FINDINGS")) + then + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "clinical_signs", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.12.0", "Clinical Signs found", "n-a")); + end + + +// Rule unit: DOC.13 +rule "DOC.13.0: Dosages" + when + FileAttribute(label == "OECD Number", value == "425") + $section: Section( + (anyHeadlineContainsStringIgnoreCase("Dosages") || anyHeadlineContainsStringIgnoreCase("Study Design")) + && !getHeadline().containsString("TABLE") + ) + then + entityCreationService.betweenStringsIncludeStartAndEnd("The animals were treated", ".", "dosages", EntityType.ENTITY, $section).forEach(entity -> { + entity.apply("DOC.13.0", "Dosage found", "n-a"); + }); + entityCreationService.betweenStringsIncludeStartAndEnd("Animals were treated", ".", "dosages", EntityType.ENTITY, $section).forEach(entity -> { + entity.apply("DOC.13.0", "Dosage found", "n-a"); + }); + entityCreationService.byRegexWithLineBreaks("(?:\\.[\\s|\\n]|^.{5,20}\\n)([^\\.]{1,200}(?:animal|given|received)[^\\.]{1,200}dose\\s(?:levels?\\s)?(?:of|at)[^\\.]{1,200})(?:\\.[\\s|\\n|$])", "dosages", EntityType.ENTITY,1, $section).forEach(entity -> { + entity.apply("DOC.13.0", "Dosage found", "n-a"); + }); + end + + +// Rule unit: DOC.14 +rule "DOC.14.0: Mortality" + when + $headline: Headline(containsString("Mortality") && !containsString("TABLE")) + FileAttribute(label == "OECD Number", value == "425") + then + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "mortality", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.14.0", "Mortality found", "n-a")); + end + + +// Rule unit: DOC.15 +rule "DOC.15.0: Study Conclusion" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471")) + $section: Section( + getHeadline().containsStringIgnoreCase("Conclusion") + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "study_conclusion", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.15.0", "Study Conclusion found", "n-a")); + end + + +// Rule unit: DOC.16 +rule "DOC.16.0: Weight Behavior Changes" + when + FileAttribute(label == "OECD Number", value == "402") + $section: Section( + getHeadline().containsString("Results") + && ( + containsString("body weight") + || containsString("body weights") + || containsString("bodyweight") + || containsString("bodyweights") + ) + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "weight_behavior_changes", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.16.0", "Weight behavior changes found", "n-a")); + end + + +// Rule unit: DOC.17 +rule "DOC.17.0: Necropsy findings" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","436")) + $section: Section( + ( + anyHeadlineContainsStringIgnoreCase("Necropsy") + || getHeadline().containsStringIgnoreCase("Macroscopic Findings") + || getHeadline().containsStringIgnoreCase("Macroscopic examination") + ) + && !getHeadline().containsStringIgnoreCase("Table") + && !getHeadline().containsStringIgnoreCase("Appendix") + && !getHeadline().containsStringIgnoreCase("3 - MACROSCOPIC FINDINGS") + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "necropsy_findings", EntityType.ENTITY) + .forEach( entity -> entity.apply("DOC.17.0", "Necropsy section found", "n-a")); + end + + +// Rule unit: DOC.18 +rule "DOC.18.0: Clinical observations" + when + FileAttribute(label == "OECD Number", value == "403") + $section: Section( + ( + anyHeadlineContainsStringIgnoreCase("Clinical Observations") + || anyHeadlineContainsStringIgnoreCase("Clinical observations") + || anyHeadlineContainsStringIgnoreCase("In-life Observations") + || anyHeadlineContainsStringIgnoreCase("Postmortem Observations") + ) + && !anyHeadlineContainsStringIgnoreCase("Appendix") + && !anyHeadlineContainsStringIgnoreCase("Table") + && !anyHeadlineContainsStringIgnoreCase("Mortality") + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "clinical_observations", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.18.0", "Clinical observations section found", "n-a")); + end + + +// Rule unit: DOC.19 +rule "DOC.19.0: Bodyweight changes" + when + FileAttribute(label == "OECD Number", value == "403") + $headline: Headline(containsAnyStringIgnoreCase("Bodyweight", "Bodyweights", "Body Weights", "Body Weight"), !containsAnyStringIgnoreCase("Appendix", "TABLE")) + then + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "bodyweight_changes", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.19.0", "Bodyweight section found", "n-a")); + end + + +// Rule unit: DOC.20 +rule "DOC.20.0: Study Design" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","404","405","406","428","429","438","439","474","487")) + $section: Section( + anyHeadlineContainsStringIgnoreCase("study design") + && !anyHeadlineContainsString("Preliminary screening test") + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "study_design", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.20.0", "Study design section found", "n-a")); + end + +rule "DOC.20.1: Study Design" + when + Headline(containsStringIgnoreCase("Study Design"), $sectionIdentifier: getSectionIdentifier()) + $headline: Headline(getSectionIdentifier().isChildOf($sectionIdentifier)) + then + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "study_design", EntityType.ENTITY) + .forEach(entity -> { + entity.apply("DOC.20.1", "Study design section found", "n-a"); + }); + end + + +// Rule unit: DOC.21 +rule "DOC.21.0: Results and Conclusion (406, 428, 438, 439, 474 & 487)" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("406","428","438","439","474","487")) + $parentHeadline: Headline( + containsAnyString("Results", "Conclusion"), + !containsAnyString("POSITIVE CONTROL", "Positive Control", "Evaluation", "Micronucleus", "TABLE", "DISCUSSION", "CONCLUSIONS", "Interpretation","Viability", "analysis"), + $sectionIdentifier: getSectionIdentifier() + ) + not Headline(getSectionIdentifier().isChildOf($sectionIdentifier)) + then + entityCreationService.bySemanticNodeParagraphsOnly($parentHeadline.getParent(), "results_and_conclusion", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.21.0", "Results and Conclusion found", "n-a")); + end + +rule "DOC.21.1: Results and Conclusion (406, 428, 438, 439, 474 & 487)" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("406","428","438","439","474","487")) + Headline( + containsAnyString("Results", "Conclusion"), + !containsAnyString("POSITIVE CONTROL", "Positive Control", "Evaluation", "Micronucleus", "TABLE", "DISCUSSION", "CONCLUSIONS", "Interpretation","Viability", "analysis"), + $sectionIdentifier: getSectionIdentifier() + ) + $headline: Headline(getSectionIdentifier().isChildOf($sectionIdentifier)) + then + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "results_and_conclusion", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.21.1", "Results and Conclusion found", "n-a")); + end + + +// Rule unit: DOC.22 +rule "DOC.22.0: Detailing (404 & 405)" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("404","405")) + $section: Section( + anyHeadlineContainsStringIgnoreCase("Results") + && !getHeadline().containsStringIgnoreCase("Evaluation") + && !getHeadline().containsStringIgnoreCase("study") + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "detailing", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.22.0", "Detailing found", "n-a")); + end + + +// Rule unit: DOC.23 +rule "DOC.23.0: Preliminary Test Results (429)" + when + FileAttribute(label == "OECD Number", value == "429") + $section: Section( + ((anyHeadlineContainsString("Preliminary Screening Test") && containsString("Clinical observations")) + || anyHeadlineContainsString("Pre-Experiment")) + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "preliminary_test_results", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.23.0", "Preliminary Test Results found", "n-a")); + end + + +// Rule unit: DOC.24 +rule "DOC.24.0: Test Results (429)" + when + FileAttribute(label == "OECD Number", value == "429") + $section: Section((getHeadline().containsString("RESULTS AND DISCUSSION") || getHeadline().containsString("Estimation of the proliferative response of lymph node cells") || getHeadline().containsString("Results in the Main Experiment"))) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "test_results", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.24.0", "Test Results found", "n-a")); + end + +rule "DOC.24.1: Test Results (429)" + when + Headline(containsStringIgnoreCase("RESULTS AND DISCUSSION"), $sectionIdentifierResultsAndDiscussion: getSectionIdentifier()) + $headline: Headline(getSectionIdentifier().isChildOf($sectionIdentifierResultsAndDiscussion)) + then + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "test_results", EntityType.ENTITY) + .forEach(entity -> { + entity.apply("DOC.24.1", "Test Results found", "n-a"); + }); + end + + +// Rule unit: DOC.25 +rule "DOC.25.0: Approach used (429)" + when + FileAttribute(label == "OECD Number", value == "429") + $section: Section( + hasEntitiesOfType("species") + && (containsStringIgnoreCase("animals per") || containsStringIgnoreCase("animals /")) + ) + then + entityCreationService.byRegexIgnoreCase("\\banimals (?:per|\\/) .{0,15}(group)\\b", "approach_used", EntityType.ENTITY,1, $section).forEach(entity -> { + entity.apply("DOC.25.0", "Study animal approach found.", "n-a"); + }); + end + + +// Rule unit: DOC.26 +rule "DOC.26.0: Sex" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("405","429")) + $section: Section( + ( + anyHeadlineContainsStringIgnoreCase("animal") + || anyHeadlineContainsStringIgnoreCase("test system") + ) + && !getHeadline().containsStringIgnoreCase("selection") + && ( + containsStringIgnoreCase("sex:") + || containsStringIgnoreCase("male") + || containsStringIgnoreCase("female") + ) + ) + then + entityCreationService.byRegexIgnoreCase("([S|s]ex:)?[\\w\\s]{0,10}\\b(males?|females?)\\b", "sex", EntityType.ENTITY,2, $section).forEach(entity -> { + entity.apply("DOC.26.0", "Test animal sex found", "n-a"); + }); + end + + +// Rule unit: DOC.27 +rule "DOC.27.0: Animal Number 405" + when + FileAttribute(label == "OECD Number", value == "405") + $section: Section( + ( + anyHeadlineContainsStringIgnoreCase("animal") + || anyHeadlineContainsStringIgnoreCase("test system") + || anyHeadlineContainsStringIgnoreCase("reaction") + ) + && !getHeadline().containsString("selection") + && ( + containsStringIgnoreCase("number of animals") + || containsStringIgnoreCase("no.") + ) + ) + then + entityCreationService.byRegexIgnoreCase("(Number of animals:)[\\w\\s]{0,10}\\b([\\d]{1,3})\\b", "number_of_animals", EntityType.ENTITY,2, $section).forEach(entity -> { + entity.apply("DOC.27.0", "Number of animals found", "n-a"); + }); + entityCreationService.byRegexIgnoreCase("(?:.{1,10} No\\. )([\\d\\w\\-]{3,8})", "animal_numbers", EntityType.ENTITY,1, $section).forEach(entity -> { + entity.apply("DOC.27.0", "Number of animals found", "n-a"); + }); + end + + +// Rule unit: DOC.28 +rule "DOC.28.0: Animal Number 429" + when + FileAttribute(label == "OECD Number", value == "429") + $section: Section( + ( + anyHeadlineContainsStringIgnoreCase("animal") + || anyHeadlineContainsStringIgnoreCase("test system") + ) + && !getHeadline().containsString("selection") + && containsStringIgnoreCase("number of animals") + && (containsStringIgnoreCase("per") || containsString("/")) + && containsStringIgnoreCase("group") + ) + then + entityCreationService.byRegexIgnoreCase("(Number of animals per group:)[\\w\\s]{0,10}\\b([\\d]{1,3})\\b", "number_of_animals", EntityType.ENTITY,2, $section).forEach(entity -> { + entity.apply("DOC.28.0", "Number of animals in group found", "n-a"); + }); + entityCreationService.byRegexIgnoreCase("(Number of animals per group:).{0,60}\\b([\\d]{1,3})\\sper group\\b", "number_of_animals", EntityType.ENTITY,2, $section).forEach(entity -> { + entity.apply("DOC.28.0", "Number of animals in group found", "n-a"); + }); + entityCreationService.byRegexIgnoreCase("([\\d]{1,3})[\\w\\s\\/]{0,20}(?:treatment )?group\\b", "number_of_animals", EntityType.ENTITY,1 , $section).forEach(entity -> { + entity.apply("DOC.28.0", "Number of animals in group found", "n-a"); + }); + end + +rule "DOC.28.1: No. Of animals - Fallback to appendix tables listing all individual animals for 429" + when + $keyword: String() from List.of("Animal Number", "Animal No.", "Animal number") + $table: Table(containsString($keyword) && getHeadline().containsString("TABLE") && getHeadline().containsString("Individual")) + FileAttribute(label == "OECD Number", value == "429") + then + $table.streamTableCellsWithHeader($keyword) + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "animal_numbers", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(entity -> { + entity.apply("DOC.28.1", "Animal number found.", "n-a"); + }); + end + + +// Rule unit: DOC.29 +rule "DOC.29.0: 4h Exposure" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("403","436")) + $section: Section( + (containsStringIgnoreCase("4 hours") || containsStringIgnoreCase("four hours")) + ) + then + entityCreationService.byRegexIgnoreCase("((?<=\\.\\s\\b).{1,100}(4|four) hours.*?\\.) ", "4h_exposure", EntityType.ENTITY,1, $section).forEach(entity -> { + entity.apply("DOC.29.0", "4h exposure sentence found", "n-a"); + }); + end + + +// Rule unit: DOC.30 +rule "DOC.30.0: Dilution of the test substance" + when + FileAttribute(label == "OECD Number", value == "404") + $section: Section( + getHeadline().containsString("Formulation") + && containsString("dilution") + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "dilution", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.30.0", "Dilution found.", "n-a")); + end + + +// Rule unit: DOC.31 +rule "DOC.31.0: Positive Control" + when + FileAttribute(label == "OECD Number", value == "429") + $section: Section( + getHeadline().containsStringIgnoreCase("Positive Control") + && !(getHeadline().containsStringIgnoreCase("Appendix") || getHeadline().containsStringIgnoreCase("Table")) + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "positive_control", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.31.0", "Positive control found.", "n-a")); + end + + +// Rule unit: DOC.32 +rule "DOC.32.0: Mortality Statement" + when + FileAttribute(label == "OECD Number", value == "402") + $headline: Headline(containsStringIgnoreCase("Mortality") && !containsString("TABLE")) + then + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "mortality_statement", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.32.0", "Mortality Statement found", "n-a")); + end + + +// Rule unit: DOC.33 +rule "DOC.33.0: Dose Mortality" + when + FileAttribute(label == "OECD Number", value == "425") + $table: Table( + (hasHeader("Mortality") || hasHeader("Long Term Results") || hasHeader("LongTerm Outcome") || hasHeader("Long Term Outcome") || hasHeader("Comments") || hasHeader("Viability / Mortality") || hasHeader("Viability/Mortality")) + && + (hasHeader("Dose [mg/kg bodyweight]") || hasHeader("Dose [mg/kg body weight]") ||hasHeader("Dose (mg/kg)") || hasHeader("Dose levei (mg/kg)") || hasHeader("Dose Level (mg/kg)") || hasHeader("Dose level (mg/kg)") || hasHeader("Dosage [mg/kg body weight]")) + ) + then + Stream.of($table.streamTableCellsWithHeader("Mortality"), + $table.streamTableCellsWithHeader("Comments"), + $table.streamTableCellsWithHeader("Long Term Results"), + $table.streamTableCellsWithHeader("Long Term Outcome"), + $table.streamTableCellsWithHeader("LongTerm Outcome"), + $table.streamTableCellsWithHeader("Viability / Mortality"), + $table.streamTableCellsWithHeader("Viability/Mortality") + ).flatMap(a -> a) + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dose_mortality", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(entity -> { + entity.apply("DOC.33.0", "Dose Mortality Data found.", "n-a"); + }); + + Stream.of($table.streamTableCellsWithHeader("Dose [mg/kg bodyweight]"), + $table.streamTableCellsWithHeader("Dose [mg/kg body weight]"), + $table.streamTableCellsWithHeader("Dose levei (mg/kg)"), + $table.streamTableCellsWithHeader("Dose Level (mg/kg)"), + $table.streamTableCellsWithHeader("Dose level (mg/kg)"), + $table.streamTableCellsWithHeader("Dose (mg/kg)"), + $table.streamTableCellsWithHeader("Dosage [mg/kg body weight]") + ).flatMap(a -> a) + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dose_mortality_dose", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(entity -> { + entity.apply("DOC.33.0", "Dose Mortality Data found.", "n-a"); + }); + end + + +// Rule unit: DOC.34 +rule "DOC.34.0: Results (Main Study)" + when + FileAttribute(label == "OECD Number", value == "429") + $section: Section( + getHeadline().containsString("Results") + && getHeadline().getTextRange().length() < 20 + && !(getHeadline().containsString("Appendix") || getHeadline().containsString("Table")) + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "results_(main_study)", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.34.0", "Results for main study found.", "n-a")); + end + + +// Rule unit: DOC.35 +rule "DOC.35.0: Doses (mg/kg bodyweight)" + when + FileAttribute(label == "OECD Number", value == "402") + $section: Section( + anyHeadlineContainsStringIgnoreCase("study design") + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "doses_(mg_kg_bw)", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.35.0", "Doses per bodyweight information found", "n-a")); + end + + +//------------------------------------ AI rules ------------------------------------ + +// Rule unit: AI.4 +rule "AI.4.0: Add all NER Entities of type Person" + salience 999 + when + nerEntities: NerEntities(hasEntitiesOfType("Person")) + then + nerEntities.streamEntitiesOfType("Person") + .filter(entity -> entity.value().length() > 3) + .filter(entity -> entity.value().length() < 100) + .forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_author", EntityType.RECOMMENDATION, document) + .ifPresent(e -> e.skip("AI.4.0", ""))); + end + + +// Rule unit: AI.5 +rule "AI.5.0: Combine and add NER Entities as CBI_address" + salience 999 + when + nerEntities: NerEntities(hasEntitiesOfType("Location") || hasEntitiesOfType("Address")|| hasEntitiesOfType("Organization")) + then + entityCreationService + .combineNerEntitiesWithConfidence( + nerEntities, + "CBI_address", + EntityType.RECOMMENDATION, + document, + Set.of("Organization", + "Location", + "Address", + "ORG", + "STREET", + "CITY"), + Set.of("Organization", + "Location", + "Address", + "ORG", + "STREET", + "POSTAL", + "COUNTRY", + "CARDINAL", + "CITY", + "STATE"), + 50, + 3, + 2, + 0.7) + .forEach(nerEntity -> nerEntity.skip("AI.5.0", "")); + end + + +// Rule unit: AI.6 +rule "AI.6.0: Add all NER Entities of type Location" + salience 999 + when + nerEntities: NerEntities(hasEntitiesOfType("Location")) + then + nerEntities.streamEntitiesOfType("Location") + .filter(entity -> entity.value().length() > 3) + .filter(entity -> entity.value().length() < 100) + .forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document) + .ifPresent(e -> e.skip("AI.6.0", ""))); + end + + +// Rule unit: AI.7 +rule "AI.7.0: Add all NER Entities of type Address" + salience 999 + when + nerEntities: NerEntities(hasEntitiesOfType("Address")) + then + nerEntities.streamEntitiesOfType("Address") + .filter(entity -> entity.value().length() > 3) + .filter(entity -> entity.value().length() < 100) + .forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document) + .ifPresent(e -> e.skip("AI.7.0", ""))); + end + + +//------------------------------------ Manual changes rules ------------------------------------ + +// Rule unit: MAN.0 +rule "MAN.0.0: Apply manual resize redaction" + salience 128 + when + $resizeRedaction: ManualResizeRedaction($id: annotationId, $requestDate: requestDate) + not ManualResizeRedaction(annotationId == $id, requestDate.isBefore($requestDate)) + $entityToBeResized: TextEntity(matchesAnnotationId($id)) + then + manualChangesApplicationService.resize($entityToBeResized, $resizeRedaction); + retract($resizeRedaction); + end + +rule "MAN.0.1: Apply manual resize redaction" + salience 128 + when + $resizeRedaction: ManualResizeRedaction($id: annotationId, $requestDate: requestDate) + not ManualResizeRedaction(annotationId == $id, requestDate.isBefore($requestDate)) + $imageToBeResized: Image(id == $id) + then + manualChangesApplicationService.resizeImage($imageToBeResized, $resizeRedaction); + retract($resizeRedaction); + end + + +// Rule unit: MAN.1 +rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity" + salience 128 + when + $idRemoval: IdRemoval($id: annotationId, !removeFromDictionary, !removeFromAllDossiers) + $entityToBeRemoved: TextEntity(matchesAnnotationId($id)) + then + $entityToBeRemoved.addManualChange($idRemoval); + retract($idRemoval); + end + +rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image" + salience 128 + when + $idRemoval: IdRemoval($id: annotationId) + $imageEntityToBeRemoved: Image($id == id) + then + $imageEntityToBeRemoved.getManualOverwrite().addChange($idRemoval); + retract($idRemoval); + end + + +// Rule unit: MAN.2 +rule "MAN.2.0: Apply force redaction" + salience 128 + when + $force: ManualForceRedaction($id: annotationId) + $entityToForce: TextEntity(matchesAnnotationId($id)) + then + $entityToForce.addManualChange($force); + retract($force); + end + +rule "MAN.2.1: Apply force redaction to images" + salience 128 + when + $force: ManualForceRedaction($id: annotationId) + $imageToForce: Image(id == $id) + then + $imageToForce.getManualOverwrite().addChange($force); + retract($force); + end + + +// Rule unit: MAN.3 +rule "MAN.3.0: Apply entity recategorization" + salience 128 + when + $recategorization: ManualRecategorization($id: annotationId, $type: type, $requestDate: requestDate) + not ManualRecategorization($id == annotationId, requestDate.isBefore($requestDate)) + $entityToBeRecategorized: TextEntity(matchesAnnotationId($id), type() != $type) + then + $entityToBeRecategorized.addManualChange($recategorization); + retract($recategorization); + end + +rule "MAN.3.1: Apply entity recategorization of same type" + salience 128 + when + $recategorization: ManualRecategorization($id: annotationId, $type: type, $requestDate: requestDate) + not ManualRecategorization($id == annotationId, requestDate.isBefore($requestDate)) + $entityToBeRecategorized: TextEntity(matchesAnnotationId($id), type() == $type) + then + $entityToBeRecategorized.addManualChange($recategorization); + retract($recategorization); + end + +rule "MAN.3.2: Apply image recategorization" + salience 128 + when + $recategorization: ManualRecategorization($id: annotationId, $requestDate: requestDate) + not ManualRecategorization($id == annotationId, requestDate.isBefore($requestDate)) + $imageToBeRecategorized: Image($id == id) + then + manualChangesApplicationService.recategorize($imageToBeRecategorized, $recategorization); + retract($recategorization); + end + +rule "MAN.3.3: Apply recategorization entities by default" + salience 128 + when + $entity: TextEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) + then + $entity.apply("MAN.3.3", "Recategorized entities are applied by default.", $entity.legalBasis()); + end + + +// Rule unit: MAN.4 +rule "MAN.4.0: Apply legal basis change" + salience 128 + when + $legalBasisChange: ManualLegalBasisChange($id: annotationId) + $imageToBeRecategorized: Image($id == id) + then + $imageToBeRecategorized.getManualOverwrite().addChange($legalBasisChange); + retract($legalBasisChange) + end + +rule "MAN.4.1: Apply legal basis change" + salience 128 + when + $legalBasisChange: ManualLegalBasisChange($id: annotationId) + $entityToBeChanged: TextEntity(matchesAnnotationId($id)) + then + $entityToBeChanged.addManualChange($legalBasisChange); + retract($legalBasisChange) + end + + +//------------------------------------ Entity merging rules ------------------------------------ + +// Rule unit: X.0 +rule "X.0.0: Remove Entity contained by Entity of same type" + salience 65 + when + $containment: Containment( + $container: container, + $contained: contained, + $container.type() == $contained.type(), + $container.entityType == $contained.entityType, + $container != $contained, + !$container.removed(), + !$container.hasManualChanges(), + !$contained.hasManualChanges(), + !$contained.removed() + ) + not TextEntity( + getTextRange().equals($container.getTextRange()), + type() == $container.type(), + entityType == EntityType.DICTIONARY_REMOVAL, + engines contains Engine.DOSSIER_DICTIONARY, + !hasManualChanges() + ) + then + $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); +end + +rule "X.0.1: Remove Entity contained by Entity of same type with manual changes" + salience 65 + when + $containment: Containment( + $container: container, + $contained: contained, + $container.type() == $contained.type(), + $container.entityType == $contained.entityType, + $container != $contained, + !$container.removed(), + $container.hasManualChanges(), + !$contained.hasManualChanges(), + !$contained.removed() + ) + then + $contained.remove("X.0.1", "remove Entity contained by Entity of same type with manual changes"); +end + + +// Rule unit: X.2 +rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" + salience 64 + when + $containment: Containment( + $container: container, + $contained: contained, + $container.entityType == EntityType.FALSE_POSITIVE, + $container.active(), + $contained.entityType == EntityType.ENTITY, + $contained.type() == $container.type(), + !$contained.hasManualChanges() + ) + then + $contained.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); +end + +rule "X.2.1: Remove Entity of type HINT when contained by FALSE_POSITIVE" + salience 64 + when + $containment: Containment( + $container: container, + $contained: contained, + $container.entityType == EntityType.FALSE_POSITIVE, + $container.active(), + $contained.entityType == EntityType.HINT, + $contained.type() == $container.type(), + !$contained.hasManualChanges() + ) + then + $contained.remove("X.2.1", "remove Entity of type HINT when contained by FALSE_POSITIVE"); +end + + +// Rule unit: X.3 +rule "X.3.0: Remove RECOMMENDATION Contained by FALSE_RECOMMENDATION" + salience 64 + when + $containment: Containment( + $container: container, + $contained: contained, + $container.entityType == EntityType.FALSE_RECOMMENDATION, + $container.active(), + $contained.entityType == EntityType.RECOMMENDATION, + $contained.type() == $container.type(), + !$contained.hasManualChanges() + ) + then + $contained.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); +end + + +// Rule unit: X.4 +rule "X.4.0: Remove Entity of type RECOMMENDATION when text range equals ENTITY with same type" + salience 256 + when + $equality: Equality( + $a: a, + $b: b, + $a.type() == $b.type(), + ($a.entityType == EntityType.ENTITY || $a.entityType == EntityType.HINT), + $a.active(), + $b.entityType == EntityType.RECOMMENDATION, + !$b.hasManualChanges() + ) + then + $a.addEngines($b.getEngines()); + $b.remove("X.4.0", "remove Entity of type RECOMMENDATION when text range equals ENTITY with same type"); + end + + +// Rule unit: X.5 +rule "X.5.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY" + salience 256 + when + $intersection: Intersection( + $a: a, + $b: b, + ($a.entityType == EntityType.ENTITY || $a.entityType == EntityType.HINT), + $a.active(), + $b.entityType == EntityType.RECOMMENDATION, + !$b.hasManualChanges() + ) + then + $b.remove("X.5.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY"); + end + +rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATION" + salience 256 + when + $containment: Containment( + $container: container, + $contained: contained, + $container.entityType == EntityType.RECOMMENDATION, + $container.active(), + $contained.entityType == EntityType.RECOMMENDATION, + $container.type() != $contained.type(), + !$contained.hasManualChanges() + ) + then + $contained.remove("X.5.1", "remove Entity of type RECOMMENDATION when contained by RECOMMENDATION"); +end + + +rule "X.5.2: Remove Entity of type RECOMMENDATION when contained by ENTITY of same type" + salience 256 + when + $intersection: Containment( + $container: container, + $contained: contained, + ($container.entityType == EntityType.ENTITY || $container.entityType == EntityType.HINT), + !$container.removed(), + $contained.entityType == EntityType.RECOMMENDATION, + $container.type() == $contained.type(), + !$contained.hasManualChanges() + ) + then + $contained.remove("X.5.2", "remove Entity of type RECOMMENDATION when contained by ENTITY of same type"); + end + + +// Rule unit: X.7 +rule "X.7.0: Remove all images" + salience 512 + when + $image: Image(imageType != ImageType.OCR, !hasManualChanges()) + then + $image.remove("X.7.0", "remove all images"); + retract($image); + end + + +// Rule unit: X.8 +rule "X.8.0: Remove Entity when text range and type equals to imported Entity" + salience 257 + when + $equality: Equality( + $a: a, + $b: b, + $a.type() == $b.type(), + $a.engines contains Engine.IMPORTED, + $a.active(), + $b.engines not contains Engine.IMPORTED, + $a != $b + ) + then + $b.remove("X.8.0", "remove Entity when text range and type equals to imported Entity"); + $a.addEngines($b.getEngines()); + end + +rule "X.8.1: Remove Entity when intersected by imported Entity" + salience 256 + when + $intersection: Intersection( + $a: a, + $b: b, + $a.engines contains Engine.IMPORTED, + $a.active(), + $b.engines not contains Engine.IMPORTED, + $a != $b + ) + then + $b.remove("X.8.1", "remove Entity when intersected by imported Entity"); + end + + +// Rule unit: X.9 +rule "X.9.0: Merge mostly contained signatures" + when + $aiSignature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.AI) + $signature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.ALGORITHM, mostlyContains($aiSignature, 0.8)) + then + $aiSignature.remove("X.9.0", "removed because already contained by alogrithm signature"); + $signature.addEngine(LayoutEngine.AI); + end + + +// Rule unit: X.10 +rule "X.10.0: remove false positives of ai" + when + $anyImage: Image(engines contains LayoutEngine.ALGORITHM) + $aiSignature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.AI, !mostlyContainedBy($anyImage, 0.8)) + then + $aiSignature.remove("X.10.0", "Removed because false positive"); + end + + +// Rule unit: X.11 +rule "X.11.0: Remove dictionary entity which intersects with a manual entity" + salience 64 + when + $intersection: Intersection( + $a: a, + $b: b, + $a.engines contains Engine.MANUAL, + $a.active(), + $b.dictionaryEntry, + $b.engines not contains Engine.MANUAL + ) + then + $b.remove("X.11.0", "remove dictionary entity which intersects with a manual entity"); + end + + +//------------------------------------ Dictionary merging rules ------------------------------------ + +// Rule unit: DICT.0 +rule "DICT.0.0: Remove Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL" + salience 64 + when + $dictionaryRemoval: TextEntity($type: type(), entityType == EntityType.DICTIONARY_REMOVAL, engines contains Engine.DOSSIER_DICTIONARY) + $entity: TextEntity(getTextRange().equals($dictionaryRemoval.getTextRange()), engines contains Engine.DICTIONARY, type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) + then + $entity.remove("DICT.0.0", "Remove Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL"); + $entity.addEngine(Engine.DOSSIER_DICTIONARY); + end + + +//------------------------------------ File attributes rules ------------------------------------ + +// Rule unit: FA.1 +rule "FA.1.0: Remove duplicate FileAttributes" + salience 64 + when + $fileAttribute: FileAttribute($label: label, $value: value) + $duplicate: FileAttribute(this != $fileAttribute, label == $label, value == $value) + then + retract($duplicate); + end + + +//------------------------------------ Local dictionary search rules ------------------------------------ + +// Rule unit: LDS.0 +rule "LDS.0.0: Run local dictionary search" + agenda-group "LOCAL_DICTIONARY_ADDS" + salience -999 + when + $dictionaryModel: DictionaryModel(!localEntriesWithMatchedRules.isEmpty()) from dictionary.getDictionaryModels() + then + entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document) + .forEach(entity -> { + Collection matchedRules = $dictionaryModel.getMatchedRulesForLocalDictionaryEntry(entity.getValue()); + matchedRules.forEach(matchedRule -> entity.addMatchedRule(matchedRule.asSkippedIfApplied())); + }); + end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_table_test_components.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_table_test_components.drl new file mode 100644 index 00000000..13763953 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_table_test_components.drl @@ -0,0 +1,493 @@ +package drools + +import static java.lang.String.format; +import static com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility.anyMatch; +import static com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility.exactMatch; + +import java.util.List; +import java.util.LinkedList; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.Collection; +import java.util.stream.Stream; +import java.util.Optional; + +import com.iqser.red.service.redaction.v1.server.logger.RulesLogger; + +import com.iqser.red.service.redaction.v1.server.model.component.Component; +import com.iqser.red.service.redaction.v1.server.model.component.Entity; +import com.iqser.red.service.redaction.v1.server.service.components.ComponentMappingService; +import com.iqser.red.service.redaction.v1.server.service.document.ComponentCreationService; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Change; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryType; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualChange; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute; + +global ComponentCreationService componentCreationService +global ComponentMappingService componentMappingService +global RulesLogger logger + + +//------------------------------------ queries ------------------------------------ + +query "getFileAttributes" + $fileAttribute: FileAttribute() + end + +query "getComponents" + $component: Component() + end + +//------------------------------------ table rules ------------------------------------ + +rule "TableComponents.900.0: Create components for all table entities." + salience -900 + when + $tables: List() from collect (Entity(type == "Table")) + then + componentCreationService.createComponentForTables("TableComponents.900.0", $tables); + end + +//------------------------------------ Default Components rules ------------------------------------ + +rule "StudyTitle.0.0: First Title found" + when + $titleCandidates: List() from collect (Entity(type == "title")) + then + componentCreationService.firstOrElse("StudyTitle.0.0", "Study_Title", $titleCandidates, ""); + end + + +rule "PerformingLaboratory.1.0: Performing Laboratory name and country found in same section" + when + $laboratoryName: Entity(type == "laboratory_name", $node: containingNode) + $laboratoryCountry: Entity(type == "laboratory_country", containingNode == $node) + not Entity(type == "laboratory_country", containingNode == $node, Math.abs($laboratoryName.startOffset - startOffset) < Math.abs($laboratoryName.startOffset - $laboratoryCountry.startOffset)) + then + componentCreationService.create("PerformingLaboratory.1.0", "Performing_Laboratory", $laboratoryName.getValue() + ", " + $laboratoryCountry.getValue(), "Laboratory name and country found!", List.of($laboratoryName, $laboratoryCountry)); + end + +rule "PerformingLaboratory.2.0: Performing Laboratory name but no country found in same section" + when + $laboratoryName: Entity(type == "laboratory_name", $node: containingNode) + not Entity(type == "laboratory_country", containingNode == $node) + then + componentCreationService.create("PerformingLaboratory.2.0", "Performing_Laboratory", $laboratoryName.getValue(), "Only laboratory name found!", List.of($laboratoryName)); + end + +rule "PerformingLaboratory.0.2: Performing Laboratory not found" + salience -1 + when + not Component(name == "Performing_Laboratory") + then + componentCreationService.create("PerformingLaboratory.0.2", "Performing_Laboratory", "", "fallback"); + end + + +rule "ReportNumber.0.0: First Report number found" + when + $reportNumberCandidates: List() from collect (Entity(type == "report_number")) + then + componentCreationService.firstOrElse("ReportNumber.0.0", "Report_Number", $reportNumberCandidates, ""); + end + + +rule "GLPStudy.0.0: GLP Study found" + when + $glpStudyList: List(!isEmpty) from collect(Entity(type == "glp_study")) + then + componentCreationService.create("GLPStudy.0.0", "GLP_Study", "Yes", "Yes if present, No if not", $glpStudyList); + end + +rule "GLPStudy.1.0: GLP Study not found" + when + not Entity(type == "glp_study") + then + componentCreationService.create("GLPStudy.1.0", "GLP_Study", "No", "Yes if present, No if not"); + end + +rule "TestGuideline.0.1: match OECD number and year with guideline mappings" + salience 1 + when + not Component(name == "Test_Guidelines_1") + $guidelineNumber: Entity(type == "oecd_guideline_number", $number: value) + $guidelineYear: Entity(type == "oecd_guideline_year", $year: value) + then + Optional guidelineMatch = componentMappingService.from("GuidelineMapping").where("number = " + $number).where("year = " + $year).select("description").findAny(); + if (guidelineMatch.isEmpty()) { + return; + } + componentCreationService.create( + "TestGuideline.0.0", + "Test_Guidelines_1", + guidelineMatch.get(), + "OECD Number and guideline year mapped!", + List.of($guidelineNumber, $guidelineYear) + ); + end + +rule "TestGuideline.1.0: no guideline mapping found" + when + not Component(name == "Test_Guidelines_1") + $guideLine: Entity(type == "oecd_guideline") + then + componentCreationService.create("TestGuideline.2.0", "Test_Guidelines_1", $guideLine.getValue(), "No Mapping for OECD number and year found, using fallback instead!", List.of($guideLine)); + end + +rule "TestGuideline.2.0: All values of EPA guideline and EC guidelines" + when + $guidelines: List() from collect (Entity(type == "epa_guideline" || type == "ec_guideline")) + then + componentCreationService.joining("TestGuideline.2.0", "Test_Guidelines_2", $guidelines); + end + + +rule "StartDate.0.0: All experimental start dates converted to dd/MM/yyyy" + when + $startDates: List() from collect (Entity(type == "experimental_start_date")) + then + componentCreationService.convertDates("StartDate.0.0", "Experimental_Starting_Date", $startDates); + end + + +rule "CompletionDate.0.0: All experimental end dates converted to dd/MM/yyyy" + when + $endDates: List() from collect (Entity(type == "experimental_end_date")) + then + componentCreationService.convertDates("CompletionDate.0.0", "Experimental_Completion_Date", $endDates); + end + + +rule "AnalysisCertificate.0.0: Unique values of certificate of analysis batch identification" + when + $batchNumbers: List() from collect (Entity(type == "batch_number")) + then + componentCreationService.joiningUnique("AnalysisCertificate.0.0", "Certificate_of_Analysis_Batch_Identification", $batchNumbers); + end + +rule "StudyConclusion.0.0: Study conclusion in first found section" + when + $oecdNumber: String() from List.of("402", "403", "404", "405", "425", "429", "436", "471") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $studyConclusions: List() from collect(Entity(type == "study_conclusion")) + then + componentCreationService.joiningFromFirstSectionOnly("StudyConclusion.0.0", "Study_Conclusion", $studyConclusions, " "); + end + +rule "GuidelineDeviation.0.0: Guideline deviation as sentences" + when + $oecdNumber: String() from List.of("402", "403", "404", "405", "425", "429", "436", "471") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $guidelineDeviations: List() from collect (Entity(type == "guideline_deviation")) + then + componentCreationService.joining("GuidelineDeviation.0.0", "Deviation_from_the_Guideline", $guidelineDeviations, "\n"); + end + +rule "Species.0.0: First found species" + when + $oecdNumber: String() from List.of("402", "403", "404", "405", "425", "429", "436") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $species: List() from collect (Entity(type == "species")) + then + componentCreationService.firstOrElse("Species.0.0", "Species", $species, ""); + end + +rule "Strain.0.0: First found strain" + when + $oecdNumber: String() from List.of("402", "403", "404", "405", "425", "429", "436", "471") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $strain: List() from collect (Entity(type == "strain")) + then + componentCreationService.firstOrElse("Strain.0.0", "Strain", $strain, ""); + end + +rule "Conclusion.0.0: Unique values of Conclusion LD50" + when + $oecdNumber: String() from List.of("402", "403", "425", "436") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $conclusions: List() from collect (Entity(type == "ld50_value")) + then + componentCreationService.joiningUnique("Conclusion.0.0", "Conclusion_LD50_mg_per_kg", $conclusions); + end + +rule "Conclusion0.1.0: Greater than found" + when + $oecdNumber: String() from List.of("402", "403", "425", "436") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $conclusions: List(!isEmpty()) from collect (Entity(type == "ld50_greater")) + then + componentCreationService.create("Conclusion.1.0", "Conclusion_LD50_Greater_than", "Greater than", "Entity of type 'ld50_greater' found", $conclusions); + end + +rule "Conclusion.1.1: Greater than not found" + when + $oecdNumber: String() from List.of("402", "403", "425", "436") + FileAttribute(label == "OECD Number", value == $oecdNumber) + not Entity(type == "ld50_greater") + then + componentCreationService.create("Conclusion.1.1", "Conclusion_LD50_Greater_than", "", "No entity of type 'ld50_greater' found"); + end + +rule "Conclusion.2.0: Minimum confidence as unique values" + when + $oecdNumber: String() from List.of("402", "403", "425", "436") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $conclusions: List() from collect (Entity(type == "confidence_minimal")) + then + componentCreationService.joiningUnique("Conclusion.2.0", "Conclusion_Minimum_Confidence", $conclusions); + end + +rule "Conclusion.3.0: Maximum confidence as unique values" + when + $oecdNumber: String() from List.of("402", "403", "425", "436") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $conclusions: List() from collect (Entity(type == "confidence_maximal")) + then + componentCreationService.joiningUnique("Conclusion.3.0", "Conclusion_Maximum_Confidence", $conclusions); + end + +rule "Necropsy.0.0: Necropsy findings from longest section" + when + FileAttribute(label == "OECD Number", value == "402") + $necropsies: List() from collect (Entity(type == "necropsy_findings")) + then + componentCreationService.joiningFromLongestSectionOnly("Necropsy.0.0", "Necropsy_Findings", $necropsies, " "); + end + +rule "Necropsy.0.1: Necropsy findings joined with \n" + when + FileAttribute(label == "OECD Number", value == "403" || value == "436") + $necropsies: List() from collect (Entity(type == "necropsy_findings")) + then + componentCreationService.joining("Necropsy.0.0", "Necropsy_Findings", $necropsies, "\n"); + end + +rule "Necropsy.1.0: Doses mg per kg of Bodyweight as one block" + when + FileAttribute(label == "OECD Number", value == "402") + $dosages: List() from collect (Entity(type == "doses_(mg_kg_bw)")) + then + componentCreationService.joining("Necropsy.1.0", "Doses_mg_per_kg_bw", $dosages, " "); + end + +rule "Necropsy.2.0: Conducted with 4 hours of exposure as one block" + when + $oecdNumber: String() from List.of("403", "436") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $exposures: List() from collect (Entity(type == "4h_exposure")) + then + componentCreationService.joining("Necropsy.3.0", "Conducted_with_4_Hours_of_Exposure", $exposures, " "); + end + +rule "StudyDesign.0.0: Study design as one block" + when + $oecdNumber: String() from List.of("404", "405", "406", "428", "429", "438", "439", "474", "487") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $studyDesigns: List() from collect (Entity(type == "study_design")) + then + componentCreationService.joining("StudyDesign.0.0", "Study_Design", $studyDesigns, " "); + end + +rule "Results.0.0: Results and conclusions as joined values" + when + $oecdNumber: String() from List.of("406", "428", "438", "439", "474", "487") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $results: List() from collect (Entity(type == "results_and_conclusion")) + then + componentCreationService.joining("Results.0.0", "Results_and_Conclusions", $results, " "); + end + +rule "WeightBehavior.0.0: Weight change behavior as sentences" + when + FileAttribute(label == "OECD Number", value == "402") + $weightChanges: List() from collect (Entity(type == "weight_behavior_changes")) + then + componentCreationService.joining("WeightBehavior.0.0", "Weight_Behavior_Changes", $weightChanges, "\n"); + end + +rule "MortalityStatement.0.0: Mortality statements as one block" + when + FileAttribute(label == "OECD Number", value == "402") + $mortalityStatements: List() from collect (Entity(type == "mortality_statement")) + then + componentCreationService.joining("MortalityStatement.0.0", "Mortality_Statement", $mortalityStatements, " "); + end + +rule "ClinicalObservations.0.0: Clinical observations as sentences" + when + FileAttribute(label == "OECD Number", value == "403") + $observations: List() from collect (Entity(type == "clinical_observations")) + then + componentCreationService.joining("MortalityStatement.0.0", "Clinical_Observations", $observations, "\n"); + end + +rule "BodyWeight.0.0: Bodyweight changes as sentences" + when + FileAttribute(label == "OECD Number", value == "403") + $weightChanges: List() from collect (Entity(type == "bodyweight_changes")) + then + componentCreationService.joining("BodyWeight.0.0", "Body_Weight_Changes", $weightChanges, "\n"); + end + +rule "Detailing.0.0: Detailing of reported changes as one block" + when + $oecdNumber: String() from List.of("404", "405") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $detailings: List() from collect (Entity(type == "detailing")) + then + componentCreationService.joining("Detailing.0.0", "Detailing_of_Reported_Changes", $detailings, " "); + end + +rule "Sex.0.0: Male sex found" + when + $oecdNumber: String() from List.of("405", "429") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $males: List(!isEmpty) from collect (Entity(type == "sex", (value.toLowerCase() == "male" || value.toLowerCase() == "males"))) + then + componentCreationService.create("Sex.0.0", "Sex", "male", "male sex found", $males); + end + +rule "Sex.1.0: Female sex found" + when + $oecdNumber: String() from List.of("405", "429") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $females: List(!isEmpty) from collect (Entity(type == "sex", (value.toLowerCase() == "female" || value.toLowerCase() == "females"))) + then + componentCreationService.create("Sex.0.0", "Sex", "female", "female sex found", $females); + end + +rule "NumberOfAnimals.0.0: Number of animals found" + when + $oecdNumber: String() from List.of("405", "429") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $numberOfAnimals: Entity(type == "number_of_animals") + then + componentCreationService.create("NumberOfAnimals.0.0", "Number_of_Animals", $numberOfAnimals.getValue(), "Number of animals found directly", $numberOfAnimals); + end + +rule "NumberOfAnimals.1.0: Count unique occurences of animals" + when + $oecdNumber: String() from List.of("405", "429") + FileAttribute(label == "OECD Number", value == $oecdNumber) + not Entity(type == "number_of_animals") + $animals: List() from collect (Entity(type == "animal_number")) + then + componentCreationService.uniqueValueCount("NumberOfAnimals.1.0", "Number_of_Animals", $animals); + end + +rule "ClinicalSigns.0.0: Clinical signs as sentences" + when + $oecdNumber: String() from List.of("425") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $clinicalSigns: List() from collect (Entity(type == "clinical_signs")) + then + componentCreationService.joining("ClinicalSigns.0.0", "Clinical_Signs", $clinicalSigns, "\n"); + end + +rule "DoseMortality.0.0: Dose mortality joined with dose from same table row" + when + $oecdNumber: String() from List.of("425") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $doseMortalities: List() from collect (Entity(type == "dose_mortality" || type == "dose_mortality_dose")) + then + componentCreationService.joiningFromSameTableRow("DoseMortality.0.0", "Dose_Mortality", $doseMortalities); + end + +rule "Mortality.0.0: Mortality as one block" + when + $oecdNumber: String() from List.of("425") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $mortalities: List() from collect (Entity(type == "mortality")) + then + componentCreationService.joining("Mortality.0.0", "Mortality", $mortalities, " "); + end + +rule "Dosages.0.0: First found value of Dosages" + when + $oecdNumber: String() from List.of("425") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $mortalities: List() from collect (Entity(type == "dosages")) + then + componentCreationService.firstOrElse("Dosages.0.0", "Dosages", $mortalities, ""); + end + +rule "PrelimResults.0.0: Preliminary test results as sentences" + when + $oecdNumber: String() from List.of("429") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $results: List() from collect (Entity(type == "preliminary_test_results")) + then + componentCreationService.joining("PrelimResults.0.0", "Preliminary_Test_Results", $results, "\n"); + end + +rule "TestResults.0.0: Test results as one block" + when + $oecdNumber: String() from List.of("429") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $results: List() from collect (Entity(type == "test_results")) + then + componentCreationService.joining("TestResults.0.0", "Test_Results", $results, " "); + end + +rule "PositiveControl.0.0: Was the definitive study conducted with positive control" + when + $oecdNumber: String() from List.of("429") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $results: List() from collect (Entity(type == "positive_control")) + then + componentCreationService.joining("PositiveControl.0.0", "Was_the_definitive_study_conducted_with_positive_control", $results, " "); + end + +rule "MainResults.0.0: Results from main study as one block" + when + $oecdNumber: String() from List.of("429") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $results: List() from collect (Entity(type == "results_(main_study)")) + then + componentCreationService.joining("MainResults.0.0", "Results_Main_Study", $results, " "); + end + +rule "UsedApproach.0.0: Used approach found and mapped to 'Group'" + when + $oecdNumber: String() from List.of("429") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $results: List(!isEmpty()) from collect (Entity(type == "approach_used")) + then + componentCreationService.create("UsedApproach.0.0", "What_was_the_approach_used", "Group", "'Group' when approach used is present, else 'Individual'", $results); + end + +rule "UsedApproach.1.0: Used approach not found and thus 'Individual'" + when + $oecdNumber: String() from List.of("429") + FileAttribute(label == "OECD Number", value == $oecdNumber) + not Entity(type == "approach_used") + then + componentCreationService.create("UsedApproach.1.0", "What_was_the_approach_used", "Individual", "'Group' when approach used is present, else 'Individual'"); + end + +rule "DefaultComponents.999.0: Create components for all unmapped entities." + salience -999 + when + not FileAttribute(label == "OECD Number") + $allEntities: List(!isEmpty()) from collect (Entity()) + then + componentCreationService.createComponentsForUnMappedEntities("DefaultComponents.999.0", $allEntities); + end + + +//------------------------------------ Component merging rules ------------------------------------ +/* +rule "X.0.0: merge duplicate component references" + when + $first: Component() + $duplicate: Component(this != $first, name == $first.name, value == $first.value) + then + $first.getReferences().addAll($duplicate.getReferences()); + retract($duplicate); + end +*/ \ No newline at end of file -- 2.47.2 From a9fff497b52563275097f5f734c9760c7c6561b4 Mon Sep 17 00:00:00 2001 From: maverickstuder Date: Tue, 4 Feb 2025 13:16:29 +0100 Subject: [PATCH 5/5] RED-10708: Tables as components in DM --- .../v1/server/TableComponentsIntegrationTest.java | 4 ++-- .../test/resources/drools/documine_flora_table_test.drl | 4 ++-- .../drools/documine_flora_table_test_components.drl | 5 ++--- .../src/main/resources/all_rules_documine.drl | 9 +++++++++ 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/TableComponentsIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/TableComponentsIntegrationTest.java index 26ed5e7c..1fd36058 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/TableComponentsIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/TableComponentsIntegrationTest.java @@ -82,9 +82,9 @@ class TableComponentsIntegrationTest extends AbstractRedactionIntegrationTest { var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); boolean tableEntityFound = entityLog.getEntityLogEntry() .stream() - .anyMatch(entry -> entry.getMatchedRule() != null && entry.getMatchedRule().contains("DOC.100.0")); + .anyMatch(entry -> entry.getMatchedRule() != null && entry.getMatchedRule().contains("T.0.0")); - assertTrue(tableEntityFound, "Expected table entity creation ('DOC.100.0') to be present in the entity log"); + assertTrue(tableEntityFound, "Expected table entity creation ('T.0.0') to be present in the entity log"); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_table_test.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_table_test.drl index 75df6c3d..965526c7 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_table_test.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_table_test.drl @@ -69,12 +69,12 @@ query "getFileAttributes" //------------------------------------ table rules ------------------------------------ -rule "DOC.100.0: Create TableEntities for all Tables" +rule "T.0.0: Create TableEntities for all Tables" when $table: Table() then Optional tableEntity = entityCreationService.bySemanticNode($table, "Table", EntityType.ENTITY); - tableEntity.ifPresent(t -> t.apply("DOC.100.0", "Table found.", "n-a")); + tableEntity.ifPresent(t -> t.apply("T.0.0", "Table found.", "n-a")); end //------------------------------------ Headlines rules ------------------------------------ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_table_test_components.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_table_test_components.drl index 13763953..25bbe8ab 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_table_test_components.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_table_test_components.drl @@ -45,12 +45,11 @@ query "getComponents" //------------------------------------ table rules ------------------------------------ -rule "TableComponents.900.0: Create components for all table entities." - salience -900 +rule "TableComponents.0.0: Create components for all table entities." when $tables: List() from collect (Entity(type == "Table")) then - componentCreationService.createComponentForTables("TableComponents.900.0", $tables); + componentCreationService.createComponentForTables("TableComponents.0.0", $tables); end //------------------------------------ Default Components rules ------------------------------------ diff --git a/redaction-service-v1/rules-management/src/main/resources/all_rules_documine.drl b/redaction-service-v1/rules-management/src/main/resources/all_rules_documine.drl index 4d5359ed..402cf9df 100644 --- a/redaction-service-v1/rules-management/src/main/resources/all_rules_documine.drl +++ b/redaction-service-v1/rules-management/src/main/resources/all_rules_documine.drl @@ -68,6 +68,15 @@ query "getFileAttributes" $fileAttribute: FileAttribute() end +//------------------------------------ T rules ------------------------------------ +rule "T.0.0: Create TableEntities for all Tables" + when + $table: Table() + then + Optional tableEntity = entityCreationService.bySemanticNode($table, "Table", EntityType.ENTITY); + tableEntity.ifPresent(t -> t.apply("T.0.0", "Table found.", "n-a")); + end + //------------------------------------ H rules ------------------------------------ // Rule unit: H.0 -- 2.47.2