diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Document.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Document.java index 08cba8f7..719b2edb 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Document.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Document.java @@ -10,7 +10,6 @@ import java.util.Set; import java.util.stream.Collectors; import java.util.stream.Stream; -import com.iqser.red.service.redaction.v1.server.exception.NotFoundException; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock; @@ -83,7 +82,7 @@ public class Document implements GenericSemanticNode { @Override public Headline getHeadline() { - return streamAllSubNodesOfType(NodeType.HEADLINE).map(node -> (Headline) node).findFirst().orElseThrow(() -> new NotFoundException("No Headlines found in this document!")); + return streamAllSubNodesOfType(NodeType.HEADLINE).map(node -> (Headline) node).findFirst().orElseGet(Headline::empty); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Headline.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Headline.java index 9e0bcfde..1f7747d4 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Headline.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Headline.java @@ -6,6 +6,7 @@ import java.util.Set; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.AtomicTextBlock; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock; import lombok.AccessLevel; @@ -68,4 +69,10 @@ public class Headline implements GenericSemanticNode { return this; } + + public static Headline empty() { + + return Headline.builder().leafTextBlock(AtomicTextBlock.empty(-1L, 0, new Page(), -1, null)).build(); + } + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Page.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Page.java index 4e7a0f01..28e2b60c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Page.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Page.java @@ -15,12 +15,14 @@ import lombok.AllArgsConstructor; import lombok.Builder; import lombok.EqualsAndHashCode; import lombok.Getter; +import lombok.NoArgsConstructor; import lombok.Setter; import lombok.experimental.FieldDefaults; @Getter @Setter @Builder +@NoArgsConstructor @AllArgsConstructor @FieldDefaults(level = AccessLevel.PRIVATE) public class Page { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/SemanticNode.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/SemanticNode.java index 4dfc900c..12c15e88 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/SemanticNode.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/SemanticNode.java @@ -104,9 +104,10 @@ public interface SemanticNode { /** * Traverses the Tree up, until it hits a Headline or hits a Section which will then return the first Headline from its children. - * Throws NotFoundException if no Headline is found this way + * If no Headline is found this way, it will recursively traverse the tree up and try again until it hits the root, where it will perform a BFS. + * If no Headline exists anywhere in the Document a dummy Headline is returned. * - * @return First Headline found + * @return First Headline found. */ default Headline getHeadline() { @@ -115,7 +116,7 @@ public interface SemanticNode { /** - * Checks if its TocId has a length greater than zero. + * Checks if its TreeId has a length greater than zero. * * @return boolean indicating whether this Node has a Parent in the DocumentTree */ diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Table.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Table.java index bdbb1ec8..b074832d 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Table.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Table.java @@ -121,6 +121,23 @@ public class Table implements SemanticNode { } + /** + * Streams all entities in this table, that appear in a row, which contains no entity of any of the provided types. + * + * @param types type strings to check whether a row contains an entity like them + * @return Stream of all entities in this table, that appear in a row, which contains at least one entity with any of the provided types. + */ + public Stream streamEntitiesWhereRowContainsNoEntitiesOfType(List types) { + + return IntStream.range(0, numberOfRows) + .boxed() + .filter(rowNumber -> streamRow(rowNumber).map(TableCell::getEntities).flatMap(Collection::stream).noneMatch(entity -> types.contains(entity.getType()))) + .flatMap(this::streamRow) + .map(TableCell::getEntities) + .flatMap(Collection::stream); + } + + /** * Returns a TableCell at the provided row and column location. * diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RulesTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RulesTest.java index e88a031b..2f7186d9 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RulesTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RulesTest.java @@ -95,7 +95,7 @@ import lombok.extern.slf4j.Slf4j; @Import(RulesTest.RulesTestConfiguration.class) public class RulesTest { - private static final String RULES_PATH = "drools/prod_syngenta_new.drl"; + private static final String RULES_PATH = "drools/rules.drl"; private static final String RULES = loadFromClassPath(RULES_PATH); private static final String VERTEBRATE = "vertebrate"; private static final String ADDRESS = "CBI_address"; diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/MigrationPocTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/MigrationPocTest.java index 09b7be50..ab4db75c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/MigrationPocTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/MigrationPocTest.java @@ -1,7 +1,6 @@ package com.iqser.red.service.redaction.v1.server.document.graph; import static java.util.stream.Collectors.toMap; -import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.when; @@ -40,7 +39,7 @@ import lombok.SneakyThrows; public class MigrationPocTest extends BuildDocumentIntegrationTest { private static final String RULES = loadFromClassPath("drools/rules.drl"); - + @Autowired private RedactionLogEntryAdapter redactionLogAdapter; @Autowired @@ -99,8 +98,6 @@ public class MigrationPocTest extends BuildDocumentIntegrationTest { logPrecision(migratedIds, newIds); logRecall(migratedIds, newIds); - - assertEquals(originalRedactionLog.getRedactionLogEntry().size(), migratedEntities.size()); } @@ -110,7 +107,7 @@ public class MigrationPocTest extends BuildDocumentIntegrationTest { System.out.printf("precision %.2f\n", precision); System.out.println("New Entries"); getAddedEntries(migratedIds, newIds).forEach(System.out::println); - assertTrue(precision > 0.9); + assertTrue(precision >= 0.85); System.out.println(); } @@ -121,7 +118,7 @@ public class MigrationPocTest extends BuildDocumentIntegrationTest { System.out.printf("recall %.2f\n", recall); System.out.println("Missing entries"); getMissingEntries(migratedIds, newIds).forEach(System.out::println); - assertTrue(recall > 0.9); + assertTrue(recall >= 0.85); System.out.println(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl index 31574a5b..f2c7867d 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl @@ -6,17 +6,19 @@ import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.u import java.util.List; import java.util.LinkedList; -import java.util.HashSet; +import java.util.Set +import java.util.stream.Collectors; +import java.util.Collection; +import java.util.stream.Stream; -import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.* -import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.* -import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.* -import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.* +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.*; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.*; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.*; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.*; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.EntityType; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.ImageType; import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute; -import java.util.Set -import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine +import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.EntityCreationService; import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.Dictionary; import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.DictionaryModel; @@ -26,21 +28,26 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.ManualRedactionApplicationService; -import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity -import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity -import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary -import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity -import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity -import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary -import java.util.stream.Collectors -import java.util.Collection -import java.util.stream.Stream +import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary; +import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntitiesAdapter; +import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility; global Document document +global EntityCreationService entityCreationService global ManualRedactionApplicationService manualRedactionApplicationService +global NerEntitiesAdapter nerEntitiesAdapter global Dictionary dictionary +//------------------------------------ queries ------------------------------------ + +query "getFileAttributes" + $fileAttribute: FileAttribute() + end + // --------------------------------------- manual redaction rules ------------------------------------------------------------------- rule "Apply manual resize redaction" diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/prod_syngenta_new.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/prod_syngenta_new.drl deleted file mode 100644 index a8556cd3..00000000 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/prod_syngenta_new.drl +++ /dev/null @@ -1,725 +0,0 @@ -package drools - -import static java.lang.String.format; -import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.anyMatch; -import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.exactMatch; - -import java.util.List; -import java.util.LinkedList; -import java.util.HashSet; - -import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.*; -import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.*; -import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.*; -import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.*; -import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.EntityType; -import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.ImageType; -import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute; -import java.util.Set -import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine; -import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.EntityCreationService; -import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.Dictionary; -import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.DictionaryModel; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus; -import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.ManualRedactionApplicationService; -import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity; -import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary; -import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity; -import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary; -import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntitiesAdapter; -import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities; -import java.util.stream.Collectors; -import java.util.Collection; -import java.util.stream.Stream; -import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility; - -global Document document -global EntityCreationService entityCreationService -global ManualRedactionApplicationService manualRedactionApplicationService -global NerEntitiesAdapter nerEntitiesAdapter -global Dictionary dictionary - -// --------------------------------------- queries ------------------------------------------------------------------- - -query "getFileAttributes" - $fileAttribute: FileAttribute() - end - -// --------------------------------------- Syngenta specific laboratory recommendation ------------------------------------------------------------------- - -rule "0: Recommend CTL/BL laboratory that start with BL or CTL" - when - $section: Section(containsString("CT") || containsString("BL")) - then - /* Regular expression: ((\b((([Cc]T(([1ILli\/])| L|~P))|(BL))[\. ]?([\dA-Ziltphz~\/.:!]| ?[\(',][Ppi](\(e)?|([\(-?']\/))+( ?[\(\/\dA-Znasieg]+)?)\b( ?\/? ?\d+)?)|(\bCT[L1i]\b)) */ - entityCreationService.byRegexIgnoreCase("((\\b((([Cc]T(([1ILli\\/])| L|~P))|(BL))[\\. ]?([\\dA-Ziltphz~\\/.:!]| ?[\\(',][Ppi](\\(e)?|([\\(-?']\\/))+( ?[\\(\\/\\dA-Znasieg]+)?)\\b( ?\\/? ?\\d+)?)|(\\bCT[L1i]\\b))", "CBI_address", EntityType.RECOMMENDATION, $section) - .forEach(entity -> { - entity.addMatchedRule(0); - entity.addEngine(Engine.RULE); - insert(entity); - }); - end - -// --------------------------------------- CBI Rules ------------------------------------------------------------------- - -rule "1: Redact CBI Authors (non vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $entity: RedactionEntity(type == "CBI_author", entityType == EntityType.ENTITY) - then - $entity.setRedaction(true); - $entity.addMatchedRule(1); - $entity.setRedactionReason("Author found"); - $entity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); - end - -rule "2: Redact CBI Authors (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $entity: RedactionEntity(type == "CBI_author", entityType == EntityType.ENTITY) - then - $entity.setRedaction(true); - $entity.addMatchedRule(2); - $entity.setRedactionReason("Author found"); - $entity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); - end - -rule "3: Don't redact CBI Address (Non vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $entity: RedactionEntity(type == "CBI_address", entityType == EntityType.ENTITY) - then - $entity.setRedaction(false); - $entity.addMatchedRule(3); - $entity.setRedactionReason("Address found for non vertebrate study"); - end - -rule "4: Redact CBI Address (Vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $entity: RedactionEntity(type == "CBI_address", entityType == EntityType.ENTITY) - then - $entity.setRedaction(true); - $entity.addMatchedRule(4); - $entity.setRedactionReason("Address found"); - $entity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); - end - -rule "5: Add FALSE_POSITIVE Entity for genitive CBI_author" - when - $entity: RedactionEntity(type == "CBI_author", anyMatch(textAfter, "['’’'ʼˈ´`‘′ʻ’']s"), redaction) - then - RedactionEntity falsePositive = entityCreationService.byBoundary($entity.getBoundary(), "CBI_author", EntityType.FALSE_POSITIVE, document); - falsePositive.addMatchedRule(5); - insert(falsePositive); - end - -rule "6: Redact all Cell's with Header Author(s) as CBI_author (non vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $table: Table(hasHeader("Author(s)")) - then - $table.streamTableCellsWithHeader("Author(s)") - .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) - .forEach(redactionEntity -> { - redactionEntity.setRedaction(true); - redactionEntity.addMatchedRule(6); - redactionEntity.addEngine(Engine.RULE); - redactionEntity.setRedactionReason("Author(s) found"); - redactionEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); - insert(redactionEntity); - }); - end - -rule "7: Redact all Cell's with Header Author(s) as CBI_author (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $table: Table(hasHeader("Author(s)")) - then - $table.streamTableCellsWithHeader("Author(s)") - .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) - .forEach(redactionEntity -> { - redactionEntity.setRedaction(true); - redactionEntity.addMatchedRule(7); - redactionEntity.addEngine(Engine.RULE); - redactionEntity.setRedactionReason("Author(s) found"); - redactionEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); - insert(redactionEntity); - }); - end - -rule "8: Redact all Cell's with Header Author as CBI_author" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $table: Table(hasHeader("Author")) - then - $table.streamTableCellsWithHeader("Author") - .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) - .forEach(redactionEntity -> { - redactionEntity.setRedaction(true); - redactionEntity.addMatchedRule(8); - redactionEntity.addEngine(Engine.RULE); - redactionEntity.setRedactionReason("Author found"); - redactionEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); - insert(redactionEntity); - }); - end - -rule "9: Redact all Cell's with Header Author as CBI_author" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $table: Table(hasHeader("Author")) - then - $table.streamTableCellsWithHeader("Author") - .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) - .forEach(redactionEntity -> { - redactionEntity.setRedaction(true); - redactionEntity.addMatchedRule(9); - redactionEntity.addEngine(Engine.RULE); - redactionEntity.setRedactionReason("Author found"); - redactionEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); - insert(redactionEntity); - }); - end - -rule "10: Recommend all CBI_author entities in Table with Vertebrate Study Y/N Header" - agenda-group "LOCAL_DICTIONARY_ADDS" - salience -1 - when - $table: Table(hasHeader("Author(s)") && hasHeader("Vertebrate Study Y/N")) - then - $table.getEntitiesOfType("CBI_author").forEach(entity -> dictionary.addMultipleAuthorsAsRecommendation(entity)); - end - -rule "14: Add CBI_author with \"et al.\" Regex (non vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(containsString("et al.")) - then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, $section) - .forEach(entity -> { - entity.setRedaction(true); - entity.setRedactionReason("Author found by \"et al\" regex"); - entity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); - entity.addMatchedRule(14); - entity.addEngine(Engine.RULE); - insert(entity); - dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false); - }); - end - -rule "15: Add CBI_author with \"et al.\" Regex (vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(containsString("et al.")) - then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, $section) - .forEach(entity -> { - entity.setRedaction(true); - entity.setRedactionReason("Author found by \"et al\" regex"); - entity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); - entity.addMatchedRule(15); - entity.addEngine(Engine.RULE); - insert(entity); - dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false); - }); - end - -rule "16: Add recommendation for Addresses in Test Organism sections" - when - $section: Section(excludesTables, containsString("Species") && containsString("Source") && !containsString("Species:") && !containsString("Source:")) - then - entityCreationService.lineAfterString("Source", "CBI_address", EntityType.RECOMMENDATION, $section) - .forEach(entity -> { - entity.setRedactionReason("Line after \"Source\" in Test Organism Section"); - entity.addEngine(Engine.RULE); - entity.addMatchedRule(16); - insert(entity); - }); - end - -rule "17: Add recommendation for Addresses in Test Animals sections" - when - $section: Section(excludesTables, containsString("Species:"), containsString("Source:")) - then - entityCreationService.lineAfterString("Source:", "CBI_address", EntityType.RECOMMENDATION, $section) - .forEach(entity -> { - entity.setRedactionReason("Line after \"Source:\" in Test Animals Section"); - entity.addEngine(Engine.RULE); - entity.addMatchedRule(17); - insert(entity); - }); - end - -rule "18.0: Do not redact Names and Addresses if published information found in section without tables" - when - $section: Paragraph(hasEntitiesOfType("published_information"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(redactionEntity -> { - redactionEntity.setRedaction(false); - redactionEntity.setRedactionReason("Published Information found"); - redactionEntity.addReferences($section.getEntitiesOfType("published_information")); - }); - end - -rule "18.1: Do not redact Names and Addresses if published information found in same table row" - when - $table: Table(hasEntitiesOfType("published_information"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(redactionEntity -> { - redactionEntity.setRedaction(false); - redactionEntity.setRedactionReason("Published Information found in row"); - redactionEntity.addReferences($table.getEntitiesOfTypeInSameRow("published_information", redactionEntity)); - }); - end - -// --------------------------------------- PII rules ------------------------------------------------------------------- - -rule "19: Redact all PII (non vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $pii: RedactionEntity(type == "PII", redaction == false) - then - $pii.setRedaction(true); - $pii.setRedactionReason("Personal Information found"); - $pii.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); - $pii.addMatchedRule(19); - end - -rule "20: Redact all PII (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $pii: RedactionEntity(type == "PII", redaction == false) - then - $pii.setRedaction(true); - $pii.setRedactionReason("Personal Information found"); - $pii.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); - $pii.addMatchedRule(20); - end - -rule "21: Redact Emails by RegEx (Non vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(containsString("@")) - then - entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) - .forEach(emailEntity -> { - emailEntity.setRedaction(true); - emailEntity.addEngine(Engine.RULE); - emailEntity.setRedactionReason("Found by Email Regex"); - emailEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); - emailEntity.addMatchedRule(21); - insert(emailEntity); - }); - end - -rule "22: Redact Emails by RegEx (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(containsString("@")) - then - entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) - .forEach(emailEntity -> { - emailEntity.setRedaction(true); - emailEntity.addEngine(Engine.RULE); - emailEntity.setRedactionReason("Found by Email Regex"); - emailEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); - emailEntity.addMatchedRule(22); - insert(emailEntity); - }); - end - -rule "25: Redact Phone and Fax by RegEx (non vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(containsString("Contact") || - containsString("Telephone") || - containsString("Phone") || - containsString("Ph.") || - containsString("Fax") || - containsString("Tel") || - containsString("Ter") || - containsString("Mobile") || - containsString("Fel") || - containsString("Fer")) - then - entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) - .forEach(contactEntity -> { - contactEntity.setRedaction(true); - contactEntity.addEngine(Engine.RULE); - contactEntity.setRedactionReason("Found by Email Regex"); - contactEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); - contactEntity.addMatchedRule(25); - insert(contactEntity); - }); - end - -rule "26: Redact Phone and Fax by RegEx (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(containsString("Contact") || - containsString("Telephone") || - containsString("Phone") || - containsString("Ph.") || - containsString("Fax") || - containsString("Tel") || - containsString("Ter") || - containsString("Mobile") || - containsString("Fel") || - containsString("Fer")) - then - entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) - .forEach(contactEntity -> { - contactEntity.setRedaction(true); - contactEntity.addEngine(Engine.RULE); - contactEntity.setRedactionReason("Found by Email Regex"); - contactEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); - contactEntity.addMatchedRule(26); - insert(contactEntity); - }); - end - - -rule "27: Redact AUTHOR(S) (non vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(excludesTables, containsString("AUTHOR(S):"), containsString("COMPLETION DATE:"), !containsString("STUDY COMPLETION DATE:")) - then - entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section) - .forEach(authorEntity -> { - authorEntity.setRedaction(true); - authorEntity.addMatchedRule(27); - authorEntity.addEngine(Engine.RULE); - authorEntity.setRedactionReason("AUTHOR(S) was found"); - authorEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); - insert(authorEntity); - }); - end - -rule "28: Redact AUTHOR(S) (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(excludesTables, containsString("AUTHOR(S):"), containsString("COMPLETION DATE:"), !containsString("STUDY COMPLETION DATE:")) - then - entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section) - .forEach(authorEntity -> { - authorEntity.setRedaction(true); - authorEntity.addMatchedRule(28); - authorEntity.addEngine(Engine.RULE); - authorEntity.setRedactionReason("AUTHOR(S) was found"); - authorEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); - insert(authorEntity); - }); - end - - -rule "29: Redact AUTHOR(S) (non vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(excludesTables, containsString("AUTHOR(S):"), containsString("STUDY COMPLETION DATE:")) - then - entityCreationService.betweenStrings("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", EntityType.ENTITY, $section) - .forEach(authorEntity -> { - authorEntity.setRedaction(true); - authorEntity.addMatchedRule(29); - authorEntity.addEngine(Engine.RULE); - authorEntity.setRedactionReason("AUTHOR(S) was found"); - authorEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); - insert(authorEntity); - }); - end - -rule "30: Redact AUTHOR(S) (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(excludesTables, containsString("AUTHOR(S):"), containsString("STUDY COMPLETION DATE:")) - then - entityCreationService.betweenStrings("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", EntityType.ENTITY, $section) - .forEach(authorEntity -> { - authorEntity.setRedaction(true); - authorEntity.addMatchedRule(30); - authorEntity.addEngine(Engine.RULE); - authorEntity.setRedactionReason("AUTHOR(S) was found"); - authorEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); - insert(authorEntity); - }); - end - -rule "31: Redact PERFORMING LABORATORY (Non vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - not FileAttribute(label == "Vertebrate Study", value == "Yes") - $section: Section(excludesTables, containsString("PERFORMING LABORATORY:")) - then - entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) - .forEach(laboratoryEntity -> { - laboratoryEntity.setRedaction(false); - laboratoryEntity.addMatchedRule(31); - laboratoryEntity.addEngine(Engine.RULE); - laboratoryEntity.setRedactionReason("PERFORMING LABORATORY was found for non vertebrate study"); - dictionary.addLocalDictionaryEntry(laboratoryEntity); - insert(laboratoryEntity); - }); - end - -rule "32: Redact PERFORMING LABORATORY (Vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - FileAttribute(label == "Vertebrate Study", value == "Yes") - $section: Section(excludesTables, containsString("PERFORMING LABORATORY:")) - then - entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) - .forEach(laboratoryEntity -> { - laboratoryEntity.setRedaction(true); - laboratoryEntity.addMatchedRule(32); - laboratoryEntity.addEngine(Engine.RULE); - laboratoryEntity.setRedactionReason("PERFORMING LABORATORY was found"); - laboratoryEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); - dictionary.addLocalDictionaryEntry(laboratoryEntity); - insert(laboratoryEntity); - }); - end - -// --------------------------------------- other rules ------------------------------------------------------------------- - -rule "33: Purity Hint" - when - $section: Section(containsStringIgnoreCase("purity")) - then - entityCreationService.byRegexIgnoreCase("(purity ?( of|\\(.{1,20}\\))?( ?:)?) .{0,5}[\\d\\.]+( .{0,4}\\.)? ?%", "hint_only", EntityType.ENTITY, 1, $section) - .forEach(hint -> { - hint.addEngine(Engine.RULE); - hint.addMatchedRule(33); - }); - end - -rule "34: Redact signatures (not Vertebrate Study)" - when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $signature: Image(imageType == ImageType.SIGNATURE) - then - $signature.setRedaction(true); - $signature.setMatchedRule(34); - $signature.setRedactionReason("Signature Found"); - $signature.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); - end - -rule "35: Redact signatures (Vertebrate Study)" - when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $signature: Image(imageType == ImageType.SIGNATURE) - then - $signature.setRedaction(true); - $signature.setMatchedRule(35); - $signature.setRedactionReason("Signature Found"); - $signature.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); - end - - -rule "36: Redact logos" - when - not FileAttribute(label == "Vertbrate Study", value.toLowerCase() == "yes") - $logo: Image(imageType == ImageType.LOGO) - then - $logo.setRedaction(true); - $logo.setMatchedRule(36); - $logo.setRedactionReason("Logo Found"); - $logo.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); - end - -// --------------------------------------- NER Entities rules ------------------------------------------------------------------- - -rule "add NER Entities of type CBI_author" - salience 999 - when - nerEntities: NerEntities(hasEntitiesOfType("CBI_author")) - then - nerEntities.streamEntitiesOfType("CBI_author") - .map(nerEntity -> entityCreationService.byNerEntity(nerEntity, EntityType.RECOMMENDATION, document)) - .forEach(entity -> insert(entity)); - end - -rule "combine and add NER Entities as CBI_address" - salience 999 - when - nerEntities: NerEntities(hasEntitiesOfType("ORG") || hasEntitiesOfType("STREET") || hasEntitiesOfType("CITY")) - then - nerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities) - .map(boundary -> entityCreationService.byBoundary(boundary, "CBI_address", EntityType.RECOMMENDATION, document)) - .forEach(entity -> { - entity.addEngine(Engine.NER); - insert(entity); - }); - end - -// --------------------------------------- manual redaction rules ------------------------------------------------------------------- - -rule "Apply manual resize redaction" - salience 128 - when - $resizeRedaction: ManualResizeRedaction($id: annotationId) - $entityToBeResized: RedactionEntity(matchesAnnotationId($id)) - then - manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction); - retract($resizeRedaction); - update($entityToBeResized); - end - -rule "Apply id removals that are valid and not in forced redactions to Entity" - salience 128 - when - IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId) - not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null) - $entityToBeRemoved: RedactionEntity(matchesAnnotationId($id)) - then - $entityToBeRemoved.removeFromGraph(); - retract($entityToBeRemoved); - end - -rule "Apply id removals that are valid and not in forced redactions to Image" - salience 128 - when - IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId) - not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null) - $entityToBeRemoved: Image($id == id) - then - $entityToBeRemoved.setIgnored(true); - retract($entityToBeRemoved); - end - -rule "Apply force redaction" - salience 128 - when - $forceRedaction: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis) - $entityToForce: RedactionEntity(matchesAnnotationId($id)) - then - $entityToForce.setLegalBasis($legalBasis); - $entityToForce.setRedaction(true); - $entityToForce.setSkipRemoveEntitiesContainedInLarger(true); - retract($forceRedaction); - update($entityToForce); - end - -rule "Apply image recategorization" - salience 128 - when - ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type) - $image: Image($id == id) - then - $image.setImageType(ImageType.fromString($imageType)); - end - -// --------------------------------------- merging rules ------------------------------------------------------------------- - -rule "remove Entity contained by Entity of same type" - salience 65 - when - $larger: RedactionEntity($type: type, $entityType: entityType) - $contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger) - then - $contained.removeFromGraph(); - retract($contained); - end - -rule "merge intersecting Entities of same type" - salience 64 - when - $first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger) - $second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger) - then - $first.removeFromGraph(); - $second.removeFromGraph(); - RedactionEntity mergedEntity = entityCreationService.byEntities(List.of($first, $second), $type, $entityType, document); - retract($first); - retract($second); - insert(mergedEntity); - end - -rule "remove Entity of type ENTITY when contained by FALSE_POSITIVE" - salience 64 - when - $falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE) - $entity: RedactionEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized, !skipRemoveEntitiesContainedInLarger) - then - $entity.removeFromGraph(); - retract($entity) - end - -rule "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" - salience 64 - when - $falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION) - $recommendation: RedactionEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger) - then - $recommendation.removeFromGraph(); - retract($recommendation); - end - -rule "remove Entity of type RECOMMENDATION when intersected by ENTITY with same type" - salience 256 - when - $entity: RedactionEntity($type: type, entityType == EntityType.ENTITY) - $recommendation: RedactionEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger) - then - $entity.addEngines($recommendation.getEngines()); - $recommendation.removeFromGraph(); - retract($recommendation); - end - -rule "remove Entity of type RECOMMENDATION when contained by ENTITY" - salience 256 - when - $entity: RedactionEntity(entityType == EntityType.ENTITY) - $recommendation: RedactionEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger) - then - $recommendation.removeFromGraph(); - retract($recommendation); - end - -rule "remove Entity of lower rank, when equal boundaries and entityType" - salience 32 - when - $higherRank: RedactionEntity($type: type, $entityType: entityType, $boundary: boundary) - $lowerRank: RedactionEntity($boundary == boundary, type != $type, entityType == $entityType, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !redaction) - then - $lowerRank.removeFromGraph(); - retract($lowerRank); - end - -// --------------------------------------- FileAttribute Rules ------------------------------------------------------------------- - -rule "remove duplicate FileAttributes" - salience 64 - when - $fileAttribute: FileAttribute($label: label, $value: value) - $duplicate: FileAttribute(this != $fileAttribute, label == $label, value == $value) - then - retract($duplicate); - end - -// --------------------------------------- local dictionary search ------------------------------------------------------------------- - -rule "run local dictionary search" - agenda-group "LOCAL_DICTIONARY_ADDS" - salience -999 - when - DictionaryModel(!localEntries.isEmpty(), $type: type, $searchImplementation: localSearch) from dictionary.getDictionaryModels() - then - entityCreationService.bySearchImplementation($searchImplementation, $type, EntityType.RECOMMENDATION, document) - .forEach(entity -> { - entity.addEngine(Engine.RULE); - insert(entity); - }); - end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index 79e5f495..9ed55619 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -6,7 +6,10 @@ import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.u import java.util.List; import java.util.LinkedList; -import java.util.HashSet; +import java.util.Set +import java.util.stream.Collectors; +import java.util.Collection; +import java.util.stream.Stream; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.*; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.*; @@ -15,7 +18,6 @@ import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.te import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.EntityType; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.ImageType; import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute; -import java.util.Set import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.EntityCreationService; import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.Dictionary; @@ -32,9 +34,6 @@ import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.en import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary; import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntitiesAdapter; import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities; -import java.util.stream.Collectors; -import java.util.Collection; -import java.util.stream.Stream; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility; global Document document @@ -43,24 +42,437 @@ global ManualRedactionApplicationService manualRedactionApplicationService global NerEntitiesAdapter nerEntitiesAdapter global Dictionary dictionary -// --------------------------------------- queries ------------------------------------------------------------------- +//------------------------------------ queries ------------------------------------ query "getFileAttributes" $fileAttribute: FileAttribute() end -// --------------------------------------- CBI rules ------------------------------------------------------------------- +//------------------------------------ Syngenta specific rules ------------------------------------ -rule "5: Add FALSE_POSITIVE Entity for genitive CBI_author" +// Rule unit: SYN.0 +rule "SYN.0.0: Redact if CTL/* or BL/* was found (Non Vertebrate Study)" when - $entity: RedactionEntity(type == "CBI_author", anyMatch(textAfter, "['’’'ʼˈ´`‘′ʻ’']s"), entityType == EntityType.ENTITY) + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(containsString("CTL/") || containsString("BL/")) then - RedactionEntity falsePositive = entityCreationService.byBoundary($entity.getBoundary(), "CBI_author", EntityType.FALSE_POSITIVE, document); - falsePositive.addMatchedRule(5); - insert(falsePositive); + Stream.concat( + entityCreationService.byString("CTL", "hint", EntityType.ENTITY, $section), + entityCreationService.byString("BL", "hint", EntityType.ENTITY, $section) + ).forEach(entity -> { + entity.setRedactionReason("hint_only"); + entity.addMatchedRule(0); + entity.addEngine(Engine.RULE); + insert(entity); + }); end -rule "0: Expand CBI_author entities with firstname initials" + +//------------------------------------ CBI rules ------------------------------------ + +// Rule unit: CBI.3 +rule "CBI.3.0: Redacted because Section contains Vertebrate" + when + $section: Section(!hasTables(), hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(entity -> { + entity.setRedactionReason("Vertebrate found"); + entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); + entity.setRedaction(true); + entity.addEngine(Engine.RULE); + entity.addMatchedRule(3); + entity.addReferences($section.getEntitiesOfType("vertebrate")); + }); + end + +rule "CBI.3.1: Redacted because Table Row contains Vertebrate" + when + $table: Table(hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.setRedactionReason("Vertebrate found"); + entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); + entity.setRedaction(true); + entity.addEngine(Engine.RULE); + entity.addMatchedRule(3); + entity.addReferences($table.getEntitiesOfTypeInSameRow("vertebrate", entity)); + }); + end + +rule "CBI.3.2: Don't redact because Section doesn't contain Vertebrate" + when + $section: Section(!hasTables(), !hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(entity -> { + entity.setRedactionReason("No vertebrate found"); + entity.setRedaction(false); + entity.addEngine(Engine.RULE); + entity.addMatchedRule(3); + }); + end + +rule "CBI.3.3: Dont redact because Table Row doesn't contain Vertebrate" + when + $table: Table(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + then + $table.streamEntitiesWhereRowContainsNoEntitiesOfType(List.of("vertebrate")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.setRedactionReason("No vertebrate found"); + entity.setRedaction(false); + entity.addEngine(Engine.RULE); + entity.addMatchedRule(3); + }); + end + + +// Rule unit: CBI.4 +rule "CBI.4.0: Dont redact Names and Addresses if no_redaction_indicator is found in Section" + when + $section: Section(!hasTables(), + hasEntitiesOfType("vertebrate"), + hasEntitiesOfType("no_redaction_indicator"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(entity -> { + entity.setRedactionReason("Vertebrate but a no redaction indicator found"); + entity.setRedaction(false); + entity.addEngine(Engine.RULE); + entity.addMatchedRule(4); + entity.addReferences($section.getEntitiesOfType("no_redaction_indicator")); + }); + end + +rule "CBI.4.1: Dont redact Names and Addresses if no_redaction_indicator is found in Table Row" + when + $table: Table(hasEntitiesOfType("no_redaction_indicator"), + hasEntitiesOfType("vertebrate"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate", "no-redaction_indicator")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.setRedactionReason("Vertebrate but a no redaction indicator found"); + entity.setRedaction(false); + entity.addEngine(Engine.RULE); + entity.addMatchedRule(4); + entity.addReferences($table.getEntitiesOfTypeInSameRow("vertebrate", entity)); + entity.addReferences($table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity)); + }); + end + + +// Rule unit: CBI.5 +rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in section" + when + $section: Section(!hasTables(), + hasEntitiesOfType("redaction_indicator"), + hasEntitiesOfType("no_redaction_indicator"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(entity -> { + entity.setRedactionReason("no_redaction_indicator but also redaction_indicator found"); + entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); + entity.setRedaction(true); + entity.addEngine(Engine.RULE); + entity.addMatchedRule(5); + entity.addReferences($section.getEntitiesOfType("no_redaction_indicator")); + entity.addReferences($section.getEntitiesOfType("redaction_indicator")); + }); + end + +rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in Table Row" + when + $table: Table(hasEntitiesOfType("no_redaction_indicator"), + hasEntitiesOfType("redaction_indicator"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no-redaction_indicator")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.setRedactionReason("no_redaction_indicator but also redaction_indicator found"); + entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); + entity.setRedaction(true); + entity.addEngine(Engine.RULE); + entity.addMatchedRule(5); + entity.addReferences($table.getEntitiesOfTypeInSameRow("vertebrate", entity)); + entity.addReferences($table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity)); + }); + end + + +// Rule unit: CBI.8 +rule "CBI.8.0: Redacted because Section contains must_redact entity" + when + $section: Section(!hasTables(), hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(entity -> { + entity.setRedactionReason("must_redact entity found"); + entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); + entity.setRedaction(true); + entity.addEngine(Engine.RULE); + entity.addMatchedRule(15); + entity.addReferences($section.getEntitiesOfType("must_redact")); + }); + end + +rule "CBI.8.1: Redacted because Table Row contains must_redact entity" + when + $table: Table(hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("must_redact")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.setRedactionReason("must_redact entity found"); + entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); + entity.setRedaction(true); + entity.addEngine(Engine.RULE); + entity.addMatchedRule(15); + entity.addReferences($table.getEntitiesOfTypeInSameRow("must_redact", entity)); + }); + end + + +// Rule unit: CBI.9 +rule "CBI.9.0: Redact all Cell's with Header Author(s) as CBI_author (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .forEach(redactionEntity -> { + redactionEntity.setRedaction(true); + redactionEntity.addMatchedRule(9); + redactionEntity.addEngine(Engine.RULE); + redactionEntity.setRedactionReason("Author(s) found"); + redactionEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); + insert(redactionEntity); + }); + end + +rule "CBI.9.1: Redact all Cell's with Header Author as CBI_author (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $table: Table(hasHeader("Author")) + then + $table.streamTableCellsWithHeader("Author") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .forEach(redactionEntity -> { + redactionEntity.setRedaction(true); + redactionEntity.addMatchedRule(9); + redactionEntity.addEngine(Engine.RULE); + redactionEntity.setRedactionReason("Author found"); + redactionEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); + insert(redactionEntity); + }); + end + + +// Rule unit: CBI.11 +rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study Y/N Header" + agenda-group "LOCAL_DICTIONARY_ADDS" + salience -1 + when + $table: Table(hasHeader("Author(s)") && hasHeader("Vertebrate Study Y/N")) + then + $table.getEntitiesOfType("CBI_author").forEach(entity -> dictionary.addMultipleAuthorsAsRecommendation(entity)); + end + + +// Rule unit: CBI.12 +rule "CBI.12.0: Add all Cell's with Header Author(s) as CBI_author" + salience 1 + when + $table: Table(hasHeader("Author(s)") || hasHeader("Author")) + then + Stream.concat( + $table.streamTableCellsWithHeader("Author(s)"), + $table.streamTableCellsWithHeader("Author") + ) + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .forEach(redactionEntity -> { + redactionEntity.addMatchedRule(12); + redactionEntity.setRedactionReason("Author(s) header found"); + redactionEntity.addEngine(Engine.RULE); + insert(redactionEntity); + }); + end + +rule "CBI.12.1: Dont redact CBI_author, if its row contains a cell with header \"Vertebrate study Y/N\" and value No" + when + $table: Table(hasRowWithHeaderAndValue("Vertebrate study Y/N", "N") || hasRowWithHeaderAndValue("Vertebrate study Y/N", "No")) + then + $table.streamEntitiesWhereRowHasHeaderAndAnyValue("Vertebrate study Y/N", List.of("N", "No")) + .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) + .forEach(authorEntity -> { + authorEntity.setRedaction(false); + authorEntity.setRedactionReason("Not redacted because it's row does not belong to a vertebrate study"); + authorEntity.addMatchedRule(12); + }); + end + +rule "CBI.12.2: Redact CBI_author, if its row contains a cell with header \"Vertebrate study Y/N\" and value Yes" + when + $table: Table(hasRowWithHeaderAndValue("Vertebrate study Y/N", "Y") || hasRowWithHeaderAndValue("Vertebrate study Y/N", "Yes")) + then + $table.streamEntitiesWhereRowHasHeaderAndAnyValue("Vertebrate study Y/N", List.of("Y", "Yes")) + .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) + .forEach(authorEntity -> { + authorEntity.setRedaction(true); + authorEntity.setRedactionReason("Redacted because it's row belongs to a vertebrate study"); + authorEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); + authorEntity.addMatchedRule(12); + }); + end + + +// Rule unit: CBI.14 +rule "CBI.14.0: Redact CBI_sponsor entities if preceded by \"batches produced at\"" + when + $sponsorEntity: RedactionEntity(type == "CBI_sponsor", textBefore.contains("batches produced at")) + then + $sponsorEntity.setRedaction(true); + $sponsorEntity.setRedactionReason("Redacted because it represents a sponsor company"); + $sponsorEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); + $sponsorEntity.addMatchedRule(14); + end + + +// Rule unit: CBI.15 +rule "CBI.15.0: Redact row if row contains \"determination of residues\" and livestock keyword" + when + $keyword: String() from List.of("livestock", + "live stock", + "tissue", + "tissues", + "liver", + "muscle", + "bovine", + "ruminant", + "ruminants") + $residueKeyword: String() from List.of("determination of residues", "determination of total residues") + $section: Section(!hasTables(), + containsStringIgnoreCase($residueKeyword), + containsStringIgnoreCase($keyword)) + then + entityCreationService.byString($keyword, "must_redact", EntityType.ENTITY, $section) + .forEach(keywordEntity -> insert(keywordEntity)); + + $section.getEntitiesOfType(List.of($keyword, $residueKeyword)) + .forEach(redactionEntity -> { + redactionEntity.setRedaction(true); + redactionEntity.addMatchedRule(15); + redactionEntity.setRedactionReason("Determination of residues and keyword \"" + $keyword + "\" was found."); + redactionEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); + }); + end + +rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determination of residues\" and livestock keyword" + when + $keyword: String() from List.of("livestock", + "live stock", + "tissue", + "tissues", + "liver", + "muscle", + "bovine", + "ruminant", + "ruminants") + $residueKeyword: String() from List.of("determination of residues", "determination of total residues") + $table: Table(containsStringIgnoreCase($residueKeyword), containsStringIgnoreCase($keyword)) + then + entityCreationService.byString($keyword, "must_redact", EntityType.ENTITY, $table) + .forEach(keywordEntity -> insert(keywordEntity)); + + $table.streamEntitiesWhereRowContainsStringsIgnoreCase(List.of($keyword, $residueKeyword)) + .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) + .forEach(redactionEntity -> { + redactionEntity.setRedaction(true); + redactionEntity.addMatchedRule(15); + redactionEntity.setRedactionReason("Determination of residues and keyword \"" + $keyword + "\" was found."); + redactionEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); + }); + end + + +// Rule unit: CBI.16 +rule "CBI.16.0: Add CBI_author with \"et al.\" Regex (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.setRedaction(true); + entity.setRedactionReason("Author found by \"et al\" regex"); + entity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); + entity.addMatchedRule(18); + entity.addEngine(Engine.RULE); + insert(entity); + dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false); + }); + end + +rule "CBI.16.1: Add CBI_author with \"et al.\" Regex (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.setRedaction(true); + entity.setRedactionReason("Author found by \"et al\" regex"); + entity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); + entity.addMatchedRule(19); + entity.addEngine(Engine.RULE); + insert(entity); + dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false); + }); + end + + +// Rule unit: CBI.17 +rule "CBI.17.0: Add recommendation for Addresses in Test Organism sections, without colon" + when + $section: Section(!hasTables(), containsString("Species") && containsString("Source") && !containsString("Species:") && !containsString("Source:")) + then + entityCreationService.lineAfterString("Source", "CBI_address", EntityType.RECOMMENDATION, $section) + .forEach(entity -> { + entity.setRedactionReason("Line after \"Source\" in Test Organism Section"); + entity.addEngine(Engine.RULE); + entity.addMatchedRule(20); + insert(entity); + }); + end + +rule "CBI.17.1: Add recommendation for Addresses in Test Organism sections, with colon" + when + $section: Section(!hasTables(), containsString("Species:"), containsString("Source:")) + then + entityCreationService.lineAfterString("Source:", "CBI_address", EntityType.RECOMMENDATION, $section) + .forEach(entity -> { + entity.setRedactionReason("Line after \"Source:\" in Test Animals Section"); + entity.addEngine(Engine.RULE); + entity.addMatchedRule(20); + insert(entity); + }); + end + + +// Rule unit: CBI.18 +rule "CBI.18.0: Expand CBI_author entities with firstname initials" no-loop true when $entityToExpand: RedactionEntity(type == "CBI_author", @@ -76,268 +488,82 @@ rule "0: Expand CBI_author entities with firstname initials" insert(expandedEntity); end -rule "0: Expand CBI_author and PII entities with salutation prefix" + +// Rule unit: CBI.19 +rule "CBI.19.0: Expand CBI_author entities with salutation prefix" when - $entityToExpand: RedactionEntity((type == "CBI_author" || type == "PII"), anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")) + $entityToExpand: RedactionEntity(type == "CBI_author", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")) then RedactionEntity expandedEntity = entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"); expandedEntity.addMatchedRule(0); insert(expandedEntity); end -rule "1: Redacted because Section contains Vertebrate" - when - $section: Section(hasEntitiesOfType("vertebrate"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(redactionEntity -> { - redactionEntity.setRedaction(true); - redactionEntity.addMatchedRule(1); - redactionEntity.setRedactionReason("Vertebrate Found in this section"); - redactionEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); - }); - end - -rule "2: Not Redacted because Section contains no Vertebrate" - when - $section: Section(!hasEntitiesOfType("vertebrate"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(redactionEntity -> { - redactionEntity.setRedaction(false); - redactionEntity.addMatchedRule(2); - redactionEntity.setRedactionReason("No Vertebrate Found in this section"); - }); - end - -rule "3: Do not redact Names and Addresses if no redaction Indicator is contained" - when - $section: Section(hasEntitiesOfType("vertebrate"), - hasEntitiesOfType("no_redaction_indicator"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(redactionEntity -> { - redactionEntity.setRedaction(false); - redactionEntity.addMatchedRule(3); - redactionEntity.setRedactionReason("Vertebrate and a no-redaction-indicator found in this section"); - }); - end - -rule "4: Redact Names and Addresses if no_redaction_indicator and redaction_indicator is contained" - when - $section: Section(hasEntitiesOfType("vertebrate"), - hasEntitiesOfType("no_redaction_indicator"), - hasEntitiesOfType("redaction_indicator"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(redactionEntity -> { - redactionEntity.setRedaction(true); - redactionEntity.addMatchedRule(4); - redactionEntity.setRedactionReason("Vertebrate and a no-redaction-indicator, but also redaction-indicator, found in this section"); - redactionEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); - }); - end - -rule "5: Do not redact Names and Addresses if published information found" - when - $section: Section(hasEntitiesOfType("vertebrate"), - hasEntitiesOfType("published_information"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - List publishedInformationEntities = $section.getEntitiesOfType("published_information"); - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(redactionEntity -> { - redactionEntity.setRedaction(false); - redactionEntity.setRedactionReason("Vertebrate but also Published Information found in this section"); - redactionEntity.addReferences(publishedInformationEntities); - }); - end - -rule "6.0: Add all Cell's with Header Author(s) as CBI_author" - when - $table: Table(hasHeader("Author(s)")) - then - $table.streamTableCellsWithHeader("Author(s)") - .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) - .forEach(redactionEntity -> { - redactionEntity.addMatchedRule(6); - redactionEntity.setRedactionReason("Author(s) header found"); - insert(redactionEntity); - }); - end - -rule "6.1: Dont redact CBI_author, if its row contains a cell with header \"Vertebrate study Y/N\" and value No" - when - $table: Table(hasRowWithHeaderAndValue("Vertebrate study Y/N", "N") || hasRowWithHeaderAndValue("Vertebrate study Y/N", "No")) - then - $table.streamEntitiesWhereRowHasHeaderAndAnyValue("Vertebrate study Y/N", List.of("N", "No")) - .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) - .forEach(authorEntity -> { - authorEntity.setRedaction(false); - authorEntity.setRedactionReason("Not redacted because it's row does not belong to a vertebrate study"); - authorEntity.setLegalBasis(""); - authorEntity.addMatchedRule(6); - }); - end - -rule "7: Redact CBI_author, if its row contains a cell with header \"Vertebrate study Y/N\" and value Yes" - when - $table: Table(hasRowWithHeaderAndValue("Vertebrate study Y/N", "Y") || hasRowWithHeaderAndValue("Vertebrate study Y/N", "Yes")) - then - $table.streamEntitiesWhereRowHasHeaderAndAnyValue("Vertebrate study Y/N", List.of("Y", "Yes")) - .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) - .forEach(authorEntity -> { - authorEntity.setRedaction(true); - authorEntity.setRedactionReason("Redacted because it's row belongs to a vertebrate study"); - authorEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); - authorEntity.addMatchedRule(7); - }); - end - -rule "8: Redact if must_redact entity is found" - when - $section: Section(hasEntitiesOfType("must_redact"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(redactionEntity -> { - redactionEntity.setRedaction(true); - redactionEntity.setRedactionReason("must_redact entry was found."); - redactionEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); - redactionEntity.addMatchedRule(8); - }); - end - -rule "9: Redact CBI_sponsor entities if preceded by \" batches produced at\"" - when - $sponsorEntity: RedactionEntity(type == "CBI_sponsor", textBefore.contains("batches produced at")) - then - $sponsorEntity.setRedaction(true); - $sponsorEntity.setRedactionReason("Redacted because it represents a sponsor company"); - $sponsorEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); - $sponsorEntity.addMatchedRule(9); - end - -rule "10: Redact row if row contains \"determination of residues\" and livestock keyword" - when - $keyword: String() from List.of("livestock", - "live stock", - "tissue", - "tissues", - "liver", - "muscle", - "bovine", - "ruminant", - "ruminants") - $residueKeyword: String() from List.of("determination of residues", "determination of total residues") - $table: Table(containsStringIgnoreCase($residueKeyword) - && containsStringIgnoreCase($keyword)) - then - entityCreationService.byString($keyword, "must_redact", EntityType.ENTITY, $table) - .forEach(keywordEntity -> insert(keywordEntity)); - - $table.streamEntitiesWhereRowContainsStringsIgnoreCase(List.of($keyword, $residueKeyword)) - .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) - .forEach(redactionEntity -> { - redactionEntity.setRedaction(true); - redactionEntity.setRedactionReason("Determination of residues and keyword \"" + $keyword + "\" was found."); - redactionEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); - redactionEntity.addMatchedRule(10); - }); - end - -rule "11: Redact if CTL/* or BL/* was found" - when - $section: Section(!hasTables(), (containsString("CTL/") || containsString("BL/"))) - then - entityCreationService.byString("CTL", "must_redact", EntityType.ENTITY, $section) - .forEach(mustRedactEntity -> insert(mustRedactEntity)); - entityCreationService.byString("BL", "must_redact", EntityType.ENTITY, $section) - .forEach(mustRedactEntity -> insert(mustRedactEntity)); - - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(redactionEntity -> { - redactionEntity.setRedaction(true); - redactionEntity.setRedactionReason("Laboratory for vertebrate studies found"); - redactionEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); - redactionEntity.addMatchedRule(11); - }); - end - -rule "12: Add CBI_author with \"et al.\" Regex" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - $section: Section(containsString("et al.")) - then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.setRedaction(true); - entity.setRedactionReason("Author found by \"et al\" regex"); - entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); - entity.addMatchedRule(12); - insert(entity); - dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false); - }); - end - -rule "13: Add recommendation for Addresses in Test Organism sections" - when - $section: Section(!hasTables(), containsString("Species") && containsString("Source") && !containsString("Species:") && !containsString("Source:")) - then - entityCreationService.lineAfterString("Source", "CBI_address", EntityType.RECOMMENDATION, $section) - .forEach(redactionEntity -> { - redactionEntity.setRedactionReason("Line after \"Source\" in Test Organism Section"); - redactionEntity.addMatchedRule(13); - insert(redactionEntity); - }); - end - -rule "14: Add recommendation for Addresses in Test Animals sections" - - when - $section: Section(!hasTables(), containsString("Species:"), containsString("Source:")) - then - entityCreationService.lineAfterString("Source:", "CBI_address", EntityType.RECOMMENDATION, $section) - .forEach(redactionEntity -> { - redactionEntity.setRedactionReason("Line after \"Source:\" in Test Animals Section"); - redactionEntity.addMatchedRule(14); - insert(redactionEntity); - }); - end - -// --------------------------------------- PII rules ------------------------------------------------------------------- - -rule "15: Redact all PII" + +//------------------------------------ PII rules ------------------------------------ + +// Rule unit: PII.0 +rule "PII.0.0: Redact all PII (non vertebrate study)" when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $pii: RedactionEntity(type == "PII", redaction == false) then $pii.setRedaction(true); - $pii.setRedactionReason("PII found"); - $pii.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2e)"); - $pii.addMatchedRule(15); + $pii.setRedactionReason("Personal Information found"); + $pii.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); + $pii.addMatchedRule(0); end -rule "16: Redact Emails by RegEx (Non vertebrate study)" +rule "PII.0.1: Redact all PII (vertebrate study)" when + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $pii: RedactionEntity(type == "PII", redaction == false) + then + $pii.setRedaction(true); + $pii.setRedactionReason("Personal Information found"); + $pii.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); + $pii.addMatchedRule(0); + end + + +// Rule unit: PII.1 +rule "PII.1.0: Redact Emails by RegEx (Non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $section: Section(containsString("@")) then - entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, $section) + entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) .forEach(emailEntity -> { emailEntity.setRedaction(true); + emailEntity.addEngine(Engine.RULE); emailEntity.setRedactionReason("Found by Email Regex"); - emailEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2e)"); - emailEntity.addMatchedRule(16); + emailEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); + emailEntity.addMatchedRule(1); insert(emailEntity); }); end -rule "17: Redact line after contact information keywords" - agenda-group "LOCAL_DICTIONARY_ADDS" +rule "PII.1.1: Redact Emails by RegEx (vertebrate study)" when + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(containsString("@")) + then + entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> { + emailEntity.setRedaction(true); + emailEntity.addEngine(Engine.RULE); + emailEntity.setRedactionReason("Found by Email Regex"); + emailEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); + emailEntity.addMatchedRule(1); + insert(emailEntity); + }); + end + + +// Rule unit: PII.4 +rule "PII.4.0: Redact line after contact information keywords (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $contactKeyword: String() from List.of("Contact point:", "Contact:", "Alternative contact:", @@ -362,18 +588,52 @@ rule "17: Redact line after contact information keywords" entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) .forEach(contactEntity -> { contactEntity.setRedaction(true); - contactEntity.addMatchedRule(17); + contactEntity.addMatchedRule(4); + contactEntity.setRedactionReason("Found after \"" + $contactKeyword + "\" contact keyword"); + contactEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2e)"); + insert(contactEntity); + }); + end + +rule "PII.4.1: Redact line after contact information keywords (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $contactKeyword: String() from List.of("Contact point:", + "Contact:", + "Alternative contact:", + "European contact:", + "No:", + "Contact:", + "Tel.:", + "Tel:", + "Telephone number:", + "Telephone No:", + "Telephone:", + "Phone No.", + "Phone:", + "Fax number:", + "Fax:", + "E-mail:", + "Email:", + "e-mail:", + "E-mail address:") + $section: Section(containsString($contactKeyword)) + then + entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) + .forEach(contactEntity -> { + contactEntity.setRedaction(true); + contactEntity.addMatchedRule(4); contactEntity.setRedactionReason("Found after \"" + $contactKeyword + "\" contact keyword"); contactEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2e)"); insert(contactEntity); - dictionary.addLocalDictionaryEntry("PII", contactEntity.getValue(), false); }); end -rule "18: redact line between contact keywords" - agenda-group "LOCAL_DICTIONARY_ADDS" +// Rule unit: PII.6 +rule "PII.6.0: redact line between contact keywords (non vertebrate study)" when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) then Stream.concat( @@ -382,39 +642,213 @@ rule "18: redact line between contact keywords" ) .forEach(contactEntity -> { contactEntity.setRedaction(true); - contactEntity.addMatchedRule(18); + contactEntity.addMatchedRule(6); contactEntity.setRedactionReason("Found between contact keywords"); - contactEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2e)"); + contactEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); insert(contactEntity); - dictionary.addLocalDictionaryEntry("PII", contactEntity.getValue(), false); }); end -rule "19: Redact AUTHOR(S)" +rule "PII.6.1: redact line between contact keywords" when - FileAttribute(placeholder == "{fileattributes.vertebrateStudy}", value == "true") - $section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("COMPLETION DATE:")) + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) + then + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + ) + .forEach(contactEntity -> { + contactEntity.setRedaction(true); + contactEntity.addMatchedRule(6); + contactEntity.setRedactionReason("Found between contact keywords"); + contactEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); + insert(contactEntity); + }); + end + + +// Rule unit: PII.7 +rule "PII.7.0: Redact contact information if applicant is found (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(getHeadline().containsString("applicant") || + getHeadline().containsString("Primary contact") || + getHeadline().containsString("Alternative contact") || + containsString("Applicant") || + containsString("Telephone number:")) + then + Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", + "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + )) + .forEach(entity -> { + entity.setRedaction(true); + entity.setRedactionReason("Applicant information was found"); + entity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); + entity.addMatchedRule(7); + entity.addEngine(Engine.RULE); + insert(entity); + }); + end + +rule "PII.7.1: Redact contact information if applicant is found (non vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(getHeadline().containsString("applicant") || + getHeadline().containsString("Primary contact") || + getHeadline().containsString("Alternative contact") || + containsString("Applicant") || + containsString("Telephone number:")) + then + Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", + "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + )) + .forEach(entity -> { + entity.setRedaction(true); + entity.setRedactionReason("Applicant information was found"); + entity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); + entity.addMatchedRule(7); + entity.addEngine(Engine.RULE); + insert(entity); + }); + end + + +// Rule unit: PII.8 +rule "PII.8.0: Redact contact information if producer is found" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(containsStringIgnoreCase("producer of the plant protection") || + containsStringIgnoreCase("producer of the active substance") || + containsStringIgnoreCase("manufacturer of the active substance") || + containsStringIgnoreCase("manufacturer:") || + containsStringIgnoreCase("Producer or producers of the active substance")) + then + Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", + "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + )) + .forEach(entity -> { + entity.setRedaction(true); + entity.setRedactionReason("Producer was found"); + entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2e)"); + entity.addMatchedRule(8); + entity.addEngine(Engine.RULE); + insert(entity); + }); + end + +rule "PII.8.1: Redact contact information if producer is found" + when + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(containsStringIgnoreCase("producer of the plant protection") || + containsStringIgnoreCase("producer of the active substance") || + containsStringIgnoreCase("manufacturer of the active substance") || + containsStringIgnoreCase("manufacturer:") || + containsStringIgnoreCase("Producer or producers of the active substance")) + then + Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", + "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + )) + .forEach(entity -> { + entity.setRedaction(true); + entity.setRedactionReason("Producer was found"); + entity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); + entity.addMatchedRule(8); + entity.addEngine(Engine.RULE); + insert(entity); + }); + end + + +// Rule unit: PII.9 +rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"COMPLETION DATE\" (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("COMPLETION DATE:"), !containsString("STUDY COMPLETION DATE:")) then entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section) .forEach(authorEntity -> { authorEntity.setRedaction(true); - authorEntity.addMatchedRule(19); + authorEntity.addMatchedRule(9); + authorEntity.addEngine(Engine.RULE); authorEntity.setRedactionReason("AUTHOR(S) was found"); - authorEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2e)"); + authorEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); insert(authorEntity); }); end -rule "20: Redact PERFORMING LABORATORY (Non vertebrate study)" +rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"STUDY COMPLETION DATE\" (non vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("COMPLETION DATE:"), !containsString("STUDY COMPLETION DATE:")) + then + entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section) + .forEach(authorEntity -> { + authorEntity.setRedaction(true); + authorEntity.addMatchedRule(9); + authorEntity.addEngine(Engine.RULE); + authorEntity.setRedactionReason("AUTHOR(S) was found"); + authorEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); + insert(authorEntity); + }); + end + +rule "PII.9.2: Redact between \"AUTHOR(S)\" and \"COMPLETION DATE\" (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("STUDY COMPLETION DATE:")) + then + entityCreationService.betweenStrings("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", EntityType.ENTITY, $section) + .forEach(authorEntity -> { + authorEntity.setRedaction(true); + authorEntity.addMatchedRule(9); + authorEntity.addEngine(Engine.RULE); + authorEntity.setRedactionReason("AUTHOR(S) was found"); + authorEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); + insert(authorEntity); + }); + end + +rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"STUDY COMPLETION DATE\" (vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("STUDY COMPLETION DATE:")) + then + entityCreationService.betweenStrings("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", EntityType.ENTITY, $section) + .forEach(authorEntity -> { + authorEntity.setRedaction(true); + authorEntity.addMatchedRule(9); + authorEntity.addEngine(Engine.RULE); + authorEntity.setRedactionReason("AUTHOR(S) was found"); + authorEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); + insert(authorEntity); + }); + end + + +// Rule unit: PII.10 +rule "PII.10.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value == "Yes") - $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:")) + $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) then entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { laboratoryEntity.setRedaction(false); - laboratoryEntity.addMatchedRule(31); + laboratoryEntity.addMatchedRule(10); laboratoryEntity.addEngine(Engine.RULE); laboratoryEntity.setRedactionReason("PERFORMING LABORATORY was found for non vertebrate study"); dictionary.addLocalDictionaryEntry(laboratoryEntity); @@ -422,116 +856,143 @@ rule "20: Redact PERFORMING LABORATORY (Non vertebrate study)" }); end -rule "20: Redact PERFORMING LABORATORY" +rule "PII.10.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when - $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:")) + FileAttribute(label == "Vertebrate Study", value == "Yes") + $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) then - entityCreationService.betweenStrings("PERFORMING LABORATORY:", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section) - .forEach(authorEntity -> { - authorEntity.setRedaction(true); - authorEntity.addMatchedRule(20); - authorEntity.setRedactionReason("PERFORMING LABORATORY was found"); - authorEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2e)"); - dictionary.addLocalDictionaryEntry(authorEntity); - insert(authorEntity); + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + .forEach(laboratoryEntity -> { + laboratoryEntity.setRedaction(true); + laboratoryEntity.addMatchedRule(10); + laboratoryEntity.addEngine(Engine.RULE); + laboratoryEntity.setRedactionReason("PERFORMING LABORATORY was found"); + laboratoryEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); + dictionary.addLocalDictionaryEntry(laboratoryEntity); + insert(laboratoryEntity); }); end -rule "21: Redact On behalf of Sequani Ltd.:" + +// Rule unit: PII.12 +rule "PII.12.0: Redact On behalf of Sequani Ltd.:" when $section: Section(!hasTables(), containsString("On behalf of Sequani Ltd.: Name Title")) then entityCreationService.betweenStrings("On behalf of Sequani Ltd.: Name Title", "On behalf of", "PII", EntityType.ENTITY, $section) .forEach(authorEntity -> { authorEntity.setRedaction(true); - authorEntity.addMatchedRule(21); + authorEntity.addMatchedRule(12); authorEntity.setRedactionReason("On behalf of Sequani Ltd.: Name Title was found"); - authorEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2e)"); + authorEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); insert(authorEntity); }); end -rule "22: Redact On behalf of Syngenta Ltd.:" + +// Rule unit: PII.13 +rule "PII.13.0: Expand PII entities with salutation prefix" when - $section: Section(!hasTables(), containsString("On behalf of Syngenta Ltd.: Name Title")) + $entityToExpand: RedactionEntity(type == "PII", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")) then - entityCreationService.betweenStrings("On behalf of Syngenta Ltd.: Name Title", "Study dates", "PII", EntityType.ENTITY, $section) - .forEach(authorEntity -> { - authorEntity.setRedaction(true); - authorEntity.addMatchedRule(21); - authorEntity.setRedactionReason("On behalf of Syngenta Ltd.: Name Title was found"); - authorEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2e)"); - insert(authorEntity); - }); + RedactionEntity expandedEntity = entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"); + expandedEntity.addMatchedRule(13); + insert(expandedEntity); end -rule "25: Redact Purity" +//------------------------------------ Other rules ------------------------------------ + +// Rule unit: ETC.1 +rule "ETC.1.0: Redact Purity" when $section: Section(containsStringIgnoreCase("purity")) then - entityCreationService.byRegex("\\bPurity:\\s*(?\\s*\\d{1,2}(?:\\.\\d{1,2})?\\s*%)", "purity", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.addMatchedRule(25); + entityCreationService.byRegex("\\bPurity:\\s*(?\\s*\\d{1,2}(?:\\.\\d{1,2})?\\s*%)", "purity", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.addMatchedRule(1); entity.addEngine(Engine.RULE); entity.setRedaction(true); entity.setRedactionReason("Purity found"); entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2a)"); - }); - end + }); + end -rule "26: Redact signatures" +// Rule unit: ETC.2 +rule "ETC.2.0: Redact signatures (non vertebrate study)" when + not FileAttribute(label == "Vertebrate Study", value == "Yes") $signature: Image(imageType == ImageType.SIGNATURE) then $signature.setRedaction(true); - $signature.setMatchedRule(26); + $signature.setMatchedRule(2); $signature.setRedactionReason("Signature Found"); - $signature.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); + $signature.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); end -rule "27: Redact formulas" +rule "ETC.2.0: Redact signatures (vertebrate study)" when - $formula: Image(imageType == ImageType.FORMULA) + FileAttribute(label == "Vertebrate Study", value == "Yes") + $signature: Image(imageType == ImageType.SIGNATURE) then - $formula.setRedaction(true); - $formula.setMatchedRule(27); - $formula.setRedactionReason("Formula Found"); - $formula.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); + $signature.setRedaction(true); + $signature.setMatchedRule(2); + $signature.setRedactionReason("Signature Found"); + $signature.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); end -rule "28: Redact logos" + +// Rule unit: ETC.3 +rule "ETC.3.0: Redact logos (vertebrate study)" when + not FileAttribute(label == "Vertebrate Study", value == "Yes") $logo: Image(imageType == ImageType.LOGO) then $logo.setRedaction(true); - $logo.setMatchedRule(28); + $logo.setMatchedRule(3); $logo.setRedactionReason("Logo Found"); - $logo.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); + $logo.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); end -rule "29: Redact Dossier Redactions" - when - $dossierRedaction: RedactionEntity(type == "dossier_redactions") - then +rule "ETC.3.1: Redact logos (non vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value == "Yes") + $logo: Image(imageType == ImageType.LOGO) + then + $logo.setRedaction(true); + $logo.setMatchedRule(3); + $logo.setRedactionReason("Logo Found"); + $logo.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +// Rule unit: ETC.4 +rule "ETC.4.0: Redact dossier dictionary entries" + when + $dossierRedaction: RedactionEntity(type == "dossier_redaction") + then $dossierRedaction.setRedaction(true); - $dossierRedaction.addMatchedRule(29); - $dossierRedaction.setRedactionReason("Dossier Redaction found"); - $dossierRedaction.setLegalBasis("Article 39(1)(2) of Regulation (EC) No 178/2002"); - end + $dossierRedaction.addMatchedRule(4); + $dossierRedaction.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); + $dossierRedaction.setRedactionReason("Specification of impurity found"); + end -rule "30: Remove Dossier redactions if file is confidential" - when - FileAttribute(label == "Confidentiality", value == "confidential") - $dossierRedaction: RedactionEntity(type == "dossier_redactions") - then + +// Rule unit: ETC.5 +rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confidential'" + when + not FileAttribute(label == "Confidentiality", value == "confidential") + $dossierRedaction: RedactionEntity(type == "dossier_redaction") + then $dossierRedaction.removeFromGraph(); - retract($dossierRedaction) - end + retract($dossierRedaction); + end -rule "101: Redact CAS Number" + +// Rule unit: ETC.6 +rule "ETC.6.0: Redact CAS Number" when $table: Table(hasHeader("Sample #")) then @@ -546,8 +1007,9 @@ rule "101: Redact CAS Number" }); end -rule "102: Guidelines FileAttributes" - salience 999 + +// Rule unit: ETC.7 +rule "ETC.7.0: Guidelines FileAttributes" when $section: Section(!hasTables(), (containsString("DATA REQUIREMENT(S):") || containsString("TEST GUIDELINE(S):")) && (containsString("OECD") || containsString("EPA") || containsString("OPPTS"))) then @@ -557,9 +1019,35 @@ rule "102: Guidelines FileAttributes" .forEach(fileAttribute -> insert(fileAttribute)); end -// --------------------------------------- NER Entities rules ------------------------------------------------------------------- -rule "add NER Entities of type CBI_author" +// Rule unit: ETC.8 +rule "ETC.8.0: Redact formulas (vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value == "Yes") + $logo: Image(imageType == ImageType.FORMULA) + then + $logo.setRedaction(true); + $logo.setMatchedRule(3); + $logo.setRedactionReason("Logo Found"); + $logo.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "ETC.8.1: Redact formulas (non vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value == "Yes") + $logo: Image(imageType == ImageType.FORMULA) + then + $logo.setRedaction(true); + $logo.setMatchedRule(3); + $logo.setRedactionReason("Logo Found"); + $logo.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +//------------------------------------ AI rules ------------------------------------ + +// Rule unit: AI.0 +rule "AI.0.0: add all NER Entities of type CBI_author" salience 999 when nerEntities: NerEntities(hasEntitiesOfType("CBI_author")) @@ -569,7 +1057,9 @@ rule "add NER Entities of type CBI_author" .forEach(entity -> insert(entity)); end -rule "combine and add NER Entities as CBI_address" + +// Rule unit: AI.1 +rule "AI.1.0: combine and add NER Entities as CBI_address" salience 999 when nerEntities: NerEntities(hasEntitiesOfType("ORG") || hasEntitiesOfType("STREET") || hasEntitiesOfType("CITY")) @@ -582,9 +1072,11 @@ rule "combine and add NER Entities as CBI_address" }); end -// --------------------------------------- manual redaction rules ------------------------------------------------------------------- -rule "Apply manual resize redaction" +//------------------------------------ Manual redaction rules ------------------------------------ + +// Rule unit: MAN.0 +rule "MAN.0.0: Apply manual resize redaction" salience 128 when $resizeRedaction: ManualResizeRedaction($id: annotationId) @@ -595,7 +1087,9 @@ rule "Apply manual resize redaction" update($entityToBeResized); end -rule "Apply id removals that are valid and not in forced redactions to Entity" + +// Rule unit: MAN.1 +rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity" salience 128 when IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId) @@ -606,17 +1100,19 @@ rule "Apply id removals that are valid and not in forced redactions to Entity" retract($entityToBeRemoved); end -rule "Apply id removals that are valid and not in forced redactions to Image" +rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image" salience 128 when IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId) not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null) - $entityToBeRemoved: Image($id == id) + $imageEntityToBeRemoved: Image($id == id) then - $entityToBeRemoved.setIgnored(true); + $imageEntityToBeRemoved.setIgnored(true); end -rule "Apply force redaction" + +// Rule unit: MAN.2 +rule "MAN.2.0: Apply force redaction" salience 128 when ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis) @@ -627,7 +1123,9 @@ rule "Apply force redaction" $entityToForce.setSkipRemoveEntitiesContainedInLarger(true); end -rule "Apply image recategorization" + +// Rule unit: MAN.3 +rule "MAN.3.0: Apply image recategorization" salience 128 when ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type) @@ -636,9 +1134,11 @@ rule "Apply image recategorization" $image.setImageType(ImageType.fromString($imageType)); end -// --------------------------------------- merging rules ------------------------------------------------------------------- -rule "remove Entity contained by Entity of same type" +//------------------------------------ Entity merging rules ------------------------------------ + +// Rule unit: X.0 +rule "X.0.0: remove Entity contained by Entity of same type" salience 65 when $larger: RedactionEntity($type: type, $entityType: entityType) @@ -648,7 +1148,9 @@ rule "remove Entity contained by Entity of same type" retract($contained); end -rule "merge intersecting Entities of same type" + +// Rule unit: X.1 +rule "X.1.0: merge intersecting Entities of same type" salience 64 when $first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger) @@ -662,7 +1164,9 @@ rule "merge intersecting Entities of same type" insert(mergedEntity); end -rule "remove Entity of type ENTITY when contained by FALSE_POSITIVE" + +// Rule unit: X.2 +rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE) @@ -672,7 +1176,9 @@ rule "remove Entity of type ENTITY when contained by FALSE_POSITIVE" retract($entity) end -rule "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" + +// Rule unit: X.3 +rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" salience 64 when $falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION) @@ -682,7 +1188,9 @@ rule "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATIO retract($recommendation); end -rule "remove Entity of type RECOMMENDATION when intersected by ENTITY with same type" + +// Rule unit: X.4 +rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY with same type" salience 256 when $entity: RedactionEntity($type: type, entityType == EntityType.ENTITY) @@ -693,29 +1201,35 @@ rule "remove Entity of type RECOMMENDATION when intersected by ENTITY with same retract($recommendation); end -rule "remove Entity of type RECOMMENDATION when contained by ENTITY" + +// Rule unit: X.5 +rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY" salience 256 when $entity: RedactionEntity(entityType == EntityType.ENTITY) - $recommendation: RedactionEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger) + $recommendation: RedactionEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger) then $recommendation.removeFromGraph(); retract($recommendation); end -rule "remove Entity of lower rank, when boundaries interect and entityType" + +// Rule unit: X.6 +rule "X.6.0: remove Entity of lower rank, when intersects" salience 32 when - $higherRank: RedactionEntity($type: type, $entityType: entityType) - $lowerRank: RedactionEntity(intersects($higherRank), type != $type, entityType == $entityType, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized, !skipRemoveEntitiesContainedInLarger) + $higherRank: RedactionEntity($type: type) + $lowerRank: RedactionEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized, !skipRemoveEntitiesContainedInLarger) then $lowerRank.removeFromGraph(); retract($lowerRank); end -// --------------------------------------- FileAttribute Rules ------------------------------------------------------------------- -rule "remove duplicate FileAttributes" +//------------------------------------ File attributes rules ------------------------------------ + +// Rule unit: FA.1 +rule "FA.1.0: remove duplicate FileAttributes" salience 64 when $fileAttribute: FileAttribute($label: label, $value: value) @@ -724,9 +1238,11 @@ rule "remove duplicate FileAttributes" retract($duplicate); end -// --------------------------------------- local dictionary search ------------------------------------------------------------------- -rule "run local dictionary search" +//------------------------------------ Local dictionary search rules ------------------------------------ + +// Rule unit: LDS.0 +rule "LDS.0.0: run local dictionary search" agenda-group "LOCAL_DICTIONARY_ADDS" salience -999 when diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections.pdf new file mode 100644 index 00000000..40218a61 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl index 2f8890ef..f97c680f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl @@ -3,7 +3,6 @@ package drools import static java.lang.String.format; import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.anyMatch; import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.exactMatch; -import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.data.mapper.PropertiesMapper.parseImageType; import java.util.List; import java.util.LinkedList; @@ -245,7 +244,7 @@ rule "10: Redact row if row contains \"determination of residues\" and livestock rule "11: Redact if CTL/* or BL/* was found" when - $section: Section(excludesTables, (containsString("CTL/") || containsString("BL/"))) + $section: Section(!hasTables, (containsString("CTL/") || containsString("BL/"))) then entityCreationService.byString("CTL/", "must_redact", EntityType.ENTITY, $section) .forEach(mustRedactEntity -> insert(mustRedactEntity)); @@ -279,7 +278,7 @@ rule "12: Add CBI_author with \"et al.\" Regex" rule "13: Add recommendation for Addresses in Test Organism sections" when - $section: Section(excludesTables, containsString("Species") && containsString("Source") && !containsString("Species:") && !containsString("Source:")) + $section: Section(!hasTables, containsString("Species") && containsString("Source") && !containsString("Species:") && !containsString("Source:")) then entityCreationService.lineAfterString("Source", "CBI_address", EntityType.RECOMMENDATION, $section) .forEach(redactionEntity -> { @@ -292,7 +291,7 @@ rule "13: Add recommendation for Addresses in Test Organism sections" rule "14: Add recommendation for Addresses in Test Animals sections" when - $section: Section(excludesTables, containsString("Species:"), containsString("Source:")) + $section: Section(!hasTables, containsString("Species:"), containsString("Source:")) then entityCreationService.lineAfterString("Source:", "CBI_address", EntityType.RECOMMENDATION, $section) .forEach(redactionEntity -> { @@ -386,7 +385,7 @@ rule "18: redact line between contact keywords" rule "19: Redact AUTHOR(S)" when FileAttribute(placeholder == "{fileattributes.vertebrateStudy}", value == "true") - $section: Section(excludesTables, containsString("AUTHOR(S):"), containsString("COMPLETION DATE:")) + $section: Section(!hasTables, containsString("AUTHOR(S):"), containsString("COMPLETION DATE:")) then entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section) .forEach(authorEntity -> { @@ -400,7 +399,7 @@ rule "19: Redact AUTHOR(S)" rule "20: Redact PERFORMING LABORATORY" when - $section: Section(excludesTables, containsString("PERFORMING LABORATORY:")) + $section: Section(!hasTables, containsString("PERFORMING LABORATORY:")) then entityCreationService.betweenStrings("PERFORMING LABORATORY:", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section) .forEach(authorEntity -> { @@ -414,7 +413,7 @@ rule "20: Redact PERFORMING LABORATORY" rule "21: Redact On behalf of Sequani Ltd.:" when - $section: Section(excludesTables, containsString("On behalf of Sequani Ltd.: Name Title")) + $section: Section(!hasTables, containsString("On behalf of Sequani Ltd.: Name Title")) then entityCreationService.betweenStrings("On behalf of Sequani Ltd.: Name Title", "On behalf of", "PII", EntityType.ENTITY, $section) .forEach(authorEntity -> { @@ -428,7 +427,7 @@ rule "21: Redact On behalf of Sequani Ltd.:" rule "22: Redact On behalf of Syngenta Ltd.:" when - $section: Section(excludesTables, containsString("On behalf of Syngenta Ltd.: Name Title")) + $section: Section(!hasTables, containsString("On behalf of Syngenta Ltd.: Name Title")) then entityCreationService.betweenStrings("On behalf of Syngenta Ltd.: Name Title", "Study dates", "PII", EntityType.ENTITY, $section) .forEach(authorEntity -> { @@ -506,7 +505,7 @@ rule "101: Redact CAS Number" rule "102: Guidelines FileAttributes" when - $section: Section(excludesTables, (containsString("DATA REQUIREMENT(S):") || containsString("TEST GUIDELINE(S):")) && (containsString("OECD") || containsString("EPA") || containsString("OPPTS"))) + $section: Section(!hasTables, (containsString("DATA REQUIREMENT(S):") || containsString("TEST GUIDELINE(S):")) && (containsString("OECD") || containsString("EPA") || containsString("OPPTS"))) then RedactionSearchUtility.findBoundariesByRegex("OECD (No\\.? )?\\d{3}( \\(\\d{4}\\))?", $section.getTextBlock()).stream() .map(boundary -> $section.getTextBlock().subSequence(boundary).toString()) @@ -590,7 +589,7 @@ rule "Apply image recategorization" ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type) $image: Image($id == id) then - $image.setImageType(parseImageType($imageType)); + $image.setImageType(ImageType.fromString($imageType)); end // --------------------------------------- merging rules -------------------------------------------------------------------