From 5625d1ff0103d6ff03e0843ccbbb84dca8ca293c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kilian=20Sch=C3=BCttler?= Date: Mon, 3 Jul 2023 17:10:08 +0200 Subject: [PATCH] RED-6929: Fix Acceptance Tests/Rules --- .../adapter/RedactionLogEntryAdapter.java | 11 +- .../document/graph/entity/MatchedRule.java | 33 + .../graph/entity/RedactionEntity.java | 95 +- .../document/graph/entity/RuleIdentifier.java | 41 + .../document/graph/nodes/Image.java | 79 +- .../services/EntityCreationService.java | 33 +- .../service/RedactionLogCreatorService.java | 20 +- .../v1/server/DocumineFloraTest.java | 9 +- .../v1/server/RedactionIntegrationTest.java | 3 +- .../document/entity/RedactionEntityTest.java | 51 +- .../server/document/graph/BoundaryTest.java | 10 + .../ManualResizeRedactionIntegrationTest.java | 12 +- .../DocumentPerformanceIntegrationTest.java | 4 +- .../src/test/resources/drools/all_rules.drl | 649 +++++++---- .../drools/manual_redaction_rules.drl | 39 +- .../src/test/resources/drools/rules.drl | 316 ++--- .../src/test/resources/drools/rules_v2.drl | 12 +- .../EFSA_sanitisation_GFL_v1/rules.drl | 1034 ++++++++++++----- 18 files changed, 1651 insertions(+), 800 deletions(-) create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/entity/MatchedRule.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/entity/RuleIdentifier.java diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/classification/adapter/RedactionLogEntryAdapter.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/classification/adapter/RedactionLogEntryAdapter.java index ac256585..d1846ae0 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/classification/adapter/RedactionLogEntryAdapter.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/classification/adapter/RedactionLogEntryAdapter.java @@ -101,10 +101,13 @@ public class RedactionLogEntryAdapter { redactionLogEntry.getType(), redactionLogEntry.isRecommendation() ? EntityType.RECOMMENDATION : EntityType.ENTITY, node); - correctEntity.setLegalBasis(redactionLogEntry.getLegalBasis()); - correctEntity.setRedactionReason(redactionLogEntry.getReason()); - correctEntity.addMatchedRule(redactionLogEntry.getMatchedRule()); - correctEntity.setRedaction(redactionLogEntry.isRedacted()); + + String ruleIdentifier = redactionLogEntry.getType() + "." + redactionLogEntry.getMatchedRule() + ".0"; + if (redactionLogEntry.isRedacted()) { + correctEntity.apply(ruleIdentifier, redactionLogEntry.getReason(), redactionLogEntry.getLegalBasis()); + } else { + correctEntity.skip(ruleIdentifier, redactionLogEntry.getReason()); + } correctEntity.setDictionaryEntry(redactionLogEntry.isDictionaryEntry()); correctEntity.setDossierDictionaryEntry(redactionLogEntry.isDossierDictionaryEntry()); return correctEntity; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/entity/MatchedRule.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/entity/MatchedRule.java new file mode 100644 index 00000000..4a44e00a --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/entity/MatchedRule.java @@ -0,0 +1,33 @@ +package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity; + +import java.util.Collections; +import java.util.Objects; +import java.util.Set; + +public record MatchedRule(RuleIdentifier ruleIdentifier, String reason, String legalBasis, boolean applied, Set references) implements Comparable { + + public static MatchedRule empty() { + + return new MatchedRule(RuleIdentifier.empty(), "", "", false, Collections.emptySet()); + } + + + @Override + public int compareTo(MatchedRule matchedRule) { + + RuleIdentifier otherRuleIdentifier = matchedRule.ruleIdentifier(); + if (!Objects.equals(ruleIdentifier.type(), otherRuleIdentifier.type())) { + if (Objects.equals(otherRuleIdentifier.type(), "MAN")) { + return 1; + } + if (Objects.equals(ruleIdentifier.type(), "MAN")) { + return -1; + } + } + if (!Objects.equals(otherRuleIdentifier.unit(), ruleIdentifier().unit())) { + return otherRuleIdentifier.unit() - ruleIdentifier.unit(); + } + return otherRuleIdentifier.id() - ruleIdentifier.id(); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/entity/RedactionEntity.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/entity/RedactionEntity.java index a6c6db6d..4fa0f78f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/entity/RedactionEntity.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/entity/RedactionEntity.java @@ -2,12 +2,13 @@ package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.e import java.awt.geom.Rectangle2D; import java.util.Collection; +import java.util.Collections; import java.util.Comparator; -import java.util.Deque; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.PriorityQueue; import java.util.Set; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine; @@ -21,6 +22,7 @@ import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; import lombok.EqualsAndHashCode; +import lombok.NonNull; import lombok.experimental.FieldDefaults; @Data @@ -39,19 +41,18 @@ public class RedactionEntity { final EntityType entityType; // empty defaults - boolean redaction; boolean removed; boolean ignored; + boolean resized; boolean skipRemoveEntitiesContainedInLarger; boolean dictionaryEntry; boolean dossierDictionaryEntry; - Set engines; - Set references; @Builder.Default - Deque matchedRules = new LinkedList<>(); - String redactionReason; - String legalBasis; + Set engines = new HashSet<>(); + + @Builder.Default + PriorityQueue matchedRuleList = new PriorityQueue<>(); // inferred on graph insertion @EqualsAndHashCode.Include @@ -68,7 +69,19 @@ public class RedactionEntity { public static RedactionEntity initialEntityNode(Boundary boundary, String type, EntityType entityType) { - return RedactionEntity.builder().type(type).entityType(entityType).boundary(boundary).engines(new HashSet<>()).references(new HashSet<>()).build(); + return RedactionEntity.builder().type(type).entityType(entityType).boundary(boundary).build(); + } + + + public boolean isApplied() { + + return getMatchedRule().applied(); + } + + + public Set getReferences() { + + return getMatchedRule().references(); } @@ -120,28 +133,60 @@ public class RedactionEntity { } - public void addMatchedRule(String ruleIdentifier) { + public void apply(@NonNull String ruleIdentifier, String comment, @NonNull String legalBasis) { - matchedRules.add(ruleIdentifier); + if (legalBasis.isBlank() || legalBasis.isEmpty()) { + throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity"); + } + matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, legalBasis, true, Collections.emptySet())); + } + + + public void applyWithReferences(@NonNull String ruleIdentifier, String comment, @NonNull String legalBasis, Collection references) { + + if (legalBasis.isBlank() || legalBasis.isEmpty()) { + throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity"); + } + matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, legalBasis, true, new HashSet<>(references))); + } + + + public void skip(@NonNull String ruleIdentifier, String comment) { + + matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, "", false, Collections.emptySet())); + } + + + public void skipWithReferences(@NonNull String ruleIdentifier, String comment, Collection references) { + + matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, "", false, new HashSet<>(references))); + } + + + public void addMatchedRule(MatchedRule matchedRule) { + + matchedRuleList.add(matchedRule); + } + + + public void addMatchedRules(Collection matchedRules) { + + matchedRuleList.addAll(matchedRules); } public int getMatchedRuleUnit() { - String[] values = getMatchedRule().split("\\."); - if (values.length < 2) { - return -1; - } - return Integer.parseInt(values[1]); + return getMatchedRule().ruleIdentifier().unit(); } - public String getMatchedRule() { + public MatchedRule getMatchedRule() { - if (matchedRules.isEmpty()) { - return ""; + if (matchedRuleList.isEmpty()) { + return MatchedRule.empty(); } - return matchedRules.getLast(); + return matchedRuleList.peek(); } @@ -202,18 +247,6 @@ public class RedactionEntity { } - public void addReference(RedactionEntity reference) { - - references.add(reference); - } - - - public void addReferences(List references) { - - this.references.addAll(references); - } - - public boolean matchesAnnotationId(String manualRedactionId) { return getRedactionPositionsPerPage().stream().anyMatch(entityPosition -> entityPosition.getId().equals(manualRedactionId)); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/entity/RuleIdentifier.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/entity/RuleIdentifier.java new file mode 100644 index 00000000..137bcf37 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/entity/RuleIdentifier.java @@ -0,0 +1,41 @@ +package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity; + +import java.util.Objects; + +public record RuleIdentifier(String type, Integer unit, Integer id) { + + public static RuleIdentifier fromString(String identifier) { + + String[] values = identifier.split("\\."); + if (values.length != 3) { + throw new IllegalArgumentException("Illegal rule identifier provided: " + identifier); + } + String type = values[0]; + Integer group = Integer.parseInt(values[1]); + Integer id = Integer.parseInt(values[2]); + return new RuleIdentifier(type, group, id); + } + + + public static RuleIdentifier empty() { + + return new RuleIdentifier("", null, null); + } + + + @Override + public String toString() { + + StringBuilder sb = new StringBuilder(); + sb.append(type()); + if (Objects.nonNull(unit()) && Objects.nonNull(id())) { + sb.append(".").append(unit()).append(".").append(id()); + } else if (Objects.nonNull(id())) { + sb.append(".*.").append(id()); + } else if (Objects.nonNull(unit())) { + sb.append(".").append(unit()).append(".*"); + } + return sb.toString(); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Image.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Image.java index 826c2e25..505c1a15 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Image.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Image.java @@ -1,15 +1,19 @@ package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes; import java.awt.geom.Rectangle2D; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.PriorityQueue; import java.util.Set; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.MatchedRule; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RuleIdentifier; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlockCollector; @@ -19,6 +23,7 @@ import lombok.Builder; import lombok.Data; import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; +import lombok.NonNull; import lombok.experimental.FieldDefaults; @Data @@ -35,14 +40,10 @@ public class Image implements GenericSemanticNode { boolean transparent; Rectangle2D position; - boolean redaction; boolean ignored; + @Builder.Default - String redactionReason = ""; - @Builder.Default - String legalBasis = ""; - @Builder.Default - String matchedRule = ""; + PriorityQueue matchedRuleList = new PriorityQueue<>(); @EqualsAndHashCode.Exclude Page page; @@ -55,6 +56,72 @@ public class Image implements GenericSemanticNode { Set entities = new HashSet<>(); + public void apply(@NonNull String ruleIdentifier, String comment, @NonNull String legalBasis) { + + if (legalBasis.isBlank() || legalBasis.isEmpty()) { + throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity"); + } + matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, legalBasis, true, Collections.emptySet())); + } + + + public void applyWithReferences(@NonNull String ruleIdentifier, String comment, @NonNull String legalBasis, Collection references) { + + if (legalBasis.isBlank() || legalBasis.isEmpty()) { + throw new IllegalArgumentException("legal basis cannot be empty when redacting an entity"); + } + matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, legalBasis, true, new HashSet<>(references))); + } + + + public void skip(@NonNull String ruleIdentifier, String comment) { + + matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, "", false, Collections.emptySet())); + } + + + public void skipWithReferences(@NonNull String ruleIdentifier, String comment, Collection references) { + + matchedRuleList.add(new MatchedRule(RuleIdentifier.fromString(ruleIdentifier), comment, "", false, new HashSet<>(references))); + } + + + public void addMatchedRule(MatchedRule matchedRule) { + + matchedRuleList.add(matchedRule); + } + + + public void addMatchedRules(Collection matchedRules) { + + matchedRuleList.addAll(matchedRules); + } + + + public boolean isApplied() { + + return getMatchedRule().applied(); + } + + + public Set getReferences() { + + return getMatchedRule().references(); + } + + + public int getMatchedRuleUnit() { + + return getMatchedRule().ruleIdentifier().unit(); + } + + + public MatchedRule getMatchedRule() { + + return matchedRuleList.peek(); + } + + @Override public NodeType getType() { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/services/EntityCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/services/EntityCreationService.java index b8bc4fd1..021e9de1 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/services/EntityCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/services/EntityCreationService.java @@ -1,5 +1,22 @@ package com.iqser.red.service.redaction.v1.server.layoutparsing.document.services; +import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.getExpandedEndByRegex; +import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.getExpandedStartByRegex; +import static com.iqser.red.service.redaction.v1.server.redaction.utils.SeparatorUtils.boundaryIsSurroundedBySeparators; + +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.LinkedList; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import org.springframework.stereotype.Service; + import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree; @@ -14,17 +31,9 @@ import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.Re import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities; import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.SearchImplementation; import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder; + import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.springframework.stereotype.Service; - -import java.util.*; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.getExpandedEndByRegex; -import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.getExpandedStartByRegex; -import static com.iqser.red.service.redaction.v1.server.redaction.utils.SeparatorUtils.boundaryIsSurroundedBySeparators; @Slf4j @Service @@ -259,13 +268,9 @@ public class EntityCreationService { } RedactionEntity mergedEntity = RedactionEntity.initialEntityNode(Boundary.merge(entitiesToMerge.stream().map(RedactionEntity::getBoundary).toList()), type, entityType); - mergedEntity.setRedaction(entitiesToMerge.stream().anyMatch(RedactionEntity::isRedaction)); mergedEntity.addEngines(entitiesToMerge.stream().flatMap(entityNode -> entityNode.getEngines().stream()).collect(Collectors.toSet())); - entitiesToMerge.stream().map(RedactionEntity::getMatchedRules).flatMap(Collection::stream).forEach(mergedEntity::addMatchedRule); + entitiesToMerge.stream().map(RedactionEntity::getMatchedRuleList).flatMap(Collection::stream).forEach(matchedRule -> mergedEntity.getMatchedRuleList().add(matchedRule)); - RedactionEntity entityWithHigherRuleNumber = entitiesToMerge.stream().max(Comparator.comparingInt(RedactionEntity::getMatchedRuleUnit)).orElse(entitiesToMerge.get(0)); - mergedEntity.setRedactionReason(entityWithHigherRuleNumber.getRedactionReason()); - mergedEntity.setLegalBasis(entityWithHigherRuleNumber.getLegalBasis()); mergedEntity.setDictionaryEntry(entitiesToMerge.stream().anyMatch(RedactionEntity::isDictionaryEntry)); mergedEntity.setDossierDictionaryEntry(entitiesToMerge.stream().anyMatch(RedactionEntity::isDossierDictionaryEntry)); mergedEntity.setIgnored(entitiesToMerge.stream().allMatch(RedactionEntity::isIgnored)); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java index 37ae9db6..961e3bc9 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java @@ -87,18 +87,18 @@ public class RedactionLogCreatorService { int sectionNumber = entity.getDeepestFullyContainingNode().getTreeId().isEmpty() ? 0 : entity.getDeepestFullyContainingNode().getTreeId().get(0); return RedactionLogEntry.builder() - .color(getColor(entity.getType(), dossierTemplateId, entity.isRedaction())) - .reason(entity.getRedactionReason()) - .legalBasis(entity.getLegalBasis()) + .color(getColor(entity.getType(), dossierTemplateId, entity.isApplied())) + .reason(entity.getMatchedRule().reason()) + .legalBasis(entity.getMatchedRule().legalBasis()) .value(entity.getValue()) .type(entity.getType()) - .redacted(entity.isRedaction()) + .redacted(entity.isApplied()) .isHint(isHint(entity.getType(), dossierTemplateId)) .isRecommendation(entity.getEntityType().equals(EntityType.RECOMMENDATION)) .isFalsePositive(entity.getEntityType().equals(EntityType.FALSE_POSITIVE) || entity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION)) .section(entity.getDeepestFullyContainingNode().toString()) .sectionNumber(sectionNumber) - .matchedRule(entity.getMatchedRule()) + .matchedRule(entity.getMatchedRule().ruleIdentifier().toString()) .isDictionaryEntry(entity.isDictionaryEntry()) .textAfter(entity.getTextAfter()) .textBefore(entity.getTextBefore()) @@ -116,13 +116,13 @@ public class RedactionLogCreatorService { String imageType = image.getImageType().equals(ImageType.OTHER) ? "image" : image.getImageType().toString().toLowerCase(); return RedactionLogEntry.builder() .id(image.getId()) - .color(getColor(image.getImageType().toString(), dossierTemplateId, image.isRedaction())) + .color(getColor(image.getImageType().toString(), dossierTemplateId, image.isApplied())) .isImage(true) .type(imageType) - .redacted(image.isRedaction()) - .reason(image.getRedactionReason()) - .legalBasis(image.getLegalBasis()) - .matchedRule(image.getMatchedRule()) + .redacted(image.isApplied()) + .reason(image.getMatchedRule().reason()) + .legalBasis(image.getMatchedRule().legalBasis()) + .matchedRule(image.getMatchedRule().ruleIdentifier().toString()) .isHint(dictionaryService.isHint(image.getImageType().toString(), dossierTemplateId)) .isDictionaryEntry(false) .isRecommendation(false) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java index ed84f828..402f8e25 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java @@ -7,6 +7,7 @@ import java.io.IOException; import java.util.List; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.springframework.boot.autoconfigure.EnableAutoConfiguration; @@ -41,6 +42,7 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest { @Test + @Disabled public void titleExtraction() throws IOException { AnalyzeRequest request = uploadFileToStorage("files/Documine/Flora/A8591B/15-Curacron_ToxicidadeAgudaOral.pdf"); @@ -63,12 +65,13 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest { } - @Test + @Disabled public void tableWithEmptyCols() throws IOException { // FIXME TableNodeFactory: 36, why has table no rows/cols here. - AnalyzeRequest request = prepareStorage("files/Documine/Flora/A8591B/2009773e2e05919bb9e46aeedcc8b924.ORIGIN.pdf","files/Documine/Flora/A8591B/2009773e2e05919bb9e46aeedcc8b924.ORIGIN.TABLES.json" ); + AnalyzeRequest request = prepareStorage("files/Documine/Flora/A8591B/2009773e2e05919bb9e46aeedcc8b924.ORIGIN.pdf", + "files/Documine/Flora/A8591B/2009773e2e05919bb9e46aeedcc8b924.ORIGIN.TABLES.json"); System.out.println("Start Full integration test"); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); @@ -88,8 +91,6 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest { } - - @Configuration @EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class}) @ComponentScan(excludeFilters = {@ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, value = StorageAutoConfiguration.class)}) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index eec86bb3..91b9577a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -222,7 +222,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { @Test public void titleExtraction() throws IOException { - AnalyzeRequest request = uploadFileToStorage("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf"); + AnalyzeRequest request = uploadFileToStorage("files/new/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections.pdf"); System.out.println("Start Full integration test"); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); System.out.println("Finished structure analysis"); @@ -627,6 +627,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { .annotationId("0b56ea1a87c83f351df177315af94f0d") .fileId(TEST_FILE_ID) .status(AnnotationStatus.APPROVED) + .legalBasis("Something") .requestDate(OffsetDateTime.of(2022, 05, 23, 9, 30, 15, 4653, ZoneOffset.UTC)) .processedDate(OffsetDateTime.of(2022, 05, 23, 9, 30, 15, 794, ZoneOffset.UTC)) .build()); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/entity/RedactionEntityTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/entity/RedactionEntityTest.java index 18b7c3d8..24be60ad 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/entity/RedactionEntityTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/entity/RedactionEntityTest.java @@ -1,6 +1,7 @@ package com.iqser.red.service.redaction.v1.server.document.entity; import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; import org.junit.jupiter.api.Test; @@ -14,12 +15,52 @@ public class RedactionEntityTest { public void testMatchedRule() { RedactionEntity entity = RedactionEntity.initialEntityNode(new Boundary(1, 100), "PII", EntityType.ENTITY); - entity.addMatchedRule("CBI.1.0"); - entity.addMatchedRule("CBI.2.0"); - entity.addMatchedRule("CBI.3.0"); - entity.addMatchedRule("CBI.4.0"); - assertThat(entity.getMatchedRule()).isEqualTo("CBI.4.0"); + entity.skip("CBI.1.0", ""); + entity.skip("CBI.2.0", ""); + entity.skip("CBI.3.0", ""); + entity.skip("CBI.4.1", ""); + entity.skip("CBI.4.0", ""); + assertThat(entity.getMatchedRule().ruleIdentifier().toString()).isEqualTo("CBI.4.1"); assertThat(entity.getMatchedRuleUnit()).isEqualTo(4); } + + @Test + public void testMatchedRuleWithManualRedaction() { + + RedactionEntity entity = RedactionEntity.initialEntityNode(new Boundary(1, 100), "PII", EntityType.ENTITY); + entity.skip("MAN.2.0", ""); + entity.skip("CBI.2.0", ""); + entity.skip("CBI.3.0", ""); + entity.skip("CBI.4.1", ""); + entity.skip("CBI.4.0", ""); + assertThat(entity.getMatchedRule().ruleIdentifier().toString()).isEqualTo("MAN.2.0"); + assertThat(entity.getMatchedRuleUnit()).isEqualTo(2); + } + + + @Test + public void testMatchedRuleWithNonsense() { + + RedactionEntity entity = RedactionEntity.initialEntityNode(new Boundary(1, 100), "PII", EntityType.ENTITY); + assertThrows(IllegalArgumentException.class, () -> { + entity.skip("", ""); + }); + assertThrows(IllegalArgumentException.class, () -> { + entity.skip("CBI.1.*", ""); + }); + assertThrows(IllegalArgumentException.class, () -> { + entity.skip("CBI", ""); + }); + assertThrows(IllegalArgumentException.class, () -> { + entity.skip("aaaaaaaaaa", ""); + }); + assertThrows(IllegalArgumentException.class, () -> { + entity.apply("CBI.0.0", "", ""); + }); + entity.skip("CBI.2.0", ""); + assertThat(entity.getMatchedRule().ruleIdentifier().toString()).isEqualTo("CBI.2.0"); + assertThat(entity.getMatchedRuleUnit()).isEqualTo(2); + } + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/BoundaryTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/BoundaryTest.java index 2633738c..488cf027 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/BoundaryTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/BoundaryTest.java @@ -74,4 +74,14 @@ class BoundaryTest { assertThrows(IndexOutOfBoundsException.class, () -> startBoundary.split(List.of(12, 40, 100))); } + + @Test + void testCompareTo() { + + Boundary beforeBoundary = new Boundary(1, 8); + Boundary afterBoundary = new Boundary(101, 102); + assertEquals(-1, beforeBoundary.compareTo(startBoundary)); + assertEquals(1, afterBoundary.compareTo(startBoundary)); + } + } \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/ManualResizeRedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/ManualResizeRedactionIntegrationTest.java index 98272601..443cf69a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/ManualResizeRedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/ManualResizeRedactionIntegrationTest.java @@ -1,7 +1,6 @@ package com.iqser.red.service.redaction.v1.server.document.graph; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.wildfly.common.Assert.assertFalse; @@ -124,6 +123,7 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati ManualForceRedaction manualForceRedaction = ManualForceRedaction.builder() .annotationId(initialId) .status(AnnotationStatus.APPROVED) + .legalBasis("Something") .requestDate(OffsetDateTime.now()) .build(); @@ -138,10 +138,11 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati assertFalse(entity.getIntersectingNodes().isEmpty()); assertEquals(1, entity.getPages().size()); assertEquals("David Ksenia", entity.getValue()); + assertEquals("Something", entity.getMatchedRule().legalBasis()); assertEquals(initialId, entity.getRedactionPositionsPerPage().get(0).getId()); assertFalse(entity.isRemoved()); assertTrue(entity.isSkipRemoveEntitiesContainedInLarger()); - assertTrue(entity.isRedaction()); + assertTrue(entity.isApplied()); } @@ -163,12 +164,9 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati kieSession.fireAllRules(); kieSession.dispose(); - assertNull(entity.getDeepestFullyContainingNode()); - assertTrue(entity.getIntersectingNodes().isEmpty()); - assertTrue(entity.getPages().isEmpty()); assertEquals("David Ksenia", entity.getValue()); assertEquals(initialId, entity.getRedactionPositionsPerPage().get(0).getId()); - assertTrue(entity.isRemoved()); + assertTrue(entity.isIgnored()); } @@ -185,6 +183,7 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati ManualForceRedaction manualForceRedaction = ManualForceRedaction.builder() .annotationId(initialId) .status(AnnotationStatus.APPROVED) + .legalBasis("Something") .requestDate(OffsetDateTime.now()) .build(); @@ -202,6 +201,7 @@ public class ManualResizeRedactionIntegrationTest extends BuildDocumentIntegrati assertEquals("David Ksenia", entity.getValue()); assertEquals(initialId, entity.getRedactionPositionsPerPage().get(0).getId()); assertFalse(entity.isRemoved()); + assertFalse(entity.isIgnored()); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/services/DocumentPerformanceIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/services/DocumentPerformanceIntegrationTest.java index ce65e99a..cd912a75 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/services/DocumentPerformanceIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/services/DocumentPerformanceIntegrationTest.java @@ -255,7 +255,7 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration List entityPositionsOnPage = page.getEntities() .stream() .filter(entityNode -> !entityNode.isRemoved()) - .filter(RedactionEntity::isRedaction) + .filter(RedactionEntity::isApplied) .flatMap(entityNode -> entityNode.getRedactionPositionsPerPage().stream()) .filter(entityPosition -> entityPosition.getPage().equals(page)) .flatMap(entityPosition -> entityPosition.getRectanglePerLine().stream()) @@ -269,7 +269,7 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration List entityPositionsOnPage = page.getEntities() .stream() .filter(entityNode -> !entityNode.isRemoved()) - .filter(entityNode -> !entityNode.isRedaction()) + .filter(entityNode -> !entityNode.isApplied()) .flatMap(entityNode -> entityNode.getRedactionPositionsPerPage().stream()) .filter(entityPosition -> entityPosition.getPage().equals(page)) .flatMap(entityPosition -> entityPosition.getRectanglePerLine().stream()) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_rules.drl index 973c03c3..d35404d1 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_rules.drl @@ -14,6 +14,12 @@ import java.util.Optional; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.*; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.*; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Section; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Table; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SemanticNode; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Document; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Paragraph; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Image; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.*; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.*; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.EntityType; @@ -61,16 +67,77 @@ rule "SYN.0.0: Redact if CTL/* or BL/* was found (Non Vertebrate Study)" entityCreationService.byString("CTL", "must_redact", EntityType.ENTITY, $section), entityCreationService.byString("BL", "must_redact", EntityType.ENTITY, $section) ).forEach(entity -> { - entity.setRedactionReason("hint_only"); - entity.addMatchedRule("SYN.0.0"); + entity.skip("SYN.0.0", "hint_only"); entity.addEngine(Engine.RULE); insert(entity); }); end +// Rule unit: SYN.1 +rule "SYN.1.0: Recommend CTL/BL laboratory that start with BL or CTL" + when + $section: Section(containsString("CT") || containsString("BL")) + then + /* Regular expression: ((\b((([Cc]T(([1ILli\/])| L|~P))|(BL))[\. ]?([\dA-Ziltphz~\/.:!]| ?[\(',][Ppi](\(e)?|([\(-?']\/))+( ?[\(\/\dA-Znasieg]+)?)\b( ?\/? ?\d+)?)|(\bCT[L1i]\b)) */ + entityCreationService.byRegexIgnoreCase("((\\b((([Cc]T(([1ILli\\/])| L|~P))|(BL))[\\. ]?([\\dA-Ziltphz~\\/.:!]| ?[\\(',][Ppi](\\(e)?|([\\(-?']\\/))+( ?[\\(\\/\\dA-Znasieg]+)?)\\b( ?\\/? ?\\d+)?)|(\\bCT[L1i]\\b))", "CBI_address", EntityType.RECOMMENDATION, $section) + .forEach(entity -> { + entity.skip("SYN.1.0", ""); + entity.addEngine(Engine.RULE); + insert(entity); + }); + end + + //------------------------------------ CBI rules ------------------------------------ +// Rule unit: CBI.0 +rule "CBI.0.0: Redact CBI Authors (Non Vertebrate Study)" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $entity: RedactionEntity(type == "CBI_author", dictionaryEntry) + then + $entity.apply("CBI.0.0", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "CBI.0.1: Redact CBI Authors (Vertebrate Study)" + when + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $entity: RedactionEntity(type == "CBI_author", dictionaryEntry) + then + $entity.apply("CBI.0.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +// Rule unit: CBI.1 +rule "CBI.1.0: Don't redact CBI Address (Non Vertebrate Study)" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $entity: RedactionEntity(type == "CBI_address", dictionaryEntry) + then + $entity.skip("CBI.1.0", "Address found for Non Vertebrate Study"); + end + +rule "CBI.1.1: Redact CBI Address (Vertebrate Study)" + when + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $entity: RedactionEntity(type == "CBI_address", dictionaryEntry) + then + $entity.apply("CBI.1.1", "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +// Rule unit: CBI.2 +rule "CBI.2.0: Don't redact genitive CBI_author" + when + $entity: RedactionEntity(type == "CBI_author", anyMatch(textAfter, "['’’'ʼˈ´`‘′ʻ’']s"), getIsApplied()) + then + RedactionEntity falsePositive = entityCreationService.byBoundary($entity.getBoundary(), "CBI_author", EntityType.FALSE_POSITIVE, document); + falsePositive.skip("CBI.2.0", "Genitive Author found"); + insert(falsePositive); + end + + // Rule unit: CBI.3 rule "CBI.3.0: Redacted because Section contains Vertebrate" when @@ -78,12 +145,13 @@ rule "CBI.3.0: Redacted because Section contains Vertebrate" then $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) .forEach(entity -> { - entity.setRedactionReason("Vertebrate found"); - entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); - entity.setRedaction(true); entity.addEngine(Engine.RULE); - entity.addMatchedRule("CBI.3.0"); - entity.addReferences($section.getEntitiesOfType("vertebrate")); + entity.applyWithReferences( + "CBI.3.0", + "Vertebrate found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $section.getEntitiesOfType("vertebrate") + ); }); end @@ -94,12 +162,13 @@ rule "CBI.3.1: Redacted because Table Row contains Vertebrate" $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate")) .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) .forEach(entity -> { - entity.setRedactionReason("Vertebrate found"); - entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); - entity.setRedaction(true); entity.addEngine(Engine.RULE); - entity.addMatchedRule("CBI.3.1"); - entity.addReferences($table.getEntitiesOfTypeInSameRow("vertebrate", entity)); + entity.applyWithReferences( + "CBI.3.1", + "Vertebrate found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $table.getEntitiesOfTypeInSameRow("vertebrate", entity) + ); }); end @@ -109,10 +178,8 @@ rule "CBI.3.2: Don't redact because Section doesn't contain Vertebrate" then $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) .forEach(entity -> { - entity.setRedactionReason("No vertebrate found"); - entity.setRedaction(false); entity.addEngine(Engine.RULE); - entity.addMatchedRule("CBI.3.2"); + entity.skip("CBI.3.2", "No vertebrate found"); }); end @@ -123,10 +190,8 @@ rule "CBI.3.3: Dont redact because Table Row doesn't contain Vertebrate" $table.streamEntitiesWhereRowContainsNoEntitiesOfType(List.of("vertebrate")) .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) .forEach(entity -> { - entity.setRedactionReason("No vertebrate found"); - entity.setRedaction(false); entity.addEngine(Engine.RULE); - entity.addMatchedRule("CBI.3.3"); + entity.skip("CBI.3.3", "No vertebrate found"); }); end @@ -141,11 +206,12 @@ rule "CBI.4.0: Dont redact Names and Addresses if no_redaction_indicator is foun then $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) .forEach(entity -> { - entity.setRedactionReason("Vertebrate but a no redaction indicator found"); - entity.setRedaction(false); entity.addEngine(Engine.RULE); - entity.addMatchedRule("CBI.4.0"); - entity.addReferences($section.getEntitiesOfType("no_redaction_indicator")); + entity.skipWithReferences( + "CBI.4.0", + "Vertebrate but a no redaction indicator found", + $section.getEntitiesOfType("no_redaction_indicator") + ); }); end @@ -158,12 +224,14 @@ rule "CBI.4.1: Dont redact Names and Addresses if no_redaction_indicator is foun $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate", "no-redaction_indicator")) .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) .forEach(entity -> { - entity.setRedactionReason("Vertebrate but a no redaction indicator found"); - entity.setRedaction(false); entity.addEngine(Engine.RULE); - entity.addMatchedRule("CBI.4.1"); - entity.addReferences($table.getEntitiesOfTypeInSameRow("vertebrate", entity)); - entity.addReferences($table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity)); + entity.skipWithReferences( + "CBI.4.1", + "Vertebrate but a no redaction indicator found", + Stream.concat( + $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() + ); }); end @@ -178,13 +246,15 @@ rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also red then $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) .forEach(entity -> { - entity.setRedactionReason("no_redaction_indicator but also redaction_indicator found"); - entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); - entity.setRedaction(true); entity.addEngine(Engine.RULE); - entity.addMatchedRule("CBI.5.0"); - entity.addReferences($section.getEntitiesOfType("no_redaction_indicator")); - entity.addReferences($section.getEntitiesOfType("redaction_indicator")); + entity.applyWithReferences( + "CBI.5.0", + "no_redaction_indicator but also redaction_indicator found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + Stream.concat( + $section.getEntitiesOfType("vertebrate").stream(), + $section.getEntitiesOfType("no_redaction_indicator").stream()).toList() + ); }); end @@ -197,13 +267,90 @@ rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also red $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no-redaction_indicator")) .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) .forEach(entity -> { - entity.setRedactionReason("no_redaction_indicator but also redaction_indicator found"); - entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); - entity.setRedaction(true); entity.addEngine(Engine.RULE); - entity.addMatchedRule("CBI.5.1"); - entity.addReferences($table.getEntitiesOfTypeInSameRow("vertebrate", entity)); - entity.addReferences($table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity)); + entity.applyWithReferences( + "CBI.5.1", + "no_redaction_indicator but also redaction_indicator found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + Stream.concat( + $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() + ); + }); + end + + +// Rule unit: CBI.6 +rule "CBI.6.0: Don't redact Names and Addresses if vertebrate but also published_information is found in Section" + when + $section: Section(!hasTables(), + hasEntitiesOfType("vertebrate"), + hasEntitiesOfType("published_information"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(entity -> { + entity.addEngine(Engine.RULE); + entity.skipWithReferences( + "CBI.6.0", + "vertebrate but also published_information found", + Stream.concat( + $section.getEntitiesOfType("vertebrate").stream(), + $section.getEntitiesOfType("published_information").stream()).toList() + ); + }); + end + +rule "CBI.6.1: Don't redact Names and Addresses if vertebrate but also published_information is found in Table Row" + when + $table: Table(hasEntitiesOfType("vertebrate"), + hasEntitiesOfType("published_information"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no-redaction_indicator")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.addEngine(Engine.RULE); + entity.skipWithReferences( + "CBI.6.1", + "vertebrate but also published_information found", + Stream.concat( + $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), + $table.getEntitiesOfTypeInSameRow("published_information", entity).stream()).toList() + ); + }); + end + + +// Rule unit: CBI.7 +rule "CBI.7.0: Do not redact Names and Addresses if published information found in section without tables" + when + $section: Section(!hasTables(), + hasEntitiesOfType("published_information"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(redactionEntity -> { + redactionEntity.skipWithReferences( + "CBI.7.0", + "Published Information found in section", + $section.getEntitiesOfType("published_information") + ); + }); + end + +rule "CBI.7.1: Do not redact Names and Addresses if published information found in same table row" + when + $table: Table(hasEntitiesOfType("published_information"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(redactionEntity -> { + redactionEntity.skipWithReferences( + "CBI.7.1", + "Published Information found in row", + $table.getEntitiesOfTypeInSameRow("published_information", redactionEntity) + ); }); end @@ -215,12 +362,13 @@ rule "CBI.8.0: Redacted because Section contains must_redact entity" then $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) .forEach(entity -> { - entity.setRedactionReason("must_redact entity found"); - entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); - entity.setRedaction(true); entity.addEngine(Engine.RULE); - entity.addMatchedRule("CBI.8.0"); - entity.addReferences($section.getEntitiesOfType("must_redact")); + entity.applyWithReferences( + "CBI.8.0", + "must_redact entity found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $section.getEntitiesOfType("must_redact") + ); }); end @@ -231,12 +379,13 @@ rule "CBI.8.1: Redacted because Table Row contains must_redact entity" $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("must_redact")) .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) .forEach(entity -> { - entity.setRedactionReason("must_redact entity found"); - entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); - entity.setRedaction(true); entity.addEngine(Engine.RULE); - entity.addMatchedRule("CBI.8.1"); - entity.addReferences($table.getEntitiesOfTypeInSameRow("must_redact", entity)); + entity.applyWithReferences( + "CBI.8.1", + "must_redact entity found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $table.getEntitiesOfTypeInSameRow("must_redact", entity) + ); }); end @@ -253,11 +402,8 @@ rule "CBI.9.0: Redact all Cell's with Header Author(s) as CBI_author (non verteb .filter(Optional::isPresent) .map(Optional::get) .forEach(redactionEntity -> { - redactionEntity.setRedaction(true); - redactionEntity.addMatchedRule("CBI.9.0"); + redactionEntity.apply("CBI.9.0", "Author(s) found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); redactionEntity.addEngine(Engine.RULE); - redactionEntity.setRedactionReason("Author(s) found"); - redactionEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); insert(redactionEntity); }); end @@ -273,11 +419,44 @@ rule "CBI.9.1: Redact all Cell's with Header Author as CBI_author (non vertebrat .filter(Optional::isPresent) .map(Optional::get) .forEach(redactionEntity -> { - redactionEntity.setRedaction(true); - redactionEntity.addMatchedRule("CBI.9.1"); + redactionEntity.apply("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + redactionEntity.addEngine(Engine.RULE); + insert(redactionEntity); + }); + end + + +// Rule unit: CBI.10 +rule "CBI.10.0: Redact all Cell's with Header Author(s) as CBI_author (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> { + redactionEntity.apply("CBI.10.0", "Author(s) found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + redactionEntity.addEngine(Engine.RULE); + insert(redactionEntity); + }); + end + +rule "CBI.10.1: Redact all Cell's with Header Author as CBI_author (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $table: Table(hasHeader("Author")) + then + $table.streamTableCellsWithHeader("Author") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> { + redactionEntity.apply("CBI.10.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); redactionEntity.addEngine(Engine.RULE); - redactionEntity.setRedactionReason("Author found"); - redactionEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); insert(redactionEntity); }); end @@ -308,8 +487,7 @@ rule "CBI.12.0: Add all Cell's with Header Author(s) as CBI_author" .filter(Optional::isPresent) .map(Optional::get) .forEach(redactionEntity -> { - redactionEntity.addMatchedRule("CBI.12.0"); - redactionEntity.setRedactionReason("Author(s) header found"); + redactionEntity.skip("CBI.12.0", "Author(s) header found"); redactionEntity.addEngine(Engine.RULE); insert(redactionEntity); }); @@ -322,9 +500,7 @@ rule "CBI.12.1: Dont redact CBI_author, if its row contains a cell with header \ $table.streamEntitiesWhereRowHasHeaderAndAnyValue("Vertebrate study Y/N", List.of("N", "No")) .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) .forEach(authorEntity -> { - authorEntity.setRedaction(false); - authorEntity.setRedactionReason("Not redacted because it's row does not belong to a vertebrate study"); - authorEntity.addMatchedRule("CBI.12.1"); + authorEntity.skip("CBI.12.1", "Not redacted because it's row does not belong to a vertebrate study"); }); end @@ -335,23 +511,28 @@ rule "CBI.12.2: Redact CBI_author, if its row contains a cell with header \"Vert $table.streamEntitiesWhereRowHasHeaderAndAnyValue("Vertebrate study Y/N", List.of("Y", "Yes")) .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) .forEach(authorEntity -> { - authorEntity.setRedaction(true); - authorEntity.setRedactionReason("Redacted because it's row belongs to a vertebrate study"); - authorEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); - authorEntity.addMatchedRule("CBI.12.2"); + authorEntity.apply("CBI.12.2", "Redacted because it's row belongs to a vertebrate study", "Reg (EC) No 1107/2009 Art. 63 (2g)"); }); end +// Rule unit: CBI.13 +rule "CBI.13.0: Ignore CBI Address Recommendations" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $entity: RedactionEntity(type == "CBI_address", entityType == EntityType.RECOMMENDATION) + then + $entity.removeFromGraph(); + retract($entity) + end + + // Rule unit: CBI.14 rule "CBI.14.0: Redact CBI_sponsor entities if preceded by \"batches produced at\"" when $sponsorEntity: RedactionEntity(type == "CBI_sponsor", textBefore.contains("batches produced at")) then - $sponsorEntity.setRedaction(true); - $sponsorEntity.setRedactionReason("Redacted because it represents a sponsor company"); - $sponsorEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); - $sponsorEntity.addMatchedRule("CBI.14.0"); + $sponsorEntity.apply("CBI.14.0", "Redacted because it represents a sponsor company", "Reg (EC) No 1107/2009 Art. 63 (2g)"); end @@ -377,10 +558,7 @@ rule "CBI.15.0: Redact row if row contains \"determination of residues\" and liv $section.getEntitiesOfType(List.of($keyword, $residueKeyword)) .forEach(redactionEntity -> { - redactionEntity.setRedaction(true); - redactionEntity.addMatchedRule("CBI.15.0"); - redactionEntity.setRedactionReason("Determination of residues and keyword \"" + $keyword + "\" was found."); - redactionEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); + redactionEntity.apply("CBI.15.0", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)"); }); end @@ -404,10 +582,7 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio $table.streamEntitiesWhereRowContainsStringsIgnoreCase(List.of($keyword, $residueKeyword)) .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) .forEach(redactionEntity -> { - redactionEntity.setRedaction(true); - redactionEntity.addMatchedRule("CBI.15.1"); - redactionEntity.setRedactionReason("Determination of residues and keyword \"" + $keyword + "\" was found."); - redactionEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); + redactionEntity.apply("CBI.15.1", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)"); }); end @@ -421,13 +596,10 @@ rule "CBI.16.0: Add CBI_author with \"et al.\" Regex (non vertebrate study)" then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.setRedaction(true); - entity.setRedactionReason("Author found by \"et al\" regex"); - entity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); - entity.addMatchedRule("CBI.16.0"); + entity.apply("CBI.16.0", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); entity.addEngine(Engine.RULE); - insert(entity); dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false); + insert(entity); }); end @@ -439,10 +611,7 @@ rule "CBI.16.1: Add CBI_author with \"et al.\" Regex (vertebrate study)" then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.setRedaction(true); - entity.setRedactionReason("Author found by \"et al\" regex"); - entity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); - entity.addMatchedRule("CBI.16.1"); + entity.apply("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); entity.addEngine(Engine.RULE); insert(entity); dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false); @@ -457,9 +626,8 @@ rule "CBI.17.0: Add recommendation for Addresses in Test Organism sections, with then entityCreationService.lineAfterString("Source", "CBI_address", EntityType.RECOMMENDATION, $section) .forEach(entity -> { - entity.setRedactionReason("Line after \"Source\" in Test Organism Section"); entity.addEngine(Engine.RULE); - entity.addMatchedRule("CBI.17.0"); + entity.skip("CBI.17.0", "Line after \"Source\" in Test Organism Section"); insert(entity); }); end @@ -470,9 +638,8 @@ rule "CBI.17.1: Add recommendation for Addresses in Test Organism sections, with then entityCreationService.lineAfterString("Source:", "CBI_address", EntityType.RECOMMENDATION, $section) .forEach(entity -> { - entity.setRedactionReason("Line after \"Source:\" in Test Animals Section"); entity.addEngine(Engine.RULE); - entity.addMatchedRule("CBI.17.1"); + entity.skip("CBI.17.1", "Line after \"Source:\" in Test Animals Section"); insert(entity); }); end @@ -489,7 +656,7 @@ rule "CBI.18.0: Expand CBI_author entities with firstname initials" ) then RedactionEntity expandedEntity = entityCreationService.bySuffixExpansionRegex($entityToExpand, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)"); - expandedEntity.addMatchedRule("CBI.18.0"); + expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList()); $entityToExpand.removeFromGraph(); retract($entityToExpand); insert(expandedEntity); @@ -502,7 +669,9 @@ rule "CBI.19.0: Expand CBI_author entities with salutation prefix" $entityToExpand: RedactionEntity(type == "CBI_author", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")) then RedactionEntity expandedEntity = entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"); - expandedEntity.addMatchedRule("CBI.19.0"); + expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList()); + $entityToExpand.removeFromGraph(); + retract($entityToExpand); insert(expandedEntity); end @@ -516,10 +685,8 @@ rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC then entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { - laboratoryEntity.setRedaction(false); - laboratoryEntity.addMatchedRule("CBI.20.0"); + laboratoryEntity.skip("CBI.20.0", "PERFORMING LABORATORY was found for non vertebrate study"); laboratoryEntity.addEngine(Engine.RULE); - laboratoryEntity.setRedactionReason("PERFORMING LABORATORY was found for non vertebrate study"); dictionary.addLocalDictionaryEntry(laboratoryEntity); insert(laboratoryEntity); }); @@ -533,11 +700,8 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC then entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { - laboratoryEntity.setRedaction(true); - laboratoryEntity.addMatchedRule("CBI.20.1"); + laboratoryEntity.apply("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); laboratoryEntity.addEngine(Engine.RULE); - laboratoryEntity.setRedactionReason("PERFORMING LABORATORY was found"); - laboratoryEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); dictionary.addLocalDictionaryEntry(laboratoryEntity); insert(laboratoryEntity); }); @@ -552,10 +716,7 @@ rule "PII.0.0: Redact all PII (non vertebrate study)" not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $pii: RedactionEntity(type == "PII", dictionaryEntry) then - $pii.setRedaction(true); - $pii.setRedactionReason("Personal Information found"); - $pii.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); - $pii.addMatchedRule("PII.0.0"); + $pii.apply("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end rule "PII.0.1: Redact all PII (vertebrate study)" @@ -563,10 +724,7 @@ rule "PII.0.1: Redact all PII (vertebrate study)" FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $pii: RedactionEntity(type == "PII", dictionaryEntry) then - $pii.setRedaction(true); - $pii.setRedactionReason("Personal Information found"); - $pii.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); - $pii.addMatchedRule("PII.0.1"); + $pii.apply("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -578,11 +736,8 @@ rule "PII.1.0: Redact Emails by RegEx (Non vertebrate study)" then entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) .forEach(emailEntity -> { - emailEntity.setRedaction(true); emailEntity.addEngine(Engine.RULE); - emailEntity.setRedactionReason("Found by Email Regex"); - emailEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); - emailEntity.addMatchedRule("PII.1.0"); + emailEntity.apply("PII.1.0", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); insert(emailEntity); }); end @@ -594,16 +749,87 @@ rule "PII.1.1: Redact Emails by RegEx (vertebrate study)" then entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) .forEach(emailEntity -> { - emailEntity.setRedaction(true); emailEntity.addEngine(Engine.RULE); - emailEntity.setRedactionReason("Found by Email Regex"); - emailEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); - emailEntity.addMatchedRule("PII.1.1"); + emailEntity.apply("PII.1.1", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); insert(emailEntity); }); end +// Rule unit: PII.2 +rule "PII.2.0: Redact Phone and Fax by RegEx (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(containsString("Contact") || + containsString("Telephone") || + containsString("Phone") || + containsString("Ph.") || + containsString("Fax") || + containsString("Tel") || + containsString("Ter") || + containsString("Mobile") || + containsString("Fel") || + containsString("Fer")) + then + entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) + .forEach(contactEntity -> { + contactEntity.addEngine(Engine.RULE); + contactEntity.apply("PII.2.0", "Found by Phone and Fax Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + insert(contactEntity); + }); + end + +rule "PII.2.1: Redact Phone and Fax by RegEx (vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(containsString("Contact") || + containsString("Telephone") || + containsString("Phone") || + containsString("Ph.") || + containsString("Fax") || + containsString("Tel") || + containsString("Ter") || + containsString("Mobile") || + containsString("Fel") || + containsString("Fer")) + then + entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) + .forEach(contactEntity -> { + contactEntity.addEngine(Engine.RULE); + contactEntity.apply("PII.2.1", "Found by Phone and Fax Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + insert(contactEntity); + }); + end + + +// Rule unit: PII.3 +rule "PII.3.0: Redact telephone numbers by RegEx (Non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(matchesRegex("[+]\\d{1,}")) + then + entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.apply("PII.3.0", "Telephone number found by regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + entity.addEngine(Engine.RULE); + insert(entity); + }); + end + +rule "PII.3.1: Redact telephone numbers by RegEx (vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(matchesRegex("[+]\\d{1,}")) + then + entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.apply("PII.3.1", "Telephone number found by regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + entity.addEngine(Engine.RULE); + insert(entity); + }); + end + + // Rule unit: PII.4 rule "PII.4.0: Redact line after contact information keywords (non vertebrate study)" when @@ -631,11 +857,8 @@ rule "PII.4.0: Redact line after contact information keywords (non vertebrate st then entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) .forEach(contactEntity -> { - contactEntity.setRedaction(true); - contactEntity.addMatchedRule("PII.4.0"); + contactEntity.apply("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)"); contactEntity.addEngine(Engine.RULE); - contactEntity.setRedactionReason("Found after \"" + $contactKeyword + "\" contact keyword"); - contactEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2e)"); insert(contactEntity); }); end @@ -666,11 +889,44 @@ rule "PII.4.1: Redact line after contact information keywords (non vertebrate st then entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) .forEach(contactEntity -> { - contactEntity.setRedaction(true); - contactEntity.addMatchedRule("PII.4.1"); + contactEntity.apply("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + contactEntity.addEngine(Engine.RULE); + insert(contactEntity); + }); + end + + +// Rule unit: PII.5 +rule "PII.5.0: Redact line after contact information keywords reduced (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $contactKeyword: String() from List.of("Contact point:", + "Contact:", + "Alternative contact:", + "European contact:") + $section: Section(containsString($contactKeyword)) + then + entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) + .forEach(contactEntity -> { + contactEntity.apply("PII.5.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + contactEntity.addEngine(Engine.RULE); + insert(contactEntity); + }); + end + +rule "PII.5.1: Redact line after contact information keywords reduced (Vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $contactKeyword: String() from List.of("Contact point:", + "Contact:", + "Alternative contact:", + "European contact:") + $section: Section(containsString($contactKeyword)) + then + entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) + .forEach(contactEntity -> { + contactEntity.apply("PII.5.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(2) of Regulation (EC) No 178/2002"); contactEntity.addEngine(Engine.RULE); - contactEntity.setRedactionReason("Found after \"" + $contactKeyword + "\" contact keyword"); - contactEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2e)"); insert(contactEntity); }); end @@ -687,11 +943,8 @@ rule "PII.6.0: redact line between contact keywords (non vertebrate study)" entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) ) .forEach(contactEntity -> { - contactEntity.setRedaction(true); - contactEntity.addMatchedRule("PII.6.0"); + contactEntity.apply("PII.6.0", "Found between contact keywords", "Article 39(e)(3) of Regulation (EC) No 178/2002"); contactEntity.addEngine(Engine.RULE); - contactEntity.setRedactionReason("Found between contact keywords"); - contactEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); insert(contactEntity); }); end @@ -706,11 +959,8 @@ rule "PII.6.1: redact line between contact keywords" entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) ) .forEach(contactEntity -> { - contactEntity.setRedaction(true); - contactEntity.addMatchedRule("PII.6.1"); + contactEntity.apply("PII.6.1", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002"); contactEntity.addEngine(Engine.RULE); - contactEntity.setRedactionReason("Found between contact keywords"); - contactEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); insert(contactEntity); }); end @@ -733,10 +983,7 @@ rule "PII.7.0: Redact contact information if applicant is found (non vertebrate entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) .forEach(entity -> { - entity.setRedaction(true); - entity.setRedactionReason("Applicant information was found"); - entity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); - entity.addMatchedRule("PII.7.0"); + entity.apply("PII.7.0", "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); entity.addEngine(Engine.RULE); insert(entity); }); @@ -758,10 +1005,7 @@ rule "PII.7.1: Redact contact information if applicant is found (non vertebrate entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) .forEach(entity -> { - entity.setRedaction(true); - entity.setRedactionReason("Applicant information was found"); - entity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); - entity.addMatchedRule("PII.7.1"); + entity.apply("PII.7.1", "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); entity.addEngine(Engine.RULE); insert(entity); }); @@ -785,10 +1029,7 @@ rule "PII.8.0: Redact contact information if producer is found" entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) .forEach(entity -> { - entity.setRedaction(true); - entity.setRedactionReason("Producer was found"); - entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2e)"); - entity.addMatchedRule("PII.8.0"); + entity.apply("PII.8.0", "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); entity.addEngine(Engine.RULE); insert(entity); }); @@ -810,10 +1051,7 @@ rule "PII.8.1: Redact contact information if producer is found" entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) .forEach(entity -> { - entity.setRedaction(true); - entity.setRedactionReason("Producer was found"); - entity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); - entity.addMatchedRule("PII.8.1"); + entity.apply("PII.8.1", "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); entity.addEngine(Engine.RULE); insert(entity); }); @@ -828,11 +1066,8 @@ rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"COMPLETION DATE\" (non vertebr then entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section) .forEach(authorEntity -> { - authorEntity.setRedaction(true); - authorEntity.addMatchedRule("PII.9.0"); + authorEntity.apply("PII.9.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); authorEntity.addEngine(Engine.RULE); - authorEntity.setRedactionReason("AUTHOR(S) was found"); - authorEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); insert(authorEntity); }); end @@ -844,11 +1079,8 @@ rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"STUDY COMPLETION DATE\" (non v then entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section) .forEach(authorEntity -> { - authorEntity.setRedaction(true); - authorEntity.addMatchedRule("PII.9.1"); + authorEntity.apply("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); authorEntity.addEngine(Engine.RULE); - authorEntity.setRedactionReason("AUTHOR(S) was found"); - authorEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); insert(authorEntity); }); end @@ -860,11 +1092,8 @@ rule "PII.9.2: Redact between \"AUTHOR(S)\" and \"COMPLETION DATE\" (non vertebr then entityCreationService.betweenStrings("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", EntityType.ENTITY, $section) .forEach(authorEntity -> { - authorEntity.setRedaction(true); - authorEntity.addMatchedRule("PII.9.2"); + authorEntity.apply("PII.9.2", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); authorEntity.addEngine(Engine.RULE); - authorEntity.setRedactionReason("AUTHOR(S) was found"); - authorEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); insert(authorEntity); }); end @@ -876,16 +1105,26 @@ rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"STUDY COMPLETION DATE\" (verte then entityCreationService.betweenStrings("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", EntityType.ENTITY, $section) .forEach(authorEntity -> { - authorEntity.setRedaction(true); - authorEntity.addMatchedRule("PII.9.3"); + authorEntity.apply("PII.9.3", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); authorEntity.addEngine(Engine.RULE); - authorEntity.setRedactionReason("AUTHOR(S) was found"); - authorEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); insert(authorEntity); }); end +// Rule unit: PII.10 +rule "PII.10.0: Redact study director abbreviation" + when + $section: Section(containsString("KATH") || containsString("BECH") || containsString("KML")) + then + entityCreationService.byRegexIgnoreCase("((KATH)|(BECH)|(KML)) ?(\\d{4})","PII", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.apply("PII.10.0", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + insert(entity); + }); + end + + // Rule unit: PII.11 rule "PII.11.0: Redact On behalf of Sequani Ltd.:" when @@ -893,10 +1132,7 @@ rule "PII.11.0: Redact On behalf of Sequani Ltd.:" then entityCreationService.betweenStrings("On behalf of Sequani Ltd.: Name Title", "On behalf of", "PII", EntityType.ENTITY, $section) .forEach(authorEntity -> { - authorEntity.setRedaction(true); - authorEntity.addMatchedRule("PII.11.0"); - authorEntity.setRedactionReason("On behalf of Sequani Ltd.: Name Title was found"); - authorEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); + authorEntity.apply("PII.11.0", "On behalf of Sequani Ltd.: Name Title was found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); insert(authorEntity); }); end @@ -908,13 +1144,27 @@ rule "PII.12.0: Expand PII entities with salutation prefix" $entityToExpand: RedactionEntity(type == "PII", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")) then RedactionEntity expandedEntity = entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"); - expandedEntity.addMatchedRule("PII.12.0"); + expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList()); + expandedEntity.addEngine(Engine.RULE); insert(expandedEntity); end //------------------------------------ Other rules ------------------------------------ +// Rule unit: ETC.0 +rule "ETC.0.0: Purity Hint" + when + $section: Section(containsStringIgnoreCase("purity")) + then + entityCreationService.byRegexIgnoreCase("(purity ?( of|\\(.{1,20}\\))?( ?:)?) .{0,5}[\\d\\.]+( .{0,4}\\.)? ?%", "hint_only", EntityType.ENTITY, 1, $section) + .forEach(hint -> { + hint.addEngine(Engine.RULE); + hint.skip("ETC.0.0", ""); + }); + end + + // Rule unit: ETC.1 rule "ETC.1.0: Redact Purity" when @@ -922,11 +1172,8 @@ rule "ETC.1.0: Redact Purity" then entityCreationService.byRegex("\\bPurity:\\s*(?\\s*\\d{1,2}(?:\\.\\d{1,2})?\\s*%)", "purity", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.addMatchedRule("ETC.1.0"); + entity.apply("ETC.1.0", "Purity found", "Reg (EC) No 1107/2009 Art. 63 (2a)"); entity.addEngine(Engine.RULE); - entity.setRedaction(true); - entity.setRedactionReason("Purity found"); - entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2a)"); }); end @@ -937,10 +1184,7 @@ rule "ETC.2.0: Redact signatures (non vertebrate study)" not FileAttribute(label == "Vertebrate Study", value == "Yes") $signature: Image(imageType == ImageType.SIGNATURE) then - $signature.setRedaction(true); - $signature.setMatchedRule("ETC.2.0"); - $signature.setRedactionReason("Signature Found"); - $signature.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); + $signature.apply("ETC.2.0", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end rule "ETC.2.0: Redact signatures (vertebrate study)" @@ -948,10 +1192,7 @@ rule "ETC.2.0: Redact signatures (vertebrate study)" FileAttribute(label == "Vertebrate Study", value == "Yes") $signature: Image(imageType == ImageType.SIGNATURE) then - $signature.setRedaction(true); - $signature.setMatchedRule("ETC.2.0"); - $signature.setRedactionReason("Signature Found"); - $signature.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); + $signature.apply("ETC.2.0", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -961,10 +1202,7 @@ rule "ETC.3.0: Redact logos (vertebrate study)" not FileAttribute(label == "Vertebrate Study", value == "Yes") $logo: Image(imageType == ImageType.LOGO) then - $logo.setRedaction(true); - $logo.setMatchedRule("ETC.3.0"); - $logo.setRedactionReason("Logo Found"); - $logo.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); + $logo.apply("ETC.3.0", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end rule "ETC.3.1: Redact logos (non vertebrate study)" @@ -972,10 +1210,7 @@ rule "ETC.3.1: Redact logos (non vertebrate study)" FileAttribute(label == "Vertebrate Study", value == "Yes") $logo: Image(imageType == ImageType.LOGO) then - $logo.setRedaction(true); - $logo.setMatchedRule("ETC.3.1"); - $logo.setRedactionReason("Logo Found"); - $logo.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); + $logo.apply("ETC.3.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -984,10 +1219,7 @@ rule "ETC.4.0: Redact dossier dictionary entries" when $dossierRedaction: RedactionEntity(type == "dossier_redaction") then - $dossierRedaction.setRedaction(true); - $dossierRedaction.addMatchedRule("ETC.4.0"); - $dossierRedaction.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); - $dossierRedaction.setRedactionReason("Specification of impurity found"); + $dossierRedaction.apply("ETC.4.0", "Specification of impurity found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end @@ -1012,10 +1244,7 @@ rule "ETC.6.0: Redact CAS Number" .filter(Optional::isPresent) .map(Optional::get) .forEach(redactionEntity -> { - redactionEntity.setRedaction(true); - redactionEntity.addMatchedRule("ETC.6.0"); - redactionEntity.setRedactionReason("Sample # found in Header"); - redactionEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); + redactionEntity.apply("ETC.6.0", "Sample # found in Header", "Reg (EC) No 1107/2009 Art. 63 (2g)"); insert(redactionEntity); }); end @@ -1039,10 +1268,7 @@ rule "ETC.8.0: Redact formulas (vertebrate study)" not FileAttribute(label == "Vertebrate Study", value == "Yes") $logo: Image(imageType == ImageType.FORMULA) then - $logo.setRedaction(true); - $logo.setMatchedRule("ETC.8.0"); - $logo.setRedactionReason("Logo Found"); - $logo.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); + $logo.apply("ETC.8.0", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end rule "ETC.8.1: Redact formulas (non vertebrate study)" @@ -1050,10 +1276,7 @@ rule "ETC.8.1: Redact formulas (non vertebrate study)" FileAttribute(label == "Vertebrate Study", value == "Yes") $logo: Image(imageType == ImageType.FORMULA) then - $logo.setRedaction(true); - $logo.setMatchedRule("ETC.8.1"); - $logo.setRedactionReason("Logo Found"); - $logo.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); + $logo.apply("ETC.8.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -1086,6 +1309,19 @@ rule "AI.1.0: combine and add NER Entities as CBI_address" end +// Rule unit: AI.2 +rule "AI.2.0: add all NER Entities of any type except CBI_author" + salience 999 + when + nerEntities: NerEntities() + then + nerEntities.getNerEntityList().stream() + .filter(nerEntity -> !nerEntity.type().equals("CBI_author")) + .map(nerEntity -> entityCreationService.byNerEntity(nerEntity, nerEntity.type().toLowerCase(), EntityType.RECOMMENDATION, document)) + .forEach(entity -> insert(entity)); + end + + //------------------------------------ Manual redaction rules ------------------------------------ // Rule unit: MAN.0 @@ -1130,8 +1366,7 @@ rule "MAN.2.0: Apply force redaction" ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis) $entityToForce: RedactionEntity(matchesAnnotationId($id)) then - $entityToForce.setLegalBasis($legalBasis); - $entityToForce.setRedaction(true); + $entityToForce.apply("MAN.2.0", "Forced redaction", $legalBasis); $entityToForce.setSkipRemoveEntitiesContainedInLarger(true); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl index f2c7867d..afbfd462 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl @@ -48,9 +48,8 @@ query "getFileAttributes" $fileAttribute: FileAttribute() end -// --------------------------------------- manual redaction rules ------------------------------------------------------------------- - -rule "Apply manual resize redaction" +// Rule unit: MAN.0 +rule "MAN.0.0: Apply manual resize redaction" salience 128 when $resizeRedaction: ManualResizeRedaction($id: annotationId) @@ -61,18 +60,19 @@ rule "Apply manual resize redaction" update($entityToBeResized); end -rule "Apply id removals that are valid and not in forced redactions to Entity" + +// Rule unit: MAN.1 +rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity" salience 128 when IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId) not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null) $entityToBeRemoved: RedactionEntity(matchesAnnotationId($id)) then - $entityToBeRemoved.removeFromGraph(); - retract($entityToBeRemoved); + $entityToBeRemoved.setIgnored(true); end -rule "Apply id removals that are valid and not in forced redactions to Image" +rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image" salience 128 when IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId) @@ -82,18 +82,21 @@ rule "Apply id removals that are valid and not in forced redactions to Image" $imageEntityToBeRemoved.setIgnored(true); end -rule "Apply force redaction" + +// Rule unit: MAN.2 +rule "MAN.2.0: Apply force redaction" salience 128 when ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis) $entityToForce: RedactionEntity(matchesAnnotationId($id)) then - $entityToForce.setLegalBasis($legalBasis); - $entityToForce.setRedaction(true); + $entityToForce.apply("MAN.2.0", "Forced redaction", $legalBasis); $entityToForce.setSkipRemoveEntitiesContainedInLarger(true); end -rule "Apply image recategorization" + +// Rule unit: MAN.3 +rule "MAN.3.0: Apply image recategorization" salience 128 when ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type) @@ -102,4 +105,18 @@ rule "Apply image recategorization" $image.setImageType(ImageType.fromString($imageType)); end +//------------------------------------ Local dictionary search rules ------------------------------------ +// Rule unit: LDS.0 +rule "LDS.0.0: run local dictionary search" + agenda-group "LOCAL_DICTIONARY_ADDS" + salience -999 + when + DictionaryModel(!localEntries.isEmpty(), $type: type, $searchImplementation: localSearch) from dictionary.getDictionaryModels() + then + entityCreationService.bySearchImplementation($searchImplementation, $type, EntityType.RECOMMENDATION, document) + .forEach(entity -> { + entity.addEngine(Engine.RULE); + insert(entity); + }); + end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index 973c03c3..57c5e799 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -14,6 +14,12 @@ import java.util.Optional; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.*; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.*; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Section; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Table; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SemanticNode; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Document; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Paragraph; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Image; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.*; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.*; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.EntityType; @@ -61,8 +67,7 @@ rule "SYN.0.0: Redact if CTL/* or BL/* was found (Non Vertebrate Study)" entityCreationService.byString("CTL", "must_redact", EntityType.ENTITY, $section), entityCreationService.byString("BL", "must_redact", EntityType.ENTITY, $section) ).forEach(entity -> { - entity.setRedactionReason("hint_only"); - entity.addMatchedRule("SYN.0.0"); + entity.skip("SYN.0.0", "hint_only"); entity.addEngine(Engine.RULE); insert(entity); }); @@ -78,12 +83,13 @@ rule "CBI.3.0: Redacted because Section contains Vertebrate" then $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) .forEach(entity -> { - entity.setRedactionReason("Vertebrate found"); - entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); - entity.setRedaction(true); entity.addEngine(Engine.RULE); - entity.addMatchedRule("CBI.3.0"); - entity.addReferences($section.getEntitiesOfType("vertebrate")); + entity.applyWithReferences( + "CBI.3.0", + "Vertebrate found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $section.getEntitiesOfType("vertebrate") + ); }); end @@ -94,12 +100,13 @@ rule "CBI.3.1: Redacted because Table Row contains Vertebrate" $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate")) .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) .forEach(entity -> { - entity.setRedactionReason("Vertebrate found"); - entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); - entity.setRedaction(true); entity.addEngine(Engine.RULE); - entity.addMatchedRule("CBI.3.1"); - entity.addReferences($table.getEntitiesOfTypeInSameRow("vertebrate", entity)); + entity.applyWithReferences( + "CBI.3.1", + "Vertebrate found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $table.getEntitiesOfTypeInSameRow("vertebrate", entity) + ); }); end @@ -109,10 +116,8 @@ rule "CBI.3.2: Don't redact because Section doesn't contain Vertebrate" then $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) .forEach(entity -> { - entity.setRedactionReason("No vertebrate found"); - entity.setRedaction(false); entity.addEngine(Engine.RULE); - entity.addMatchedRule("CBI.3.2"); + entity.skip("CBI.3.2", "No vertebrate found"); }); end @@ -123,10 +128,8 @@ rule "CBI.3.3: Dont redact because Table Row doesn't contain Vertebrate" $table.streamEntitiesWhereRowContainsNoEntitiesOfType(List.of("vertebrate")) .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) .forEach(entity -> { - entity.setRedactionReason("No vertebrate found"); - entity.setRedaction(false); entity.addEngine(Engine.RULE); - entity.addMatchedRule("CBI.3.3"); + entity.skip("CBI.3.3", "No vertebrate found"); }); end @@ -141,11 +144,12 @@ rule "CBI.4.0: Dont redact Names and Addresses if no_redaction_indicator is foun then $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) .forEach(entity -> { - entity.setRedactionReason("Vertebrate but a no redaction indicator found"); - entity.setRedaction(false); entity.addEngine(Engine.RULE); - entity.addMatchedRule("CBI.4.0"); - entity.addReferences($section.getEntitiesOfType("no_redaction_indicator")); + entity.skipWithReferences( + "CBI.4.0", + "Vertebrate but a no redaction indicator found", + $section.getEntitiesOfType("no_redaction_indicator") + ); }); end @@ -158,12 +162,14 @@ rule "CBI.4.1: Dont redact Names and Addresses if no_redaction_indicator is foun $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate", "no-redaction_indicator")) .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) .forEach(entity -> { - entity.setRedactionReason("Vertebrate but a no redaction indicator found"); - entity.setRedaction(false); entity.addEngine(Engine.RULE); - entity.addMatchedRule("CBI.4.1"); - entity.addReferences($table.getEntitiesOfTypeInSameRow("vertebrate", entity)); - entity.addReferences($table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity)); + entity.skipWithReferences( + "CBI.4.1", + "Vertebrate but a no redaction indicator found", + Stream.concat( + $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() + ); }); end @@ -178,13 +184,15 @@ rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also red then $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) .forEach(entity -> { - entity.setRedactionReason("no_redaction_indicator but also redaction_indicator found"); - entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); - entity.setRedaction(true); entity.addEngine(Engine.RULE); - entity.addMatchedRule("CBI.5.0"); - entity.addReferences($section.getEntitiesOfType("no_redaction_indicator")); - entity.addReferences($section.getEntitiesOfType("redaction_indicator")); + entity.applyWithReferences( + "CBI.5.0", + "no_redaction_indicator but also redaction_indicator found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + Stream.concat( + $section.getEntitiesOfType("vertebrate").stream(), + $section.getEntitiesOfType("no_redaction_indicator").stream()).toList() + ); }); end @@ -197,13 +205,15 @@ rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also red $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no-redaction_indicator")) .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) .forEach(entity -> { - entity.setRedactionReason("no_redaction_indicator but also redaction_indicator found"); - entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); - entity.setRedaction(true); entity.addEngine(Engine.RULE); - entity.addMatchedRule("CBI.5.1"); - entity.addReferences($table.getEntitiesOfTypeInSameRow("vertebrate", entity)); - entity.addReferences($table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity)); + entity.applyWithReferences( + "CBI.5.1", + "no_redaction_indicator but also redaction_indicator found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + Stream.concat( + $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() + ); }); end @@ -215,12 +225,13 @@ rule "CBI.8.0: Redacted because Section contains must_redact entity" then $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) .forEach(entity -> { - entity.setRedactionReason("must_redact entity found"); - entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); - entity.setRedaction(true); entity.addEngine(Engine.RULE); - entity.addMatchedRule("CBI.8.0"); - entity.addReferences($section.getEntitiesOfType("must_redact")); + entity.applyWithReferences( + "CBI.8.0", + "must_redact entity found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $section.getEntitiesOfType("must_redact") + ); }); end @@ -231,12 +242,13 @@ rule "CBI.8.1: Redacted because Table Row contains must_redact entity" $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("must_redact")) .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) .forEach(entity -> { - entity.setRedactionReason("must_redact entity found"); - entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); - entity.setRedaction(true); entity.addEngine(Engine.RULE); - entity.addMatchedRule("CBI.8.1"); - entity.addReferences($table.getEntitiesOfTypeInSameRow("must_redact", entity)); + entity.applyWithReferences( + "CBI.8.1", + "must_redact entity found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $table.getEntitiesOfTypeInSameRow("must_redact", entity) + ); }); end @@ -253,11 +265,8 @@ rule "CBI.9.0: Redact all Cell's with Header Author(s) as CBI_author (non verteb .filter(Optional::isPresent) .map(Optional::get) .forEach(redactionEntity -> { - redactionEntity.setRedaction(true); - redactionEntity.addMatchedRule("CBI.9.0"); + redactionEntity.apply("CBI.9.0", "Author(s) found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); redactionEntity.addEngine(Engine.RULE); - redactionEntity.setRedactionReason("Author(s) found"); - redactionEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); insert(redactionEntity); }); end @@ -273,11 +282,8 @@ rule "CBI.9.1: Redact all Cell's with Header Author as CBI_author (non vertebrat .filter(Optional::isPresent) .map(Optional::get) .forEach(redactionEntity -> { - redactionEntity.setRedaction(true); - redactionEntity.addMatchedRule("CBI.9.1"); + redactionEntity.apply("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); redactionEntity.addEngine(Engine.RULE); - redactionEntity.setRedactionReason("Author found"); - redactionEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); insert(redactionEntity); }); end @@ -308,8 +314,7 @@ rule "CBI.12.0: Add all Cell's with Header Author(s) as CBI_author" .filter(Optional::isPresent) .map(Optional::get) .forEach(redactionEntity -> { - redactionEntity.addMatchedRule("CBI.12.0"); - redactionEntity.setRedactionReason("Author(s) header found"); + redactionEntity.skip("CBI.12.0", "Author(s) header found"); redactionEntity.addEngine(Engine.RULE); insert(redactionEntity); }); @@ -322,9 +327,7 @@ rule "CBI.12.1: Dont redact CBI_author, if its row contains a cell with header \ $table.streamEntitiesWhereRowHasHeaderAndAnyValue("Vertebrate study Y/N", List.of("N", "No")) .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) .forEach(authorEntity -> { - authorEntity.setRedaction(false); - authorEntity.setRedactionReason("Not redacted because it's row does not belong to a vertebrate study"); - authorEntity.addMatchedRule("CBI.12.1"); + authorEntity.skip("CBI.12.1", "Not redacted because it's row does not belong to a vertebrate study"); }); end @@ -335,10 +338,7 @@ rule "CBI.12.2: Redact CBI_author, if its row contains a cell with header \"Vert $table.streamEntitiesWhereRowHasHeaderAndAnyValue("Vertebrate study Y/N", List.of("Y", "Yes")) .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) .forEach(authorEntity -> { - authorEntity.setRedaction(true); - authorEntity.setRedactionReason("Redacted because it's row belongs to a vertebrate study"); - authorEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); - authorEntity.addMatchedRule("CBI.12.2"); + authorEntity.apply("CBI.12.2", "Redacted because it's row belongs to a vertebrate study", "Reg (EC) No 1107/2009 Art. 63 (2g)"); }); end @@ -348,10 +348,7 @@ rule "CBI.14.0: Redact CBI_sponsor entities if preceded by \"batches produced at when $sponsorEntity: RedactionEntity(type == "CBI_sponsor", textBefore.contains("batches produced at")) then - $sponsorEntity.setRedaction(true); - $sponsorEntity.setRedactionReason("Redacted because it represents a sponsor company"); - $sponsorEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); - $sponsorEntity.addMatchedRule("CBI.14.0"); + $sponsorEntity.apply("CBI.14.0", "Redacted because it represents a sponsor company", "Reg (EC) No 1107/2009 Art. 63 (2g)"); end @@ -377,10 +374,7 @@ rule "CBI.15.0: Redact row if row contains \"determination of residues\" and liv $section.getEntitiesOfType(List.of($keyword, $residueKeyword)) .forEach(redactionEntity -> { - redactionEntity.setRedaction(true); - redactionEntity.addMatchedRule("CBI.15.0"); - redactionEntity.setRedactionReason("Determination of residues and keyword \"" + $keyword + "\" was found."); - redactionEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); + redactionEntity.apply("CBI.15.0", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)"); }); end @@ -404,10 +398,7 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio $table.streamEntitiesWhereRowContainsStringsIgnoreCase(List.of($keyword, $residueKeyword)) .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) .forEach(redactionEntity -> { - redactionEntity.setRedaction(true); - redactionEntity.addMatchedRule("CBI.15.1"); - redactionEntity.setRedactionReason("Determination of residues and keyword \"" + $keyword + "\" was found."); - redactionEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); + redactionEntity.apply("CBI.15.1", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)"); }); end @@ -421,13 +412,10 @@ rule "CBI.16.0: Add CBI_author with \"et al.\" Regex (non vertebrate study)" then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.setRedaction(true); - entity.setRedactionReason("Author found by \"et al\" regex"); - entity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); - entity.addMatchedRule("CBI.16.0"); + entity.apply("CBI.16.0", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); entity.addEngine(Engine.RULE); - insert(entity); dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false); + insert(entity); }); end @@ -439,10 +427,7 @@ rule "CBI.16.1: Add CBI_author with \"et al.\" Regex (vertebrate study)" then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.setRedaction(true); - entity.setRedactionReason("Author found by \"et al\" regex"); - entity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); - entity.addMatchedRule("CBI.16.1"); + entity.apply("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); entity.addEngine(Engine.RULE); insert(entity); dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false); @@ -457,9 +442,8 @@ rule "CBI.17.0: Add recommendation for Addresses in Test Organism sections, with then entityCreationService.lineAfterString("Source", "CBI_address", EntityType.RECOMMENDATION, $section) .forEach(entity -> { - entity.setRedactionReason("Line after \"Source\" in Test Organism Section"); entity.addEngine(Engine.RULE); - entity.addMatchedRule("CBI.17.0"); + entity.skip("CBI.17.0", "Line after \"Source\" in Test Organism Section"); insert(entity); }); end @@ -470,9 +454,8 @@ rule "CBI.17.1: Add recommendation for Addresses in Test Organism sections, with then entityCreationService.lineAfterString("Source:", "CBI_address", EntityType.RECOMMENDATION, $section) .forEach(entity -> { - entity.setRedactionReason("Line after \"Source:\" in Test Animals Section"); entity.addEngine(Engine.RULE); - entity.addMatchedRule("CBI.17.1"); + entity.skip("CBI.17.1", "Line after \"Source:\" in Test Animals Section"); insert(entity); }); end @@ -489,7 +472,7 @@ rule "CBI.18.0: Expand CBI_author entities with firstname initials" ) then RedactionEntity expandedEntity = entityCreationService.bySuffixExpansionRegex($entityToExpand, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)"); - expandedEntity.addMatchedRule("CBI.18.0"); + expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList()); $entityToExpand.removeFromGraph(); retract($entityToExpand); insert(expandedEntity); @@ -502,7 +485,9 @@ rule "CBI.19.0: Expand CBI_author entities with salutation prefix" $entityToExpand: RedactionEntity(type == "CBI_author", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")) then RedactionEntity expandedEntity = entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"); - expandedEntity.addMatchedRule("CBI.19.0"); + expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList()); + $entityToExpand.removeFromGraph(); + retract($entityToExpand); insert(expandedEntity); end @@ -516,10 +501,8 @@ rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC then entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { - laboratoryEntity.setRedaction(false); - laboratoryEntity.addMatchedRule("CBI.20.0"); + laboratoryEntity.skip("CBI.20.0", "PERFORMING LABORATORY was found for non vertebrate study"); laboratoryEntity.addEngine(Engine.RULE); - laboratoryEntity.setRedactionReason("PERFORMING LABORATORY was found for non vertebrate study"); dictionary.addLocalDictionaryEntry(laboratoryEntity); insert(laboratoryEntity); }); @@ -533,11 +516,8 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC then entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { - laboratoryEntity.setRedaction(true); - laboratoryEntity.addMatchedRule("CBI.20.1"); + laboratoryEntity.apply("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); laboratoryEntity.addEngine(Engine.RULE); - laboratoryEntity.setRedactionReason("PERFORMING LABORATORY was found"); - laboratoryEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); dictionary.addLocalDictionaryEntry(laboratoryEntity); insert(laboratoryEntity); }); @@ -552,10 +532,7 @@ rule "PII.0.0: Redact all PII (non vertebrate study)" not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $pii: RedactionEntity(type == "PII", dictionaryEntry) then - $pii.setRedaction(true); - $pii.setRedactionReason("Personal Information found"); - $pii.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); - $pii.addMatchedRule("PII.0.0"); + $pii.apply("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end rule "PII.0.1: Redact all PII (vertebrate study)" @@ -563,10 +540,7 @@ rule "PII.0.1: Redact all PII (vertebrate study)" FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $pii: RedactionEntity(type == "PII", dictionaryEntry) then - $pii.setRedaction(true); - $pii.setRedactionReason("Personal Information found"); - $pii.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); - $pii.addMatchedRule("PII.0.1"); + $pii.apply("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -578,11 +552,8 @@ rule "PII.1.0: Redact Emails by RegEx (Non vertebrate study)" then entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) .forEach(emailEntity -> { - emailEntity.setRedaction(true); emailEntity.addEngine(Engine.RULE); - emailEntity.setRedactionReason("Found by Email Regex"); - emailEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); - emailEntity.addMatchedRule("PII.1.0"); + emailEntity.apply("PII.1.0", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); insert(emailEntity); }); end @@ -594,11 +565,8 @@ rule "PII.1.1: Redact Emails by RegEx (vertebrate study)" then entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) .forEach(emailEntity -> { - emailEntity.setRedaction(true); emailEntity.addEngine(Engine.RULE); - emailEntity.setRedactionReason("Found by Email Regex"); - emailEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); - emailEntity.addMatchedRule("PII.1.1"); + emailEntity.apply("PII.1.1", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); insert(emailEntity); }); end @@ -631,11 +599,8 @@ rule "PII.4.0: Redact line after contact information keywords (non vertebrate st then entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) .forEach(contactEntity -> { - contactEntity.setRedaction(true); - contactEntity.addMatchedRule("PII.4.0"); + contactEntity.apply("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)"); contactEntity.addEngine(Engine.RULE); - contactEntity.setRedactionReason("Found after \"" + $contactKeyword + "\" contact keyword"); - contactEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2e)"); insert(contactEntity); }); end @@ -666,11 +631,8 @@ rule "PII.4.1: Redact line after contact information keywords (non vertebrate st then entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) .forEach(contactEntity -> { - contactEntity.setRedaction(true); - contactEntity.addMatchedRule("PII.4.1"); + contactEntity.apply("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)"); contactEntity.addEngine(Engine.RULE); - contactEntity.setRedactionReason("Found after \"" + $contactKeyword + "\" contact keyword"); - contactEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2e)"); insert(contactEntity); }); end @@ -687,11 +649,8 @@ rule "PII.6.0: redact line between contact keywords (non vertebrate study)" entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) ) .forEach(contactEntity -> { - contactEntity.setRedaction(true); - contactEntity.addMatchedRule("PII.6.0"); + contactEntity.apply("PII.6.0", "Found between contact keywords", "Article 39(e)(3) of Regulation (EC) No 178/2002"); contactEntity.addEngine(Engine.RULE); - contactEntity.setRedactionReason("Found between contact keywords"); - contactEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); insert(contactEntity); }); end @@ -706,11 +665,8 @@ rule "PII.6.1: redact line between contact keywords" entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) ) .forEach(contactEntity -> { - contactEntity.setRedaction(true); - contactEntity.addMatchedRule("PII.6.1"); + contactEntity.apply("PII.6.1", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002"); contactEntity.addEngine(Engine.RULE); - contactEntity.setRedactionReason("Found between contact keywords"); - contactEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); insert(contactEntity); }); end @@ -733,10 +689,7 @@ rule "PII.7.0: Redact contact information if applicant is found (non vertebrate entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) .forEach(entity -> { - entity.setRedaction(true); - entity.setRedactionReason("Applicant information was found"); - entity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); - entity.addMatchedRule("PII.7.0"); + entity.apply("PII.7.0", "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); entity.addEngine(Engine.RULE); insert(entity); }); @@ -758,10 +711,7 @@ rule "PII.7.1: Redact contact information if applicant is found (non vertebrate entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) .forEach(entity -> { - entity.setRedaction(true); - entity.setRedactionReason("Applicant information was found"); - entity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); - entity.addMatchedRule("PII.7.1"); + entity.apply("PII.7.1", "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); entity.addEngine(Engine.RULE); insert(entity); }); @@ -785,10 +735,7 @@ rule "PII.8.0: Redact contact information if producer is found" entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) .forEach(entity -> { - entity.setRedaction(true); - entity.setRedactionReason("Producer was found"); - entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2e)"); - entity.addMatchedRule("PII.8.0"); + entity.apply("PII.8.0", "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); entity.addEngine(Engine.RULE); insert(entity); }); @@ -810,10 +757,7 @@ rule "PII.8.1: Redact contact information if producer is found" entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) .forEach(entity -> { - entity.setRedaction(true); - entity.setRedactionReason("Producer was found"); - entity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); - entity.addMatchedRule("PII.8.1"); + entity.apply("PII.8.1", "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); entity.addEngine(Engine.RULE); insert(entity); }); @@ -828,11 +772,8 @@ rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"COMPLETION DATE\" (non vertebr then entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section) .forEach(authorEntity -> { - authorEntity.setRedaction(true); - authorEntity.addMatchedRule("PII.9.0"); + authorEntity.apply("PII.9.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); authorEntity.addEngine(Engine.RULE); - authorEntity.setRedactionReason("AUTHOR(S) was found"); - authorEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); insert(authorEntity); }); end @@ -844,11 +785,8 @@ rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"STUDY COMPLETION DATE\" (non v then entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section) .forEach(authorEntity -> { - authorEntity.setRedaction(true); - authorEntity.addMatchedRule("PII.9.1"); + authorEntity.apply("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); authorEntity.addEngine(Engine.RULE); - authorEntity.setRedactionReason("AUTHOR(S) was found"); - authorEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); insert(authorEntity); }); end @@ -860,11 +798,8 @@ rule "PII.9.2: Redact between \"AUTHOR(S)\" and \"COMPLETION DATE\" (non vertebr then entityCreationService.betweenStrings("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", EntityType.ENTITY, $section) .forEach(authorEntity -> { - authorEntity.setRedaction(true); - authorEntity.addMatchedRule("PII.9.2"); + authorEntity.apply("PII.9.2", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); authorEntity.addEngine(Engine.RULE); - authorEntity.setRedactionReason("AUTHOR(S) was found"); - authorEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); insert(authorEntity); }); end @@ -876,11 +811,8 @@ rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"STUDY COMPLETION DATE\" (verte then entityCreationService.betweenStrings("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", EntityType.ENTITY, $section) .forEach(authorEntity -> { - authorEntity.setRedaction(true); - authorEntity.addMatchedRule("PII.9.3"); + authorEntity.apply("PII.9.3", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); authorEntity.addEngine(Engine.RULE); - authorEntity.setRedactionReason("AUTHOR(S) was found"); - authorEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); insert(authorEntity); }); end @@ -893,10 +825,7 @@ rule "PII.11.0: Redact On behalf of Sequani Ltd.:" then entityCreationService.betweenStrings("On behalf of Sequani Ltd.: Name Title", "On behalf of", "PII", EntityType.ENTITY, $section) .forEach(authorEntity -> { - authorEntity.setRedaction(true); - authorEntity.addMatchedRule("PII.11.0"); - authorEntity.setRedactionReason("On behalf of Sequani Ltd.: Name Title was found"); - authorEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); + authorEntity.apply("PII.11.0", "On behalf of Sequani Ltd.: Name Title was found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); insert(authorEntity); }); end @@ -908,7 +837,8 @@ rule "PII.12.0: Expand PII entities with salutation prefix" $entityToExpand: RedactionEntity(type == "PII", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")) then RedactionEntity expandedEntity = entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"); - expandedEntity.addMatchedRule("PII.12.0"); + expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList()); + expandedEntity.addEngine(Engine.RULE); insert(expandedEntity); end @@ -922,11 +852,8 @@ rule "ETC.1.0: Redact Purity" then entityCreationService.byRegex("\\bPurity:\\s*(?\\s*\\d{1,2}(?:\\.\\d{1,2})?\\s*%)", "purity", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.addMatchedRule("ETC.1.0"); + entity.apply("ETC.1.0", "Purity found", "Reg (EC) No 1107/2009 Art. 63 (2a)"); entity.addEngine(Engine.RULE); - entity.setRedaction(true); - entity.setRedactionReason("Purity found"); - entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2a)"); }); end @@ -937,10 +864,7 @@ rule "ETC.2.0: Redact signatures (non vertebrate study)" not FileAttribute(label == "Vertebrate Study", value == "Yes") $signature: Image(imageType == ImageType.SIGNATURE) then - $signature.setRedaction(true); - $signature.setMatchedRule("ETC.2.0"); - $signature.setRedactionReason("Signature Found"); - $signature.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); + $signature.apply("ETC.2.0", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end rule "ETC.2.0: Redact signatures (vertebrate study)" @@ -948,10 +872,7 @@ rule "ETC.2.0: Redact signatures (vertebrate study)" FileAttribute(label == "Vertebrate Study", value == "Yes") $signature: Image(imageType == ImageType.SIGNATURE) then - $signature.setRedaction(true); - $signature.setMatchedRule("ETC.2.0"); - $signature.setRedactionReason("Signature Found"); - $signature.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); + $signature.apply("ETC.2.0", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -961,10 +882,7 @@ rule "ETC.3.0: Redact logos (vertebrate study)" not FileAttribute(label == "Vertebrate Study", value == "Yes") $logo: Image(imageType == ImageType.LOGO) then - $logo.setRedaction(true); - $logo.setMatchedRule("ETC.3.0"); - $logo.setRedactionReason("Logo Found"); - $logo.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); + $logo.apply("ETC.3.0", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end rule "ETC.3.1: Redact logos (non vertebrate study)" @@ -972,10 +890,7 @@ rule "ETC.3.1: Redact logos (non vertebrate study)" FileAttribute(label == "Vertebrate Study", value == "Yes") $logo: Image(imageType == ImageType.LOGO) then - $logo.setRedaction(true); - $logo.setMatchedRule("ETC.3.1"); - $logo.setRedactionReason("Logo Found"); - $logo.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); + $logo.apply("ETC.3.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -984,10 +899,7 @@ rule "ETC.4.0: Redact dossier dictionary entries" when $dossierRedaction: RedactionEntity(type == "dossier_redaction") then - $dossierRedaction.setRedaction(true); - $dossierRedaction.addMatchedRule("ETC.4.0"); - $dossierRedaction.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); - $dossierRedaction.setRedactionReason("Specification of impurity found"); + $dossierRedaction.apply("ETC.4.0", "Specification of impurity found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end @@ -1012,10 +924,7 @@ rule "ETC.6.0: Redact CAS Number" .filter(Optional::isPresent) .map(Optional::get) .forEach(redactionEntity -> { - redactionEntity.setRedaction(true); - redactionEntity.addMatchedRule("ETC.6.0"); - redactionEntity.setRedactionReason("Sample # found in Header"); - redactionEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); + redactionEntity.apply("ETC.6.0", "Sample # found in Header", "Reg (EC) No 1107/2009 Art. 63 (2g)"); insert(redactionEntity); }); end @@ -1039,10 +948,7 @@ rule "ETC.8.0: Redact formulas (vertebrate study)" not FileAttribute(label == "Vertebrate Study", value == "Yes") $logo: Image(imageType == ImageType.FORMULA) then - $logo.setRedaction(true); - $logo.setMatchedRule("ETC.8.0"); - $logo.setRedactionReason("Logo Found"); - $logo.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002"); + $logo.apply("ETC.8.0", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end rule "ETC.8.1: Redact formulas (non vertebrate study)" @@ -1050,10 +956,7 @@ rule "ETC.8.1: Redact formulas (non vertebrate study)" FileAttribute(label == "Vertebrate Study", value == "Yes") $logo: Image(imageType == ImageType.FORMULA) then - $logo.setRedaction(true); - $logo.setMatchedRule("ETC.8.1"); - $logo.setRedactionReason("Logo Found"); - $logo.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); + $logo.apply("ETC.8.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -1130,8 +1033,7 @@ rule "MAN.2.0: Apply force redaction" ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis) $entityToForce: RedactionEntity(matchesAnnotationId($id)) then - $entityToForce.setLegalBasis($legalBasis); - $entityToForce.setRedaction(true); + $entityToForce.apply("MAN.2.0", "Forced redaction", $legalBasis); $entityToForce.setSkipRemoveEntitiesContainedInLarger(true); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl index 8be4a838..34382527 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl @@ -68,10 +68,7 @@ rule "Always redact CBI_author" when $cbiAuthor: RedactionEntity(type == "CBI_author", entityType == EntityType.ENTITY) then - $cbiAuthor.addMatchedRule("0"); - $cbiAuthor.setRedaction(true); - $cbiAuthor.setRedactionReason("Author found"); - $cbiAuthor.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); + $cbiAuthor.apply("CBI.0.0", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end // --------------------------------------- PII rules ------------------------------------------------------------------- @@ -81,10 +78,7 @@ rule "Always redact PII" when $cbiAuthor: RedactionEntity(type == "PII", entityType == EntityType.ENTITY) then - $cbiAuthor.addMatchedRule("1"); - $cbiAuthor.setRedaction(true); - $cbiAuthor.setRedactionReason("PII found"); - $cbiAuthor.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002"); + $cbiAuthor.apply("PII.0.0", "PII found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end // --------------------------------------- merging rules ------------------------------------------------------------------- @@ -147,7 +141,7 @@ rule "remove Entity of lower rank, when equal boundaries and entityType" salience 32 when $higherRank: RedactionEntity($type: type, $entityType: entityType, $boundary: boundary) - $lowerRank: RedactionEntity($boundary == boundary, type != $type, entityType == $entityType, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !redaction) + $lowerRank: RedactionEntity($boundary == boundary, type != $type, entityType == $entityType, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !applied) then $lowerRank.removeFromGraph(); retract($lowerRank); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl index f97c680f..c003b9d5 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl @@ -6,7 +6,11 @@ import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.u import java.util.List; import java.util.LinkedList; -import java.util.HashSet; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.Collection; +import java.util.stream.Stream; +import java.util.Optional; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.*; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.*; @@ -15,7 +19,6 @@ import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.te import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.EntityType; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.ImageType; import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute; -import java.util.Set import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.EntityCreationService; import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.Dictionary; @@ -32,9 +35,6 @@ import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.en import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary; import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntitiesAdapter; import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities; -import java.util.stream.Collectors; -import java.util.Collection; -import java.util.stream.Stream; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility; global Document document @@ -43,178 +43,336 @@ global ManualRedactionApplicationService manualRedactionApplicationService global NerEntitiesAdapter nerEntitiesAdapter global Dictionary dictionary -// --------------------------------------- queries ------------------------------------------------------------------- +//------------------------------------ queries ------------------------------------ query "getFileAttributes" $fileAttribute: FileAttribute() end -// --------------------------------------- CBI rules ------------------------------------------------------------------- +//------------------------------------ Syngenta specific rules ------------------------------------ -rule "0: Expand CBI_author entities with firstname initials" - no-loop true +// Rule unit: SYN.0 +rule "SYN.0.0: Redact if CTL/* or BL/* was found (Non Vertebrate Study)" when - $entityToExpand: RedactionEntity(type == "CBI_author", - value.matches("[^\\s]+"), - textAfter.startsWith(" "), - anyMatch(textAfter, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)") - ) + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(containsString("CTL/") || containsString("BL/")) then - RedactionEntity expandedEntity = entityCreationService.bySuffixExpansionRegex($entityToExpand, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)"); - expandedEntity.addMatchedRule(0); - $entityToExpand.removeFromGraph(); - retract($entityToExpand); - insert(expandedEntity); - end - -rule "0: Expand CBI_author and PII entities with salutation prefix" - when - $entityToExpand: RedactionEntity((type == "CBI_author" || type == "PII"), anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")) - then - RedactionEntity expandedEntity = entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"); - expandedEntity.addMatchedRule(0); - insert(expandedEntity); - end - -rule "1: Redacted because Section contains Vertebrate" - when - $section: Section(hasEntitiesOfType("vertebrate"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(redactionEntity -> { - redactionEntity.setRedaction(true); - redactionEntity.addMatchedRule(1); - redactionEntity.setRedactionReason("Vertebrate Found in this section"); - redactionEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); - }); - end - -rule "2: Not Redacted because Section contains no Vertebrate" - when - $section: Section(!hasEntitiesOfType("vertebrate"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(redactionEntity -> { - redactionEntity.setRedaction(false); - redactionEntity.addMatchedRule(2); - redactionEntity.setRedactionReason("No Vertebrate Found in this section"); + Stream.concat( + entityCreationService.byString("CTL", "must_redact", EntityType.ENTITY, $section), + entityCreationService.byString("BL", "must_redact", EntityType.ENTITY, $section) + ).forEach(entity -> { + entity.skip("SYN.0.0", "hint_only"); + entity.addEngine(Engine.RULE); + insert(entity); }); end -rule "3: Do not redact Names and Addresses if no redaction Indicator is contained" + +//------------------------------------ CBI rules ------------------------------------ + +// Rule unit: CBI.3 +rule "CBI.3.0: Redacted because Section contains Vertebrate" when - $section: Section(hasEntitiesOfType("vertebrate"), - hasEntitiesOfType("no_redaction_indicator"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $section: Section(!hasTables(), hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(redactionEntity -> { - redactionEntity.setRedaction(false); - redactionEntity.addMatchedRule(3); - redactionEntity.setRedactionReason("Vertebrate and a no-redaction-indicator found in this section"); + .forEach(entity -> { + entity.addEngine(Engine.RULE); + entity.addMatchedRuleAndRedactAndReference( + "CBI.3.0", + "Vertebrate found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $section.getEntitiesOfType("vertebrate") + ); }); end -rule "4: Redact Names and Addresses if no_redaction_indicator and redaction_indicator is contained" +rule "CBI.3.1: Redacted because Table Row contains Vertebrate" when - $section: Section(hasEntitiesOfType("vertebrate"), - hasEntitiesOfType("no_redaction_indicator"), - hasEntitiesOfType("redaction_indicator"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $table: Table(hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(redactionEntity -> { - redactionEntity.setRedaction(true); - redactionEntity.addMatchedRule(4); - redactionEntity.setRedactionReason("Vertebrate and a no-redaction-indicator, but also redaction-indicator, found in this section"); - redactionEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.addEngine(Engine.RULE); + entity.addMatchedRuleAndRedactAndReference( + "CBI.3.1", + "Vertebrate found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $table.getEntitiesOfTypeInSameRow("vertebrate", entity) + ); }); end -rule "5: Do not redact Names and Addresses if published information found" - +rule "CBI.3.2: Don't redact because Section doesn't contain Vertebrate" when - $section: Section(hasEntitiesOfType("vertebrate"), - hasEntitiesOfType("published_information"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $section: Section(!hasTables(), !hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - List publishedInformationEntities = $section.getEntitiesOfType("published_information"); $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(redactionEntity -> { - redactionEntity.setRedaction(false); - redactionEntity.setRedactionReason("Vertebrate but also Published Information found in this section"); - redactionEntity.addReferences(publishedInformationEntities); + .forEach(entity -> { + entity.addEngine(Engine.RULE); + entity.addMatchedRule("CBI.3.2", "No vertebrate found"); }); end -rule "6.0: Add all Cell's with Header Author(s) as CBI_author" +rule "CBI.3.3: Dont redact because Table Row doesn't contain Vertebrate" when + $table: Table(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + then + $table.streamEntitiesWhereRowContainsNoEntitiesOfType(List.of("vertebrate")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.addEngine(Engine.RULE); + entity.addMatchedRule("CBI.3.3", "No vertebrate found"); + }); + end + + +// Rule unit: CBI.4 +rule "CBI.4.0: Dont redact Names and Addresses if no_redaction_indicator is found in Section" + when + $section: Section(!hasTables(), + hasEntitiesOfType("vertebrate"), + hasEntitiesOfType("no_redaction_indicator"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(entity -> { + entity.addEngine(Engine.RULE); + entity.addMatchedRuleAndReference( + "CBI.4.0", + "Vertebrate but a no redaction indicator found", + $section.getEntitiesOfType("no_redaction_indicator") + ); + }); + end + +rule "CBI.4.1: Dont redact Names and Addresses if no_redaction_indicator is found in Table Row" + when + $table: Table(hasEntitiesOfType("no_redaction_indicator"), + hasEntitiesOfType("vertebrate"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate", "no-redaction_indicator")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.addEngine(Engine.RULE); + entity.addMatchedRuleAndReference( + "CBI.4.1", + "Vertebrate but a no redaction indicator found", + Stream.concat( + $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() + ); + }); + end + + +// Rule unit: CBI.5 +rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in section" + when + $section: Section(!hasTables(), + hasEntitiesOfType("redaction_indicator"), + hasEntitiesOfType("no_redaction_indicator"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(entity -> { + entity.addEngine(Engine.RULE); + entity.addMatchedRuleAndRedactAndReference( + "CBI.5.0", + "no_redaction_indicator but also redaction_indicator found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + Stream.concat( + $section.getEntitiesOfType("vertebrate").stream(), + $section.getEntitiesOfType("no_redaction_indicator").stream()).toList() + ); + }); + end + +rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in Table Row" + when + $table: Table(hasEntitiesOfType("no_redaction_indicator"), + hasEntitiesOfType("redaction_indicator"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no-redaction_indicator")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.addEngine(Engine.RULE); + entity.addMatchedRuleAndRedactAndReference( + "CBI.5.1", + "no_redaction_indicator but also redaction_indicator found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + Stream.concat( + $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() + ); + }); + end + + +// Rule unit: CBI.8 +rule "CBI.8.0: Redacted because Section contains must_redact entity" + when + $section: Section(!hasTables(), hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(entity -> { + entity.addEngine(Engine.RULE); + entity.addMatchedRuleAndRedactAndReference( + "CBI.8.0", + "must_redact entity found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $section.getEntitiesOfType("must_redact") + ); + }); + end + +rule "CBI.8.1: Redacted because Table Row contains must_redact entity" + when + $table: Table(hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("must_redact")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.addEngine(Engine.RULE); + entity.addMatchedRuleAndRedactAndReference( + "CBI.8.1", + "must_redact entity found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $table.getEntitiesOfTypeInSameRow("must_redact", entity) + ); + }); + end + + +// Rule unit: CBI.9 +rule "CBI.9.0: Redact all Cell's with Header Author(s) as CBI_author (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $table: Table(hasHeader("Author(s)")) then $table.streamTableCellsWithHeader("Author(s)") .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) .forEach(redactionEntity -> { - redactionEntity.addMatchedRule(6); - redactionEntity.setRedactionReason("Author(s) header found"); + redactionEntity.addMatchedRuleAndRedact("CBI.9.0", "Author(s) found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + redactionEntity.addEngine(Engine.RULE); insert(redactionEntity); }); end -rule "6.1: Dont redact CBI_author, if its row contains a cell with header \"Vertebrate study Y/N\" and value No" +rule "CBI.9.1: Redact all Cell's with Header Author as CBI_author (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $table: Table(hasHeader("Author")) + then + $table.streamTableCellsWithHeader("Author") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> { + redactionEntity.addMatchedRuleAndRedact("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + redactionEntity.addEngine(Engine.RULE); + insert(redactionEntity); + }); + end + + +// Rule unit: CBI.11 +rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study Y/N Header" + agenda-group "LOCAL_DICTIONARY_ADDS" + salience -1 + when + $table: Table(hasHeader("Author(s)") && hasHeader("Vertebrate Study Y/N")) + then + $table.getEntitiesOfType("CBI_author").forEach(entity -> dictionary.addMultipleAuthorsAsRecommendation(entity)); + end + + +// Rule unit: CBI.12 +rule "CBI.12.0: Add all Cell's with Header Author(s) as CBI_author" + salience 1 + when + $table: Table(hasHeader("Author(s)") || hasHeader("Author")) + then + Stream.concat( + $table.streamTableCellsWithHeader("Author(s)"), + $table.streamTableCellsWithHeader("Author") + ) + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> { + redactionEntity.skip("CBI.12.0", "Author(s) header found"); + redactionEntity.addEngine(Engine.RULE); + insert(redactionEntity); + }); + end + +rule "CBI.12.1: Dont redact CBI_author, if its row contains a cell with header \"Vertebrate study Y/N\" and value No" when $table: Table(hasRowWithHeaderAndValue("Vertebrate study Y/N", "N") || hasRowWithHeaderAndValue("Vertebrate study Y/N", "No")) then $table.streamEntitiesWhereRowHasHeaderAndAnyValue("Vertebrate study Y/N", List.of("N", "No")) .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) .forEach(authorEntity -> { - authorEntity.setRedaction(false); - authorEntity.setRedactionReason("Not redacted because it's row does not belong to a vertebrate study"); - authorEntity.setLegalBasis(""); - authorEntity.addMatchedRule(6); + authorEntity.addMatchedRule("CBI.12.1", "Not redacted because it's row does not belong to a vertebrate study"); }); end -rule "7: Redact CBI_author, if its row contains a cell with header \"Vertebrate study Y/N\" and value Yes" +rule "CBI.12.2: Redact CBI_author, if its row contains a cell with header \"Vertebrate study Y/N\" and value Yes" when $table: Table(hasRowWithHeaderAndValue("Vertebrate study Y/N", "Y") || hasRowWithHeaderAndValue("Vertebrate study Y/N", "Yes")) then $table.streamEntitiesWhereRowHasHeaderAndAnyValue("Vertebrate study Y/N", List.of("Y", "Yes")) .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) .forEach(authorEntity -> { - authorEntity.setRedaction(true); - authorEntity.setRedactionReason("Redacted because it's row belongs to a vertebrate study"); - authorEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); - authorEntity.addMatchedRule(7); + authorEntity.addMatchedRuleAndRedact("CBI.12.2", "Redacted because it's row belongs to a vertebrate study", "Reg (EC) No 1107/2009 Art. 63 (2g)"); }); end -rule "8: Redact if must_redact entity is found" - when - $section: Section(hasEntitiesOfType("must_redact"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(redactionEntity -> { - redactionEntity.setRedaction(true); - redactionEntity.setRedactionReason("must_redact entry was found."); - redactionEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); - redactionEntity.addMatchedRule(8); - }); - end -rule "9: Redact CBI_sponsor entities if preceded by \" batches produced at\"" +// Rule unit: CBI.14 +rule "CBI.14.0: Redact CBI_sponsor entities if preceded by \"batches produced at\"" when $sponsorEntity: RedactionEntity(type == "CBI_sponsor", textBefore.contains("batches produced at")) then - $sponsorEntity.setRedaction(true); - $sponsorEntity.setRedactionReason("Redacted because it represents a sponsor company"); - $sponsorEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); - $sponsorEntity.addMatchedRule(9); + $sponsorEntity.apply("CBI.14.0", "Redacted because it represents a sponsor company", "Reg (EC) No 1107/2009 Art. 63 (2g)"); end -rule "10: Redact row if row contains \"determination of residues\" and livestock keyword" + +// Rule unit: CBI.15 +rule "CBI.15.0: Redact row if row contains \"determination of residues\" and livestock keyword" + when + $keyword: String() from List.of("livestock", + "live stock", + "tissue", + "tissues", + "liver", + "muscle", + "bovine", + "ruminant", + "ruminants") + $residueKeyword: String() from List.of("determination of residues", "determination of total residues") + $section: Section(!hasTables(), + containsStringIgnoreCase($residueKeyword), + containsStringIgnoreCase($keyword)) + then + entityCreationService.byString($keyword, "must_redact", EntityType.ENTITY, $section) + .forEach(keywordEntity -> insert(keywordEntity)); + + $section.getEntitiesOfType(List.of($keyword, $residueKeyword)) + .forEach(redactionEntity -> { + redactionEntity.addMatchedRuleAndRedact("CBI.15.0", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + }); + end + +rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determination of residues\" and livestock keyword" when $keyword: String() from List.of("livestock", "live stock", @@ -226,8 +384,7 @@ rule "10: Redact row if row contains \"determination of residues\" and livestock "ruminant", "ruminants") $residueKeyword: String() from List.of("determination of residues", "determination of total residues") - $table: Table(containsStringIgnoreCase($residueKeyword) - && containsStringIgnoreCase($keyword)) + $table: Table(containsStringIgnoreCase($residueKeyword), containsStringIgnoreCase($keyword)) then entityCreationService.byString($keyword, "must_redact", EntityType.ENTITY, $table) .forEach(keywordEntity -> insert(keywordEntity)); @@ -235,101 +392,184 @@ rule "10: Redact row if row contains \"determination of residues\" and livestock $table.streamEntitiesWhereRowContainsStringsIgnoreCase(List.of($keyword, $residueKeyword)) .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) .forEach(redactionEntity -> { - redactionEntity.setRedaction(true); - redactionEntity.setRedactionReason("Determination of residues and keyword \"" + $keyword + "\" was found."); - redactionEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); - redactionEntity.addMatchedRule(10); + redactionEntity.addMatchedRuleAndRedact("CBI.15.1", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)"); }); end -rule "11: Redact if CTL/* or BL/* was found" - when - $section: Section(!hasTables, (containsString("CTL/") || containsString("BL/"))) - then - entityCreationService.byString("CTL/", "must_redact", EntityType.ENTITY, $section) - .forEach(mustRedactEntity -> insert(mustRedactEntity)); - entityCreationService.byString("BL/", "must_redact", EntityType.ENTITY, $section) - .forEach(mustRedactEntity -> insert(mustRedactEntity)); - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(redactionEntity -> { - redactionEntity.setRedaction(true); - redactionEntity.setRedactionReason("Laboratory for vertebrate studies found"); - redactionEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); - redactionEntity.addMatchedRule(11); - }); - end - -rule "12: Add CBI_author with \"et al.\" Regex" +// Rule unit: CBI.16 +rule "CBI.16.0: Add CBI_author with \"et al.\" Regex (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when - $section: Section(containsString("et al.")) + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(containsString("et al.")) then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, $section) + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.setRedaction(true); - entity.setRedactionReason("Author found by \"et al\" regex"); - entity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); - entity.addMatchedRule(12); + entity.apply("CBI.16.0", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + entity.addEngine(Engine.RULE); + dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false); + insert(entity); + }); + end + +rule "CBI.16.1: Add CBI_author with \"et al.\" Regex (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.apply("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + entity.addEngine(Engine.RULE); insert(entity); dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false); }); end -rule "13: Add recommendation for Addresses in Test Organism sections" + +// Rule unit: CBI.17 +rule "CBI.17.0: Add recommendation for Addresses in Test Organism sections, without colon" when - $section: Section(!hasTables, containsString("Species") && containsString("Source") && !containsString("Species:") && !containsString("Source:")) + $section: Section(!hasTables(), containsString("Species") && containsString("Source") && !containsString("Species:") && !containsString("Source:")) then entityCreationService.lineAfterString("Source", "CBI_address", EntityType.RECOMMENDATION, $section) - .forEach(redactionEntity -> { - redactionEntity.setRedactionReason("Line after \"Source\" in Test Organism Section"); - redactionEntity.addMatchedRule(13); - insert(redactionEntity); + .forEach(entity -> { + entity.addEngine(Engine.RULE); + entity.skip("CBI.17.0", "Line after \"Source\" in Test Organism Section"); + insert(entity); }); end -rule "14: Add recommendation for Addresses in Test Animals sections" - +rule "CBI.17.1: Add recommendation for Addresses in Test Organism sections, with colon" when - $section: Section(!hasTables, containsString("Species:"), containsString("Source:")) + $section: Section(!hasTables(), containsString("Species:"), containsString("Source:")) then entityCreationService.lineAfterString("Source:", "CBI_address", EntityType.RECOMMENDATION, $section) - .forEach(redactionEntity -> { - redactionEntity.setRedactionReason("Line after \"Source:\" in Test Animals Section"); - redactionEntity.addMatchedRule(14); - insert(redactionEntity); + .forEach(entity -> { + entity.addEngine(Engine.RULE); + entity.skip("CBI.17.1", "Line after \"Source:\" in Test Animals Section"); + insert(entity); }); end -// --------------------------------------- PII rules ------------------------------------------------------------------- -rule "15: Redact all PII" +// Rule unit: CBI.18 +rule "CBI.18.0: Expand CBI_author entities with firstname initials" + no-loop true when - $pii: RedactionEntity(type == "PII", redaction == false) + $entityToExpand: RedactionEntity(type == "CBI_author", + value.matches("[^\\s]+"), + textAfter.startsWith(" "), + anyMatch(textAfter, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)") + ) then - $pii.setRedaction(true); - $pii.setRedactionReason("PII found"); - $pii.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2e)"); - $pii.addMatchedRule(15); + RedactionEntity expandedEntity = entityCreationService.bySuffixExpansionRegex($entityToExpand, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)"); + expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList()); + $entityToExpand.removeFromGraph(); + retract($entityToExpand); + insert(expandedEntity); end -rule "16: Redact Emails by RegEx (Non vertebrate study)" + +// Rule unit: CBI.19 +rule "CBI.19.0: Expand CBI_author entities with salutation prefix" when + $entityToExpand: RedactionEntity(type == "CBI_author", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")) + then + RedactionEntity expandedEntity = entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"); + expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList()); + $entityToExpand.removeFromGraph(); + retract($entityToExpand); + insert(expandedEntity); + end + + +// Rule unit: CBI.20 +rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value == "Yes") + $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) + then + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + .forEach(laboratoryEntity -> { + laboratoryEntity.skip("CBI.20.0", "PERFORMING LABORATORY was found for non vertebrate study"); + laboratoryEntity.addEngine(Engine.RULE); + dictionary.addLocalDictionaryEntry(laboratoryEntity); + insert(laboratoryEntity); + }); + end + +rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value == "Yes") + $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) + then + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + .forEach(laboratoryEntity -> { + laboratoryEntity.apply("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + laboratoryEntity.addEngine(Engine.RULE); + dictionary.addLocalDictionaryEntry(laboratoryEntity); + insert(laboratoryEntity); + }); + end + + +//------------------------------------ PII rules ------------------------------------ + +// Rule unit: PII.0 +rule "PII.0.0: Redact all PII (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $pii: RedactionEntity(type == "PII", dictionaryEntry) + then + $pii.apply("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "PII.0.1: Redact all PII (vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $pii: RedactionEntity(type == "PII", dictionaryEntry) + then + $pii.apply("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +// Rule unit: PII.1 +rule "PII.1.0: Redact Emails by RegEx (Non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $section: Section(containsString("@")) then - entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, $section) + entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) .forEach(emailEntity -> { - emailEntity.setRedaction(true); - emailEntity.setRedactionReason("Found by Email Regex"); - emailEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2e)"); - emailEntity.addMatchedRule(16); + emailEntity.addEngine(Engine.RULE); + emailEntity.apply("PII.1.0", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); insert(emailEntity); }); end -rule "17: Redact line after contact information keywords" - agenda-group "LOCAL_DICTIONARY_ADDS" +rule "PII.1.1: Redact Emails by RegEx (vertebrate study)" when + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(containsString("@")) + then + entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> { + emailEntity.addEngine(Engine.RULE); + emailEntity.apply("PII.1.1", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + insert(emailEntity); + }); + end + + +// Rule unit: PII.4 +rule "PII.4.0: Redact line after contact information keywords (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $contactKeyword: String() from List.of("Contact point:", "Contact:", "Alternative contact:", @@ -353,19 +593,49 @@ rule "17: Redact line after contact information keywords" then entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) .forEach(contactEntity -> { - contactEntity.setRedaction(true); - contactEntity.addMatchedRule(17); - contactEntity.setRedactionReason("Found after \"" + $contactKeyword + "\" contact keyword"); - contactEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2e)"); + contactEntity.apply("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + contactEntity.addEngine(Engine.RULE); + insert(contactEntity); + }); + end + +rule "PII.4.1: Redact line after contact information keywords (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $contactKeyword: String() from List.of("Contact point:", + "Contact:", + "Alternative contact:", + "European contact:", + "No:", + "Contact:", + "Tel.:", + "Tel:", + "Telephone number:", + "Telephone No:", + "Telephone:", + "Phone No.", + "Phone:", + "Fax number:", + "Fax:", + "E-mail:", + "Email:", + "e-mail:", + "E-mail address:") + $section: Section(containsString($contactKeyword)) + then + entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) + .forEach(contactEntity -> { + contactEntity.apply("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + contactEntity.addEngine(Engine.RULE); insert(contactEntity); - dictionary.addLocalDictionaryEntry("PII", contactEntity.getValue(), false); }); end -rule "18: redact line between contact keywords" - agenda-group "LOCAL_DICTIONARY_ADDS" +// Rule unit: PII.6 +rule "PII.6.0: redact line between contact keywords (non vertebrate study)" when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) then Stream.concat( @@ -373,139 +643,291 @@ rule "18: redact line between contact keywords" entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) ) .forEach(contactEntity -> { - contactEntity.setRedaction(true); - contactEntity.addMatchedRule(18); - contactEntity.setRedactionReason("Found between contact keywords"); - contactEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2e)"); + contactEntity.apply("PII.6.0", "Found between contact keywords", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + contactEntity.addEngine(Engine.RULE); insert(contactEntity); - dictionary.addLocalDictionaryEntry("PII", contactEntity.getValue(), false); }); end -rule "19: Redact AUTHOR(S)" +rule "PII.6.1: redact line between contact keywords" when - FileAttribute(placeholder == "{fileattributes.vertebrateStudy}", value == "true") - $section: Section(!hasTables, containsString("AUTHOR(S):"), containsString("COMPLETION DATE:")) + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) + then + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + ) + .forEach(contactEntity -> { + contactEntity.apply("PII.6.1", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + contactEntity.addEngine(Engine.RULE); + insert(contactEntity); + }); + end + + +// Rule unit: PII.7 +rule "PII.7.0: Redact contact information if applicant is found (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(getHeadline().containsString("applicant") || + getHeadline().containsString("Primary contact") || + getHeadline().containsString("Alternative contact") || + containsString("Applicant") || + containsString("Telephone number:")) + then + Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", + "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + )) + .forEach(entity -> { + entity.apply("PII.7.0", "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + entity.addEngine(Engine.RULE); + insert(entity); + }); + end + +rule "PII.7.1: Redact contact information if applicant is found (non vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(getHeadline().containsString("applicant") || + getHeadline().containsString("Primary contact") || + getHeadline().containsString("Alternative contact") || + containsString("Applicant") || + containsString("Telephone number:")) + then + Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", + "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + )) + .forEach(entity -> { + entity.apply("PII.7.1", "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + entity.addEngine(Engine.RULE); + insert(entity); + }); + end + + +// Rule unit: PII.8 +rule "PII.8.0: Redact contact information if producer is found" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(containsStringIgnoreCase("producer of the plant protection") || + containsStringIgnoreCase("producer of the active substance") || + containsStringIgnoreCase("manufacturer of the active substance") || + containsStringIgnoreCase("manufacturer:") || + containsStringIgnoreCase("Producer or producers of the active substance")) + then + Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", + "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + )) + .forEach(entity -> { + entity.apply("PII.8.0", "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + entity.addEngine(Engine.RULE); + insert(entity); + }); + end + +rule "PII.8.1: Redact contact information if producer is found" + when + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(containsStringIgnoreCase("producer of the plant protection") || + containsStringIgnoreCase("producer of the active substance") || + containsStringIgnoreCase("manufacturer of the active substance") || + containsStringIgnoreCase("manufacturer:") || + containsStringIgnoreCase("Producer or producers of the active substance")) + then + Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", + "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + )) + .forEach(entity -> { + entity.apply("PII.8.1", "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + entity.addEngine(Engine.RULE); + insert(entity); + }); + end + + +// Rule unit: PII.9 +rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"COMPLETION DATE\" (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("COMPLETION DATE:"), !containsString("STUDY COMPLETION DATE:")) then entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section) .forEach(authorEntity -> { - authorEntity.setRedaction(true); - authorEntity.addMatchedRule(19); - authorEntity.setRedactionReason("AUTHOR(S) was found"); - authorEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2e)"); + authorEntity.apply("PII.9.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + authorEntity.addEngine(Engine.RULE); insert(authorEntity); }); end -rule "20: Redact PERFORMING LABORATORY" +rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"STUDY COMPLETION DATE\" (non vertebrate study)" when - $section: Section(!hasTables, containsString("PERFORMING LABORATORY:")) + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("COMPLETION DATE:"), !containsString("STUDY COMPLETION DATE:")) then - entityCreationService.betweenStrings("PERFORMING LABORATORY:", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section) + entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section) .forEach(authorEntity -> { - authorEntity.setRedaction(true); - authorEntity.addMatchedRule(20); - authorEntity.setRedactionReason("PERFORMING LABORATORY was found"); - authorEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2e)"); + authorEntity.apply("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + authorEntity.addEngine(Engine.RULE); insert(authorEntity); }); end -rule "21: Redact On behalf of Sequani Ltd.:" +rule "PII.9.2: Redact between \"AUTHOR(S)\" and \"COMPLETION DATE\" (non vertebrate study)" when - $section: Section(!hasTables, containsString("On behalf of Sequani Ltd.: Name Title")) + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("STUDY COMPLETION DATE:")) + then + entityCreationService.betweenStrings("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", EntityType.ENTITY, $section) + .forEach(authorEntity -> { + authorEntity.apply("PII.9.2", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + authorEntity.addEngine(Engine.RULE); + insert(authorEntity); + }); + end + +rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"STUDY COMPLETION DATE\" (vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("STUDY COMPLETION DATE:")) + then + entityCreationService.betweenStrings("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", EntityType.ENTITY, $section) + .forEach(authorEntity -> { + authorEntity.apply("PII.9.3", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + authorEntity.addEngine(Engine.RULE); + insert(authorEntity); + }); + end + + +// Rule unit: PII.11 +rule "PII.11.0: Redact On behalf of Sequani Ltd.:" + when + $section: Section(!hasTables(), containsString("On behalf of Sequani Ltd.: Name Title")) then entityCreationService.betweenStrings("On behalf of Sequani Ltd.: Name Title", "On behalf of", "PII", EntityType.ENTITY, $section) .forEach(authorEntity -> { - authorEntity.setRedaction(true); - authorEntity.addMatchedRule(21); - authorEntity.setRedactionReason("On behalf of Sequani Ltd.: Name Title was found"); - authorEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2e)"); + authorEntity.apply("PII.11.0", "On behalf of Sequani Ltd.: Name Title was found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); insert(authorEntity); }); end -rule "22: Redact On behalf of Syngenta Ltd.:" + +// Rule unit: PII.12 +rule "PII.12.0: Expand PII entities with salutation prefix" when - $section: Section(!hasTables, containsString("On behalf of Syngenta Ltd.: Name Title")) + $entityToExpand: RedactionEntity(type == "PII", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")) then - entityCreationService.betweenStrings("On behalf of Syngenta Ltd.: Name Title", "Study dates", "PII", EntityType.ENTITY, $section) - .forEach(authorEntity -> { - authorEntity.setRedaction(true); - authorEntity.addMatchedRule(21); - authorEntity.setRedactionReason("On behalf of Syngenta Ltd.: Name Title was found"); - authorEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2e)"); - insert(authorEntity); - }); + RedactionEntity expandedEntity = entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*"); + expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList()); + expandedEntity.addEngine(Engine.RULE); + insert(expandedEntity); end -rule "26: Redact signatures" + +//------------------------------------ Other rules ------------------------------------ + +// Rule unit: ETC.1 +rule "ETC.1.0: Redact Purity" when + $section: Section(containsStringIgnoreCase("purity")) + then + entityCreationService.byRegex("\\bPurity:\\s*(?\\s*\\d{1,2}(?:\\.\\d{1,2})?\\s*%)", "purity", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.apply("ETC.1.0", "Purity found", "Reg (EC) No 1107/2009 Art. 63 (2a)"); + entity.addEngine(Engine.RULE); + }); + end + + +// Rule unit: ETC.2 +rule "ETC.2.0: Redact signatures (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value == "Yes") $signature: Image(imageType == ImageType.SIGNATURE) then - $signature.setRedaction(true); - $signature.setMatchedRule(26); - $signature.setRedactionReason("Signature Found"); - $signature.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); + $signature.addMatchedRuleAndRedact("ETC.2.0", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end -rule "27: Redact formulas" +rule "ETC.2.0: Redact signatures (vertebrate study)" when - $formula: Image(imageType == ImageType.FORMULA) + FileAttribute(label == "Vertebrate Study", value == "Yes") + $signature: Image(imageType == ImageType.SIGNATURE) then - $formula.setRedaction(true); - $formula.setMatchedRule(27); - $formula.setRedactionReason("Formula Found"); - $formula.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); + $signature.addMatchedRuleAndRedact("ETC.2.0", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end -rule "28: Redact logos" + +// Rule unit: ETC.3 +rule "ETC.3.0: Redact logos (vertebrate study)" when + not FileAttribute(label == "Vertebrate Study", value == "Yes") $logo: Image(imageType == ImageType.LOGO) then - $logo.setRedaction(true); - $logo.setMatchedRule(28); - $logo.setRedactionReason("Logo Found"); - $logo.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); + $logo.addMatchedRuleAndRedact("ETC.3.0", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end -rule "29: Redact Dossier Redactions" - when - $dossierRedaction: RedactionEntity(type == "dossier_redactions") - then - $dossierRedaction.setRedaction(true); - $dossierRedaction.addMatchedRule(29); - $dossierRedaction.setRedactionReason("Dossier Redaction found"); - $dossierRedaction.setLegalBasis("Article 39(1)(2) of Regulation (EC) No 178/2002"); - end +rule "ETC.3.1: Redact logos (non vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value == "Yes") + $logo: Image(imageType == ImageType.LOGO) + then + $logo.addMatchedRuleAndRedact("ETC.3.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end -rule "30: Remove Dossier redactions if file is confidential" - when - FileAttribute(label == "Confidentiality", value == "confidential") - $dossierRedaction: RedactionEntity(type == "dossier_redactions") - then + +// Rule unit: ETC.4 +rule "ETC.4.0: Redact dossier dictionary entries" + when + $dossierRedaction: RedactionEntity(type == "dossier_redaction") + then + $dossierRedaction.apply("ETC.4.0", "Specification of impurity found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + + +// Rule unit: ETC.5 +rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confidential'" + when + not FileAttribute(label == "Confidentiality", value == "confidential") + $dossierRedaction: RedactionEntity(type == "dossier_redaction") + then $dossierRedaction.removeFromGraph(); - retract($dossierRedaction) - end + retract($dossierRedaction); + end -rule "101: Redact CAS Number" + +// Rule unit: ETC.6 +rule "ETC.6.0: Redact CAS Number" when $table: Table(hasHeader("Sample #")) then $table.streamTableCellsWithHeader("Sample #") .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "PII", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) .forEach(redactionEntity -> { - redactionEntity.setRedaction(true); - redactionEntity.addMatchedRule(101); - redactionEntity.setRedactionReason("Sample # found in Header"); - redactionEntity.setLegalBasis("Reg (EC) No 1107/2009 Art. 63 (2g)"); + redactionEntity.addMatchedRuleAndRedact("ETC.6.0", "Sample # found in Header", "Reg (EC) No 1107/2009 Art. 63 (2g)"); insert(redactionEntity); }); end -rule "102: Guidelines FileAttributes" + +// Rule unit: ETC.7 +rule "ETC.7.0: Guidelines FileAttributes" when - $section: Section(!hasTables, (containsString("DATA REQUIREMENT(S):") || containsString("TEST GUIDELINE(S):")) && (containsString("OECD") || containsString("EPA") || containsString("OPPTS"))) + $section: Section(!hasTables(), (containsString("DATA REQUIREMENT(S):") || containsString("TEST GUIDELINE(S):")) && (containsString("OECD") || containsString("EPA") || containsString("OPPTS"))) then RedactionSearchUtility.findBoundariesByRegex("OECD (No\\.? )?\\d{3}( \\(\\d{4}\\))?", $section.getTextBlock()).stream() .map(boundary -> $section.getTextBlock().subSequence(boundary).toString()) @@ -513,9 +935,29 @@ rule "102: Guidelines FileAttributes" .forEach(fileAttribute -> insert(fileAttribute)); end -// --------------------------------------- NER Entities rules ------------------------------------------------------------------- -rule "add NER Entities of type CBI_author" +// Rule unit: ETC.8 +rule "ETC.8.0: Redact formulas (vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value == "Yes") + $logo: Image(imageType == ImageType.FORMULA) + then + $logo.addMatchedRuleAndRedact("ETC.8.0", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "ETC.8.1: Redact formulas (non vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value == "Yes") + $logo: Image(imageType == ImageType.FORMULA) + then + $logo.addMatchedRuleAndRedact("ETC.8.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +//------------------------------------ AI rules ------------------------------------ + +// Rule unit: AI.0 +rule "AI.0.0: add all NER Entities of type CBI_author" salience 999 when nerEntities: NerEntities(hasEntitiesOfType("CBI_author")) @@ -525,7 +967,9 @@ rule "add NER Entities of type CBI_author" .forEach(entity -> insert(entity)); end -rule "combine and add NER Entities as CBI_address" + +// Rule unit: AI.1 +rule "AI.1.0: combine and add NER Entities as CBI_address" salience 999 when nerEntities: NerEntities(hasEntitiesOfType("ORG") || hasEntitiesOfType("STREET") || hasEntitiesOfType("CITY")) @@ -538,9 +982,11 @@ rule "combine and add NER Entities as CBI_address" }); end -// --------------------------------------- manual redaction rules ------------------------------------------------------------------- -rule "Apply manual resize redaction" +//------------------------------------ Manual redaction rules ------------------------------------ + +// Rule unit: MAN.0 +rule "MAN.0.0: Apply manual resize redaction" salience 128 when $resizeRedaction: ManualResizeRedaction($id: annotationId) @@ -551,39 +997,43 @@ rule "Apply manual resize redaction" update($entityToBeResized); end -rule "Apply id removals that are valid and not in forced redactions to Entity" + +// Rule unit: MAN.1 +rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity" salience 128 when IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId) not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null) $entityToBeRemoved: RedactionEntity(matchesAnnotationId($id)) then - $entityToBeRemoved.removeFromGraph(); - retract($entityToBeRemoved); + $entityToBeRemoved.setIgnored(true); end -rule "Apply id removals that are valid and not in forced redactions to Image" +rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image" salience 128 when IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId) not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null) - $entityToBeRemoved: Image($id == id) + $imageEntityToBeRemoved: Image($id == id) then - $entityToBeRemoved.setIgnored(true); + $imageEntityToBeRemoved.setIgnored(true); end -rule "Apply force redaction" + +// Rule unit: MAN.2 +rule "MAN.2.0: Apply force redaction" salience 128 when ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis) $entityToForce: RedactionEntity(matchesAnnotationId($id)) then - $entityToForce.setLegalBasis($legalBasis); - $entityToForce.setRedaction(true); + $entityToForce.apply("MAN.2.0", "Forced redaction", $legalBasis); $entityToForce.setSkipRemoveEntitiesContainedInLarger(true); end -rule "Apply image recategorization" + +// Rule unit: MAN.3 +rule "MAN.3.0: Apply image recategorization" salience 128 when ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type) @@ -592,9 +1042,11 @@ rule "Apply image recategorization" $image.setImageType(ImageType.fromString($imageType)); end -// --------------------------------------- merging rules ------------------------------------------------------------------- -rule "remove Entity contained by Entity of same type" +//------------------------------------ Entity merging rules ------------------------------------ + +// Rule unit: X.0 +rule "X.0.0: remove Entity contained by Entity of same type" salience 65 when $larger: RedactionEntity($type: type, $entityType: entityType) @@ -604,7 +1056,9 @@ rule "remove Entity contained by Entity of same type" retract($contained); end -rule "merge intersecting Entities of same type" + +// Rule unit: X.1 +rule "X.1.0: merge intersecting Entities of same type" salience 64 when $first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger) @@ -618,7 +1072,9 @@ rule "merge intersecting Entities of same type" insert(mergedEntity); end -rule "remove Entity of type ENTITY when contained by FALSE_POSITIVE" + +// Rule unit: X.2 +rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE) @@ -628,7 +1084,9 @@ rule "remove Entity of type ENTITY when contained by FALSE_POSITIVE" retract($entity) end -rule "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" + +// Rule unit: X.3 +rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" salience 64 when $falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION) @@ -638,7 +1096,9 @@ rule "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATIO retract($recommendation); end -rule "remove Entity of type RECOMMENDATION when intersected by ENTITY with same type" + +// Rule unit: X.4 +rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY with same type" salience 256 when $entity: RedactionEntity($type: type, entityType == EntityType.ENTITY) @@ -649,7 +1109,9 @@ rule "remove Entity of type RECOMMENDATION when intersected by ENTITY with same retract($recommendation); end -rule "remove Entity of type RECOMMENDATION when contained by ENTITY" + +// Rule unit: X.5 +rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY" salience 256 when $entity: RedactionEntity(entityType == EntityType.ENTITY) @@ -659,19 +1121,23 @@ rule "remove Entity of type RECOMMENDATION when contained by ENTITY" retract($recommendation); end -rule "remove Entity of lower rank, when equal boundaries and entityType" + +// Rule unit: X.6 +rule "X.6.0: remove Entity of lower rank, when intersected by entity of type ENTITY" salience 32 when - $higherRank: RedactionEntity($type: type, $entityType: entityType, $boundary: boundary) - $lowerRank: RedactionEntity($boundary == boundary, type != $type, entityType == $entityType, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !redaction) + $higherRank: RedactionEntity($type: type, entityType == EntityType.ENTITY) + $lowerRank: RedactionEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized, !skipRemoveEntitiesContainedInLarger) then $lowerRank.removeFromGraph(); retract($lowerRank); end -// --------------------------------------- FileAttribute Rules ------------------------------------------------------------------- -rule "remove duplicate FileAttributes" +//------------------------------------ File attributes rules ------------------------------------ + +// Rule unit: FA.1 +rule "FA.1.0: remove duplicate FileAttributes" salience 64 when $fileAttribute: FileAttribute($label: label, $value: value) @@ -680,9 +1146,11 @@ rule "remove duplicate FileAttributes" retract($duplicate); end -// --------------------------------------- local dictionary search ------------------------------------------------------------------- -rule "run local dictionary search" +//------------------------------------ Local dictionary search rules ------------------------------------ + +// Rule unit: LDS.0 +rule "LDS.0.0: run local dictionary search" agenda-group "LOCAL_DICTIONARY_ADDS" salience -999 when