diff --git a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts index 8df0cc47..1d3c3b43 100644 --- a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts +++ b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts @@ -51,6 +51,7 @@ dependencies { implementation("org.springframework.cloud:spring-cloud-starter-openfeign:4.0.4") implementation("org.springframework.boot:spring-boot-starter-amqp:3.1.4") + testImplementation(project(":rules-management")) testImplementation("org.apache.pdfbox:pdfbox:${pdfBoxVersion}") testImplementation("org.apache.pdfbox:pdfbox-tools:${pdfBoxVersion}") diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/MatchedRule.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/MatchedRule.java index fe5e4ad8..7a04467e 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/MatchedRule.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/MatchedRule.java @@ -31,11 +31,11 @@ public final class MatchedRule implements Comparable { String reason = ""; @Builder.Default String legalBasis = ""; - boolean applied; boolean writeValueWithLineBreaks; + boolean applied; boolean removed; boolean ignored; - boolean resized; + @Builder.Default Set references = Collections.emptySet(); @@ -46,6 +46,20 @@ public final class MatchedRule implements Comparable { } + public MatchedRule asSkippedIfApplied() { + + if (!this.isApplied()) { + return this; + } + return MatchedRule.builder().ruleIdentifier(getRuleIdentifier()) + .writeValueWithLineBreaks(this.isWriteValueWithLineBreaks()) + .legalBasis(this.getLegalBasis()) + .reason(this.getReason()) + .references(this.getReferences()) + .build(); + } + + @Override public int compareTo(MatchedRule matchedRule) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/drools/BasicRule.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/drools/BasicRule.java index d6f60188..1a2460c6 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/drools/BasicRule.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/drools/BasicRule.java @@ -1,5 +1,6 @@ package com.iqser.red.service.redaction.v1.server.model.drools; +import org.drools.drl.ast.descr.AttributeDescr; import org.drools.drl.ast.descr.RuleDescr; import lombok.AccessLevel; @@ -18,6 +19,7 @@ public class BasicRule { RuleIdentifier identifier; String name; String code; + String agendaGroup; int line; @@ -26,7 +28,8 @@ public class BasicRule { RuleIdentifier identifier = RuleIdentifier.fromName(rule.getName()); String nameWithoutIdentifier = rule.getName().replace(identifier + ":", ""); String code = rulesString.substring(rule.getStartCharacter(), rule.getEndCharacter()); - return new BasicRule(identifier, nameWithoutIdentifier, code, rule.getLine()); + String agendaGroup = rule.getAttributes().getOrDefault("agenda-group", new AttributeDescr("agenda-group", "DEFAULT")).getValue(); + return new BasicRule(identifier, nameWithoutIdentifier, code, agendaGroup, rule.getLine()); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/DroolsSyntaxValidationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/DroolsSyntaxValidationService.java index d0a7f734..9bb160f4 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/DroolsSyntaxValidationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/DroolsSyntaxValidationService.java @@ -5,6 +5,7 @@ import java.util.Locale; import java.util.Set; import java.util.stream.Collectors; +import org.drools.drl.parser.DroolsParserException; import org.kie.api.builder.KieBuilder; import org.kie.api.builder.Message; import org.springframework.stereotype.Service; @@ -32,14 +33,20 @@ public class DroolsSyntaxValidationService { @SneakyThrows public DroolsSyntaxValidation testRules(RuleValidationModel rules) { - DroolsSyntaxValidation customDroolsSyntaxValidation = buildCustomDroolsSyntaxValidation(rules.getRulesString(), RuleFileType.valueOf(rules.getRuleFileType())); + DroolsSyntaxValidation customDroolsSyntaxValidation; + try { + customDroolsSyntaxValidation = buildCustomDroolsSyntaxValidation(rules.getRulesString(), RuleFileType.valueOf(rules.getRuleFileType())); + } catch (DroolsParserException e) { + // this means the parser could not parse the file at all. In this case use drools compiler only as it will return useful error messages. + customDroolsSyntaxValidation = new DroolsSyntaxValidation(); + } DroolsSyntaxValidation droolsCompilerSyntaxValidation = buildDroolsCompilerSyntaxValidation(rules); droolsCompilerSyntaxValidation.getDroolsSyntaxErrorMessages().addAll(customDroolsSyntaxValidation.getDroolsSyntaxErrorMessages()); return droolsCompilerSyntaxValidation; } - private DroolsSyntaxValidation buildCustomDroolsSyntaxValidation(String ruleString, RuleFileType ruleFileType) { + private DroolsSyntaxValidation buildCustomDroolsSyntaxValidation(String ruleString, RuleFileType ruleFileType) throws DroolsParserException { RuleFileBluePrint ruleFileBluePrint = RuleFileParser.buildBluePrintFromRulesString(ruleString); DroolsSyntaxValidation customSyntaxValidation = ruleFileBluePrint.getDroolsSyntaxValidation(); @@ -75,19 +82,24 @@ public class DroolsSyntaxValidationService { .build()); } }); - baseRuleFileBluePrint.streamAllRules().forEach(basicRule -> { - if (!validateRuleIsPresent(basicRule, ruleFileBluePrint)) { - int line = ruleFileBluePrint.findRulesByIdentifier(basicRule.getIdentifier()).stream().findFirst().map(BasicRule::getLine).orElse(basicRule.getLine()); - customSyntaxValidation.getDroolsSyntaxErrorMessages().add(DroolsSyntaxErrorMessage.builder().line(line) - .column(0) - .message(String.format("Changing or removing the rule %s is not allowed! Must be: %n%s", basicRule.getName(), basicRule.getCode())) + if (ruleFileType.equals(RuleFileType.ENTITY)) { + String requiredAgendaGroup = "LOCAL_DICTIONARY_ADDS"; + if (!validateAgendaGroupIsPresent(ruleFileBluePrint, requiredAgendaGroup)) { + customSyntaxValidation.getDroolsSyntaxErrorMessages().add(DroolsSyntaxErrorMessage.builder().line(0) + .column(0).message(String.format("At least one rule with Agenda-Group '%s' required!", requiredAgendaGroup)) .build()); } - }); + } return customSyntaxValidation; } + private boolean validateAgendaGroupIsPresent(RuleFileBluePrint ruleFileBluePrint, String agendaGroupName) { + + return ruleFileBluePrint.streamAllRules().anyMatch(basicRule -> basicRule.getAgendaGroup().equals(agendaGroupName)); + } + + private boolean importsAreValid(RuleFileBluePrint baseRuleFileBluePrint, RuleFileBluePrint ruleFileBluePrint) { // imports may shrink, but not add anything new! diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/MigrationIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/MigrationIntegrationTest.java index 0bc2ca5b..90c80dab 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/MigrationIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/MigrationIntegrationTest.java @@ -4,8 +4,10 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.File; +import java.io.FileInputStream; import java.io.FileOutputStream; import java.nio.file.Path; +import java.util.LinkedList; import java.util.List; import java.util.Locale; import java.util.Map; @@ -25,6 +27,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.migration.MigratedIds; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog; @@ -51,6 +54,22 @@ public class MigrationIntegrationTest extends BuildDocumentIntegrationTest { ObjectMapper mapper; + @Test + @SneakyThrows + public void testSave() { + + MigratedIds ids = new MigratedIds(new LinkedList<>()); + ids.addMapping("123", "321"); + ids.addMapping("123", "321"); + ids.addMapping("123", "321"); + ids.addMapping("123", "321"); + ids.addMapping("123", "321"); + + mapper.writeValue(new FileOutputStream("/tmp/testIds.json"), ids); + var ids2 = mapper.readValue(new FileInputStream("/tmp/testIds.json"), MigratedIds.class); + assert ids2.getMappings().size() == 5; + } + @Test @SneakyThrows public void testMigration() { diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/drools/files/management/services/DroolsUpToDateTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/drools/files/management/services/DroolsUpToDateTest.java new file mode 100644 index 00000000..6e7af29f --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/drools/files/management/services/DroolsUpToDateTest.java @@ -0,0 +1,53 @@ +package com.iqser.red.service.redaction.v1.server.drools.files.management.services; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.wildfly.common.Assert.assertTrue; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; + +import org.junit.jupiter.api.Test; +import org.springframework.core.io.ClassPathResource; + +import com.knecon.fforesight.utility.rules.management.factory.RuleFileParser; +import com.knecon.fforesight.utility.rules.management.models.BasicRule; +import com.knecon.fforesight.utility.rules.management.models.RuleFileBluePrint; +import com.knecon.fforesight.utility.rules.management.utils.RuleFileIO; + +import lombok.SneakyThrows; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class DroolsUpToDateTest { + + @Test + @SneakyThrows + public void assertAllRuleFilesAreUpToDate() { + + Path droolsPath = new ClassPathResource("drools").getFile().toPath(); + Files.walk(droolsPath).filter(DroolsUpToDateTest::isEntityRuleFile).forEach(this::validateFile); + } + + + private static boolean isEntityRuleFile(Path droolsFile) { + + String fileName = droolsFile.getFileName().toString(); + return fileName.endsWith(".drl") && !fileName.endsWith("_components.drl") && !fileName.endsWith("_OLD.drl"); + } + + + private void validateFile(Path path) { + + log.info(path.toFile().getAbsolutePath()); + RuleFileBluePrint allRules = RuleFileParser.buildBluePrintFromAllRuleFiles(); + RuleFileBluePrint thisRules = RuleFileParser.buildBluePrintFromRulesString(RuleFileIO.getRulesString(path.toFile().getAbsolutePath())); + assertTrue(allRules.getAllRuleIdentifiers().containsAll(thisRules.getAllRuleIdentifiers())); + for (BasicRule rule : thisRules.getAllRules()) { + List updatedRule = allRules.findRuleByIdentifier(rule.identifier()); + assert updatedRule.size() == 1; + assertEquals(updatedRule.get(0), rule); + } + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl index 2500757a..9c767083 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl @@ -82,25 +82,25 @@ rule "SYN.1.0: Recommend CTL/BL laboratory that start with BL or CTL" //------------------------------------ CBI rules ------------------------------------ // Rule unit: CBI.0 -rule "CBI.0.0: Redact CBI Authors (Non Vertebrate Study)" +rule "CBI.0.0: Redact CBI Authors (non vertebrate Study)" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $entity: TextEntity(type == "CBI_author", dictionaryEntry) then - $entity.apply("CBI.0.0", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $entity.redact("CBI.0.0", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end -rule "CBI.0.1: Redact CBI Authors (Vertebrate Study)" +rule "CBI.0.1: Redact CBI Authors (vertebrate Study)" when FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $entity: TextEntity(type == "CBI_author", dictionaryEntry) then - $entity.apply("CBI.0.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $entity.redact("CBI.0.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end // Rule unit: CBI.1 -rule "CBI.1.0: Don't redact CBI Address (Non Vertebrate Study)" +rule "CBI.1.0: Do not redact CBI Address (non vertebrate Study)" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $entity: TextEntity(type == "CBI_address", dictionaryEntry) @@ -108,19 +108,19 @@ rule "CBI.1.0: Don't redact CBI Address (Non Vertebrate Study)" $entity.skip("CBI.1.0", "Address found for Non Vertebrate Study"); end -rule "CBI.1.1: Redact CBI Address (Vertebrate Study)" +rule "CBI.1.1: Redact CBI Address (vertebrate Study)" when FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $entity: TextEntity(type == "CBI_address", dictionaryEntry) then - $entity.apply("CBI.1.1", "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $entity.redact("CBI.1.1", "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end // Rule unit: CBI.2 -rule "CBI.2.0: Don't redact genitive CBI_author" +rule "CBI.2.0: Do not redact genitive CBI Author" when - $entity: TextEntity(type == "CBI_author", anyMatch(textAfter, "['’’'ʼˈ´`‘′ʻ’']s"), applied()) + $entity: TextEntity(type == "CBI_author", anyMatch(textAfter, "['’’'ʼˈ´`‘′ʻ’']s")) then entityCreationService.byTextRange($entity.getTextRange(), "CBI_author", EntityType.FALSE_POSITIVE, document) .ifPresent(falsePositive -> falsePositive.skip("CBI.2.0", "Genitive Author found")); @@ -128,7 +128,7 @@ rule "CBI.2.0: Don't redact genitive CBI_author" // Rule unit: CBI.7 -rule "CBI.7.0: Do not redact Names and Addresses if published information found in section without tables" +rule "CBI.7.0: Do not redact Names and Addresses if published information found in Section without tables" when $section: Section(!hasTables(), hasEntitiesOfType("published_information"), @@ -156,7 +156,7 @@ rule "CBI.7.1: Do not redact Names and Addresses if published information found // Rule unit: CBI.9 -rule "CBI.9.0: Redact all Cell's with Header Author(s) as CBI_author (non vertebrate study)" +rule "CBI.9.0: Redact all cells with Header Author(s) as CBI_author (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") @@ -166,10 +166,10 @@ rule "CBI.9.0: Redact all Cell's with Header Author(s) as CBI_author (non verteb .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) .filter(Optional::isPresent) .map(Optional::get) - .forEach(redactionEntity -> redactionEntity.apply("CBI.9.0", "Author(s) found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(redactionEntity -> redactionEntity.redact("CBI.9.0", "Author(s) found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "CBI.9.1: Redact all Cell's with Header Author as CBI_author (non vertebrate study)" +rule "CBI.9.1: Redact all cells with Header Author as CBI_author (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") @@ -179,12 +179,12 @@ rule "CBI.9.1: Redact all Cell's with Header Author as CBI_author (non vertebrat .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) .filter(Optional::isPresent) .map(Optional::get) - .forEach(redactionEntity -> redactionEntity.apply("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(redactionEntity -> redactionEntity.redact("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end // Rule unit: CBI.10 -rule "CBI.10.0: Redact all Cell's with Header Author(s) as CBI_author (vertebrate study)" +rule "CBI.10.0: Redact all cells with Header Author(s) as CBI_author (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") @@ -194,10 +194,10 @@ rule "CBI.10.0: Redact all Cell's with Header Author(s) as CBI_author (vertebrat .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) .filter(Optional::isPresent) .map(Optional::get) - .forEach(redactionEntity -> redactionEntity.apply("CBI.10.0", "Author(s) found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.0", "Author(s) found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end -rule "CBI.10.1: Redact all Cell's with Header Author as CBI_author (vertebrate study)" +rule "CBI.10.1: Redact all cells with Header Author as CBI_author (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") @@ -207,7 +207,7 @@ rule "CBI.10.1: Redact all Cell's with Header Author as CBI_author (vertebrate s .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) .filter(Optional::isPresent) .map(Optional::get) - .forEach(redactionEntity -> redactionEntity.apply("CBI.10.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end @@ -223,7 +223,7 @@ rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study // Rule unit: CBI.16 -rule "CBI.16.0: Add CBI_author with \"et al.\" Regex (non vertebrate study)" +rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") @@ -231,12 +231,12 @@ rule "CBI.16.0: Add CBI_author with \"et al.\" Regex (non vertebrate study)" then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.apply("CBI.16.0", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(entity); }); end -rule "CBI.16.1: Add CBI_author with \"et al.\" Regex (vertebrate study)" +rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") @@ -244,7 +244,7 @@ rule "CBI.16.1: Add CBI_author with \"et al.\" Regex (vertebrate study)" then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.apply("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(entity); }); end @@ -290,7 +290,7 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC then entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { - laboratoryEntity.apply("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + laboratoryEntity.redact("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(laboratoryEntity); }); end @@ -304,7 +304,7 @@ rule "PII.0.0: Redact all PII (non vertebrate study)" not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $pii: TextEntity(type == "PII", dictionaryEntry) then - $pii.apply("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $pii.redact("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end rule "PII.0.1: Redact all PII (vertebrate study)" @@ -312,7 +312,7 @@ rule "PII.0.1: Redact all PII (vertebrate study)" FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $pii: TextEntity(type == "PII", dictionaryEntry) then - $pii.apply("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $pii.redact("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -323,7 +323,7 @@ rule "PII.1.0: Redact Emails by RegEx (Non vertebrate study)" $section: Section(containsString("@")) then entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) - .forEach(emailEntity -> emailEntity.apply("PII.1.0", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(emailEntity -> emailEntity.redact("PII.1.0", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end rule "PII.1.1: Redact Emails by RegEx (vertebrate study)" @@ -332,7 +332,7 @@ rule "PII.1.1: Redact Emails by RegEx (vertebrate study)" $section: Section(containsString("@")) then entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) - .forEach(emailEntity -> emailEntity.apply("PII.1.1", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(emailEntity -> emailEntity.redact("PII.1.1", "Found by Email Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end @@ -352,7 +352,7 @@ rule "PII.2.0: Redact Phone and Fax by RegEx (non vertebrate study)" containsString("Fer")) then entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) - .forEach(contactEntity -> contactEntity.apply("PII.2.0", "Found by Phone and Fax Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.2.0", "Found by Phone and Fax Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end rule "PII.2.1: Redact Phone and Fax by RegEx (vertebrate study)" @@ -370,45 +370,27 @@ rule "PII.2.1: Redact Phone and Fax by RegEx (vertebrate study)" containsString("Fer")) then entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) - .forEach(contactEntity -> contactEntity.apply("PII.2.1", "Found by Phone and Fax Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.2.1", "Found by Phone and Fax Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.9 -rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"COMPLETION DATE\" (non vertebrate study)" +rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("COMPLETION DATE:"), !containsString("STUDY COMPLETION DATE:")) + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then - entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section) - .forEach(authorEntity -> authorEntity.apply("PII.9.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + .forEach(authorEntity -> authorEntity.redact("PII.9.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"STUDY COMPLETION DATE\" (non vertebrate study)" +rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("COMPLETION DATE:"), !containsString("STUDY COMPLETION DATE:")) + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then - entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section) - .forEach(authorEntity -> authorEntity.apply("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); - end - -rule "PII.9.2: Redact between \"AUTHOR(S)\" and \"COMPLETION DATE\" (non vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("STUDY COMPLETION DATE:")) - then - entityCreationService.betweenStrings("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", EntityType.ENTITY, $section) - .forEach(authorEntity -> authorEntity.apply("PII.9.2", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); - end - -rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"STUDY COMPLETION DATE\" (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("STUDY COMPLETION DATE:")) - then - entityCreationService.betweenStrings("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", EntityType.ENTITY, $section) - .forEach(authorEntity -> authorEntity.apply("PII.9.3", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + .forEach(authorEntity -> authorEntity.redact("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end @@ -430,25 +412,33 @@ rule "ETC.2.0: Redact signatures (non vertebrate study)" not FileAttribute(label == "Vertebrate Study", value == "Yes") $signature: Image(imageType == ImageType.SIGNATURE) then - $signature.apply("ETC.2.0", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $signature.redact("ETC.2.0", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "ETC.2.1: Redact signatures (vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value == "Yes") + $signature: Image(imageType == ImageType.SIGNATURE) + then + $signature.redact("ETC.2.1", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end // Rule unit: ETC.3 -rule "ETC.3.0: Redact logos (vertebrate study)" +rule "ETC.3.0: Skip logos (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value == "Yes") $logo: Image(imageType == ImageType.LOGO) then - $logo.apply("ETC.3.0", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $logo.skip("ETC.3.0", "Logo Found"); end -rule "ETC.3.1: Redact logos (non vertebrate study)" +rule "ETC.3.1: Redact logos (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value == "Yes") $logo: Image(imageType == ImageType.LOGO) then - $logo.apply("ETC.3.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $logo.redact("ETC.3.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -466,7 +456,7 @@ rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confi //------------------------------------ AI rules ------------------------------------ // Rule unit: AI.0 -rule "AI.0.0: add all NER Entities of type CBI_author" +rule "AI.0.0: Add all NER Entities of type CBI_author" salience 999 when nerEntities: NerEntities(hasEntitiesOfType("CBI_author")) @@ -477,7 +467,7 @@ rule "AI.0.0: add all NER Entities of type CBI_author" // Rule unit: AI.1 -rule "AI.1.0: combine and add NER Entities as CBI_address" +rule "AI.1.0: Combine and add NER Entities as CBI_address" salience 999 when nerEntities: NerEntities(hasEntitiesOfType("ORG") || hasEntitiesOfType("STREET") || hasEntitiesOfType("CITY")) @@ -486,7 +476,7 @@ rule "AI.1.0: combine and add NER Entities as CBI_address" end -//------------------------------------ Manual redaction rules ------------------------------------ +//------------------------------------ Manual changes rules ------------------------------------ // Rule unit: MAN.0 rule "MAN.0.0: Apply manual resize redaction" @@ -631,7 +621,7 @@ rule "MAN.4.1: Apply legal basis change" //------------------------------------ Entity merging rules ------------------------------------ // Rule unit: X.0 -rule "X.0.0: remove Entity contained by Entity of same type" +rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when $larger: TextEntity($type: type, $entityType: entityType, active()) @@ -643,7 +633,7 @@ rule "X.0.0: remove Entity contained by Entity of same type" // Rule unit: X.1 -rule "X.1.0: merge intersecting Entities of same type" +rule "X.1.0: Merge intersecting Entities of same type" salience 64 when $first: TextEntity($type: type, $entityType: entityType, !resized(), active()) @@ -659,7 +649,7 @@ rule "X.1.0: merge intersecting Entities of same type" // Rule unit: X.2 -rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE" +rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type, entityType == EntityType.FALSE_POSITIVE, active()) @@ -672,7 +662,7 @@ rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE" // Rule unit: X.3 -rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" +rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" salience 64 when $falseRecommendation: TextEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, active()) @@ -684,7 +674,7 @@ rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM // Rule unit: X.4 -rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY with same type" +rule "X.4.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY with same type" salience 256 when $entity: TextEntity($type: type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) @@ -697,7 +687,7 @@ rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY wit // Rule unit: X.5 -rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY" +rule "X.5.0: Remove Entity of type RECOMMENDATION when contained by ENTITY" salience 256 when $entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) @@ -709,7 +699,7 @@ rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY" // Rule unit: X.6 -rule "X.6.0: remove Entity of lower rank, when contained by by entity of type ENTITY" +rule "X.6.0: Remove Entity of lower rank, when contained by by entity of type ENTITY" salience 32 when $higherRank: TextEntity($type: type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) @@ -724,7 +714,7 @@ rule "X.6.1: remove Entity of higher rank, when intersected by entity of type EN salience 32 when $higherRank: TextEntity($type: type, $value: value, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $lowerRank: TextEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges(), active(), $lowerRank.getValue().length() > $value.length()) + $lowerRank: TextEntity(intersects($higherRank), type != $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges(), active(), $lowerRank.getValue().length() > $value.length()) then $higherRank.getIntersectingNodes().forEach(node -> update(node)); $higherRank.remove("X.6.1", "remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity"); @@ -735,7 +725,7 @@ rule "X.6.1: remove Entity of higher rank, when intersected by entity of type EN //------------------------------------ File attributes rules ------------------------------------ // Rule unit: FA.1 -rule "FA.1.0: remove duplicate FileAttributes" +rule "FA.1.0: Remove duplicate FileAttributes" salience 64 when $fileAttribute: FileAttribute($label: label, $value: value) @@ -757,6 +747,6 @@ rule "LDS.0.0: Run local dictionary search" entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document) .forEach(entity -> { Collection matchedRules = $dictionaryModel.getLocalEntriesWithMatchedRules().get(entity.getValue()); - entity.addMatchedRules(matchedRules); + matchedRules.forEach(matchedRule -> entity.addMatchedRule(matchedRule.asSkippedIfApplied())); }); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/adama-pilot.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/adama-pilot.drl deleted file mode 100644 index aaa19b6a..00000000 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/adama-pilot.drl +++ /dev/null @@ -1,916 +0,0 @@ -package drools - -import static java.lang.String.format; -import static com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility.anyMatch; -import static com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility.exactMatch; - -import java.util.List; -import java.util.LinkedList; -import java.util.Set; -import java.util.stream.Collectors; -import java.util.Collection; -import java.util.stream.Stream; -import java.util.Optional; - -import com.iqser.red.service.redaction.v1.server.model.document.*; -import com.iqser.red.service.redaction.v1.server.model.document.TextRange; -import com.iqser.red.service.redaction.v1.server.model.document.entity.*; -import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; -import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule; -import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity -import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule -import com.iqser.red.service.redaction.v1.server.model.document.nodes.*; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Headline; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.SectionIdentifier; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Footer; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Header; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType; -import com.iqser.red.service.redaction.v1.server.model.document.textblock.*; -import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock; -import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector; -import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock; -import com.iqser.red.service.redaction.v1.server.model.document.textblock.ConcatenatedTextBlock; -import com.iqser.red.service.redaction.v1.server.model.NerEntities; -import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary; -import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryModel; -import com.iqser.red.service.redaction.v1.server.service.document.EntityCreationService; -import com.iqser.red.service.redaction.v1.server.service.ManualChangesApplicationService; -import com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility; - -import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute; -import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus; - -global Document document -global EntityCreationService entityCreationService -global ManualChangesApplicationService manualChangesApplicationService -global Dictionary dictionary - -//------------------------------------ queries ------------------------------------ - -query "getFileAttributes" - $fileAttribute: FileAttribute() - end - -//--------------------------------------------------------------------------- - -rule "H.0.0 retract table of contents page" - when - $page: Page(getMainBodyTextBlock().getSearchText().contains("........") || (getMainBodyTextBlock().getSearchText().contains("APPENDICES") && getMainBodyTextBlock().getSearchText().contains("TABLES"))) - $node: SemanticNode(onPage($page.getNumber()), !onPage($page.getNumber() -1), getType() != NodeType.IMAGE) - then - retract($node); - end - - -rule "H.0.0: Ignore Table of Contents" - salience 10 - when - $tocHeadline: Headline(containsString("CONTENTS")) - $page: Page() from $tocHeadline.getParent().getPages() - $node: SemanticNode(this != $tocHeadline, getType() != NodeType.IMAGE, onPage($page.getNumber()), !onPage($page.getNumber() -1)) - then - retract($node); - end - -//---------------------- CUSTOMER RULES ----------------------------------------------------- - -rule "DOC.1.0: Adama number" - when - $paragraph: Paragraph( - containsString("R-") - && onPage(1) - ) - then - entityCreationService - .byRegex( - "0?0?R\\-[l\\d]{3,5}\\p{Lu}?", - "adama_number", - EntityType.ENTITY, - $paragraph) - .findFirst() - .ifPresent(entity -> { - entity.apply("DOC.1.0", "Adama number found"); - insert(FileAttribute.builder().label("Adama").value(entity.getValue()).build()); - }); - end - -rule "DOC.2.0: Study number by keyword" - when - $studyNumberKeyword: String() from List.of( - "BioChem project number:", - "DTI Report.", - "Final Report N°", - "LPT Report No.", - "Project identity", - "Project No.:", - "Protocol No.", - "PTRL Report No.", - "SLI Report #", - "SLI Study #", - "Specht Analytical Study Plan", - "SPL PROJECT NUMBER:", - "Study code:", - "Study N°", - "Study-No.", - "Study No:", - "Study number:", - "Study Number" - ) - $excludeKeyWords: String() from List.of( - "Sponsors study number" - ) - $paragraph: Paragraph( - containsStringIgnoreCase($studyNumberKeyword) - && !containsStringIgnoreCase($excludeKeyWords) - && onPage(1) - ) - then - entityCreationService - .lineAfterStringIgnoreCase($studyNumberKeyword,"study_number",EntityType.ENTITY,$paragraph) - .findFirst() - .ifPresent(entity -> - { - entity.apply("DOC.2.0", "Study number found"); - insert(FileAttribute.builder().label("Study").value(entity.getValue()).build()); - } - ); - end - -rule "DOC.2.1: Study number in header" - when - $studyNumberKeyword: String() from List.of( - "Final Report" - ) - $header: Header( - containsStringIgnoreCase($studyNumberKeyword) - && onPage(2) - ) - then - entityCreationService - .lineAfterStringIgnoreCase($studyNumberKeyword,"study_number",EntityType.ENTITY,$header) - .findFirst() - .ifPresent(entity -> - { - entity.apply("DOC.2.1", "Study number found"); - insert(FileAttribute.builder().label("Study").value(entity.getValue()).build()); - } - ); - end - -rule "DOC.2.2: Project Number with Section Title" - when - $title: String() from List.of( - "LABORATORY PROJECT ID." - ) - $paragraph: Paragraph( - containsStringIgnoreCase($title) - && onPage(1) - ) - then - entityCreationService.semanticNodeAfterString( - $paragraph, - "LABORATORY PROJECT ID.", - "study_number", - EntityType.ENTITY - ) - .ifPresent( - entity -> entity.apply("DOC.2.2","Study number by title found.") - ); - end - - -rule "DOC.3.0: Batch Material Number" - when - $hlKeyword: String() from List.of( - "formulation", - "formulation:", - "test item", - "test substance", - "test material" - ) - $hlNoKeyword: String() from List.of( - "Preparation" - ) - $headline: Headline( - containsStringIgnoreCase($hlKeyword) - && !containsStringIgnoreCase($hlNoKeyword) - ) - $batchKeyword: String() from List.of( - "Batch number", - "Batch number:", - "Batch number* :", - "batch no", - "Batch no.", - "Lot.n.:" - ) - $section: Section( - containsStringIgnoreCase($batchKeyword) - && ( - getHeadline() == $headline - || ( - containsStringIgnoreCase($hlKeyword) - && !containsStringIgnoreCase($hlNoKeyword) - ) - ) - ) - then - entityCreationService - .lineAfterStringIgnoreCase($batchKeyword,"batch_material_number",EntityType.ENTITY,$section) - .forEach( - entity -> { - entity.apply("DOC.3.0","Batch number found."); - insert(FileAttribute.builder().label("Batch No").value(entity.getValue()).build()); - } - ); - end - -rule "DOC.4.0: study title as headline" - when - $hlKeyword: String() from List.of( - "Analyitical Method", - "Residues", - "Acute", - "Process", - "Application", - "Review", - "Examination", - "Toxicity", - "Method Validation", - "Explosion", - "Sensitisation", - "Determination", - "Test" - ) - $hlNotKeyword: String() from List.of( - "Laboratoire", - "Laboratorien" - ) - $species: TextEntity(type=="species") - $headline: Headline( - onPage(1) - && ( - containsStringIgnoreCase($hlKeyword) - || entities contains $species - ) - && !containsStringIgnoreCase($hlNotKeyword) - ) - then - entityCreationService.bySemanticNode($headline, "title", EntityType.ENTITY).ifPresent(entity -> { - entity.apply("DOC.4.0", "Title found", "n-a"); - insert(FileAttribute.builder().label("Title").value(entity.getValue()).build()); - }); - end - -rule "DOC.4.1: study title on cover page between sections" - when - not TextEntity(type=="title", applied()) - $page: Page( - getNumber() == 1, - getMainBodyTextBlock().getSearchText().toLowerCase() (contains "Study Title".toLowerCase()) - || getMainBodyTextBlock().getSearchText().toLowerCase() (contains "Title".toLowerCase()) - || getMainBodyTextBlock().getSearchText().toLowerCase() (contains "Report 92 50 12 136".toLowerCase()) - || getMainBodyTextBlock().getSearchText().toLowerCase() (contains "Final Report".toLowerCase()) - || getMainBodyTextBlock().getSearchText().toLowerCase() (contains "Final Study Report".toLowerCase()), - getMainBodyTextBlock().getSearchText().toLowerCase() (contains "Guideline".toLowerCase()) - || getMainBodyTextBlock().getSearchText().toLowerCase() (contains "Study Identification".toLowerCase()) - // too many false positives due to term in header and cover page || getMainBodyTextBlock().getSearchText().toLowerCase() (contains "Final Report".toLowerCase()) - || getMainBodyTextBlock().getSearchText().toLowerCase() (contains "Data Requirement".toLowerCase()) - || getMainBodyTextBlock().getSearchText().toLowerCase() (contains "Submitted".toLowerCase()) - || getMainBodyTextBlock().getSearchText().toLowerCase() (contains "Test Guideline".toLowerCase()) - || getMainBodyTextBlock().getSearchText().toLowerCase() (contains "Study Director".toLowerCase()) - || getMainBodyTextBlock().getSearchText().toLowerCase() (contains "Author".toLowerCase()) - || getMainBodyTextBlock().getSearchText().toLowerCase() (contains "Including:".toLowerCase()) - || getMainBodyTextBlock().getSearchText().toLowerCase() (contains "Laboratory Investigations".toLowerCase()) - || getMainBodyTextBlock().getSearchText().toLowerCase() (contains "Test Article".toLowerCase()) - || getMainBodyTextBlock().getSearchText().toLowerCase() (contains "HLS".toLowerCase()) - || getMainBodyTextBlock().getSearchText().toLowerCase() (contains "Official Journal".toLowerCase()) - ) - then - List startStrings = List.of("Study Title", "Study Title:", "Title", "Final Report", "Final Study Report", "Report 92 50 12 136"); - List stopStrings = List.of("Guideline", "Guidelines", "Study Identification", "Data Requirement", "Submitted", "Test Guideline", - "Study Director", "Author", "Including:", "Laboratory Investigations", "Test Article", "HLS", "Official Journal"); - // too many false positives due to term in header and cover page stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("Final Report", $page.getMainBodyTextBlock())); - - entityCreationService.shortestBetweenAnyString(startStrings, stopStrings, "title", EntityType.ENTITY, document).forEach(entity -> { - entity.apply("DOC.4.1", "Study title found", "n-a"); - insert(FileAttribute.builder().label("Title").value(entity.getValue()).build()); - }); - end - -rule "DOC.4.2: Study title by paragraph including species on cover page" - when - not TextEntity(type=="title", applied()) - $species: TextEntity(type=="species") - $paragraph: Paragraph( - onPage(1) - && entities contains $species - ) - then - entityCreationService.bySemanticNode( - $paragraph, - "title", - EntityType.ENTITY - ) - .ifPresent(entity -> { - entity.apply("DOC.4.2","Title with species found on cover page"); - insert(FileAttribute.builder().label("Title").value(entity.getValue()).build()); - }); - end - -rule "DOC.5.0: Study Director on cover page in a paragraph on line" - when - not TextEntity(type=="author", applied()) - $sectionTitle: String() from List.of( - "Study Director/Analyst:", - "Study Director:", - "Study Director :", - "Author:", - "Author " - ) - $paragraph: Paragraph( - onPage(1) - && containsStringIgnoreCase($sectionTitle) - ) - then - entityCreationService - .lineAfterStringIgnoreCase( - $sectionTitle, - "author", - EntityType.ENTITY, - $paragraph - ) - .forEach(entity -> { - entity.apply("DOC.5.0","Study Director on cover page found"); - insert(FileAttribute.builder().label("Author").value(entity.getValue()).build()); - }); - end - -rule "DOC.5.1: Author/Study Director listing on cover page" - when - not TextEntity(type=="author", applied()) - $page: Page( - getNumber() == 1 - && getMainBodyTextBlock().getSearchText().toLowerCase() ( - contains "Author(s)".toLowerCase() - || contains "Authors".toLowerCase() - || contains "Author".toLowerCase() - || contains "Study Director".toLowerCase() - ) - && getMainBodyTextBlock().getSearchText().toLowerCase() ( - contains "Test Facility".toLowerCase() - || contains "Testing Facility".toLowerCase() - || contains "Study Initiation".toLowerCase() - || contains "Study Initiated".toLowerCase() - || contains "Study completed on".toLowerCase() - || contains "Study completion".toLowerCase() - || contains "Study amended".toLowerCase() - || contains "Report Issue Date".toLowerCase() - || contains "Document Date".toLowerCase() - || contains "Date".toLowerCase() - || contains "Defitrace".toLowerCase() // this should be solved with Organisation Entity as a TextRange - ) - ) - then - List startBoundaries = new LinkedList<>(); - startBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("Author(s)", $page.getMainBodyTextBlock())); - startBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("Authors", $page.getMainBodyTextBlock())); - startBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("Author", $page.getMainBodyTextBlock())); - startBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("Study Director", $page.getMainBodyTextBlock())); - - List stopBoundaries = new LinkedList<>(); - stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("Test Facility", $page.getMainBodyTextBlock())); - stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("Testing Facility", $page.getMainBodyTextBlock())); - stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("Study Initiation", $page.getMainBodyTextBlock())); - stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("Study Initiated", $page.getMainBodyTextBlock())); - stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("Study completed on", $page.getMainBodyTextBlock())); - stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("Study completion", $page.getMainBodyTextBlock())); - stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("Study amended", $page.getMainBodyTextBlock())); - stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("Report Issue Date", $page.getMainBodyTextBlock())); - stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("Document Date", $page.getMainBodyTextBlock())); - stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("Date", $page.getMainBodyTextBlock())); - stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("Defitrace", $page.getMainBodyTextBlock())); - - entityCreationService.betweenTextRanges(startBoundaries, stopBoundaries, "author", EntityType.ENTITY, document).forEach(entity -> { - entity.apply("DOC.5.1", "Author list found", "n-a"); - insert(FileAttribute.builder().label("Author").value(entity.getValue()).build()); - }); - end - -rule "DOC.5.2: Study Director on cover page in a paragraph" - when - not TextEntity(type=="author", applied()) - $sectionTitle: String() from List.of( - "Study Director" - ) - $node: Paragraph( - onPage(1) - && containsStringIgnoreCase($sectionTitle) - ) - then - entityCreationService - .byRegexWithLineBreaksIgnoreCase( - "\\n([\\w\\(\\) .]{5,30})\\n", - "author", - EntityType.ENTITY, - 1, - $node - ) - .filter( - entity -> !entity.getValue().toLowerCase().contains($sectionTitle.toLowerCase()) - ) - .forEach(entity -> { - entity.apply("DOC.5.2","Study Director on cover page found"); - insert(FileAttribute.builder().label("Author").value(entity.getValue()).build()); - }); - end - -rule "DOC.5.3: Study Director on cover page in a section" - when - not TextEntity(type=="author", applied()) - $page: Page(getNumber() == 1) - $sectionTitle: String() from List.of( - "Study Director:", - "Study Director", - "Study Director/Author" - ) - $node: Section( - onPage(1) - && containsStringIgnoreCase($sectionTitle) - ) - then - entityCreationService - .byRegexWithLineBreaksIgnoreCase( - $sectionTitle+"\\n([\\w\\(\\) .]{5,30})\\n", - "author", - EntityType.ENTITY, - 1, - $node - ) - .filter( - entity -> entity.getPages().contains($page) - ) - .forEach(entity -> { - entity.apply("DOC.5.3","Study Director on cover page found"); - insert(FileAttribute.builder().label("Author").value(entity.getValue()).build()); - }); - - entityCreationService - .lineAfterStringIgnoreCase($sectionTitle,"author",EntityType.ENTITY,$node) - .forEach(entity -> { - entity.apply("DOC.5.2","Study Director on cover page found"); - insert(FileAttribute.builder().label("Author").value(entity.getValue()).build()); - }); - end - -rule "DOC.5.4: Author in document section with relevant headline" - when - not TextEntity(type == "author", applied()) - $hlKeyword: String() from List.of( - "sponsor", - "personnel", - "staff involved", - "study details", - "management of study" - ) - $keyword: String() from List.of( - "Study Director", - "Studv Director", - "Study Director:", - "Study Director :", - "stray Birector:", - "Author:", - "Author ", - "Author(s)" - ) - $section: Section( - containsStringIgnoreCase($keyword) - && getHeadline().containsStringIgnoreCase($hlKeyword) - ) - then - entityCreationService - .lineAfterStringIgnoreCase( - $keyword, - "author", - EntityType.ENTITY, - $section - ) - .findFirst() - .ifPresent( - entity -> { - entity.apply("DOC.5.4","Author found."); - insert(FileAttribute.builder().label("Author").value(entity.getValue()).build()); - } - ); - end - -rule "DOC.5.5: Study Director from signature on compliance statement" - when - not TextEntity(type=="author", applied()) - $sectionTitle: String() from List.of( - "Compliance Statement", - "Study Compliance", - "Statement of Compliance", - "Statement of Study Compliance", - "Contributing Scientist", - "Compliance with Good Laboratory Practice", - "Signatures and Approval" - ) - $role: String() from List.of("Study Director") - $node: Section( - containsStringIgnoreCase($sectionTitle) - && containsStringIgnoreCase($role) - && getFirstPage().getNumber() > 4 - ) - then - entityCreationService - .byRegexWithLineBreaksIgnoreCase( - "([\\w\\(\\) .,]{5,35})[\\w\\/ ]{0,30}\\n"+$role, - "author", - EntityType.ENTITY, - 1, - $node - ) - .forEach(entity -> { - entity.apply("DOC.5.5","Study Director on compliance page found"); - insert(FileAttribute.builder().label("Author").value(entity.getValue()).build()); - }); - end - -rule "DOC.5.6: Study Director from section with relevant headlining responsibility" - when - not TextEntity(type=="author", applied()) - $sectionTitle: String() from List.of("Responsible personnel") - $role: String() from List.of("Study Director") - $node: Section( - getHeadline().containsStringIgnoreCase($sectionTitle) - && containsStringIgnoreCase($role) - ) - $paragraph: Paragraph( - containsStringIgnoreCase($role) - ) - then - entityCreationService - .lineAfterStringIgnoreCase( - $role, - "author", - EntityType.ENTITY, - $paragraph - ) - .forEach(entity -> { - entity.apply("DOC.5.6","Study Director on compliance page found"); - insert(FileAttribute.builder().label("Author").value(entity.getValue()).build()); - }); - end - -rule "DOC.5.7: Study Director from section with study director in headline" - when - not TextEntity(type=="author", applied()) - $sectionTitle: String() from List.of("Study Director") - $section: Section( - getHeadline().containsStringIgnoreCase($sectionTitle) - ) - then - entityCreationService - .bySemanticNodeParagraphsOnly( - $section, - "author", - EntityType.ENTITY - ) - .forEach(entity -> { - entity.apply("DOC.5.7","Study Director found by headline"); - insert(FileAttribute.builder().label("Author").value(entity.getValue()).build()); - }); - end - -rule "DOC.5.9: Remove skipped authors" - salience 9999 - when - $author: TextEntity(type=="author", !applied()) - then - $author.removeFromGraph(); - end - -rule "DOC.14.0: GLP Study" - when - $hlKeywords: String() from List.of( - "Good Laboratory Practice", - "GLP" - ) - $headline: Headline( - containsString($hlKeywords) - ) - $compliance: String() from List.of( - "conducted in compliance with", - "conducted in accordance with", - "conducted and reported in compliance with", - "carried out in accordance with", - "meets the requirements", - "performed in compliance with" - ) - $section: Section( - containsStringIgnoreCase($compliance) - && getHeadline() == $headline - ) - then - entityCreationService.bySemanticNode($headline, "glp_study", EntityType.ENTITY).ifPresent(entity -> { - entity.apply("DOC.14.0", "GLP Study found", "n-a"); - insert(FileAttribute.builder().label("GLP").value("TRUE").build()); - }); - end - -rule "DOC.14.1: GLP Study" - when - $compliance: String() from List.of( - "conducted in compliance with", - "conducted in accordance with", - "conducted and reported in compliance with", - "carried out in accordance with", - "meets the requirements", - "performed in compliance with", - "conduct" - ) - $principle: String() from List.of( - "40 CFR Part 160", - "40CFR160", - "Good Laboratory Practice", - "FIFRA Good Laboratory Practice Standards" - ) - $paragraph: Paragraph( - containsStringIgnoreCase($compliance) - && containsStringIgnoreCase($principle) - ) - then - entityCreationService.betweenStringsIncludeStartAndEndIgnoreCase( - $compliance, - $principle, - "glp_study", - EntityType.ENTITY, - $paragraph - ) - .forEach(entity -> { - entity.apply("DOC.14.1", "GLP Study found", "n-a"); - insert(FileAttribute.builder().label("GLP").value("TRUE").build()); - } - ); - end - -//------------------------------------ Manual redaction rules ------------------------------------ - -// Rule unit: MAN.0 -rule "MAN.0.0: Apply manual resize redaction" - salience 128 - when - $resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED, $requestDate: requestDate) - not ManualResizeRedaction(annotationId == $id, requestDate.isBefore($requestDate)) - $entityToBeResized: TextEntity(matchesAnnotationId($id)) - then - manualChangesApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction); - retract($resizeRedaction); - update($entityToBeResized); - $entityToBeResized.getIntersectingNodes().forEach(node -> update(node)); - end - -rule "MAN.0.1: Apply manual resize redaction" - salience 128 - when - $resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED, $requestDate: requestDate) - not ManualResizeRedaction(annotationId == $id, requestDate.isBefore($requestDate)) - $imageToBeResized: Image(id == $id) - then - manualChangesApplicationService.resizeImage($imageToBeResized, $resizeRedaction); - retract($resizeRedaction); - update($imageToBeResized); - update($imageToBeResized.getParent()); - end - - -// Rule unit: MAN.1 -rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity" - salience 128 - when - $idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED) - $entityToBeRemoved: TextEntity(matchesAnnotationId($id)) - then - $entityToBeRemoved.getManualOverwrite().addChange($idRemoval); - update($entityToBeRemoved); - retract($idRemoval); - $entityToBeRemoved.getIntersectingNodes().forEach(node -> update(node)); - end - -rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image" - salience 128 - when - $idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED) - $imageEntityToBeRemoved: Image($id == id) - then - $imageEntityToBeRemoved.getManualOverwrite().addChange($idRemoval); - update($imageEntityToBeRemoved); - retract($idRemoval); - update($imageEntityToBeRemoved.getParent()); - end - - -// Rule unit: MAN.2 -rule "MAN.2.0: Apply force redaction" - salience 128 - when - $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED) - $entityToForce: TextEntity(matchesAnnotationId($id)) - then - $entityToForce.getManualOverwrite().addChange($force); - update($entityToForce); - $entityToForce.getIntersectingNodes().forEach(node -> update(node)); - retract($force); - end - -rule "MAN.2.1: Apply force redaction to images" - salience 128 - when - $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED) - $imageToForce: Image(id == $id) - then - $imageToForce.getManualOverwrite().addChange($force); - update($imageToForce); - update($imageToForce.getParent()); - retract($force); - end - - -// Rule unit: MAN.3 -rule "MAN.3.0: Apply entity recategorization" - salience 128 - when - $recategorization: ManualRecategorization($id: annotationId, $type: type, status == AnnotationStatus.APPROVED, $requestDate: requestDate) - not ManualRecategorization($id == annotationId, requestDate.isBefore($requestDate)) - $entityToBeRecategorized: TextEntity(matchesAnnotationId($id), type != $type) - then - $entityToBeRecategorized.getIntersectingNodes().forEach(node -> update(node)); - manualChangesApplicationService.recategorize($entityToBeRecategorized, $recategorization); - retract($recategorization); - // Entity is copied and inserted, so the old entity needs to be retracted to avoid duplication. - retract($entityToBeRecategorized); - end - - -rule "MAN.3.1: Apply entity recategorization of same type" - salience 128 - when - $recategorization: ManualRecategorization($id: annotationId, $type: type, status == AnnotationStatus.APPROVED, $requestDate: requestDate) - not ManualRecategorization($id == annotationId, requestDate.isBefore($requestDate)) - $entityToBeRecategorized: TextEntity(matchesAnnotationId($id), type == $type) - then - $entityToBeRecategorized.getManualOverwrite().addChange($recategorization); - retract($recategorization); - end - - -rule "MAN.3.2: Apply image recategorization" - salience 128 - when - $recategorization: ManualRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $requestDate: requestDate) - not ManualRecategorization($id == annotationId, requestDate.isBefore($requestDate)) - $imageToBeRecategorized: Image($id == id) - then - manualChangesApplicationService.recategorize($imageToBeRecategorized, $recategorization); - update($imageToBeRecategorized); - update($imageToBeRecategorized.getParent()); - retract($recategorization); - end - - -// Rule unit: MAN.4 -rule "MAN.4.0: Apply legal basis change" - salience 128 - when - $legalbasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED) - $imageToBeRecategorized: Image($id == id) - then - $imageToBeRecategorized.getManualOverwrite().addChange($legalbasisChange); - end - -rule "MAN.4.1: Apply legal basis change" - salience 128 - when - $legalBasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED) - $entityToBeChanged: TextEntity(matchesAnnotationId($id)) - then - $entityToBeChanged.getManualOverwrite().addChange($legalBasisChange); - end - - -//------------------------------------ Entity merging rules ------------------------------------ - -// Rule unit: X.0 -rule "X.0.0: remove Entity contained by Entity of same type" - salience 65 - when - $larger: TextEntity($type: type, $entityType: entityType, active()) - $contained: TextEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !hasManualChanges(), active()) - then - $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); - retract($contained); - end - - -// Rule unit: X.2 -rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE" - salience 64 - when - $falsePositive: TextEntity($type: type, entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges(), active()) - then - $entity.getIntersectingNodes().forEach(node -> update(node)); - $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); - retract($entity) - end - - -// Rule unit: X.3 -rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" - salience 64 - when - $falseRecommendation: TextEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) - then - $recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); - retract($recommendation); - end - - -// Rule unit: X.4 -rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY with same type" - salience 256 - when - $entity: TextEntity($type: type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) - then - $entity.addEngines($recommendation.getEngines()); - $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY with same type"); - retract($recommendation); - end - - -// Rule unit: X.5 -rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY" - salience 256 - when - $entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) - then - $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when contained by ENTITY"); - retract($recommendation); - end - - -// Rule unit: X.7 -rule "X.7.0: remove all images" - salience 512 - when - $image: Image(imageType != ImageType.OCR, !hasManualChanges()) - then - $image.remove("X.7.0", "remove all images"); - retract($image); - end - - -//------------------------------------ File attributes rules ------------------------------------ - -// Rule unit: FA.1 -rule "FA.1.0: remove duplicate FileAttributes" - - salience 64 - when - $fileAttribute: FileAttribute($label: label, $value: value) - $duplicate: FileAttribute(this != $fileAttribute, label == $label, value == $value) - then - retract($duplicate); - end - - -// Rule unit: LDS.0 -rule "LDS.0.0: Run local dictionary search" - agenda-group "LOCAL_DICTIONARY_ADDS" - salience -999 - when - $dictionaryModel: DictionaryModel(!localEntriesWithMatchedRules.isEmpty()) from dictionary.getDictionaryModels() - then - entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document) - .forEach(entity -> { - Collection matchedRules = $dictionaryModel.getLocalEntriesWithMatchedRules().get(entity.getValue()); - entity.addMatchedRules(matchedRules); - }); - end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl index 738187dc..0dd7be25 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl @@ -133,7 +133,7 @@ rule "CBI.1.1: Redact CBI Address (vertebrate Study)" // Rule unit: CBI.2 rule "CBI.2.0: Do not redact genitive CBI Author" when - $entity: TextEntity(type == "CBI_author", anyMatch(textAfter, "['’’'ʼˈ´`‘′ʻ’']s"), applied()) + $entity: TextEntity(type == "CBI_author", anyMatch(textAfter, "['’’'ʼˈ´`‘′ʻ’']s")) then entityCreationService.byTextRange($entity.getTextRange(), "CBI_author", EntityType.FALSE_POSITIVE, document) .ifPresent(falsePositive -> falsePositive.skip("CBI.2.0", "Genitive Author found")); @@ -646,6 +646,7 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC }); end + // Rule unit: CBI.21 rule "CBI.21.0: Redact short Authors section (non vertebrate study)" when @@ -797,6 +798,7 @@ rule "PII.3.1: Redact telephone numbers by RegEx (vertebrate study)" .forEach(entity -> entity.redact("PII.3.1", "Telephone number found by regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end + // Rule unit: PII.4 rule "PII.4.0: Redact line after contact information keywords (non vertebrate study)" when @@ -1034,6 +1036,7 @@ rule "PII.12.0: Expand PII entities with salutation prefix" .ifPresent(expandedEntity -> expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList())); end + // Rule unit: PII.13 rule "PII.13.0: Add recommendation for PII after Contact Person" when @@ -1094,12 +1097,12 @@ rule "ETC.2.1: Redact signatures (vertebrate study)" // Rule unit: ETC.3 -rule "ETC.3.0: Redact logos (non vertebrate study)" +rule "ETC.3.0: Skip logos (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value == "Yes") $logo: Image(imageType == ImageType.LOGO) then - $logo.redact("ETC.3.0", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $logo.skip("ETC.3.0", "Logo Found"); end rule "ETC.3.1: Redact logos (vertebrate study)" @@ -1187,6 +1190,7 @@ rule "ETC.8.1: Redact formulas (non vertebrate study)" $logo.redact("ETC.8.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end + // Rule unit: ETC.9 rule "ETC.9.0: Redact skipped impurities" when @@ -1204,6 +1208,7 @@ rule "ETC.9.1: Redact impurities" $skippedImpurities.redact("ETC.9.1", "Impurity found", "Article 63(2)(b) of Regulation (EC) No 1107/2009"); end + // Rule unit: ETC.10 rule "ETC.10.0: Redact Product Composition Information" when @@ -1212,6 +1217,7 @@ rule "ETC.10.0: Redact Product Composition Information" $compositionInformation.redact("ETC.10.0", "Product Composition Information found", "Article 63(2)(d) of Regulation (EC) No 1107/2009"); end + // Rule unit: ETC.11 rule "ETC.11.0: Recommend first line in table cell with name and address of owner" when @@ -1223,6 +1229,7 @@ rule "ETC.11.0: Recommend first line in table cell with name and address of owne .ifPresent(redactionEntity -> redactionEntity.redact("ETC.11.0", "Trial Site owner and address found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end + //------------------------------------ AI rules ------------------------------------ // Rule unit: AI.0 @@ -1268,7 +1275,8 @@ rule "AI.3.0: Recommend authors from AI as PII" .forEach(nerEntity -> entityCreationService.byNerEntity(nerEntity, "PII", EntityType.RECOMMENDATION, document)); end -//------------------------------------ Manual redaction rules ------------------------------------ + +//------------------------------------ Manual changes rules ------------------------------------ // Rule unit: MAN.0 rule "MAN.0.0: Apply manual resize redaction" @@ -1502,12 +1510,11 @@ rule "X.6.0: Remove Entity of lower rank, when contained by by entity of type EN retract($lowerRank); end - rule "X.6.1: remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity" salience 32 when $higherRank: TextEntity($type: type, $value: value, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $lowerRank: TextEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges(), active(), $lowerRank.getValue().length() > $value.length()) + $lowerRank: TextEntity(intersects($higherRank), type != $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges(), active(), $lowerRank.getValue().length() > $value.length()) then $higherRank.getIntersectingNodes().forEach(node -> update(node)); $higherRank.remove("X.6.1", "remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity"); @@ -1540,6 +1547,6 @@ rule "LDS.0.0: Run local dictionary search" entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document) .forEach(entity -> { Collection matchedRules = $dictionaryModel.getLocalEntriesWithMatchedRules().get(entity.getValue()); - entity.addMatchedRules(matchedRules); + matchedRules.forEach(matchedRule -> entity.addMatchedRule(matchedRule.asSkippedIfApplied())); }); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl index 902ba12f..f82a4a72 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl @@ -66,7 +66,7 @@ query "getFileAttributes" $fileAttribute: FileAttribute() end -//------------------------------------ H rules ------------------------------------ +//------------------------------------ Headlines rules ------------------------------------ // Rule unit: H.0 rule "H.0.0: retract table of contents page" @@ -1152,7 +1152,7 @@ rule "DOC.35.0: Doses (mg/kg bodyweight)" end -//------------------------------------ Manual redaction rules ------------------------------------ +//------------------------------------ Manual changes rules ------------------------------------ // Rule unit: MAN.0 rule "MAN.0.0: Apply manual resize redaction" @@ -1394,6 +1394,6 @@ rule "LDS.0.0: Run local dictionary search" entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document) .forEach(entity -> { Collection matchedRules = $dictionaryModel.getLocalEntriesWithMatchedRules().get(entity.getValue()); - entity.addMatchedRules(matchedRules); + matchedRules.forEach(matchedRule -> entity.addMatchedRule(matchedRule.asSkippedIfApplied())); }); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_components.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_components.drl index a976a420..5f1653e0 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_components.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_components.drl @@ -313,22 +313,13 @@ rule "Necropsy.2.0: Conducted with 4 hours of exposure as one block" rule "StudyDesign.0.0: Study design as one block" when - $oecdNumber: String() from List.of("404", "405", "406", "428", "438", "439", "474", "487") + $oecdNumber: String() from List.of("404", "405", "406", "428", "429", "438", "439", "474", "487") FileAttribute(label == "OECD Number", value == $oecdNumber) $studyDesigns: List() from collect (Entity(type == "study_design")) then componentCreationService.joining("StudyDesign.0.0", "Study_Design", $studyDesigns, " "); end -rule "StudyDesign.0.1: Study design (Main Study) as one block" - when - $oecdNumber: String() from List.of("429") - FileAttribute(label == "OECD Number", value == $oecdNumber) - $studyDesigns: List() from collect (Entity(type == "study_design")) - then - componentCreationService.joining("StudyDesign.0.0", "Study_Design_Main_Study", $studyDesigns, " "); - end - rule "Results.0.0: Results and conclusions as joined values" when $oecdNumber: String() from List.of("406", "428", "438", "439", "474", "487") @@ -510,6 +501,7 @@ rule "UsedApproach.1.0: Used approach not found and thus 'Individual'" rule "DefaultComponents.999.0: Create components for all unmapped entities." salience -999 when + not FileAttribute(label == "OECD Number") $allEntities: List(!isEmpty()) from collect (Entity()) then componentCreationService.createComponentsForUnMappedEntities("DefaultComponents.999.0", $allEntities); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl index 5112137b..b28dd604 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl @@ -66,7 +66,7 @@ query "getFileAttributes" $fileAttribute: FileAttribute() end -//------------------------------------ Manual redaction rules ------------------------------------ +//------------------------------------ Manual changes rules ------------------------------------ // Rule unit: MAN.0 rule "MAN.0.0: Apply manual resize redaction" @@ -163,7 +163,6 @@ rule "MAN.3.0: Apply entity recategorization" retract($entityToBeRecategorized); end - rule "MAN.3.1: Apply entity recategorization of same type" salience 128 when @@ -175,7 +174,6 @@ rule "MAN.3.1: Apply entity recategorization of same type" retract($recategorization); end - rule "MAN.3.2: Apply image recategorization" salience 128 when @@ -208,6 +206,8 @@ rule "MAN.4.1: Apply legal basis change" then $entityToBeChanged.getManualOverwrite().addChange($legalBasisChange); end + + //------------------------------------ Local dictionary search rules ------------------------------------ // Rule unit: LDS.0 @@ -220,6 +220,6 @@ rule "LDS.0.0: Run local dictionary search" entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document) .forEach(entity -> { Collection matchedRules = $dictionaryModel.getLocalEntriesWithMatchedRules().get(entity.getValue()); - entity.addMatchedRules(matchedRules); + matchedRules.forEach(matchedRule -> entity.addMatchedRule(matchedRule.asSkippedIfApplied())); }); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index b11b2c4a..35ed717c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -84,7 +84,7 @@ rule "SYN.0.0: Redact if CTL/* or BL/* was found (Non Vertebrate Study)" //------------------------------------ CBI rules ------------------------------------ // Rule unit: CBI.3 -rule "CBI.3.0: Redacted because Section contains Vertebrate" +rule "CBI.3.0: Redacted because Section contains a vertebrate" when $section: Section(!hasTables(), hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then @@ -99,7 +99,7 @@ rule "CBI.3.0: Redacted because Section contains Vertebrate" }); end -rule "CBI.3.1: Redacted because Table Row contains Vertebrate" +rule "CBI.3.1: Redacted because table row contains a vertebrate" when $table: Table(hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then @@ -115,7 +115,7 @@ rule "CBI.3.1: Redacted because Table Row contains Vertebrate" }); end -rule "CBI.3.2: Don't redact because Section doesn't contain Vertebrate" +rule "CBI.3.2: Do not redact because Section does not contain a vertebrate" when $section: Section(!hasTables(), !hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then @@ -123,7 +123,7 @@ rule "CBI.3.2: Don't redact because Section doesn't contain Vertebrate" .forEach(entity -> entity.skip("CBI.3.2", "No vertebrate found")); end -rule "CBI.3.3: Dont redact because Table Row doesn't contain Vertebrate" +rule "CBI.3.3: Do not redact because table row does not contain a vertebrate" when $table: Table(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) then @@ -134,7 +134,7 @@ rule "CBI.3.3: Dont redact because Table Row doesn't contain Vertebrate" // Rule unit: CBI.4 -rule "CBI.4.0: Dont redact Names and Addresses if no_redaction_indicator is found in Section" +rule "CBI.4.0: Do not redact Names and Addresses if no_redaction_indicator is found in Section" when $section: Section(!hasTables(), hasEntitiesOfType("vertebrate"), @@ -151,7 +151,7 @@ rule "CBI.4.0: Dont redact Names and Addresses if no_redaction_indicator is foun }); end -rule "CBI.4.1: Dont redact Names and Addresses if no_redaction_indicator is found in Table Row" +rule "CBI.4.1: Do not redact Names and Addresses if no_redaction_indicator is found in table row" when $table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("vertebrate"), @@ -172,7 +172,7 @@ rule "CBI.4.1: Dont redact Names and Addresses if no_redaction_indicator is foun // Rule unit: CBI.5 -rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in section" +rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in Section" when $section: Section(!hasTables(), hasEntitiesOfType("redaction_indicator"), @@ -192,7 +192,7 @@ rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also red }); end -rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in Table Row" +rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in table row" when $table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("redaction_indicator"), @@ -229,7 +229,7 @@ rule "CBI.8.0: Redacted because Section contains must_redact entity" }); end -rule "CBI.8.1: Redacted because Table Row contains must_redact entity" +rule "CBI.8.1: Redacted because table row contains must_redact entity" when $table: Table(hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then @@ -247,7 +247,7 @@ rule "CBI.8.1: Redacted because Table Row contains must_redact entity" // Rule unit: CBI.9 -rule "CBI.9.0: Redact all Cell's with Header Author(s) as CBI_author (non vertebrate study)" +rule "CBI.9.0: Redact all cells with Header Author(s) as CBI_author (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") @@ -257,10 +257,10 @@ rule "CBI.9.0: Redact all Cell's with Header Author(s) as CBI_author (non verteb .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) .filter(Optional::isPresent) .map(Optional::get) - .forEach(redactionEntity -> redactionEntity.apply("CBI.9.0", "Author(s) found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(redactionEntity -> redactionEntity.redact("CBI.9.0", "Author(s) found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "CBI.9.1: Redact all Cell's with Header Author as CBI_author (non vertebrate study)" +rule "CBI.9.1: Redact all cells with Header Author as CBI_author (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") @@ -270,7 +270,7 @@ rule "CBI.9.1: Redact all Cell's with Header Author as CBI_author (non vertebrat .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) .filter(Optional::isPresent) .map(Optional::get) - .forEach(redactionEntity -> redactionEntity.apply("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(redactionEntity -> redactionEntity.redact("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end @@ -286,7 +286,7 @@ rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study // Rule unit: CBI.12 -rule "CBI.12.0: Add all Cell's with Header Author(s) as CBI_author" +rule "CBI.12.0: Add all cells with Header Author(s) as CBI_author" salience 1 when $table: Table(hasHeader("Author(s)") || hasHeader("Author")) @@ -301,7 +301,7 @@ rule "CBI.12.0: Add all Cell's with Header Author(s) as CBI_author" .forEach(redactionEntity -> redactionEntity.skip("CBI.12.0", "Author(s) header found")); end -rule "CBI.12.1: Dont redact CBI_author, if its row contains a cell with header \"Vertebrate study Y/N\" and value No" +rule "CBI.12.1: Do not redact CBI_author, if its row contains a cell with header \"Vertebrate study Y/N\" and value No" when $table: Table(hasRowWithHeaderAndValue("Vertebrate study Y/N", "N") || hasRowWithHeaderAndValue("Vertebrate study Y/N", "No")) then @@ -316,7 +316,7 @@ rule "CBI.12.2: Redact CBI_author, if its row contains a cell with header \"Vert then $table.streamEntitiesWhereRowHasHeaderAndAnyValue("Vertebrate study Y/N", List.of("Y", "Yes")) .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) - .forEach(authorEntity -> authorEntity.apply("CBI.12.2", "Redacted because it's row belongs to a vertebrate study", "Reg (EC) No 1107/2009 Art. 63 (2g)")); + .forEach(authorEntity -> authorEntity.redact("CBI.12.2", "Redacted because it's row belongs to a vertebrate study", "Reg (EC) No 1107/2009 Art. 63 (2g)")); end @@ -325,7 +325,7 @@ rule "CBI.14.0: Redact CBI_sponsor entities if preceded by \"batches produced at when $sponsorEntity: TextEntity(type == "CBI_sponsor", textBefore.contains("batches produced at")) then - $sponsorEntity.apply("CBI.14.0", "Redacted because it represents a sponsor company", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + $sponsorEntity.redact("CBI.14.0", "Redacted because it represents a sponsor company", "Reg (EC) No 1107/2009 Art. 63 (2g)"); end @@ -350,7 +350,7 @@ rule "CBI.15.0: Redact row if row contains \"determination of residues\" and liv .toList(); $section.getEntitiesOfType(List.of($keyword, $residueKeyword)) - .forEach(redactionEntity -> redactionEntity.apply("CBI.15.0", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)")); + .forEach(redactionEntity -> redactionEntity.redact("CBI.15.0", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)")); end rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determination of residues\" and livestock keyword" @@ -372,12 +372,12 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio $table.streamEntitiesWhereRowContainsStringsIgnoreCase(List.of($keyword, $residueKeyword)) .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) - .forEach(redactionEntity -> redactionEntity.apply("CBI.15.1", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)")); + .forEach(redactionEntity -> redactionEntity.redact("CBI.15.1", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)")); end // Rule unit: CBI.16 -rule "CBI.16.0: Add CBI_author with \"et al.\" Regex (non vertebrate study)" +rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") @@ -385,12 +385,12 @@ rule "CBI.16.0: Add CBI_author with \"et al.\" Regex (non vertebrate study)" then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.apply("CBI.16.0", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(entity); }); end -rule "CBI.16.1: Add CBI_author with \"et al.\" Regex (vertebrate study)" +rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") @@ -398,7 +398,7 @@ rule "CBI.16.1: Add CBI_author with \"et al.\" Regex (vertebrate study)" then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.apply("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(entity); }); end @@ -477,7 +477,7 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC then entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { - laboratoryEntity.apply("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + laboratoryEntity.redact("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(laboratoryEntity); }); end @@ -491,7 +491,7 @@ rule "PII.0.0: Redact all PII (non vertebrate study)" not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $pii: TextEntity(type == "PII", dictionaryEntry) then - $pii.apply("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $pii.redact("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end rule "PII.0.1: Redact all PII (vertebrate study)" @@ -499,7 +499,7 @@ rule "PII.0.1: Redact all PII (vertebrate study)" FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $pii: TextEntity(type == "PII", dictionaryEntry) then - $pii.apply("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $pii.redact("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -510,7 +510,7 @@ rule "PII.1.0: Redact Emails by RegEx (Non vertebrate study)" $section: Section(containsString("@")) then entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) - .forEach(emailEntity -> emailEntity.apply("PII.1.0", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(emailEntity -> emailEntity.redact("PII.1.0", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end rule "PII.1.1: Redact Emails by RegEx (vertebrate study)" @@ -519,7 +519,7 @@ rule "PII.1.1: Redact Emails by RegEx (vertebrate study)" $section: Section(containsString("@")) then entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) - .forEach(emailEntity -> emailEntity.apply("PII.1.1", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(emailEntity -> emailEntity.redact("PII.1.1", "Found by Email Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end @@ -549,12 +549,12 @@ rule "PII.4.0: Redact line after contact information keywords (non vertebrate st $section: Section(containsString($contactKeyword)) then entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.apply("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + .forEach(contactEntity -> contactEntity.redact("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end -rule "PII.4.1: Redact line after contact information keywords (non vertebrate study)" +rule "PII.4.1: Redact line after contact information keywords (vertebrate study)" when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $contactKeyword: String() from List.of("Contact point:", "Contact:", "Alternative contact:", @@ -577,12 +577,12 @@ rule "PII.4.1: Redact line after contact information keywords (non vertebrate st $section: Section(containsString($contactKeyword)) then entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.apply("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end // Rule unit: PII.6 -rule "PII.6.0: redact line between contact keywords (non vertebrate study)" +rule "PII.6.0: Redact line between contact keywords (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) @@ -591,10 +591,10 @@ rule "PII.6.0: redact line between contact keywords (non vertebrate study)" entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) ) - .forEach(contactEntity -> contactEntity.apply("PII.6.0", "Found between contact keywords", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.6.0", "Found between contact keywords", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.6.1: redact line between contact keywords" +rule "PII.6.1: Redact line between contact keywords (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) @@ -603,7 +603,7 @@ rule "PII.6.1: redact line between contact keywords" entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) ) - .forEach(contactEntity -> contactEntity.apply("PII.6.1", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.6.1", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end @@ -623,10 +623,10 @@ rule "PII.7.0: Redact contact information if applicant is found (non vertebrate entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.apply("PII.7.0", "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.7.0", "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.7.1: Redact contact information if applicant is found (non vertebrate study)" +rule "PII.7.1: Redact contact information if applicant is found (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $section: Section(getHeadline().containsString("applicant") || @@ -641,12 +641,12 @@ rule "PII.7.1: Redact contact information if applicant is found (non vertebrate entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.apply("PII.7.1", "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.7.1", "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.8 -rule "PII.8.0: Redact contact information if producer is found" +rule "PII.8.0: Redact contact information if producer is found (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $section: Section(containsStringIgnoreCase("producer of the plant protection") || @@ -661,10 +661,10 @@ rule "PII.8.0: Redact contact information if producer is found" entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.apply("PII.8.0", "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + .forEach(entity -> entity.redact("PII.8.0", "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end -rule "PII.8.1: Redact contact information if producer is found" +rule "PII.8.1: Redact contact information if producer is found (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $section: Section(containsStringIgnoreCase("producer of the plant protection") || @@ -679,45 +679,27 @@ rule "PII.8.1: Redact contact information if producer is found" entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.apply("PII.8.1", "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.8.1", "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.9 -rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"COMPLETION DATE\" (non vertebrate study)" +rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("COMPLETION DATE:"), !containsString("STUDY COMPLETION DATE:")) + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then - entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section) - .forEach(authorEntity -> authorEntity.apply("PII.9.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + .forEach(authorEntity -> authorEntity.redact("PII.9.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"STUDY COMPLETION DATE\" (non vertebrate study)" +rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("COMPLETION DATE:"), !containsString("STUDY COMPLETION DATE:")) + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then - entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section) - .forEach(authorEntity -> authorEntity.apply("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); - end - -rule "PII.9.2: Redact between \"AUTHOR(S)\" and \"COMPLETION DATE\" (non vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("STUDY COMPLETION DATE:")) - then - entityCreationService.betweenStrings("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", EntityType.ENTITY, $section) - .forEach(authorEntity -> authorEntity.apply("PII.9.2", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); - end - -rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"STUDY COMPLETION DATE\" (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("STUDY COMPLETION DATE:")) - then - entityCreationService.betweenStrings("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", EntityType.ENTITY, $section) - .forEach(authorEntity -> authorEntity.apply("PII.9.3", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + .forEach(authorEntity -> authorEntity.redact("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end @@ -727,7 +709,7 @@ rule "PII.11.0: Redact On behalf of Sequani Ltd.:" $section: Section(!hasTables(), containsString("On behalf of Sequani Ltd.: Name Title")) then entityCreationService.betweenStrings("On behalf of Sequani Ltd.: Name Title", "On behalf of", "PII", EntityType.ENTITY, $section) - .forEach(authorEntity -> authorEntity.apply("PII.11.0", "On behalf of Sequani Ltd.: Name Title was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(authorEntity -> authorEntity.redact("PII.11.0", "On behalf of Sequani Ltd.: Name Title was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end @@ -749,7 +731,7 @@ rule "ETC.1.0: Redact Purity" $section: Section(containsStringIgnoreCase("purity")) then entityCreationService.byRegex("\\bPurity:\\s*(?\\s*\\d{1,2}(?:\\.\\d{1,2})?\\s*%)", "purity", EntityType.ENTITY, 1, $section) - .forEach(entity -> entity.apply("ETC.1.0", "Purity found", "Reg (EC) No 1107/2009 Art. 63 (2a)")); + .forEach(entity -> entity.redact("ETC.1.0", "Purity found", "Reg (EC) No 1107/2009 Art. 63 (2a)")); end @@ -759,7 +741,7 @@ rule "ETC.2.0: Redact signatures (non vertebrate study)" not FileAttribute(label == "Vertebrate Study", value == "Yes") $signature: Image(imageType == ImageType.SIGNATURE) then - $signature.apply("ETC.2.0", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $signature.redact("ETC.2.0", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end rule "ETC.2.1: Redact signatures (vertebrate study)" @@ -767,25 +749,25 @@ rule "ETC.2.1: Redact signatures (vertebrate study)" FileAttribute(label == "Vertebrate Study", value == "Yes") $signature: Image(imageType == ImageType.SIGNATURE) then - $signature.apply("ETC.2.1", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $signature.redact("ETC.2.1", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end // Rule unit: ETC.3 -rule "ETC.3.0: Redact logos (vertebrate study)" +rule "ETC.3.0: Skip logos (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value == "Yes") $logo: Image(imageType == ImageType.LOGO) then - $logo.apply("ETC.3.0", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $logo.skip("ETC.3.0", "Logo Found"); end -rule "ETC.3.1: Redact logos (non vertebrate study)" +rule "ETC.3.1: Redact logos (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value == "Yes") $logo: Image(imageType == ImageType.LOGO) then - $logo.apply("ETC.3.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $logo.redact("ETC.3.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -794,7 +776,7 @@ rule "ETC.4.0: Redact dossier dictionary entries" when $dossierRedaction: TextEntity(type == "dossier_redaction") then - $dossierRedaction.apply("ETC.4.0", "Specification of impurity found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $dossierRedaction.redact("ETC.4.0", "Specification of impurity found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end @@ -818,14 +800,14 @@ rule "ETC.6.0: Redact CAS Number" .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "PII", EntityType.ENTITY)) .filter(Optional::isPresent) .map(Optional::get) - .forEach(redactionEntity -> redactionEntity.apply("ETC.6.0", "Sample # found in Header", "Reg (EC) No 1107/2009 Art. 63 (2g)")); + .forEach(redactionEntity -> redactionEntity.redact("ETC.6.0", "Sample # found in Header", "Reg (EC) No 1107/2009 Art. 63 (2g)")); end // Rule unit: ETC.7 rule "ETC.7.0: Guidelines FileAttributes" when - $section: Section(!hasTables(), (containsString("DATA REQUIREMENT(S):") || containsString("TEST GUIDELINE(S):")) && (containsString("OECD") || containsString("EPA") || containsString("OPPTS"))) + $section: Section(!hasTables(), containsAnyString("DATA REQUIREMENT(S):", "TEST GUIDELINE(S):") && containsAnyString("OECD", "EPA", "OPPTS")) then RedactionSearchUtility.findTextRangesByRegex("OECD (No\\.? )?\\d{3}( \\(\\d{4}\\))?", $section.getTextBlock()).stream() .map(boundary -> $section.getTextBlock().subSequence(boundary).toString()) @@ -840,7 +822,7 @@ rule "ETC.8.0: Redact formulas (vertebrate study)" not FileAttribute(label == "Vertebrate Study", value == "Yes") $logo: Image(imageType == ImageType.FORMULA) then - $logo.apply("ETC.8.0", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $logo.redact("ETC.8.0", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end rule "ETC.8.1: Redact formulas (non vertebrate study)" @@ -848,14 +830,14 @@ rule "ETC.8.1: Redact formulas (non vertebrate study)" FileAttribute(label == "Vertebrate Study", value == "Yes") $logo: Image(imageType == ImageType.FORMULA) then - $logo.apply("ETC.8.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $logo.redact("ETC.8.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end //------------------------------------ AI rules ------------------------------------ // Rule unit: AI.0 -rule "AI.0.0: add all NER Entities of type CBI_author" +rule "AI.0.0: Add all NER Entities of type CBI_author" salience 999 when nerEntities: NerEntities(hasEntitiesOfType("CBI_author")) @@ -866,7 +848,7 @@ rule "AI.0.0: add all NER Entities of type CBI_author" // Rule unit: AI.1 -rule "AI.1.0: combine and add NER Entities as CBI_address" +rule "AI.1.0: Combine and add NER Entities as CBI_address" salience 999 when nerEntities: NerEntities(hasEntitiesOfType("ORG") || hasEntitiesOfType("STREET") || hasEntitiesOfType("CITY")) @@ -875,7 +857,7 @@ rule "AI.1.0: combine and add NER Entities as CBI_address" end -//------------------------------------ Manual redaction rules ------------------------------------ +//------------------------------------ Manual changes rules ------------------------------------ // Rule unit: MAN.0 rule "MAN.0.0: Apply manual resize redaction" @@ -1020,7 +1002,7 @@ rule "MAN.4.1: Apply legal basis change" //------------------------------------ Entity merging rules ------------------------------------ // Rule unit: X.0 -rule "X.0.0: remove Entity contained by Entity of same type" +rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when $larger: TextEntity($type: type, $entityType: entityType, active()) @@ -1032,7 +1014,7 @@ rule "X.0.0: remove Entity contained by Entity of same type" // Rule unit: X.1 -rule "X.1.0: merge intersecting Entities of same type" +rule "X.1.0: Merge intersecting Entities of same type" salience 64 when $first: TextEntity($type: type, $entityType: entityType, !resized(), active()) @@ -1048,7 +1030,7 @@ rule "X.1.0: merge intersecting Entities of same type" // Rule unit: X.2 -rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE" +rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type, entityType == EntityType.FALSE_POSITIVE, active()) @@ -1061,7 +1043,7 @@ rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE" // Rule unit: X.3 -rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" +rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" salience 64 when $falseRecommendation: TextEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, active()) @@ -1073,7 +1055,7 @@ rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM // Rule unit: X.4 -rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY with same type" +rule "X.4.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY with same type" salience 256 when $entity: TextEntity($type: type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) @@ -1086,7 +1068,7 @@ rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY wit // Rule unit: X.5 -rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY" +rule "X.5.0: Remove Entity of type RECOMMENDATION when contained by ENTITY" salience 256 when $entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) @@ -1098,7 +1080,7 @@ rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY" // Rule unit: X.6 -rule "X.6.0: remove Entity of lower rank, when contained by entity of type ENTITY" +rule "X.6.0: Remove Entity of lower rank, when contained by by entity of type ENTITY" salience 32 when $higherRank: TextEntity($type: type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) @@ -1113,7 +1095,7 @@ rule "X.6.1: remove Entity of higher rank, when intersected by entity of type EN salience 32 when $higherRank: TextEntity($type: type, $value: value, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $lowerRank: TextEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges(), active(), $lowerRank.getValue().length() > $value.length()) + $lowerRank: TextEntity(intersects($higherRank), type != $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges(), active(), $lowerRank.getValue().length() > $value.length()) then $higherRank.getIntersectingNodes().forEach(node -> update(node)); $higherRank.remove("X.6.1", "remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity"); @@ -1124,7 +1106,7 @@ rule "X.6.1: remove Entity of higher rank, when intersected by entity of type EN //------------------------------------ File attributes rules ------------------------------------ // Rule unit: FA.1 -rule "FA.1.0: remove duplicate FileAttributes" +rule "FA.1.0: Remove duplicate FileAttributes" salience 64 when $fileAttribute: FileAttribute($label: label, $value: value) @@ -1146,6 +1128,6 @@ rule "LDS.0.0: Run local dictionary search" entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document) .forEach(entity -> { Collection matchedRules = $dictionaryModel.getLocalEntriesWithMatchedRules().get(entity.getValue()); - entity.addMatchedRules(matchedRules); + matchedRules.forEach(matchedRule -> entity.addMatchedRule(matchedRule.asSkippedIfApplied())); }); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl index a9cd2ea5..a6c6c994 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl @@ -60,16 +60,40 @@ global EntityCreationService entityCreationService global ManualChangesApplicationService manualChangesApplicationService global Dictionary dictionary -// --------------------------------------- queries ------------------------------------------------------------------- +//------------------------------------ queries ------------------------------------ query "getFileAttributes" $fileAttribute: FileAttribute() end -// --------------------------------------- NER Entities rules ------------------------------------------------------------------- +//------------------------------------ CBI rules ------------------------------------ + +// Rule unit: CBI.0 +rule "CBI.0.0: Redact CBI Authors (non vertebrate Study)" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $entity: TextEntity(type == "CBI_author", dictionaryEntry) + then + $entity.redact("CBI.0.0", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + + +//------------------------------------ PII rules ------------------------------------ + +// Rule unit: PII.0 +rule "PII.0.0: Redact all PII (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + $pii: TextEntity(type == "PII", dictionaryEntry) + then + $pii.redact("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + + +//------------------------------------ AI rules ------------------------------------ // Rule unit: AI.0 -rule "AI.0.0: add all NER Entities of type CBI_author" +rule "AI.0.0: Add all NER Entities of type CBI_author" salience 999 when nerEntities: NerEntities(hasEntitiesOfType("CBI_author")) @@ -78,28 +102,8 @@ rule "AI.0.0: add all NER Entities of type CBI_author" .forEach(nerEntity -> entityCreationService.byNerEntity(nerEntity, EntityType.RECOMMENDATION, document)); end -// --------------------------------------- CBI rules ------------------------------------------------------------------- -rule "CBI.0.0: Always redact CBI_author" - - when - $cbiAuthor: TextEntity(type == "CBI_author", entityType == EntityType.ENTITY) - then - $cbiAuthor.apply("CBI.0.0", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - end - -// --------------------------------------- PII rules ------------------------------------------------------------------- - -rule "PII.0.0: Always redact PII" - - when - $cbiAuthor: TextEntity(type == "PII", entityType == EntityType.ENTITY) - then - $cbiAuthor.apply("PII.0.0", "PII found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - end - - -//------------------------------------ Manual redaction rules ------------------------------------ +//------------------------------------ Manual changes rules ------------------------------------ // Rule unit: MAN.0 rule "MAN.0.0: Apply manual resize redaction" @@ -157,7 +161,6 @@ rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to // Rule unit: MAN.2 rule "MAN.2.0: Apply force redaction" - no-loop true salience 128 when $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED) @@ -166,10 +169,10 @@ rule "MAN.2.0: Apply force redaction" $entityToForce.getManualOverwrite().addChange($force); update($entityToForce); $entityToForce.getIntersectingNodes().forEach(node -> update(node)); + retract($force); end rule "MAN.2.1: Apply force redaction to images" - no-loop true salience 128 when $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED) @@ -178,6 +181,7 @@ rule "MAN.2.1: Apply force redaction to images" $imageToForce.getManualOverwrite().addChange($force); update($imageToForce); update($imageToForce.getParent()); + retract($force); end @@ -185,9 +189,9 @@ rule "MAN.2.1: Apply force redaction to images" rule "MAN.3.0: Apply entity recategorization" salience 128 when - $recategorization: ManualRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $requestDate: requestDate) + $recategorization: ManualRecategorization($id: annotationId, $type: type, status == AnnotationStatus.APPROVED, $requestDate: requestDate) not ManualRecategorization($id == annotationId, requestDate.isBefore($requestDate)) - $entityToBeRecategorized: TextEntity(matchesAnnotationId($id)) + $entityToBeRecategorized: TextEntity(matchesAnnotationId($id), type != $type) then $entityToBeRecategorized.getIntersectingNodes().forEach(node -> update(node)); manualChangesApplicationService.recategorize($entityToBeRecategorized, $recategorization); @@ -196,16 +200,14 @@ rule "MAN.3.0: Apply entity recategorization" retract($entityToBeRecategorized); end -rule "MAN.3.1: Apply image recategorization" +rule "MAN.3.1: Apply entity recategorization of same type" salience 128 when - $recategorization: ManualRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $requestDate: requestDate) + $recategorization: ManualRecategorization($id: annotationId, $type: type, status == AnnotationStatus.APPROVED, $requestDate: requestDate) not ManualRecategorization($id == annotationId, requestDate.isBefore($requestDate)) - $imageToBeRecategorized: Image($id == id) + $entityToBeRecategorized: TextEntity(matchesAnnotationId($id), type == $type) then - manualChangesApplicationService.recategorize($imageToBeRecategorized, $recategorization); - update($imageToBeRecategorized); - update($imageToBeRecategorized.getParent()); + $entityToBeRecategorized.getManualOverwrite().addChange($recategorization); retract($recategorization); end @@ -233,7 +235,7 @@ rule "MAN.4.1: Apply legal basis change" //------------------------------------ Entity merging rules ------------------------------------ // Rule unit: X.0 -rule "X.0.0: remove Entity contained by Entity of same type" +rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when $larger: TextEntity($type: type, $entityType: entityType, active()) @@ -245,7 +247,7 @@ rule "X.0.0: remove Entity contained by Entity of same type" // Rule unit: X.1 -rule "X.1.0: merge intersecting Entities of same type" +rule "X.1.0: Merge intersecting Entities of same type" salience 64 when $first: TextEntity($type: type, $entityType: entityType, !resized(), active()) @@ -261,7 +263,7 @@ rule "X.1.0: merge intersecting Entities of same type" // Rule unit: X.2 -rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE" +rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type, entityType == EntityType.FALSE_POSITIVE, active()) @@ -274,7 +276,7 @@ rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE" // Rule unit: X.3 -rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" +rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" salience 64 when $falseRecommendation: TextEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, active()) @@ -286,7 +288,7 @@ rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM // Rule unit: X.4 -rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY with same type" +rule "X.4.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY with same type" salience 256 when $entity: TextEntity($type: type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) @@ -299,7 +301,7 @@ rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY wit // Rule unit: X.5 -rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY" +rule "X.5.0: Remove Entity of type RECOMMENDATION when contained by ENTITY" salience 256 when $entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) @@ -311,7 +313,7 @@ rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY" // Rule unit: X.6 -rule "X.6.0: remove Entity of lower rank, when contained by by entity of type ENTITY" +rule "X.6.0: Remove Entity of lower rank, when contained by by entity of type ENTITY" salience 32 when $higherRank: TextEntity($type: type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) @@ -326,7 +328,7 @@ rule "X.6.1: remove Entity of higher rank, when intersected by entity of type EN salience 32 when $higherRank: TextEntity($type: type, $value: value, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $lowerRank: TextEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges(), active(), $lowerRank.getValue().length() > $value.length()) + $lowerRank: TextEntity(intersects($higherRank), type != $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges(), active(), $lowerRank.getValue().length() > $value.length()) then $higherRank.getIntersectingNodes().forEach(node -> update(node)); $higherRank.remove("X.6.1", "remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity"); @@ -337,7 +339,7 @@ rule "X.6.1: remove Entity of higher rank, when intersected by entity of type EN //------------------------------------ File attributes rules ------------------------------------ // Rule unit: FA.1 -rule "FA.1.0: remove duplicate FileAttributes" +rule "FA.1.0: Remove duplicate FileAttributes" salience 64 when $fileAttribute: FileAttribute($label: label, $value: value) @@ -359,6 +361,6 @@ rule "LDS.0.0: Run local dictionary search" entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document) .forEach(entity -> { Collection matchedRules = $dictionaryModel.getLocalEntriesWithMatchedRules().get(entity.getValue()); - entity.addMatchedRules(matchedRules); + matchedRules.forEach(matchedRule -> entity.addMatchedRule(matchedRule.asSkippedIfApplied())); }); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo.drl index 7c54444f..13fc50cd 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo.drl @@ -66,23 +66,9 @@ query "getFileAttributes" $fileAttribute: FileAttribute() end -//------------------------------------ Local dictionary search rules ------------------------------------ - -// Rule unit: LocalDictionarySearch.0 -rule "LDS.0.0: Run local dictionary search" - agenda-group "LOCAL_DICTIONARY_ADDS" - salience -999 - when - $dictionaryModel: DictionaryModel(!localEntriesWithMatchedRules.isEmpty()) from dictionary.getDictionaryModels() - then - entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document) - .forEach(entity -> { - Collection matchedRules = $dictionaryModel.getLocalEntriesWithMatchedRules().get(entity.getValue()); - entity.addMatchedRules(matchedRules); - }); - end -// --------------------------------------- Your rules below this line -------------------------------------------------- +//------------------------------------ Table extraction rules ------------------------------------ +// Rule unit: TAB.0 rule "TAB.0.0: Study Type File Attribute" when not FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","438","439","471","487")) @@ -115,6 +101,8 @@ rule "TAB.0.1: Guidelines" .forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline year found")); end + +// Rule unit: TAB.1 rule "TAB.1.0: Full Table extraction (Guideline Deviation)" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("425")) @@ -126,6 +114,8 @@ rule "TAB.1.0: Full Table extraction (Guideline Deviation)" .ifPresent(entity -> entity.apply("TAB.1.0", "full table extracted")); end + +// Rule unit: TAB.2 rule "TAB.2.0: Individual row extraction (Clinical Signs)" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("425")) @@ -138,6 +128,8 @@ rule "TAB.2.0: Individual row extraction (Clinical Signs)" .ifPresent(entity -> entity.apply("TAB.2.0", "Individual row based on animal number")); end + +// Rule unit: TAB.3 rule "TAB.3.0: Individual column extraction (Strain)" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("425")) @@ -151,6 +143,8 @@ rule "TAB.3.0: Individual column extraction (Strain)" .forEach(redactionEntity -> redactionEntity.apply("TAB.3.0", "Individual column based on column header")); end + +// Rule unit: TAB.4 rule "TAB.4.0: Combined Columns Extraction - Sex and Dosage" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("425")) @@ -177,6 +171,8 @@ rule "TAB.4.1: Combined Columns Extraction - Sex and Mortality" .forEach(redactionEntity -> redactionEntity.apply("TAB.4.1", "Dose Mortality found.")); end + +// Rule unit: TAB.5 rule "TAB.5.0: Targeted cell extraction" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("425")) @@ -190,6 +186,8 @@ rule "TAB.5.0: Targeted cell extraction" .ifPresent(entity -> entity.apply("TAB.5.0", "Dosage found in row with survived male")); end + +// Rule unit: TAB.6 rule "TAB.6.0: Targeted cell extraction (Experimental Stop date)" when $section: Section(getHeadline().containsString("Advanced Table Extraction"), containsAllStrings("female", "Female", "Survived", "Group 2")) @@ -202,6 +200,8 @@ rule "TAB.6.0: Targeted cell extraction (Experimental Stop date)" .ifPresent(entity -> entity.apply("TAB.6.0", "Female in group to experimental start date")); end + +// Rule unit: TAB.7 rule "TAB.7.0: Indicator (Species)" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("425")) @@ -215,7 +215,8 @@ rule "TAB.7.0: Indicator (Species)" .ifPresent(redactionEntity -> redactionEntity.apply("TAB.7.0", "Vertebrate study found")); end -//------------------------------------ Manual redaction rules ------------------------------------ + +//------------------------------------ Manual changes rules ------------------------------------ // Rule unit: MAN.0 rule "MAN.0.0: Apply manual resize redaction" @@ -433,12 +434,11 @@ rule "X.6.0: Remove Entity of lower rank, when contained by by entity of type EN retract($lowerRank); end - rule "X.6.1: remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity" salience 32 when $higherRank: TextEntity($type: type, $value: value, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $lowerRank: TextEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges(), active(), $lowerRank.getValue().length() > $value.length()) + $lowerRank: TextEntity(intersects($higherRank), type != $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges(), active(), $lowerRank.getValue().length() > $value.length()) then $higherRank.getIntersectingNodes().forEach(node -> update(node)); $higherRank.remove("X.6.1", "remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity"); @@ -447,7 +447,7 @@ rule "X.6.1: remove Entity of higher rank, when intersected by entity of type EN // Rule unit: X.7 -rule "X.7.0: remove all images" +rule "X.7.0: Remove all images" salience 512 when $image: Image(imageType != ImageType.OCR, !hasManualChanges()) @@ -469,3 +469,19 @@ rule "FA.1.0: Remove duplicate FileAttributes" retract($duplicate); end + +//------------------------------------ Local dictionary search rules ------------------------------------ + +// Rule unit: LDS.0 +rule "LDS.0.0: Run local dictionary search" + agenda-group "LOCAL_DICTIONARY_ADDS" + salience -999 + when + $dictionaryModel: DictionaryModel(!localEntriesWithMatchedRules.isEmpty()) from dictionary.getDictionaryModels() + then + entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document) + .forEach(entity -> { + Collection matchedRules = $dictionaryModel.getLocalEntriesWithMatchedRules().get(entity.getValue()); + matchedRules.forEach(matchedRule -> entity.addMatchedRule(matchedRule.asSkippedIfApplied())); + }); + end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo_components.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo_components.drl index d3e5c6ff..d8b84772 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo_components.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo_components.drl @@ -101,3 +101,26 @@ rule "EntityBasedExtr.2.4: Cells containing dose for survived males" then componentCreationService.joiningFromSameTableRow("EntityBasedExtr.2.4", "2.4 Entity-Based Values", $tableValues); end + + +rule "DefaultComponents.999.0: Create components for all unmapped entities." + salience -999 + when + not FileAttribute(label == "OECD Number") + $allEntities: List(!isEmpty()) from collect (Entity()) + then + componentCreationService.createComponentsForUnMappedEntities("DefaultComponents.999.0", $allEntities); + end + + +//------------------------------------ Component merging rules ------------------------------------ +/* +rule "X.0.0: merge duplicate component references" + when + $first: Component() + $duplicate: Component(this != $first, name == $first.name, value == $first.value) + then + $first.getReferences().addAll($duplicate.getReferences()); + retract($duplicate); + end +*/ \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/test_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/test_rules.drl index ccf3005a..c30404aa 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/test_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/test_rules.drl @@ -66,23 +66,9 @@ query "getFileAttributes" $fileAttribute: FileAttribute() end -//------------------------------------ Local dictionary search rules ------------------------------------ - -// Rule unit: LocalDictionarySearch.0 -rule "LDS.0.0: Run local dictionary search" - agenda-group "LOCAL_DICTIONARY_ADDS" - salience -999 - when - $dictionaryModel: DictionaryModel(!localEntriesWithMatchedRules.isEmpty()) from dictionary.getDictionaryModels() - then - entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document) - .forEach(entity -> { - Collection matchedRules = $dictionaryModel.getLocalEntriesWithMatchedRules().get(entity.getValue()); - entity.addMatchedRules(matchedRules); - }); - end -// --------------------------------------- Your rules below this line -------------------------------------------------- +//------------------------------------ Table extraction rules ------------------------------------ +// Rule unit: TAB.0 rule "TAB.0.0: Study Type File Attribute" when not FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","438","439","471","487")) @@ -116,20 +102,21 @@ rule "TAB.0.1: Guidelines" end +// Rule unit: TAB.6 rule "TAB.6.0: Targeted cell extraction (Experimental Stop date)" when - $section: Section(containsString("Maximum occurrence")) - $table: Table() from $section.streamChildren().toList() - TableCell(containsWordIgnoreCase("water"), $row: row) from $table.streamTableCells().toList() - $test: TableCell($row == row) from $table.streamTableCells().toList() + $section: Section(getHeadline().containsString("Advanced Table Extraction"), containsAllStrings("female", "Female", "Survived", "Group 2")) + $table: Table(hasHeader("Group 2")) from $section.streamChildren().toList() + TableCell(containsWordIgnoreCase("Female"), $row: row) from $table.streamTableCellsWithHeader("Group 2").toList() + TableCell($row == row, containsStringIgnoreCase("Survived")) from $table.streamTableCellsWithHeader("Group 2").toList() + $femaleSurvived: TableCell($row == row) from $table.streamTableCellsWithHeader("Group 2").toList() then - System.out.println("AAAA: " + $test); - entityCreationService.bySemanticNode($test, "test", EntityType.ENTITY) - .ifPresent(entity -> entity.apply("TAB.6.0", "Some test stuff")); + entityCreationService.bySemanticNode($femaleSurvived, "experiment_female_survived", EntityType.ENTITY) + .ifPresent(entity -> entity.apply("TAB.6.0", "Female in group to experimental start date")); end -//------------------------------------ Manual redaction rules ------------------------------------ +//------------------------------------ Manual changes rules ------------------------------------ // Rule unit: MAN.0 rule "MAN.0.0: Apply manual resize redaction" @@ -363,12 +350,11 @@ rule "X.6.0: Remove Entity of lower rank, when contained by by entity of type EN retract($lowerRank); end - rule "X.6.1: remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity" salience 32 when $higherRank: TextEntity($type: type, $value: value, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $lowerRank: TextEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges(), active(), $lowerRank.getValue().length() > $value.length()) + $lowerRank: TextEntity(intersects($higherRank), type != $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges(), active(), $lowerRank.getValue().length() > $value.length()) then $higherRank.getIntersectingNodes().forEach(node -> update(node)); $higherRank.remove("X.6.1", "remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity"); @@ -377,7 +363,7 @@ rule "X.6.1: remove Entity of higher rank, when intersected by entity of type EN // Rule unit: X.7 -rule "X.7.0: remove all images" +rule "X.7.0: Remove all images" salience 512 when $image: Image(imageType != ImageType.OCR, !hasManualChanges()) @@ -399,3 +385,19 @@ rule "FA.1.0: Remove duplicate FileAttributes" retract($duplicate); end + +//------------------------------------ Local dictionary search rules ------------------------------------ + +// Rule unit: LDS.0 +rule "LDS.0.0: Run local dictionary search" + agenda-group "LOCAL_DICTIONARY_ADDS" + salience -999 + when + $dictionaryModel: DictionaryModel(!localEntriesWithMatchedRules.isEmpty()) from dictionary.getDictionaryModels() + then + entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document) + .forEach(entity -> { + Collection matchedRules = $dictionaryModel.getLocalEntriesWithMatchedRules().get(entity.getValue()); + matchedRules.forEach(matchedRule -> entity.addMatchedRule(matchedRule.asSkippedIfApplied())); + }); + end diff --git a/redaction-service-v1/rules-management/build.gradle.kts b/redaction-service-v1/rules-management/build.gradle.kts index 5235d200..142558b7 100644 --- a/redaction-service-v1/rules-management/build.gradle.kts +++ b/redaction-service-v1/rules-management/build.gradle.kts @@ -16,6 +16,14 @@ repositories { } } +sourceSets { + test { + resources { + srcDirs("src/main/resources", "src/test/ressources") // add both so test can access the all_rules_file + } + } +} + dependencies { implementation(project(":redaction-service-server-v1")) implementation("org.projectlombok:lombok:1.18.28") diff --git a/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/factory/RuleFileFactory.java b/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/factory/RuleFileFactory.java index 9d107e0d..17933191 100644 --- a/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/factory/RuleFileFactory.java +++ b/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/factory/RuleFileFactory.java @@ -4,6 +4,7 @@ import static java.util.Collections.emptySet; import java.nio.charset.StandardCharsets; import java.util.Arrays; +import java.util.Comparator; import java.util.List; import java.util.Set; import java.util.stream.Collectors; @@ -23,6 +24,7 @@ import lombok.experimental.UtilityClass; public class RuleFileFactory { public String createFileFromIdentifiers(String identifiers, ApplicationType applicationType) { + if (identifiers.isBlank() || identifiers.isEmpty()) { return createFileFromIdentifiers(emptySet(), applicationType); } @@ -35,6 +37,7 @@ public class RuleFileFactory { return createFileFromIdentifiers(identifiers, true, applicationType); } + public String createFileFromIdentifiers(Set identifiers, boolean includeDefaults, ApplicationType applicationType) { if (includeDefaults) { @@ -42,12 +45,12 @@ public class RuleFileFactory { } RuleFileBluePrint bluePrint = RuleFileParser.buildBluePrintFromAllRulesFile(applicationType); RuleFileBluePrint filteredBluePrint = bluePrint.buildFilteredBluePrintByRuleIdentifiers(identifiers); - return buildRuleFile(filteredBluePrint); + return buildRuleString(filteredBluePrint); } @SneakyThrows - public String buildRuleFile(RuleFileBluePrint bluePrint) { + public String buildRuleString(RuleFileBluePrint bluePrint) { try (var templateInputStream = RuleManagementResources.getTemplateInputStream()) { String template = new String(templateInputStream.readAllBytes(), StandardCharsets.UTF_8); @@ -66,6 +69,8 @@ public class RuleFileFactory { private String buildBluePrintWithTemplateRuleOrder(RuleFileBluePrint bluePrint, List ruleOrder) { + Set additionalRuleTypes = bluePrint.ruleClasses().stream().map(RuleClass::ruleType).filter(ruleType -> !ruleOrder.contains(ruleType)).collect(Collectors.toSet()); + StringBuilder sb = new StringBuilder(); sb.append(bluePrint.imports()); sb.append("\n\n"); @@ -76,29 +81,40 @@ public class RuleFileFactory { sb.append(bluePrint.queries()); sb.append("\n\n"); for (RuleType ruleBlockType : ruleOrder) { - if (!bluePrint.ruleClassExists(ruleBlockType)) { + if (ruleBlockType.isWildCard()) { + additionalRuleTypes.stream().sorted(Comparator.comparing(RuleType::name)).forEach(ruleType -> writeRuleClass(bluePrint, ruleType, sb)); continue; } - sb.append("//------------------------------------ "); - sb.append(ruleBlockType); - sb.append(" rules ------------------------------------"); - sb.append("\n\n"); - RuleClass ruleClass = bluePrint.findRuleClassByType(ruleBlockType); - for (RuleUnit unit : ruleClass.ruleUnits()) { - if (unit.rules().isEmpty()) { - continue; - } - sb.append("// "); - sb.append(unit.rules().get(0).identifier().toRuleUnitString()); - sb.append("\n"); - unit.rules().forEach(rule -> { - sb.append(rule.code()); - sb.append("\n\n"); - }); - sb.append("\n"); + if (bluePrint.findRuleClassByType(ruleBlockType).isEmpty()) { + continue; } + writeRuleClass(bluePrint, ruleBlockType, sb); } return sb.toString().trim() + "\n"; } + + private static void writeRuleClass(RuleFileBluePrint bluePrint, RuleType ruleType, StringBuilder sb) { + + sb.append("//------------------------------------ "); + sb.append(ruleType); + sb.append(" rules ------------------------------------"); + sb.append("\n\n"); + RuleClass ruleClass = bluePrint.findRuleClassByType(ruleType).orElseThrow(); + List sortedRuleUnits = ruleClass.ruleUnits().stream().sorted(Comparator.comparingInt(RuleUnit::unit)).toList(); + for (RuleUnit unit : sortedRuleUnits) { + if (unit.rules().isEmpty()) { + continue; + } + sb.append("// "); + sb.append(unit.rules().get(0).identifier().toRuleUnitString()); + sb.append("\n"); + unit.rules().stream().sorted(Comparator.comparingInt(rule -> rule.identifier().id())).forEach(rule -> { + sb.append(rule.code()); + sb.append("\n\n"); + }); + sb.append("\n"); + } + } + } diff --git a/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/factory/RuleFileParser.java b/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/factory/RuleFileParser.java index 4fa8ca67..778efb67 100644 --- a/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/factory/RuleFileParser.java +++ b/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/factory/RuleFileParser.java @@ -94,4 +94,17 @@ public class RuleFileParser { return parseRuleIdentifiersFromFile(file.toString()); } + + /** + * Creates a BluePrint from all redact manager, documine and component rule files + * + * @return RuleFileBluePrint containing all rules from any all rule files + */ + public static RuleFileBluePrint buildBluePrintFromAllRuleFiles() { + + RuleFileBluePrint bluePrint = buildBluePrintFromAllRulesFile(ApplicationType.DM); + bluePrint.addAllRulesFromBluePrint(buildBluePrintFromAllRulesFile(ApplicationType.RM)); + return bluePrint; + } + } diff --git a/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/migration/RuleFileMigrator.java b/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/migration/RuleFileMigrator.java new file mode 100644 index 00000000..e8bb5be2 --- /dev/null +++ b/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/migration/RuleFileMigrator.java @@ -0,0 +1,43 @@ +package com.knecon.fforesight.utility.rules.management.migration; + +import java.io.File; +import java.io.FileOutputStream; +import java.nio.charset.StandardCharsets; +import java.util.List; + +import com.knecon.fforesight.utility.rules.management.factory.RuleFileFactory; +import com.knecon.fforesight.utility.rules.management.factory.RuleFileParser; +import com.knecon.fforesight.utility.rules.management.models.BasicRule; +import com.knecon.fforesight.utility.rules.management.models.RuleFileBluePrint; +import com.knecon.fforesight.utility.rules.management.utils.RuleFileIO; + +import lombok.SneakyThrows; +import lombok.experimental.UtilityClass; + +/** + * This class is the primary class for migrating Files with the 4.0.0 format to newer versions. This assumes RuleIdentifiers are already formatted. + * It will search for any rule with the same Identifier in the base and replace it with the new version. If none is found, the rule is written as is. + */ +@UtilityClass +public class RuleFileMigrator { + + @SneakyThrows + public void migrateFile(File ruleFile) { + + RuleFileBluePrint ruleFileBluePrint = RuleFileParser.buildBluePrintFromRulesString(RuleFileIO.getRulesString(ruleFile.getAbsolutePath())); + RuleFileBluePrint combinedBluePrint = RuleFileParser.buildBluePrintFromAllRuleFiles(); + + for (BasicRule ruleToReplace : ruleFileBluePrint.getAllRules()) { + List rulesToAdd = combinedBluePrint.findRuleByIdentifier(ruleToReplace.identifier()); + ruleFileBluePrint.removeRule(ruleToReplace.identifier()); + rulesToAdd.forEach(ruleFileBluePrint::addRule); + } + + String migratedRulesString = RuleFileFactory.buildRuleString(ruleFileBluePrint); + String migratedFilePath = ruleFile.getAbsolutePath(); + try (var out = new FileOutputStream(migratedFilePath)) { + out.write(migratedRulesString.getBytes(StandardCharsets.UTF_8)); + } + } + +} diff --git a/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/migration/RuleIdentifierMigrator.java b/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/migration/RuleIdentifierMigrator.java index cb9cbefd..aac63fa5 100644 --- a/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/migration/RuleIdentifierMigrator.java +++ b/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/migration/RuleIdentifierMigrator.java @@ -46,7 +46,7 @@ public class RuleIdentifierMigrator { bluePrint = migrateMatchedRuleForAllRules(bluePrint); - String ruleString = RuleFileFactory.buildRuleFile(bluePrint); + String ruleString = RuleFileFactory.buildRuleString(bluePrint); try (var out = new FileOutputStream("/tmp/all_redact_manager_rules.drl")) { out.write(ruleString.getBytes(StandardCharsets.UTF_8)); } @@ -78,8 +78,11 @@ public class RuleIdentifierMigrator { String redactionReason = findByRegex("\\.setRedactionReason\\(\"(.*)\"\\)", basicRule.code(), 1); String legalBasis = findByRegex("\\.setLegalBasis\\(\"(.*)\"\\)", basicRule.code(), 1); - String migratedCode = basicRule.code().replaceAll("\\.addMatchedRule\\(.*\\)", Matcher.quoteReplacement(String.format(".addMatchedRule(\"%s\", \"%s\", \"%s\")", basicRule.identifier().toString(), redactionReason, legalBasis))); - migratedCode = migratedCode.replaceAll("\\.setMatchedRule\\(.*\\)", Matcher.quoteReplacement(String.format(".addMatchedRule(\"%s\", \"%s\", \"%s\")", basicRule.identifier().toString(), redactionReason, legalBasis))); + String migratedCode = basicRule.code() + .replaceAll("\\.addMatchedRule\\(.*\\)", + Matcher.quoteReplacement(String.format(".addMatchedRule(\"%s\", \"%s\", \"%s\")", basicRule.identifier().toString(), redactionReason, legalBasis))); + migratedCode = migratedCode.replaceAll("\\.setMatchedRule\\(.*\\)", + Matcher.quoteReplacement(String.format(".addMatchedRule(\"%s\", \"%s\", \"%s\")", basicRule.identifier().toString(), redactionReason, legalBasis))); migratedCode = migratedCode.replaceAll("\\.setRedactionReason\\(\".*\"\\)", ""); migratedCode = migratedCode.replaceAll("\\.setLegalBasis\\(\".*\"\\)", ""); migratedCode = migratedCode.replaceAll("\\$entity;\n", ""); @@ -89,6 +92,7 @@ public class RuleIdentifierMigrator { return new BasicRule(basicRule.identifier(), basicRule.name(), migratedCode); } + private static String findByRegex(String regex, String searchText, int group) { Pattern pattern = Pattern.compile(regex); @@ -113,8 +117,8 @@ public class RuleIdentifierMigrator { public void migrateIdentifier(RuleIdentifier oldIdentifier, RuleIdentifier newIdentifier, RuleFileBluePrint bluePrint, List records) { - BasicRule oldRule = bluePrint.findRuleClassByType(oldIdentifier.type()) - .findRuleUnitByInteger(oldIdentifier.unit()) + BasicRule oldRule = bluePrint.findRuleClassByType(oldIdentifier.type()).orElseThrow() + .findRuleUnitByInteger(oldIdentifier.unit()).orElseThrow() .rules() .stream() .filter(rule -> rule.identifier().equals(oldIdentifier)) diff --git a/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/models/RuleClass.java b/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/models/RuleClass.java index 03f64bc3..ede53a1a 100644 --- a/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/models/RuleClass.java +++ b/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/models/RuleClass.java @@ -1,22 +1,23 @@ package com.knecon.fforesight.utility.rules.management.models; +import java.util.LinkedList; import java.util.List; import java.util.Objects; +import java.util.Optional; public record RuleClass(RuleType ruleType, List ruleUnits) { - public RuleUnit findRuleUnitByInteger(Integer unit) { + public Optional findRuleUnitByInteger(Integer unit) { - return ruleUnits.stream() - .filter(ruleUnit -> Objects.equals(ruleUnit.unit(), unit)) - .findFirst() - .orElseThrow(() -> new IllegalArgumentException(String.format("RuleUnit %d does not exist in class %s", unit, this))); + return ruleUnits.stream().filter(ruleUnit -> Objects.equals(ruleUnit.unit(), unit)).findFirst(); } - public boolean ruleUnitExists(Integer unit) { + public RuleUnit createNewRuleUnit(Integer unit) { - return ruleUnits.stream().anyMatch(ruleUnit -> Objects.equals(ruleUnit.unit(), unit)); + var ruleUnit = new RuleUnit(unit, new LinkedList<>()); + ruleUnits.add(ruleUnit); + return ruleUnit; } } diff --git a/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/models/RuleFileBluePrint.java b/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/models/RuleFileBluePrint.java index e6e63393..045cfb22 100644 --- a/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/models/RuleFileBluePrint.java +++ b/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/models/RuleFileBluePrint.java @@ -1,76 +1,79 @@ package com.knecon.fforesight.utility.rules.management.models; import java.util.Collection; +import java.util.Collections; import java.util.LinkedList; import java.util.List; import java.util.Objects; +import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; public record RuleFileBluePrint(String imports, String globals, String queries, List ruleClasses) { - public RuleClass findRuleClassByType(RuleType ruleType) { + public void removeRule(RuleIdentifier ruleIdentifier) { + + findRuleClassByType(ruleIdentifier.type()).ifPresent(ruleClass -> ruleClass.findRuleUnitByInteger(ruleIdentifier.unit()).ifPresent(ruleUnit -> { + ruleUnit.rules().removeIf(rule -> rule.identifier().matches(ruleIdentifier)); + if (ruleUnit.rules().isEmpty()) { + ruleClass.ruleUnits().remove(ruleUnit); + } + if (ruleClass.ruleUnits().isEmpty()) { + ruleClasses().remove(ruleClass); + } + })); - return ruleClasses.stream() - .filter(ruleClass -> Objects.equals(ruleClass.ruleType(), ruleType)) - .findFirst() - .orElseThrow(() -> new IllegalArgumentException(String.format("RuleType %s does not exist in this BluePrint %s", ruleType.name(), this))); } - public boolean ruleClassExists(RuleType ruleType) { + public Optional findRuleClassByType(RuleType ruleType) { - return ruleClasses.stream().anyMatch(ruleClass -> Objects.equals(ruleClass.ruleType(), ruleType)); + return ruleClasses.stream().filter(ruleClass -> Objects.equals(ruleClass.ruleType(), ruleType)).findFirst(); + } + + + public Set getAllRules() { + + return ruleClasses().stream().map(RuleClass::ruleUnits).flatMap(Collection::stream).map(RuleUnit::rules).flatMap(Collection::stream).collect(Collectors.toSet()); + } + + + /** + * Adds all rules from a given RuleFileBluePrint to the current instance. + * + * @param ruleFileBluePrint The RuleFileBluePrint that contains the rules to be added. + * @throws IllegalArgumentException If the RuleFileBluePrints have non-matching rules with the same identifier and cannot be merged. + */ + public void addAllRulesFromBluePrint(RuleFileBluePrint ruleFileBluePrint) { + + Set newRuleIdentifiers = ruleFileBluePrint.getAllRuleIdentifiers(); + Set existingRuleIdentifiers = this.getAllRuleIdentifiers(); + Set duplicatedRuleIdentifiers = existingRuleIdentifiers.stream().filter(newRuleIdentifiers::contains).collect(Collectors.toSet()); + for (RuleIdentifier duplicatedRuleIdentifier : duplicatedRuleIdentifiers) { + List thisRules = findRuleByIdentifier(duplicatedRuleIdentifier); + List otherRules = ruleFileBluePrint.findRuleByIdentifier(duplicatedRuleIdentifier); + assert thisRules.size() == 1; + assert otherRules.size() == 1; + if (!thisRules.get(0).equals(otherRules.get(0))) { + throw new IllegalArgumentException("RuleFileBluePrints have non matching rules with the same identifier, cannot be merged!"); + } + } + newRuleIdentifiers.removeAll(existingRuleIdentifiers); + newRuleIdentifiers.forEach(identifierToAdd -> ruleFileBluePrint.findRuleByIdentifier(identifierToAdd).forEach(this::addRule)); } public List findRuleByIdentifier(RuleIdentifier ruleIdentifier) { if (Objects.isNull(ruleIdentifier.unit())) { - return findRuleClassByType(ruleIdentifier.type()).ruleUnits().stream().map(RuleUnit::rules).flatMap(Collection::stream).toList(); - } - return findRuleClassByType(ruleIdentifier.type()).findRuleUnitByInteger(ruleIdentifier.unit()) - .rules() - .stream() - .filter(rule -> rule.identifier().matches(ruleIdentifier)) - .toList(); - } - - - public void addRule(BasicRule rule) { - - RuleClass ruleClass; - if (ruleClassExists(rule.identifier().type())) { - ruleClass = findRuleClassByType(rule.identifier().type()); - } else { - ruleClass = new RuleClass(rule.identifier().type(), new LinkedList<>()); - ruleClasses.add(ruleClass); - } - - RuleUnit ruleUnit; - if (ruleClass.ruleUnitExists(rule.identifier().unit())) { - ruleUnit = ruleClass.findRuleUnitByInteger(rule.identifier().unit()); - } else { - ruleUnit = new RuleUnit(rule.identifier().unit(), new LinkedList<>()); - ruleClass.ruleUnits().add(ruleUnit); - } - - ruleUnit.rules().add(rule); - } - - - public void removeRule(RuleIdentifier ruleIdentifier) { - - RuleClass ruleClass = findRuleClassByType(ruleIdentifier.type()); - RuleUnit ruleUnit = ruleClass.findRuleUnitByInteger(ruleIdentifier.unit()); - ruleUnit.rules().removeIf(rule -> rule.identifier().matches(ruleIdentifier)); - - if (ruleUnit.rules().isEmpty()) { - ruleClass.ruleUnits().remove(ruleUnit); - } - if (ruleClass.ruleUnits().isEmpty()) { - ruleClasses().remove(ruleClass); + return findRuleClassByType(ruleIdentifier.type()).map(ruleClass -> ruleClass.ruleUnits().stream().flatMap(ruleUnit -> ruleUnit.rules().stream()).toList()) + .orElse(Collections.emptyList()); } + return findRuleClassByType(ruleIdentifier.type())// + .map(ruleClass -> ruleClass.findRuleUnitByInteger(ruleIdentifier.unit())// + .map(ruleUnit -> ruleUnit.rules().stream().filter(rule -> rule.identifier().matches(ruleIdentifier)).toList())// + .orElse(Collections.emptyList()))// + .orElse(Collections.emptyList()); } @@ -86,6 +89,16 @@ public record RuleFileBluePrint(String imports, String globals, String queries, } + public void addRule(BasicRule rule) { + + RuleClass ruleClass = findRuleClassByType(rule.identifier().type()).orElseGet(() -> createNewRuleClass(rule)); + + RuleUnit ruleUnit = ruleClass.findRuleUnitByInteger(rule.identifier().unit()).orElseGet(() -> ruleClass.createNewRuleUnit(rule.identifier().unit())); + + ruleUnit.rules().add(rule); + } + + public RuleFileBluePrint buildFilteredBluePrintByRuleIdentifiers(Set identifiers) { RuleFileBluePrint filteredBluePrint = new RuleFileBluePrint(imports(), globals(), queries(), new LinkedList<>()); @@ -97,4 +110,12 @@ public record RuleFileBluePrint(String imports, String globals, String queries, return filteredBluePrint; } + + private RuleClass createNewRuleClass(BasicRule rule) { + + var newClass = new RuleClass(rule.identifier().type(), new LinkedList<>()); + ruleClasses.add(newClass); + return newClass; + } + } diff --git a/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/models/RuleType.java b/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/models/RuleType.java index e91c96ea..4eeaad66 100644 --- a/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/models/RuleType.java +++ b/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/models/RuleType.java @@ -1,83 +1,54 @@ package com.knecon.fforesight.utility.rules.management.models; -public enum RuleType { - SYN { - @Override - public String toString() { +import java.util.Map; - return "Syngenta specific"; - } - }, - CBI, - PII, - ETC { - @Override - public String toString() { +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.experimental.FieldDefaults; - return "Other"; - } - }, - AI, - MAN { - @Override - public String toString() { +@AllArgsConstructor +@EqualsAndHashCode(onlyExplicitlyIncluded = true) +@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) +public class RuleType { - return "Manual redaction"; - } - }, - X { - @Override - public String toString() { + static Map fullNameMap = Map.of("SYN", "Syngenta specific",// + "ETC", "Other",// + "MAN", "Manual changes",// + "X", "Entity merging",// + "FA", "File attributes",// + "LDS", "Local dictionary search",// + "TAB", "Table extraction",// + "H", "Headlines",// + "DOC", "General documine"); - return "Entity merging"; - } - }, - FA { - @Override - public String toString() { - - return "File attributes"; - } - }, - LDS { - @Override - public String toString() { - - return "Local dictionary search"; - } - }, - H { - @Override - public String toString() { - - return "H"; - } - }, - DOC { - - @Override - public String toString() { - - return "General documine"; - } - }; + @EqualsAndHashCode.Include + String name; public static RuleType fromString(String value) { - value = value.replaceAll("\r",""); - return switch (value) { - case "SYN" -> SYN; - case "CBI" -> CBI; - case "PII" -> PII; - case "ETC" -> ETC; - case "AI" -> AI; - case "MAN" -> MAN; - case "X" -> X; - case "FA" -> FA; - case "LDS" -> LDS; - case "H" -> H; - case "DOC" -> DOC; - default -> throw new IllegalStateException("Unexpected value: " + value); - }; + + value = value.replaceAll("\r", ""); + return new RuleType(value); } + + + public String name() { + + return name; + } + + + public boolean isWildCard() { + + return name.equals("*"); + } + + + @Override + public String toString() { + + return fullNameMap.getOrDefault(name, name); + } + } diff --git a/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/translation/OldRulesParser.java b/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/translation/OldRulesParser.java index cf040c09..d15db736 100644 --- a/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/translation/OldRulesParser.java +++ b/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/translation/OldRulesParser.java @@ -31,7 +31,6 @@ import com.knecon.fforesight.utility.rules.management.models.BasicRule; import com.knecon.fforesight.utility.rules.management.models.OldRule; import com.knecon.fforesight.utility.rules.management.models.RuleFileBluePrint; import com.knecon.fforesight.utility.rules.management.models.RuleIdentifier; -import com.knecon.fforesight.utility.rules.management.models.RuleType; import com.knecon.fforesight.utility.rules.management.utils.RuleFileIO; import lombok.SneakyThrows; @@ -106,10 +105,7 @@ public class OldRulesParser { String.format("%s: %s", value, value.stream().map(bluePrint::findRuleByIdentifier).flatMap(Collection::stream).map(BasicRule::name).toList()))); return Stream.concat(// - Stream.of(RuleIdentifier.fromRuleType(RuleType.X), - RuleIdentifier.fromRuleType(RuleType.FA), - RuleIdentifier.fromRuleType(RuleType.LDS), - RuleIdentifier.fromRuleType(RuleType.MAN)),// + Stream.of(RuleIdentifier.fromString("X"), RuleIdentifier.fromString("FA"), RuleIdentifier.fromString("LDS"), RuleIdentifier.fromString("MAN")),// translationPairs.values().stream().flatMap(Collection::stream)) .map(ruleIdentifier -> new RuleIdentifier(ruleIdentifier.type(), ruleIdentifier.unit(), null)) .collect(Collectors.toSet()); diff --git a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl index df06969b..eed03ce5 100644 --- a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl +++ b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl @@ -1094,15 +1094,15 @@ rule "ETC.2.1: Redact signatures (vertebrate study)" // Rule unit: ETC.3 -rule "ETC.3.0: Redact logos (vertebrate study)" +rule "ETC.3.0: Skip logos (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value == "Yes") $logo: Image(imageType == ImageType.LOGO) then - $logo.redact("ETC.3.0", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $logo.skip("ETC.3.0", "Logo Found"); end -rule "ETC.3.1: Redact logos (non vertebrate study)" +rule "ETC.3.1: Redact logos (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value == "Yes") $logo: Image(imageType == ImageType.LOGO) @@ -1507,7 +1507,7 @@ rule "X.6.1: remove Entity of higher rank, when intersected by entity of type EN salience 32 when $higherRank: TextEntity($type: type, $value: value, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $lowerRank: TextEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges(), active(), $lowerRank.getValue().length() > $value.length()) + $lowerRank: TextEntity(intersects($higherRank), type != $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges(), active(), $lowerRank.getValue().length() > $value.length()) then $higherRank.getIntersectingNodes().forEach(node -> update(node)); $higherRank.remove("X.6.1", "remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity"); @@ -1540,6 +1540,6 @@ rule "LDS.0.0: Run local dictionary search" entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document) .forEach(entity -> { Collection matchedRules = $dictionaryModel.getLocalEntriesWithMatchedRules().get(entity.getValue()); - entity.addMatchedRules(matchedRules); + matchedRules.forEach(matchedRule -> entity.addMatchedRule(matchedRule.asSkippedIfApplied())); }); end diff --git a/redaction-service-v1/rules-management/src/main/resources/all_rules_documine.drl b/redaction-service-v1/rules-management/src/main/resources/all_rules_documine.drl index 935ef3dc..a1692130 100644 --- a/redaction-service-v1/rules-management/src/main/resources/all_rules_documine.drl +++ b/redaction-service-v1/rules-management/src/main/resources/all_rules_documine.drl @@ -1149,6 +1149,155 @@ rule "DOC.35.0: Doses (mg/kg bodyweight)" .forEach(entity -> entity.apply("DOC.35.0", "Doses per bodyweight information found", "n-a")); end +//------------------------------------ Table extraction rules ------------------------------------ + +// Rule unit: TAB.0 +rule "TAB.0.0: Study Type File Attribute" + when + not FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","438","439","471","487")) + $section: Section(containsAnyString("DATA REQUIREMENT", "TEST GUIDELINE", "MÉTODO(S) DE REFERÊNCIA(S):") + && containsAnyString("OECD", "EPA", "OPPTS")) + then + RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|(?:.{5,40}(?:Number |Procedure |Guideline )))(4[\\d]{2})", 1 ,$section.getTextBlock()).stream() + .map(boundary -> $section.getTextBlock().subSequence(boundary).toString()) + .map(value -> FileAttribute.builder().label("OECD Number").value(value).build()) + .forEach(fileAttribute -> insert(fileAttribute)); + RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", 1, $section.getTextBlock()).stream() + .map(boundary -> $section.getTextBlock().subSequence(boundary).toString()) + .map(value -> FileAttribute.builder().label("OECD Number").value(value).build()) + .forEach(fileAttribute -> insert(fileAttribute)); + end + +rule "TAB.0.1: Guidelines" + when + $section: Section(containsAnyString("DATA REQUIREMENT", "TEST GUIDELINE", "MÉTODO(S) DE REFERÊNCIA(S):") && containsAnyString("OECD", "EPA", "OPPTS")) + then + entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2})", "oecd_guideline_number", EntityType.ENTITY, 1, $section) + .forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline no. found")); + entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2}),?\\s\\(?(\\d{4})\\)?", "oecd_guideline_year", EntityType.ENTITY, 2, $section) + .forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline year found")); + entityCreationService.byRegex("(?<=OECD)[\\w\\s,\\[\\]]{1,10}\\((\\d{4})\\)\\s(4[\\d]{2})", "oecd_guideline_year", EntityType.ENTITY, 1, $section) + .forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline year found")); + entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_number", EntityType.ENTITY, 1, $section) + .forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline number found")); + entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_year", EntityType.ENTITY, 2, $section) + .forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline year found")); + end + + +// Rule unit: TAB.1 +rule "TAB.1.0: Full Table extraction (Guideline Deviation)" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("425")) + $section: Section(getHeadline().containsString("Full Table")) + $table: Table() from $section.getParent().streamAllSubNodesOfType(NodeType.TABLE).toList() + $tableCell: TableCell(!header) from $table.streamTableCells().toList() + then + entityCreationService.bySemanticNode($tableCell, "full_table_row", EntityType.ENTITY) + .ifPresent(entity -> entity.apply("TAB.1.0", "full table extracted")); + end + + +// Rule unit: TAB.2 +rule "TAB.2.0: Individual row extraction (Clinical Signs)" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("425")) + $section: Section(getHeadline().containsString("Individual Rows Extraction")) + $table: Table(hasHeader("Animal No."), (hasRowWithHeaderAndAnyValue("Animal No.", List.of("120-2", "120-5")))) from $section.streamChildren().toList() + TableCell($row: row, containsAnyString("120-2", "120-5")) from $table.streamTableCellsWithHeader("Animal No.").toList() + $tableCell: TableCell($row == row) from $table.streamTableCells().toList() + then + entityCreationService.bySemanticNode($tableCell, "clinical_signs", EntityType.ENTITY) + .ifPresent(entity -> entity.apply("TAB.2.0", "Individual row based on animal number")); + end + + +// Rule unit: TAB.3 +rule "TAB.3.0: Individual column extraction (Strain)" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("425")) + $section: Section(getHeadline().containsString("Individual Column")) + $table: Table(hasHeader("Sex")) from $section.streamChildren().toList() + then + $table.streamTableCellsWithHeader("Sex") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dosages", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.apply("TAB.3.0", "Individual column based on column header")); + end + + +// Rule unit: TAB.4 +rule "TAB.4.0: Combined Columns Extraction - Sex and Dosage" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("425")) + $section: Section(getHeadline().containsString("Combined Columns")) + $table: Table(hasHeader("Dosage (mg/kg bw)")) from $section.getParent().streamAllSubNodesOfType(NodeType.TABLE).toList() + then + $table.streamTableCellsWithHeader("Dosage (mg/kg bw)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dose_mortality_dose", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.apply("TAB.4.0", "Dose Mortality dose found.")); + end + +rule "TAB.4.1: Combined Columns Extraction - Sex and Mortality" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("425")) + $section: Section(getHeadline().containsString("Combined Columns")) + $table: Table(hasHeader("Mortality")) from $section.getParent().streamAllSubNodesOfType(NodeType.TABLE).toList() + then + $table.streamTableCellsWithHeader("Mortality") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dose_mortality", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.apply("TAB.4.1", "Dose Mortality found.")); + end + + +// Rule unit: TAB.5 +rule "TAB.5.0: Targeted cell extraction" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("425")) + $section: Section(getHeadline().containsString("Value Extraction")) + $table: Table(hasHeader("Mortality"), hasRowWithHeaderAndAnyValue("Sex", List.of("male", "Male")), hasRowWithHeaderAndValue("Mortality", "Survived")) from $section.streamChildren().toList() + TableCell(containsWordIgnoreCase("Male"), $row: row) from $table.streamTableCellsWithHeader("Sex").toList() + TableCell($row == row, containsStringIgnoreCase("Survived")) from $table.streamTableCellsWithHeader("Mortality").toList() + $dosageCell: TableCell($row == row) from $table.streamTableCellsWithHeader("Dosage").toList() + then + entityCreationService.bySemanticNode($dosageCell,"doses_mg_kg_bw", EntityType.ENTITY) + .ifPresent(entity -> entity.apply("TAB.5.0", "Dosage found in row with survived male")); + end + + +// Rule unit: TAB.6 +rule "TAB.6.0: Targeted cell extraction (Experimental Stop date)" + when + $section: Section(getHeadline().containsString("Advanced Table Extraction"), containsAllStrings("female", "Female", "Survived", "Group 2")) + $table: Table(hasHeader("Group 2")) from $section.streamChildren().toList() + TableCell(containsWordIgnoreCase("Female"), $row: row) from $table.streamTableCellsWithHeader("Group 2").toList() + TableCell($row == row, containsStringIgnoreCase("Survived")) from $table.streamTableCellsWithHeader("Group 2").toList() + $femaleSurvived: TableCell($row == row) from $table.streamTableCellsWithHeader("Group 2").toList() + then + entityCreationService.bySemanticNode($femaleSurvived, "experiment_female_survived", EntityType.ENTITY) + .ifPresent(entity -> entity.apply("TAB.6.0", "Female in group to experimental start date")); + end + + +// Rule unit: TAB.7 +rule "TAB.7.0: Indicator (Species)" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("425")) + $section: Section(getHeadline().containsString("Entity-Based")) + $table: Table() from $section.streamAllSubNodesOfType(NodeType.TABLE).toList() + TableCell(isHeader(), containsString("Title"), $col: col) from $table.streamTableCells().toList() + TableCell(hasEntitiesOfType("vertebrates"), $row: row) from $table.streamTableCells().toList() + $cell: TableCell($col == col, $row == row) from $table.streamTableCells().toList() + then + entityCreationService.bySemanticNode($cell, "study_design", EntityType.ENTITY) + .ifPresent(redactionEntity -> redactionEntity.apply("TAB.7.0", "Vertebrate study found")); + end + //------------------------------------ Manual redaction rules ------------------------------------ @@ -1392,6 +1541,6 @@ rule "LDS.0.0: Run local dictionary search" entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document) .forEach(entity -> { Collection matchedRules = $dictionaryModel.getLocalEntriesWithMatchedRules().get(entity.getValue()); - entity.addMatchedRules(matchedRules); + matchedRules.forEach(matchedRule -> entity.addMatchedRule(matchedRule.asSkippedIfApplied())); }); end diff --git a/redaction-service-v1/rules-management/src/main/resources/order_template.txt b/redaction-service-v1/rules-management/src/main/resources/order_template.txt index 0cbc4d7b..e56b9bff 100644 --- a/redaction-service-v1/rules-management/src/main/resources/order_template.txt +++ b/redaction-service-v1/rules-management/src/main/resources/order_template.txt @@ -1,8 +1,10 @@ H DOC +TAB SYN CBI PII +* ETC AI MAN diff --git a/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/RuleFileBluePrintMergingTest.java b/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/RuleFileBluePrintMergingTest.java new file mode 100644 index 00000000..559101ac --- /dev/null +++ b/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/RuleFileBluePrintMergingTest.java @@ -0,0 +1,20 @@ +package com.knecon.fforesight.utility.rules.management; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import org.junit.jupiter.api.Test; + +import com.knecon.fforesight.utility.rules.management.factory.RuleFileParser; +import com.knecon.fforesight.utility.rules.management.models.RuleFileBluePrint; +import com.knecon.fforesight.utility.rules.management.models.RuleIdentifier; + +public class RuleFileBluePrintMergingTest { + + @Test + public void testBothRuleFilesCanBeMerged() { + + RuleFileBluePrint combined = RuleFileParser.buildBluePrintFromAllRuleFiles(); + assertEquals(1, combined.findRuleByIdentifier(RuleIdentifier.fromString("X.0.0")).size()); + } + +} diff --git a/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/RuleFileMigrationTest.java b/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/RuleFileMigrationTest.java new file mode 100644 index 00000000..abb7fcb6 --- /dev/null +++ b/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/RuleFileMigrationTest.java @@ -0,0 +1,56 @@ +package com.knecon.fforesight.utility.rules.management; + +import java.io.File; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; + +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; + +import com.knecon.fforesight.utility.rules.management.migration.RuleFileMigrator; + +import lombok.SneakyThrows; + +/** + * This test may be used to migrate dossier-templates and redaction-service ruleFiles all at once + * It loads the existing ruleFiles looks if there is a different version present in the rules-management. If it is present it is replaced by the rules-management version. + * From these blueprints it overwrites the existing rule files. Which means the order of rules may change according to order_template.txt. + * In comparison to the previous migration code this keeps Rules that are not present in the rules-management untouched. The order of these unknown rules may be adapted using the '*' symbol in the order-template.txt. + * The test does not add any additional rules using the default identifier lists. + * The test does not care about the applicationType, since it looks in both RM and DM all-rules files. + * It does not migrate Component Rules yet. + */ +public class RuleFileMigrationTest { + + // Put your redaction service drools paths and dossier-templates paths both RM and DM here + static final List ruleFileDirs = List.of( + "/home/kschuettler/iqser/redaction/redaction-service/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools", + "/home/kschuettler/iqser/fforesight/dossier-templates-v2/", + "/home/kschuettler/iqser/redaction/dossier-templates-v2/"); + + + @Test + @SneakyThrows + @Disabled + void migrateAllEntityRules() { + + for (String ruleFileDir : ruleFileDirs) { + Files.walk(Path.of(ruleFileDir)).filter(this::isEntityRuleFile).map(Path::toFile).peek(System.out::println).forEach(RuleFileMigrator::migrateFile); + } + } + + + private boolean isEntityRuleFile(Path path) { + + File ruleFile = path.toFile(); + if (!ruleFile.isFile()) { + return false; + } + if (!ruleFile.toString().endsWith(".drl")) { + return false; + } + return !ruleFile.getName().equals("componentRules.drl") && !ruleFile.getName().endsWith("_components.drl") && !ruleFile.getName().endsWith("OLD.drl"); + } + +} diff --git a/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/factory/RuleFileFactoryTest.java b/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/factory/RuleFileFactoryTest.java index 018c216a..791fd24e 100644 --- a/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/factory/RuleFileFactoryTest.java +++ b/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/factory/RuleFileFactoryTest.java @@ -1,6 +1,5 @@ package com.knecon.fforesight.utility.rules.management.factory; -import static java.util.stream.Collectors.toList; import static org.junit.jupiter.api.Assertions.assertEquals; import java.io.BufferedWriter; @@ -13,20 +12,14 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; import java.util.Set; import java.util.function.Function; -import java.util.stream.Collectors; import java.util.stream.Stream; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import com.knecon.fforesight.utility.rules.management.RuleManagementResources; -import com.knecon.fforesight.utility.rules.management.factory.RuleFileFactory; -import com.knecon.fforesight.utility.rules.management.factory.RuleFileParser; import com.knecon.fforesight.utility.rules.management.models.ApplicationType; import com.knecon.fforesight.utility.rules.management.models.RuleIdentifier; import com.knecon.fforesight.utility.rules.management.utils.RuleFileIO; @@ -44,22 +37,23 @@ class RuleFileFactoryTest { migrate(ApplicationType.RM, dossierTemplatesRepo + "dev", dossierTemplatesRepo + "docu", dossierTemplatesRepo + "qa"); } + @Test @SneakyThrows @Disabled public void migrateDocumineDossierTemplatesRepoToNewestRules() { + String dossierTemplatesRepo = "/Users/aisvoran/dev/documine/dossier-templates-v2/dev/"; migrate(ApplicationType.DM, dossierTemplatesRepo + "Flora", dossierTemplatesRepo + "Basf-Demo"); } + @SneakyThrows private void migrate(ApplicationType applicationType, String... paths) { Arrays.stream(paths).forEach(path -> { try { - Stream.of(Files.walk(Path.of(path))) - .flatMap(Function.identity()) - .filter(p -> p.getFileName().toString().equals("rules.drl"))// + Stream.of(Files.walk(Path.of(path))).flatMap(Function.identity()).filter(p -> p.getFileName().toString().equals("rules.drl"))// .map(Path::toFile)// .forEach(e -> migrateFile(e, applicationType)); } catch (IOException e) { @@ -76,9 +70,8 @@ class RuleFileFactoryTest { Set identifiers = RuleFileParser.parseRuleIdentifiersFromFile(oldRulesFile); String newRulesString = RuleFileFactory.createFileFromIdentifiers(identifiers, applicationType); - try (FileOutputStream out = new FileOutputStream(oldRulesFile); - OutputStreamWriter outWrite = new OutputStreamWriter(out, StandardCharsets.UTF_8); - BufferedWriter writer = new BufferedWriter(outWrite)) { + try (FileOutputStream out = new FileOutputStream(oldRulesFile); OutputStreamWriter outWrite = new OutputStreamWriter(out, + StandardCharsets.UTF_8); BufferedWriter writer = new BufferedWriter(outWrite)) { writer.write(newRulesString); } } @@ -93,91 +86,6 @@ class RuleFileFactoryTest { } - @Test - @Disabled - void generateRules() { - - String stem = """ - rule "DefaultComponents.4.klasjfidklasjf: Test Guideline 1" - when - $guidelineNumber: Entity(type == "oecd_guideline_number", klasjfnumberklasjf) - $guidelineYear: Entity(type == "oecd_guideline_year", klasjfyearklasjf) - then - componentCreationService.createComponent( - "DefaultComponents.4.klasjfidklasjf", - "Test_Guidelines_1", - "klasjfguidelineklasjf", - "OECD Number and guideline year mapped!" - ); - end - - """; - Map, String> guidelineMapping = new HashMap<>(); - guidelineMapping.put(List.of("425", "2008"), "Nº 425: Acute oral Toxicity - Up-and-Down Procedure (03/10/2008)"); - guidelineMapping.put(List.of("425", "2001"), "Nº 425: Acute oral Toxicity - Up-and-Down Procedure (17/12/2001)"); - guidelineMapping.put(List.of("402", "2017"), "Nº 402: Acute Dermal Toxicity (09/10/2017)"); - guidelineMapping.put(List.of("402", "1987"), "Nº 402: Acute Dermal Toxicity (24/02/1987)"); - guidelineMapping.put(List.of("403", "2009"), "Nº 403: Acute Inhalation Toxicity (08/09/2009)"); - guidelineMapping.put(List.of("403", "1981"), "Nº 403: Acute Inhalation Toxicity (12/05/1981)"); - guidelineMapping.put(List.of("433", "2018"), "Nº 433: Acute Inhalation Toxicity: Fixed Concentration Procedure (27/06/2018)"); - guidelineMapping.put(List.of("433", "2017"), "Nº 433: Acute Inhalation Toxicity: Fixed Concentration Procedure (09/10/2017)"); - guidelineMapping.put(List.of("436", "2009"), "Nº 436: Acute Inhalation Toxicity – Acute Toxic Class Method (08/09/2009)"); - guidelineMapping.put(List.of("404", "1981"), "Nº 404: Acute Dermal Irritation/Corrosion (12/05/1981)"); - guidelineMapping.put(List.of("404", "1992"), "Nº 404: Acute Dermal Irritation/Corrosion (17/07/1992)"); - guidelineMapping.put(List.of("404", "2002"), "Nº 404: Acute Dermal Irritation/Corrosion (24/04/2002)"); - guidelineMapping.put(List.of("404", "2015"), "Nº 404: Acute Dermal Irritation/Corrosion (28/07/2015)"); - guidelineMapping.put(List.of("405", "2017"), "Nº 405: Acute Eye Irritation/Corrosion (09/10/2017)"); - guidelineMapping.put(List.of("405", "2012"), "Nº 405: Acute Eye Irritation/Corrosion (02/10/2012)"); - guidelineMapping.put(List.of("405", "2002"), "Nº 405: Acute Eye Irritation/Corrosion (24/04/2002)"); - guidelineMapping.put(List.of("405", "1987"), "Nº 405: Acute Eye Irritation/Corrosion (24/02/1987)"); - guidelineMapping.put(List.of("429", "2002"), "Nº 429: Skin Sensitisation: Local Lymph Node Assay (24/04/2002)"); - guidelineMapping.put(List.of("429", "2010"), "Nº 429: Skin Sensitisation (23/07/2010)"); - guidelineMapping.put(List.of("442A", "2018"), "Nº 442A: Skin Sensitization (23/07/2018)"); - guidelineMapping.put(List.of("442B", "2018"), "Nº 442B: Skin Sensitization (27/06/2018)"); - guidelineMapping.put(List.of("471", "1997"), "Nº 471: Bacterial Reverse Mutation Test (21/07/1997)"); - guidelineMapping.put(List.of("471", "2020"), "Nº 471: Bacterial Reverse Mutation Test (26/06/2020)"); - guidelineMapping.put(List.of("406", "1992"), "Nº 406: Skin Sensitisation (1992)"); - guidelineMapping.put(List.of("428", "2004"), "Nº 428: Split-Thickness Skin test (2004)"); - guidelineMapping.put(List.of("438", "2018"), "Nº 438: Eye Irritation (26/06/2018)"); - guidelineMapping.put(List.of("439", "2019"), "Nº 439: Skin Irritation (2019)"); - guidelineMapping.put(List.of("474", "2016"), "Nº 474: Micronucleus Bone Marrow Cells Rat (2016)"); - guidelineMapping.put(List.of("487", "2016"), "Nº 487: Micronucleus Human Lymphocytes (2016)"); - Map>> mappings = guidelineMapping.entrySet() - .stream() - .collect(Collectors.groupingBy(Map.Entry::getValue, Collectors.mapping(Map.Entry::getKey, toList()))); - int id = 1; - StringBuilder sb = new StringBuilder(); - for (String guideline : mappings.keySet()) { - String year = getString(mappings.get(guideline).stream().map(l -> l.get(0)).distinct().toList()); - String number = getString(mappings.get(guideline).stream().map(l -> l.get(1)).distinct().toList()); - sb.append(stem.replaceAll(("klasjfguidelineklasjf"), guideline) - .replaceAll(("klasjfidklasjf"), String.valueOf(id)) - .replaceAll(("klasjfnumberklasjf"), number) - .replaceAll(("klasjfyearklasjf"), year)); - id++; - } - System.out.println(sb); - - } - - - private static String getString(List strings) { - - if (strings.size() == 1) { - return String.format("value == \"%s\"", strings.get(0)); - } - StringBuilder sb = new StringBuilder(); - sb.append("("); - for (String string : strings) { - sb.append(String.format("value == \"%s\"", strings)); - sb.append(" || "); - } - sb.delete(sb.length() - 4, sb.length()); - sb.append(")"); - return sb.toString(); - } - - @Test @SneakyThrows @Disabled @@ -232,6 +140,7 @@ class RuleFileFactoryTest { System.out.println(result); } + /** * Manual test to quickly unescape a rules file. Just change the path to whatever rule file you want to unescape * and it will generate an 'output.txt' with the result. @@ -262,6 +171,7 @@ class RuleFileFactoryTest { unescapeRulesFile(pathToFile, "new.txt"); } + @SneakyThrows private void unescapeRulesFile(String pathToFile, String outputName) { diff --git a/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/translation/OldRulesParserTest.java b/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/translation/OldRulesParserTest.java index 3c20f3ec..eadcf3b0 100644 --- a/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/translation/OldRulesParserTest.java +++ b/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/translation/OldRulesParserTest.java @@ -61,8 +61,8 @@ class OldRulesParserTest { public void printTranslationsTest() { List records = OldRulesParser.getOldRulesCsvRecords(RuleManagementResources.getOldRulesCsvInputStream()); - List.of(RuleType.SYN, RuleType.CBI, RuleType.PII, RuleType.ETC, RuleType.AI).forEach(type -> { - List rulesOfClass = RuleFileParser.buildBluePrintFromAllRulesFile(ApplicationType.RM).findRuleClassByType(type).ruleUnits(); + List.of(RuleType.fromString("SYN"), RuleType.fromString("CBI"), RuleType.fromString("PII"), RuleType.fromString("ETC"), RuleType.fromString("AI")).forEach(type -> { + List rulesOfClass = RuleFileParser.buildBluePrintFromAllRulesFile(ApplicationType.RM).findRuleClassByType(type).orElseThrow().ruleUnits(); rulesOfClass.forEach(unit -> printOldRulesThatTranslatesToNewRule(unit, records)); }); } @@ -97,7 +97,6 @@ class OldRulesParserTest { } - @Test @Disabled @SneakyThrows @@ -138,14 +137,12 @@ class OldRulesParserTest { // String dossierTemplatesRepo = "/home/aoezyetimoglu/repositories/RED/dossier-templates-v2/"; // Stream.of(Files.walk(Path.of(dossierTemplatesRepo + "dev")), Files.walk(Path.of(dossierTemplatesRepo + "docu")), Files.walk(Path.of(dossierTemplatesRepo + "qa"))) String dossierTemplatesRepo = "/home/aoezyetimoglu/repositories/PROJECTMANAGEMENT/Syngenta/business-logic/"; - Stream.of( - Files.walk(Path.of(dossierTemplatesRepo + "dev")), + Stream.of(Files.walk(Path.of(dossierTemplatesRepo + "dev")), Files.walk(Path.of(dossierTemplatesRepo + "dev-v2")), Files.walk(Path.of(dossierTemplatesRepo + "prod-cp-eu-reg")), - Files.walk(Path.of(dossierTemplatesRepo + "prod-cp-global-reg")), - Files.walk(Path.of(dossierTemplatesRepo + "prod-seeds-reg")) // - ) - .flatMap(Function.identity())// + Files.walk(Path.of(dossierTemplatesRepo + "prod-cp-global-reg")), Files.walk(Path.of(dossierTemplatesRepo + "prod-seeds-reg")) + // + ).flatMap(Function.identity())// .filter(path -> path.getFileName().toString().equals("rules.drl"))// .map(Path::toFile)// .forEach(this::translateOldRuleFile); diff --git a/redaction-service-v1/rules-management/src/test/resources/all_redact_manager_rules.drl b/redaction-service-v1/rules-management/src/test/resources/all_redact_manager_rules.drl deleted file mode 100644 index df06969b..00000000 --- a/redaction-service-v1/rules-management/src/test/resources/all_redact_manager_rules.drl +++ /dev/null @@ -1,1545 +0,0 @@ -package drools - -import static java.lang.String.format; -import static com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility.anyMatch; -import static com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility.exactMatch; - -import java.util.List; -import java.util.LinkedList; -import java.util.Set; -import java.util.stream.Collectors; -import java.util.Collection; -import java.util.stream.Stream; -import java.util.Optional; - -import com.iqser.red.service.redaction.v1.server.model.document.*; -import com.iqser.red.service.redaction.v1.server.model.document.TextRange; -import com.iqser.red.service.redaction.v1.server.model.document.entity.*; -import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; -import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule; -import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity -import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule -import com.iqser.red.service.redaction.v1.server.model.document.nodes.*; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Headline; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.SectionIdentifier; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Footer; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Header; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType; -import com.iqser.red.service.redaction.v1.server.model.document.textblock.*; -import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock; -import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector; -import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock; -import com.iqser.red.service.redaction.v1.server.model.document.textblock.ConcatenatedTextBlock; -import com.iqser.red.service.redaction.v1.server.model.NerEntities; -import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary; -import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryModel; -import com.iqser.red.service.redaction.v1.server.service.document.EntityCreationService; -import com.iqser.red.service.redaction.v1.server.service.ManualChangesApplicationService; -import com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility; - -import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute; -import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus; - -global Document document -global EntityCreationService entityCreationService -global ManualChangesApplicationService manualChangesApplicationService -global Dictionary dictionary - -//------------------------------------ queries ------------------------------------ - -query "getFileAttributes" - $fileAttribute: FileAttribute() - end - -//------------------------------------ Syngenta specific rules ------------------------------------ - -// Rule unit: SYN.0 -rule "SYN.0.0: Redact if CTL/* or BL/* was found (Non Vertebrate Study)" - when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(containsString("CTL/") || containsString("BL/")) - then - Stream.concat( - entityCreationService.byString("CTL", "must_redact", EntityType.HINT, $section), - entityCreationService.byString("BL", "must_redact", EntityType.HINT, $section) - ).forEach(entity -> entity.skip("SYN.0.0", "hint_only")); - end - - -// Rule unit: SYN.1 -rule "SYN.1.0: Recommend CTL/BL laboratory that start with BL or CTL" - when - $section: Section(containsString("CT") || containsString("BL")) - then - /* Regular expression: ((\b((([Cc]T(([1ILli\/])| L|~P))|(BL))[\. ]?([\dA-Ziltphz~\/.:!]| ?[\(',][Ppi](\(e)?|([\(-?']\/))+( ?[\(\/\dA-Znasieg]+)?)\b( ?\/? ?\d+)?)|(\bCT[L1i]\b)) */ - entityCreationService.byRegexIgnoreCase("((\\b((([Cc]T(([1ILli\\/])| L|~P))|(BL))[\\. ]?([\\dA-Ziltphz~\\/.:!]| ?[\\(',][Ppi](\\(e)?|([\\(-?']\\/))+( ?[\\(\\/\\dA-Znasieg]+)?)\\b( ?\\/? ?\\d+)?)|(\\bCT[L1i]\\b))", "CBI_address", EntityType.RECOMMENDATION, $section) - .forEach(entity -> entity.skip("SYN.1.0", "")); - end - - -//------------------------------------ CBI rules ------------------------------------ - -// Rule unit: CBI.0 -rule "CBI.0.0: Redact CBI Authors (non vertebrate Study)" - when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $entity: TextEntity(type == "CBI_author", dictionaryEntry) - then - $entity.redact("CBI.0.0", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); - end - -rule "CBI.0.1: Redact CBI Authors (vertebrate Study)" - when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $entity: TextEntity(type == "CBI_author", dictionaryEntry) - then - $entity.redact("CBI.0.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - end - - -// Rule unit: CBI.1 -rule "CBI.1.0: Do not redact CBI Address (non vertebrate Study)" - when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $entity: TextEntity(type == "CBI_address", dictionaryEntry) - then - $entity.skip("CBI.1.0", "Address found for Non Vertebrate Study"); - end - -rule "CBI.1.1: Redact CBI Address (vertebrate Study)" - when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $entity: TextEntity(type == "CBI_address", dictionaryEntry) - then - $entity.redact("CBI.1.1", "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - end - - -// Rule unit: CBI.2 -rule "CBI.2.0: Do not redact genitive CBI Author" - when - $entity: TextEntity(type == "CBI_author", anyMatch(textAfter, "['’’'ʼˈ´`‘′ʻ’']s")) - then - entityCreationService.byTextRange($entity.getTextRange(), "CBI_author", EntityType.FALSE_POSITIVE, document) - .ifPresent(falsePositive -> falsePositive.skip("CBI.2.0", "Genitive Author found")); - end - - -// Rule unit: CBI.3 -rule "CBI.3.0: Redacted because Section contains a vertebrate" - when - $section: Section(!hasTables(), hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( - "CBI.3.0", - "Vertebrate found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - $section.getEntitiesOfType("vertebrate") - ); - }); - end - -rule "CBI.3.1: Redacted because table row contains a vertebrate" - when - $table: Table(hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( - "CBI.3.1", - "Vertebrate found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("vertebrate", entity) - ); - }); - end - -rule "CBI.3.2: Do not redact because Section does not contain a vertebrate" - when - $section: Section(!hasTables(), !hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(entity -> entity.skip("CBI.3.2", "No vertebrate found")); - end - -rule "CBI.3.3: Do not redact because table row does not contain a vertebrate" - when - $table: Table(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) - then - $table.streamEntitiesWhereRowContainsNoEntitiesOfType(List.of("vertebrate")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> entity.skip("CBI.3.3", "No vertebrate found")); - end - - -// Rule unit: CBI.4 -rule "CBI.4.0: Do not redact Names and Addresses if no_redaction_indicator is found in Section" - when - $section: Section(!hasTables(), - hasEntitiesOfType("vertebrate"), - hasEntitiesOfType("no_redaction_indicator"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(entity -> { - entity.skipWithReferences( - "CBI.4.0", - "Vertebrate but a no redaction indicator found", - $section.getEntitiesOfType("no_redaction_indicator") - ); - }); - end - -rule "CBI.4.1: Do not redact Names and Addresses if no_redaction_indicator is found in table row" - when - $table: Table(hasEntitiesOfType("no_redaction_indicator"), - hasEntitiesOfType("vertebrate"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate", "no-redaction_indicator")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.skipWithReferences( - "CBI.4.1", - "Vertebrate but a no redaction indicator found", - Stream.concat( - $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), - $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() - ); - }); - end - - -// Rule unit: CBI.5 -rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in Section" - when - $section: Section(!hasTables(), - hasEntitiesOfType("redaction_indicator"), - hasEntitiesOfType("no_redaction_indicator"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( - "CBI.5.0", - "no_redaction_indicator but also redaction_indicator found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - Stream.concat( - $section.getEntitiesOfType("vertebrate").stream(), - $section.getEntitiesOfType("no_redaction_indicator").stream()).toList() - ); - }); - end - -rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in table row" - when - $table: Table(hasEntitiesOfType("no_redaction_indicator"), - hasEntitiesOfType("redaction_indicator"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no_redaction_indicator")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( - "CBI.5.1", - "no_redaction_indicator but also redaction_indicator found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - Stream.concat( - $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), - $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() - ); - }); - end - - -// Rule unit: CBI.6 -rule "CBI.6.0: Do not redact Names and Addresses if vertebrate but also published_information is found in Section" - when - $section: Section(!hasTables(), - hasEntitiesOfType("vertebrate"), - hasEntitiesOfType("published_information"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(entity -> { - entity.skipWithReferences( - "CBI.6.0", - "vertebrate but also published_information found", - Stream.concat( - $section.getEntitiesOfType("vertebrate").stream(), - $section.getEntitiesOfType("published_information").stream()).toList() - ); - }); - end - -rule "CBI.6.1: Do not redact Names and Addresses if vertebrate but also published_information is found in table row" - when - $table: Table(hasEntitiesOfType("vertebrate"), - hasEntitiesOfType("published_information"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate", "published_information")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.skipWithReferences( - "CBI.6.1", - "vertebrate but also published_information found", - Stream.concat( - $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), - $table.getEntitiesOfTypeInSameRow("published_information", entity).stream()).toList() - ); - }); - end - - -// Rule unit: CBI.7 -rule "CBI.7.0: Do not redact Names and Addresses if published information found in Section without tables" - when - $section: Section(!hasTables(), - hasEntitiesOfType("published_information"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(redactionEntity -> { - redactionEntity.skipWithReferences( - "CBI.7.0", - "Published Information found in section", - $section.getEntitiesOfType("published_information") - ); - }); - end - -rule "CBI.7.1: Do not redact Names and Addresses if published information found in same table row" - when - $table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author")) - $cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList() - $tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList() - $author: TextEntity(type == "CBI_author", active()) from $tableCell.getEntities() - then - $author.skipWithReferences("CBI.7.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $author)); - end - - -// Rule unit: CBI.8 -rule "CBI.8.0: Redacted because Section contains must_redact entity" - when - $section: Section(!hasTables(), hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( - "CBI.8.0", - "must_redact entity found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - $section.getEntitiesOfType("must_redact") - ); - }); - end - -rule "CBI.8.1: Redacted because table row contains must_redact entity" - when - $table: Table(hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("must_redact")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( - "CBI.8.1", - "must_redact entity found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("must_redact", entity) - ); - }); - end - - -// Rule unit: CBI.9 -rule "CBI.9.0: Redact all cells with Header Author(s) as CBI_author (non vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $table: Table(hasHeader("Author(s)")) - then - $table.streamTableCellsWithHeader("Author(s)") - .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) - .filter(Optional::isPresent) - .map(Optional::get) - .forEach(redactionEntity -> redactionEntity.redact("CBI.9.0", "Author(s) found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); - end - -rule "CBI.9.1: Redact all cells with Header Author as CBI_author (non vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $table: Table(hasHeader("Author")) - then - $table.streamTableCellsWithHeader("Author") - .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) - .filter(Optional::isPresent) - .map(Optional::get) - .forEach(redactionEntity -> redactionEntity.redact("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); - end - - -// Rule unit: CBI.10 -rule "CBI.10.0: Redact all cells with Header Author(s) as CBI_author (vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $table: Table(hasHeader("Author(s)")) - then - $table.streamTableCellsWithHeader("Author(s)") - .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) - .filter(Optional::isPresent) - .map(Optional::get) - .forEach(redactionEntity -> redactionEntity.redact("CBI.10.0", "Author(s) found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); - end - -rule "CBI.10.1: Redact all cells with Header Author as CBI_author (vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $table: Table(hasHeader("Author")) - then - $table.streamTableCellsWithHeader("Author") - .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) - .filter(Optional::isPresent) - .map(Optional::get) - .forEach(redactionEntity -> redactionEntity.redact("CBI.10.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); - end - - -// Rule unit: CBI.11 -rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study Y/N Header" - agenda-group "LOCAL_DICTIONARY_ADDS" - salience -1 - when - $table: Table(hasHeader("Author(s)") && hasHeader("Vertebrate Study Y/N")) - then - $table.getEntitiesOfType("CBI_author").forEach(entity -> dictionary.addMultipleAuthorsAsRecommendation(entity)); - end - - -// Rule unit: CBI.12 -rule "CBI.12.0: Add all cells with Header Author(s) as CBI_author" - salience 1 - when - $table: Table(hasHeader("Author(s)") || hasHeader("Author")) - then - Stream.concat( - $table.streamTableCellsWithHeader("Author(s)"), - $table.streamTableCellsWithHeader("Author") - ) - .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) - .filter(Optional::isPresent) - .map(Optional::get) - .forEach(redactionEntity -> redactionEntity.skip("CBI.12.0", "Author(s) header found")); - end - -rule "CBI.12.1: Do not redact CBI_author, if its row contains a cell with header \"Vertebrate study Y/N\" and value No" - when - $table: Table(hasRowWithHeaderAndValue("Vertebrate study Y/N", "N") || hasRowWithHeaderAndValue("Vertebrate study Y/N", "No")) - then - $table.streamEntitiesWhereRowHasHeaderAndAnyValue("Vertebrate study Y/N", List.of("N", "No")) - .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) - .forEach(authorEntity -> authorEntity.skip("CBI.12.1", "Not redacted because it's row does not belong to a vertebrate study")); - end - -rule "CBI.12.2: Redact CBI_author, if its row contains a cell with header \"Vertebrate study Y/N\" and value Yes" - when - $table: Table(hasRowWithHeaderAndValue("Vertebrate study Y/N", "Y") || hasRowWithHeaderAndValue("Vertebrate study Y/N", "Yes")) - then - $table.streamEntitiesWhereRowHasHeaderAndAnyValue("Vertebrate study Y/N", List.of("Y", "Yes")) - .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) - .forEach(authorEntity -> authorEntity.redact("CBI.12.2", "Redacted because it's row belongs to a vertebrate study", "Reg (EC) No 1107/2009 Art. 63 (2g)")); - end - - -// Rule unit: CBI.13 -rule "CBI.13.0: Ignore CBI Address recommendations" - when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $entity: TextEntity(type == "CBI_address", entityType == EntityType.RECOMMENDATION) - then - $entity.ignore("CBI.13.0", "Ignore CBI Address Recommendations"); - retract($entity) - end - - -// Rule unit: CBI.14 -rule "CBI.14.0: Redact CBI_sponsor entities if preceded by \"batches produced at\"" - when - $sponsorEntity: TextEntity(type == "CBI_sponsor", textBefore.contains("batches produced at")) - then - $sponsorEntity.redact("CBI.14.0", "Redacted because it represents a sponsor company", "Reg (EC) No 1107/2009 Art. 63 (2g)"); - end - - -// Rule unit: CBI.15 -rule "CBI.15.0: Redact row if row contains \"determination of residues\" and livestock keyword" - when - $keyword: String() from List.of("livestock", - "live stock", - "tissue", - "tissues", - "liver", - "muscle", - "bovine", - "ruminant", - "ruminants") - $residueKeyword: String() from List.of("determination of residues", "determination of total residues") - $section: Section(!hasTables(), - containsStringIgnoreCase($residueKeyword), - containsStringIgnoreCase($keyword)) - then - entityCreationService.byString($keyword, "must_redact", EntityType.ENTITY, $section) - .toList(); - - $section.getEntitiesOfType(List.of($keyword, $residueKeyword)) - .forEach(redactionEntity -> redactionEntity.redact("CBI.15.0", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)")); - end - -rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determination of residues\" and livestock keyword" - when - $keyword: String() from List.of("livestock", - "live stock", - "tissue", - "tissues", - "liver", - "muscle", - "bovine", - "ruminant", - "ruminants") - $residueKeyword: String() from List.of("determination of residues", "determination of total residues") - $table: Table(containsStringIgnoreCase($residueKeyword), containsStringIgnoreCase($keyword)) - then - entityCreationService.byString($keyword, "must_redact", EntityType.ENTITY, $table) - .toList(); - - $table.streamEntitiesWhereRowContainsStringsIgnoreCase(List.of($keyword, $residueKeyword)) - .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) - .forEach(redactionEntity -> redactionEntity.redact("CBI.15.1", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)")); - end - - -// Rule unit: CBI.16 -rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(containsString("et al.")) - then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); - dictionary.recommendEverywhere(entity); - }); - end - -rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(containsString("et al.")) - then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - dictionary.recommendEverywhere(entity); - }); - end - - -// Rule unit: CBI.17 -rule "CBI.17.0: Add recommendation for Addresses in Test Organism sections, without colon" - when - $section: Section(!hasTables(), containsString("Species") && containsString("Source") && !containsString("Species:") && !containsString("Source:")) - then - entityCreationService.lineAfterString("Source", "CBI_address", EntityType.RECOMMENDATION, $section) - .forEach(entity -> entity.skip("CBI.17.0", "Line after \"Source\" in Test Organism Section")); - end - -rule "CBI.17.1: Add recommendation for Addresses in Test Organism sections, with colon" - when - $section: Section(!hasTables(), containsString("Species:"), containsString("Source:")) - then - entityCreationService.lineAfterString("Source:", "CBI_address", EntityType.RECOMMENDATION, $section) - .forEach(entity -> entity.skip("CBI.17.1", "Line after \"Source:\" in Test Animals Section")); - end - - -// Rule unit: CBI.18 -rule "CBI.18.0: Expand CBI_author entities with firstname initials" - no-loop true - when - $entityToExpand: TextEntity(type == "CBI_author", - value.matches("[^\\s]+"), - textAfter.startsWith(" "), - anyMatch(textAfter, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)") - ) - then - entityCreationService.bySuffixExpansionRegex($entityToExpand, "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)") - .ifPresent(expandedEntity -> { - expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList()); - $entityToExpand.remove("CBI.18.0", "Expand CBI_author entities with firstname initials"); - retract($entityToExpand); - }); - end - - -// Rule unit: CBI.19 -rule "CBI.19.0: Expand CBI_author entities with salutation prefix" - when - $entityToExpand: TextEntity(type == "CBI_author", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")) - then - entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*") - .ifPresent(expandedEntity -> { - expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList()); - $entityToExpand.remove("CBI.19.0", "Expand CBI_author entities with salutation prefix"); - retract($entityToExpand); - }); - end - - -// Rule unit: CBI.20 -rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (non vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - not FileAttribute(label == "Vertebrate Study", value == "Yes") - $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) - then - entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) - .forEach(laboratoryEntity -> { - laboratoryEntity.skip("CBI.20.0", "PERFORMING LABORATORY was found for non vertebrate study"); - dictionary.recommendEverywhere(laboratoryEntity); - }); - end - -rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - FileAttribute(label == "Vertebrate Study", value == "Yes") - $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) - then - entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) - .forEach(laboratoryEntity -> { - laboratoryEntity.redact("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - dictionary.recommendEverywhere(laboratoryEntity); - }); - end - -// Rule unit: CBI.21 -rule "CBI.21.0: Redact short Authors section (non vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value == "Yes") - $section: Section(containsAnyStringIgnoreCase("author(s)", "author", "authors"), length() < 50, getTreeId().get(0) <= 20) //TODO: evaluate the reason of this rule - not TextEntity(type == "CBI_author", engines contains Engine.NER) from $section.getEntities() - then - entityCreationService.byRegexIgnoreCase("(?<=author\\(?s\\)?\\s\\n?)([\\p{Lu}\\p{L} ]{5,15}(,|\\n)?){1,3}", "CBI_author", EntityType.ENTITY, $section) - .forEach(entity -> { - entity.redact("CBI.21.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); - }); - end - -rule "CBI.21.1: Redact short Authors section (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value == "Yes") - $section: Section(containsAnyStringIgnoreCase("author(s)", "author", "authors"), length() < 50, getTreeId().get(0) <= 20) //TODO: evaluate the reason of this rule - not TextEntity(type == "CBI_author", engines contains Engine.NER) from $section.getEntities() - then - entityCreationService.byRegexIgnoreCase("(?<=author\\(?s\\)?\\s\\n?)([\\p{Lu}\\p{L} ]{5,15}(,|\\n)?){1,3}", "CBI_author", EntityType.ENTITY, $section) - .forEach(entity -> { - entity.redact("CBI.21.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - }); - end - - -// Rule unit: CBI.22 -rule "CBI.22.0: Redact Addresses in Reference Tables for vertebrate studies in non-vertebrate documents" - when - not FileAttribute(label == "Vertebrate Study", value == "Yes") - $table: Table(hasHeader("Vertebrate study Y/N"), hasRowWithHeaderAndValue("Vertebrate study Y/N", "Y") || hasRowWithHeaderAndValue("Vertebrate study Y/N", "Yes")) - then - $table.streamEntitiesWhereRowHasHeaderAndAnyValue("Vertebrate study Y/N", List.of("Y", "Yes")) - .filter(redactionEntity -> redactionEntity.isType("CBI_address")) - .forEach(authorEntity -> authorEntity.redact("CBI.22.0", "Redacted because row is a vertebrate study", "Reg (EC) No 1107/2009 Art. 63 (2g)")); - end - - -//------------------------------------ PII rules ------------------------------------ - -// Rule unit: PII.0 -rule "PII.0.0: Redact all PII (non vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $pii: TextEntity(type == "PII", dictionaryEntry) - then - $pii.redact("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); - end - -rule "PII.0.1: Redact all PII (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $pii: TextEntity(type == "PII", dictionaryEntry) - then - $pii.redact("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - end - - -// Rule unit: PII.1 -rule "PII.1.0: Redact Emails by RegEx (Non vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(containsString("@")) - then - entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) - .forEach(emailEntity -> emailEntity.redact("PII.1.0", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); - end - -rule "PII.1.1: Redact Emails by RegEx (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(containsString("@")) - then - entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) - .forEach(emailEntity -> emailEntity.redact("PII.1.1", "Found by Email Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); - end - -rule "PII.1.2: Redact typoed Emails with indicator" - when - $section: Section(containsString("@") || containsStringIgnoreCase("mail")) - then - entityCreationService.byRegexIgnoreCase("mail[:\\.\\s]{1,2}([\\w\\/\\-\\{\\(\\. ]{3,20}(@|a|f)\\s?[\\w\\/\\-\\{\\(\\. ]{3,20}(\\. \\w{2,4}\\b|\\.\\B|\\.\\w{1,4}\\b))", "PII", EntityType.ENTITY, $section) - .forEach(emailEntity -> emailEntity.redact("PII.1.2", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); - end - - -// Rule unit: PII.2 -rule "PII.2.0: Redact Phone and Fax by RegEx (non vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(containsString("Contact") || - containsString("Telephone") || - containsString("Phone") || - containsString("Ph.") || - containsString("Fax") || - containsString("Tel") || - containsString("Ter") || - containsString("Mobile") || - containsString("Fel") || - containsString("Fer")) - then - entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) - .forEach(contactEntity -> contactEntity.redact("PII.2.0", "Found by Phone and Fax Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); - end - -rule "PII.2.1: Redact Phone and Fax by RegEx (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(containsString("Contact") || - containsString("Telephone") || - containsString("Phone") || - containsString("Ph.") || - containsString("Fax") || - containsString("Tel") || - containsString("Ter") || - containsString("Mobile") || - containsString("Fel") || - containsString("Fer")) - then - entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) - .forEach(contactEntity -> contactEntity.redact("PII.2.1", "Found by Phone and Fax Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); - end - -rule "PII.2.2: Redact phone numbers without indicators" - when - $section: Section(containsString("+")) - then - entityCreationService.byRegex("(\\+[\\dO]{1,2} )(\\([\\dO]{1,3}\\))?[\\d\\-O ]{8,15}", "PII", EntityType.ENTITY, $section) - .forEach(entity -> entity.redact("PII.2.2", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); - end - - -// Rule unit: PII.3 -rule "PII.3.0: Redact telephone numbers by RegEx (Non vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(matchesRegex("[+]\\d{1,}")) - then - entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) - .forEach(entity -> entity.redact("PII.3.0", "Telephone number found by regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); - end - -rule "PII.3.1: Redact telephone numbers by RegEx (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(matchesRegex("[+]\\d{1,}")) - then - entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) - .forEach(entity -> entity.redact("PII.3.1", "Telephone number found by regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); - end - -// Rule unit: PII.4 -rule "PII.4.0: Redact line after contact information keywords (non vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $contactKeyword: String() from List.of("Contact point:", - "Contact:", - "Alternative contact:", - "European contact:", - "No:", - "Contact:", - "Tel.:", - "Tel:", - "Telephone number:", - "Telephone No:", - "Telephone:", - "Phone No.", - "Phone:", - "Fax number:", - "Fax:", - "E-mail:", - "Email:", - "e-mail:", - "E-mail address:") - $section: Section(containsString($contactKeyword)) - then - entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); - end - -rule "PII.4.1: Redact line after contact information keywords (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $contactKeyword: String() from List.of("Contact point:", - "Contact:", - "Alternative contact:", - "European contact:", - "No:", - "Contact:", - "Tel.:", - "Tel:", - "Telephone number:", - "Telephone No:", - "Telephone:", - "Phone No.", - "Phone:", - "Fax number:", - "Fax:", - "E-mail:", - "Email:", - "e-mail:", - "E-mail address:") - $section: Section(containsString($contactKeyword)) - then - entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); - end - - -// Rule unit: PII.5 -rule "PII.5.0: Redact line after contact information keywords reduced (non vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $contactKeyword: String() from List.of("Contact point:", - "Contact:", - "Alternative contact:", - "European contact:") - $section: Section(containsString($contactKeyword)) - then - entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.5.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(3) of Regulation (EC) No 178/2002")); - end - -rule "PII.5.1: Redact line after contact information keywords reduced (Vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $contactKeyword: String() from List.of("Contact point:", - "Contact:", - "Alternative contact:", - "European contact:") - $section: Section(containsString($contactKeyword)) - then - entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.5.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(2) of Regulation (EC) No 178/2002")); - end - - -// Rule unit: PII.6 -rule "PII.6.0: Redact line between contact keywords (non vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) - then - Stream.concat( - entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), - entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) - ) - .forEach(contactEntity -> contactEntity.redact("PII.6.0", "Found between contact keywords", "Article 39(e)(3) of Regulation (EC) No 178/2002")); - end - -rule "PII.6.1: Redact line between contact keywords (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) - then - Stream.concat( - entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), - entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) - ) - .forEach(contactEntity -> contactEntity.redact("PII.6.1", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); - end - - -// Rule unit: PII.7 -rule "PII.7.0: Redact contact information if applicant is found (non vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(getHeadline().containsString("applicant") || - getHeadline().containsString("Primary contact") || - getHeadline().containsString("Alternative contact") || - containsString("Applicant") || - containsString("Telephone number:")) - then - Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", - "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), - Stream.concat( - entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), - entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) - )) - .forEach(entity -> entity.redact("PII.7.0", "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); - end - -rule "PII.7.1: Redact contact information if applicant is found (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(getHeadline().containsString("applicant") || - getHeadline().containsString("Primary contact") || - getHeadline().containsString("Alternative contact") || - containsString("Applicant") || - containsString("Telephone number:")) - then - Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", - "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), - Stream.concat( - entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), - entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) - )) - .forEach(entity -> entity.redact("PII.7.1", "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); - end - - -// Rule unit: PII.8 -rule "PII.8.0: Redact contact information if producer is found (non vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(containsStringIgnoreCase("producer of the plant protection") || - containsStringIgnoreCase("producer of the active substance") || - containsStringIgnoreCase("manufacturer of the active substance") || - containsStringIgnoreCase("manufacturer:") || - containsStringIgnoreCase("Producer or producers of the active substance")) - then - Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", - "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), - Stream.concat( - entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), - entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) - )) - .forEach(entity -> entity.redact("PII.8.0", "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); - end - -rule "PII.8.1: Redact contact information if producer is found (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(containsStringIgnoreCase("producer of the plant protection") || - containsStringIgnoreCase("producer of the active substance") || - containsStringIgnoreCase("manufacturer of the active substance") || - containsStringIgnoreCase("manufacturer:") || - containsStringIgnoreCase("Producer or producers of the active substance")) - then - Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", - "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), - Stream.concat( - entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), - entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) - )) - .forEach(entity -> entity.redact("PII.8.1", "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); - end - - -// Rule unit: PII.9 -rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (non vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) - then - entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) - .forEach(authorEntity -> authorEntity.redact("PII.9.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); - end - -rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) - then - entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) - .forEach(authorEntity -> authorEntity.redact("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); - end - - -// Rule unit: PII.10 -rule "PII.10.0: Redact study director abbreviation" - when - $section: Section(containsString("KATH") || containsString("BECH") || containsString("KML")) - then - entityCreationService.byRegexIgnoreCase("((KATH)|(BECH)|(KML)) ?(\\d{4})","PII", EntityType.ENTITY, 1, $section) - .forEach(entity -> entity.redact("PII.10.0", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); - end - - -// Rule unit: PII.11 -rule "PII.11.0: Redact On behalf of Sequani Ltd.:" - when - $section: Section(!hasTables(), containsString("On behalf of Sequani Ltd.: Name Title")) - then - entityCreationService.betweenStrings("On behalf of Sequani Ltd.: Name Title", "On behalf of", "PII", EntityType.ENTITY, $section) - .forEach(authorEntity -> authorEntity.redact("PII.11.0", "On behalf of Sequani Ltd.: Name Title was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); - end - - -// Rule unit: PII.12 -rule "PII.12.0: Expand PII entities with salutation prefix" - when - $entityToExpand: TextEntity(type == "PII", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")) - then - entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*") - .ifPresent(expandedEntity -> expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList())); - end - -// Rule unit: PII.13 -rule "PII.13.0: Add recommendation for PII after Contact Person" - when - $section: Section(containsString("contact person:")) - then - entityCreationService.lineAfterStringsIgnoreCase(List.of("Contact Person", "Contact Person:"), "PII", EntityType.RECOMMENDATION, $section) - .forEach(entity -> entity.skip("PII.13.0", "Line after \"Source:\" in Test Animals Section")); - end - - -// Rule unit: PII.14 -rule "PII.14.0: Recommend title prefixed words as PII" - when - $section: Section(containsAnyString("Dr ", "PD Dr ", "Prof. Dr ", "Dr. med. vet ", "Dr. rer. nat ", "PhD ", "BSc ", "(FH) ", "Mr ", "Mrs ", "Ms ", "Miss ", "Dr.", "PD. Dr.", "Prof. Dr.", "Dr. med. vet.", "Dr. rer. nat.", "PhD.", "BSc.", "(FH).", "Mr.", "Mrs.", "Ms.", "Miss.")) - then - entityCreationService.byRegex("((Dr|PD Dr|Prof. Dr|Dr. med. vet|Dr. rer. nat|PhD|BSc|\\\\(FH\\\\)|Mr|Mrs|Ms|Miss)[.\\\\s]{1,2})([\\\\p{Lu}][\\\\p{L}\\\\-.]{1,20}\\\\s[\\\\p{Lu}][\\\\p{L}\\\\-.]{1,20})", "PII", EntityType.ENTITY, 3, $section); - end - - -//------------------------------------ Other rules ------------------------------------ - -// Rule unit: ETC.0 -rule "ETC.0.0: Purity Hint" - when - $section: Section(containsStringIgnoreCase("purity")) - then - entityCreationService.byRegexIgnoreCase("(purity ?( of|\\(.{1,20}\\))?( ?:)?) .{0,5}[\\d\\.]+( .{0,4}\\.)? ?%", "hint_only", EntityType.HINT, 1, $section) - .forEach(hint -> hint.skip("ETC.0.0", "hint only")); - end - - -// Rule unit: ETC.1 -rule "ETC.1.0: Redact Purity" - when - $section: Section(containsStringIgnoreCase("purity")) - then - entityCreationService.byRegex("\\bPurity:\\s*(?\\s*\\d{1,2}(?:\\.\\d{1,2})?\\s*%)", "purity", EntityType.ENTITY, 1, $section) - .forEach(entity -> entity.redact("ETC.1.0", "Purity found", "Reg (EC) No 1107/2009 Art. 63 (2a)")); - end - - -// Rule unit: ETC.2 -rule "ETC.2.0: Redact signatures (non vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value == "Yes") - $signature: Image(imageType == ImageType.SIGNATURE) - then - $signature.redact("ETC.2.0", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); - end - -rule "ETC.2.1: Redact signatures (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value == "Yes") - $signature: Image(imageType == ImageType.SIGNATURE) - then - $signature.redact("ETC.2.1", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - end - - -// Rule unit: ETC.3 -rule "ETC.3.0: Redact logos (vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value == "Yes") - $logo: Image(imageType == ImageType.LOGO) - then - $logo.redact("ETC.3.0", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); - end - -rule "ETC.3.1: Redact logos (non vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value == "Yes") - $logo: Image(imageType == ImageType.LOGO) - then - $logo.redact("ETC.3.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - end - - -// Rule unit: ETC.4 -rule "ETC.4.0: Redact dossier dictionary entries" - when - $dossierRedaction: TextEntity(type == "dossier_redaction") - then - $dossierRedaction.redact("ETC.4.0", "Specification of impurity found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); - end - -rule "ETC.4.1: Redact dossier dictionary entries" - when - $dossierRedaction: TextEntity(type == "dossier_redaction") - then - $dossierRedaction.redact("ETC.4.1", "Dossier Redaction found", "Article 39(1)(2) of Regulation (EC) No 178/2002"); - end - -rule "ETC.4.2: Redact dossier dictionary entries" - when - $dossierRedaction: TextEntity(type == "dossier_redaction") - then - $dossierRedaction.redact("ETC.4.2", "Dossier redaction found", "Article 63(2)(a) of Regulation (EC) No 1107/2009 (making reference to Article 39 of Regulation EC No 178/2002)"); - end - - -// Rule unit: ETC.5 -rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confidential'" - when - not FileAttribute(label == "Confidentiality", value == "confidential") - $dossierRedaction: TextEntity(type == "dossier_redaction") - then - $dossierRedaction.ignore("ETC.5.0", "Ignore dossier redactions, when not confidential"); - $dossierRedaction.getIntersectingNodes().forEach(node -> update(node)); - end - - -// Rule unit: ETC.6 -rule "ETC.6.0: Redact CAS Number" - when - $table: Table(hasHeader("Sample #")) - then - $table.streamTableCellsWithHeader("Sample #") - .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "PII", EntityType.ENTITY)) - .filter(Optional::isPresent) - .map(Optional::get) - .forEach(redactionEntity -> redactionEntity.redact("ETC.6.0", "Sample # found in Header", "Reg (EC) No 1107/2009 Art. 63 (2g)")); - end - - -// Rule unit: ETC.7 -rule "ETC.7.0: Guidelines FileAttributes" - when - $section: Section(!hasTables(), containsAnyString("DATA REQUIREMENT(S):", "TEST GUIDELINE(S):") && containsAnyString("OECD", "EPA", "OPPTS")) - then - RedactionSearchUtility.findTextRangesByRegex("OECD (No\\.? )?\\d{3}( \\(\\d{4}\\))?", $section.getTextBlock()).stream() - .map(boundary -> $section.getTextBlock().subSequence(boundary).toString()) - .map(value -> FileAttribute.builder().label("OECD Number").value(value).build()) - .forEach(fileAttribute -> insert(fileAttribute)); - end - - -// Rule unit: ETC.8 -rule "ETC.8.0: Redact formulas (vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value == "Yes") - $logo: Image(imageType == ImageType.FORMULA) - then - $logo.redact("ETC.8.0", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); - end - -rule "ETC.8.1: Redact formulas (non vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value == "Yes") - $logo: Image(imageType == ImageType.FORMULA) - then - $logo.redact("ETC.8.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - end - -// Rule unit: ETC.9 -rule "ETC.9.0: Redact skipped impurities" - when - FileAttribute(label == "Redact Skipped Impurities", value == "Yes") - $skippedImpurities: TextEntity(type == "skipped_impurities") - then - $skippedImpurities.redact("ETC.9.0", "Occasional Impurity found", "Article 63(2)(b) of Regulation (EC) No 1107/2009"); - end - -rule "ETC.9.1: Redact impurities" - when - FileAttribute(label == "Redact Impurities", value == "Yes") - $skippedImpurities: TextEntity(type == "impurities") - then - $skippedImpurities.redact("ETC.9.1", "Impurity found", "Article 63(2)(b) of Regulation (EC) No 1107/2009"); - end - -// Rule unit: ETC.10 -rule "ETC.10.0: Redact Product Composition Information" - when - $compositionInformation: TextEntity(type == "product_composition") - then - $compositionInformation.redact("ETC.10.0", "Product Composition Information found", "Article 63(2)(d) of Regulation (EC) No 1107/2009"); - end - -// Rule unit: ETC.11 -rule "ETC.11.0: Recommend first line in table cell with name and address of owner" - when - $table: Table(hasHeader("Name and Address of Owner / Tenant"), containsString("trial site")) - $header: TableCell(isHeader(), containsString("Name and Address of Owner / Tenant")) from $table.streamTableCells().toList() - $tableCell: TableCell(col == $header.col, row == 2) from $table.streamTableCells().toList() - then - entityCreationService.bySemanticNode($tableCell, "PII", EntityType.RECOMMENDATION) - .ifPresent(redactionEntity -> redactionEntity.redact("ETC.11.0", "Trial Site owner and address found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); - end - -//------------------------------------ AI rules ------------------------------------ - -// Rule unit: AI.0 -rule "AI.0.0: Add all NER Entities of type CBI_author" - salience 999 - when - nerEntities: NerEntities(hasEntitiesOfType("CBI_author")) - then - nerEntities.streamEntitiesOfType("CBI_author") - .forEach(nerEntity -> entityCreationService.byNerEntity(nerEntity, EntityType.RECOMMENDATION, document)); - end - - -// Rule unit: AI.1 -rule "AI.1.0: Combine and add NER Entities as CBI_address" - salience 999 - when - nerEntities: NerEntities(hasEntitiesOfType("ORG") || hasEntitiesOfType("STREET") || hasEntitiesOfType("CITY")) - then - entityCreationService.combineNerEntitiesToCbiAddressDefaults(nerEntities, "CBI_address", EntityType.RECOMMENDATION, document).toList(); - end - - -// Rule unit: AI.2 -rule "AI.2.0: Add all NER Entities of any type except CBI_author" - salience 999 - when - nerEntities: NerEntities() - then - nerEntities.getNerEntityList().stream() - .filter(nerEntity -> !nerEntity.type().equals("CBI_author")) - .forEach(nerEntity -> entityCreationService.byNerEntity(nerEntity, nerEntity.type().toLowerCase(), EntityType.RECOMMENDATION, document)); - end - - -// Rule unit: AI.3 -rule "AI.3.0: Recommend authors from AI as PII" - salience 999 - when - nerEntities: NerEntities(hasEntitiesOfType("CBI_author")) - then - nerEntities.streamEntitiesOfType("CBI_author") - .forEach(nerEntity -> entityCreationService.byNerEntity(nerEntity, "PII", EntityType.RECOMMENDATION, document)); - end - -//------------------------------------ Manual redaction rules ------------------------------------ - -// Rule unit: MAN.0 -rule "MAN.0.0: Apply manual resize redaction" - salience 128 - when - $resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED, $requestDate: requestDate) - not ManualResizeRedaction(annotationId == $id, requestDate.isBefore($requestDate)) - $entityToBeResized: TextEntity(matchesAnnotationId($id)) - then - manualChangesApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction); - retract($resizeRedaction); - update($entityToBeResized); - $entityToBeResized.getIntersectingNodes().forEach(node -> update(node)); - end - -rule "MAN.0.1: Apply manual resize redaction" - salience 128 - when - $resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED, $requestDate: requestDate) - not ManualResizeRedaction(annotationId == $id, requestDate.isBefore($requestDate)) - $imageToBeResized: Image(id == $id) - then - manualChangesApplicationService.resizeImage($imageToBeResized, $resizeRedaction); - retract($resizeRedaction); - update($imageToBeResized); - update($imageToBeResized.getParent()); - end - - -// Rule unit: MAN.1 -rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity" - salience 128 - when - $idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED) - $entityToBeRemoved: TextEntity(matchesAnnotationId($id)) - then - $entityToBeRemoved.getManualOverwrite().addChange($idRemoval); - update($entityToBeRemoved); - retract($idRemoval); - $entityToBeRemoved.getIntersectingNodes().forEach(node -> update(node)); - end - -rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image" - salience 128 - when - $idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED) - $imageEntityToBeRemoved: Image($id == id) - then - $imageEntityToBeRemoved.getManualOverwrite().addChange($idRemoval); - update($imageEntityToBeRemoved); - retract($idRemoval); - update($imageEntityToBeRemoved.getParent()); - end - - -// Rule unit: MAN.2 -rule "MAN.2.0: Apply force redaction" - salience 128 - when - $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED) - $entityToForce: TextEntity(matchesAnnotationId($id)) - then - $entityToForce.getManualOverwrite().addChange($force); - update($entityToForce); - $entityToForce.getIntersectingNodes().forEach(node -> update(node)); - retract($force); - end - -rule "MAN.2.1: Apply force redaction to images" - salience 128 - when - $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED) - $imageToForce: Image(id == $id) - then - $imageToForce.getManualOverwrite().addChange($force); - update($imageToForce); - update($imageToForce.getParent()); - retract($force); - end - - -// Rule unit: MAN.3 -rule "MAN.3.0: Apply entity recategorization" - salience 128 - when - $recategorization: ManualRecategorization($id: annotationId, $type: type, status == AnnotationStatus.APPROVED, $requestDate: requestDate) - not ManualRecategorization($id == annotationId, requestDate.isBefore($requestDate)) - $entityToBeRecategorized: TextEntity(matchesAnnotationId($id), type != $type) - then - $entityToBeRecategorized.getIntersectingNodes().forEach(node -> update(node)); - manualChangesApplicationService.recategorize($entityToBeRecategorized, $recategorization); - retract($recategorization); - // Entity is copied and inserted, so the old entity needs to be retracted to avoid duplication. - retract($entityToBeRecategorized); - end - -rule "MAN.3.1: Apply entity recategorization of same type" - salience 128 - when - $recategorization: ManualRecategorization($id: annotationId, $type: type, status == AnnotationStatus.APPROVED, $requestDate: requestDate) - not ManualRecategorization($id == annotationId, requestDate.isBefore($requestDate)) - $entityToBeRecategorized: TextEntity(matchesAnnotationId($id), type == $type) - then - $entityToBeRecategorized.getManualOverwrite().addChange($recategorization); - retract($recategorization); - end - -rule "MAN.3.2: Apply image recategorization" - salience 128 - when - $recategorization: ManualRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $requestDate: requestDate) - not ManualRecategorization($id == annotationId, requestDate.isBefore($requestDate)) - $imageToBeRecategorized: Image($id == id) - then - manualChangesApplicationService.recategorize($imageToBeRecategorized, $recategorization); - update($imageToBeRecategorized); - update($imageToBeRecategorized.getParent()); - retract($recategorization); - end - - -// Rule unit: MAN.4 -rule "MAN.4.0: Apply legal basis change" - salience 128 - when - $legalbasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED) - $imageToBeRecategorized: Image($id == id) - then - $imageToBeRecategorized.getManualOverwrite().addChange($legalbasisChange); - end - -rule "MAN.4.1: Apply legal basis change" - salience 128 - when - $legalBasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED) - $entityToBeChanged: TextEntity(matchesAnnotationId($id)) - then - $entityToBeChanged.getManualOverwrite().addChange($legalBasisChange); - end - - -//------------------------------------ Entity merging rules ------------------------------------ - -// Rule unit: X.0 -rule "X.0.0: Remove Entity contained by Entity of same type" - salience 65 - when - $larger: TextEntity($type: type, $entityType: entityType, active()) - $contained: TextEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !hasManualChanges(), active()) - then - $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); - retract($contained); - end - - -// Rule unit: X.1 -rule "X.1.0: Merge intersecting Entities of same type" - salience 64 - when - $first: TextEntity($type: type, $entityType: entityType, !resized(), active()) - $second: TextEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !hasManualChanges(), active()) - then - TextEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document); - $first.remove("X.1.0", "merge intersecting Entities of same type"); - $second.remove("X.1.0", "merge intersecting Entities of same type"); - retract($first); - retract($second); - mergedEntity.getIntersectingNodes().forEach(node -> update(node)); - end - - -// Rule unit: X.2 -rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" - salience 64 - when - $falsePositive: TextEntity($type: type, entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges(), active()) - then - $entity.getIntersectingNodes().forEach(node -> update(node)); - $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); - retract($entity) - end - - -// Rule unit: X.3 -rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" - salience 64 - when - $falseRecommendation: TextEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) - then - $recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); - retract($recommendation); - end - - -// Rule unit: X.4 -rule "X.4.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY with same type" - salience 256 - when - $entity: TextEntity($type: type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) - then - $entity.addEngines($recommendation.getEngines()); - $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY with same type"); - retract($recommendation); - end - - -// Rule unit: X.5 -rule "X.5.0: Remove Entity of type RECOMMENDATION when contained by ENTITY" - salience 256 - when - $entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) - then - $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when contained by ENTITY"); - retract($recommendation); - end - - -// Rule unit: X.6 -rule "X.6.0: Remove Entity of lower rank, when contained by by entity of type ENTITY" - salience 32 - when - $higherRank: TextEntity($type: type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $lowerRank: TextEntity(containedBy($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges(), active()) - then - $lowerRank.getIntersectingNodes().forEach(node -> update(node)); - $lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY"); - retract($lowerRank); - end - - -rule "X.6.1: remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity" - salience 32 - when - $higherRank: TextEntity($type: type, $value: value, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $lowerRank: TextEntity(intersects($higherRank), type != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges(), active(), $lowerRank.getValue().length() > $value.length()) - then - $higherRank.getIntersectingNodes().forEach(node -> update(node)); - $higherRank.remove("X.6.1", "remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity"); - retract($higherRank); - end - - -//------------------------------------ File attributes rules ------------------------------------ - -// Rule unit: FA.1 -rule "FA.1.0: Remove duplicate FileAttributes" - salience 64 - when - $fileAttribute: FileAttribute($label: label, $value: value) - $duplicate: FileAttribute(this != $fileAttribute, label == $label, value == $value) - then - retract($duplicate); - end - - -//------------------------------------ Local dictionary search rules ------------------------------------ - -// Rule unit: LDS.0 -rule "LDS.0.0: Run local dictionary search" - agenda-group "LOCAL_DICTIONARY_ADDS" - salience -999 - when - $dictionaryModel: DictionaryModel(!localEntriesWithMatchedRules.isEmpty()) from dictionary.getDictionaryModels() - then - entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document) - .forEach(entity -> { - Collection matchedRules = $dictionaryModel.getLocalEntriesWithMatchedRules().get(entity.getValue()); - entity.addMatchedRules(matchedRules); - }); - end diff --git a/redaction-service-v1/rules-management/src/test/resources/all_rules_documine.drl b/redaction-service-v1/rules-management/src/test/resources/all_rules_documine.drl deleted file mode 100644 index 935ef3dc..00000000 --- a/redaction-service-v1/rules-management/src/test/resources/all_rules_documine.drl +++ /dev/null @@ -1,1397 +0,0 @@ -package drools - -import static java.lang.String.format; -import static com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility.anyMatch; -import static com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility.exactMatch; - -import java.util.List; -import java.util.LinkedList; -import java.util.Set; -import java.util.stream.Collectors; -import java.util.Collection; -import java.util.stream.Stream; -import java.util.Optional; - -import com.iqser.red.service.redaction.v1.server.model.document.*; -import com.iqser.red.service.redaction.v1.server.model.document.TextRange; -import com.iqser.red.service.redaction.v1.server.model.document.entity.*; -import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; -import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule; -import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity -import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule -import com.iqser.red.service.redaction.v1.server.model.document.nodes.*; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Headline; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.SectionIdentifier; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Footer; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Header; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType; -import com.iqser.red.service.redaction.v1.server.model.document.textblock.*; -import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock; -import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector; -import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock; -import com.iqser.red.service.redaction.v1.server.model.document.textblock.ConcatenatedTextBlock; -import com.iqser.red.service.redaction.v1.server.model.NerEntities; -import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary; -import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryModel; -import com.iqser.red.service.redaction.v1.server.service.document.EntityCreationService; -import com.iqser.red.service.redaction.v1.server.service.ManualChangesApplicationService; -import com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility; - -import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute; -import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus; - -global Document document -global EntityCreationService entityCreationService -global ManualChangesApplicationService manualChangesApplicationService -global Dictionary dictionary - -//------------------------------------ queries ------------------------------------ - -query "getFileAttributes" - $fileAttribute: FileAttribute() - end - -//------------------------------------ H rules ------------------------------------ - -// Rule unit: H.0 -rule "H.0.0: retract table of contents page" - when - $page: Page(getMainBodyTextBlock().getSearchText().contains("........") || (getMainBodyTextBlock().getSearchText().contains("APPENDICES") && getMainBodyTextBlock().getSearchText().contains("TABLES"))) - $node: SemanticNode(onPage($page.getNumber()), !onPage($page.getNumber() -1), getType() != NodeType.IMAGE) - then - retract($node); - end - - -// Rule unit: H.1 -rule "H.1.0: Ignore Table of Contents" - salience 10 - when - $tocHeadline: Headline(containsString("CONTENTS")) - $page: Page() from $tocHeadline.getParent().getPages() - $node: SemanticNode(this != $tocHeadline, getType() != NodeType.IMAGE, onPage($page.getNumber()), !onPage($page.getNumber() -1)) - then - retract($node); - end - - -// Rule unit: H.2 -rule "H.2.0: Show headlines" - when - $headline: Headline() - then - entityCreationService.bySemanticNode($headline, "headline", EntityType.HINT); - end - - -// Rule unit: H.3 -rule "H.3.0: Study Type File Attribute" - when - not FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","438","439","471","487")) - $section: Section( - (containsString("DATA REQUIREMENT") || containsString("TEST GUIDELINE") || containsString("MÉTODO(S) DE REFERÊNCIA(S):")) - ,(containsString("OECD") || containsString("EPA") || containsString("OPPTS")) - ) - then - Stream.of(RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|(?:.{5,40}(?:Number |Procedure |Guideline )))(4[\\d]{2})", 1, $section.getTextBlock()), - RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", 1, $section.getTextBlock()), - RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD) Guideline (4\\d{2})", 1, $section.getTextBlock())).flatMap(Collection::stream).findFirst() - .map(textRange -> $section.getTextBlock().subSequence(textRange).toString()) - .map(value -> FileAttribute.builder().label("OECD Number").value(value).build()) - .ifPresent(fileAttribute -> insert(fileAttribute)); - end - -rule "H.3.1: Study Type File Attribute in Headlines" - when - not FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","438","439","471","487")) - $page: Page($pageNumber:number, - getMainBodyTextBlock().getSearchText().contains("DATA REQUIREMENT") - || getMainBodyTextBlock().getSearchText().contains("TEST GUIDELINE") - || getMainBodyTextBlock().getSearchText().contains("MÉTODO(S) DE REFERÊNCIA(S):")) - $headline: Headline(onPage($pageNumber), containsString("OECD") || containsString("EPA")|| containsString("OPPTS")) - then - Stream.of(RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|(?:.{5,40}(?:Number |Procedure |Guideline )))(4[\\d]{2})", 1, $headline.getTextBlock()), - RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", 1, $headline.getTextBlock()), - RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD) Guideline (4\\d{2})", 1, $headline.getTextBlock())).flatMap(Collection::stream).findFirst() - .map(textRange -> $headline.getTextBlock().subSequence(textRange).toString()) - .map(value -> FileAttribute.builder().label("OECD Number").value(value).build()) - .ifPresent(fileAttribute -> insert(fileAttribute)); - end - -//------------------------------------ General documine rules ------------------------------------ - -// Rule unit: DOC.1 -rule "DOC.1.0: Guidelines" - when - $section: Section( - ( - containsString("DATA REQUIREMENT") - || containsString("TEST GUIDELINE") - || containsString("MÉTODO(S) DE REFERÊNCIA(S):") - ) - && ( - containsString("OECD") - || containsString("EPA") - || containsString("OPPTS") - ) - ) - then - entityCreationService.byRegex("OECD (No\\.? )?\\d{3}( \\(\\d{4}\\))?", "oecd_guideline", EntityType.ENTITY, $section).forEach(entity -> - entity.apply("DOC.1.0", "OECD Guideline found", "n-a") - ); - entityCreationService.byRegex("OECD[\\s,]{1}(?:.{1,40}.(?>Procedure|Method).{1,20}\\d{3,4}(?>.{1,100}\\d{4}\\))?|\\[.{1,20}.Skin.{1,20}\\]|[\\d\\s,\\(\\)]{7,10}|[\\w\\.\\s]{1,15}[\\d]{3}\\s\\(\\d{4}\\)|.{0,20}[N|n]umber\\s\\d{3}.{0,1}|Test Guideline \\d{3})", "oecd_guideline", EntityType.ENTITY, $section).forEach(entity -> - entity.apply("DOC.1.0", "OECD Guideline found", "n-a") - ); - entityCreationService.byRegex("EPA (OPPTS )?\\d{3}[. ]\\d{4}( \\(\\d{4}\\))?", "epa_guideline", EntityType.ENTITY, $section).forEach(entity -> - entity.apply("DOC.1.0", "EPA Guideline found", "n-a") - ); - entityCreationService.byRegex("EC (Directive )?(No\\.? )?\\d{3,4}\\/\\d{3,4}((,? B(\\.| )\\d{1,2}\\.?)? \\(\\d{4}\\))?", "ec_guideline", EntityType.ENTITY, $section).forEach(entity -> - entity.apply("DOC.1.0", "EC Guideline found", "n-a") - ); - entityCreationService.byRegex("Commission Regulation \\(EC\\) No \\d{3}\\/\\d{4}", "ec_guideline", EntityType.ENTITY, $section).forEach(entity -> - entity.apply("DOC.1.0", "EC Guideline found", "n-a") - ); - entityCreationService.byRegex("OECD Method 4\\d{2}.{5,40}\\(.{5,40}\\d{4}\\)", "oecd_guideline", EntityType.ENTITY, $section).forEach(entity -> - entity.apply("DOC.1.0", "OECD Guideline year found", "n-a") - ); - entityCreationService.byRegex("OPPTS (Guideline Number )?\\d{3}\\.\\d{4}( \\(\\d{4}\\))?", "epa_guideline", EntityType.ENTITY, $section).forEach(entity -> - entity.apply("DOC.1.0", "EPA Guideline found", "n-a") - ); - entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2})", "oecd_guideline_number", EntityType.ENTITY,1, $section).forEach(entity -> - entity.apply("DOC.1.0", "OECD Guideline no. found", "n-a") - ); - entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2}),?\\s\\(?(\\d{4})\\)?", "oecd_guideline_year", EntityType.ENTITY,2, $section).forEach(entity -> - entity.apply("DOC.1.0", "OECD Guideline year found", "n-a") - ); - entityCreationService.byRegex("(?<=OECD)[\\w\\s,\\[\\]]{1,10}\\((\\d{4})\\)\\s(4[\\d]{2})", "oecd_guideline_year", EntityType.ENTITY,1, $section).forEach(entity -> - entity.apply("DOC.1.0", "OECD Guideline year found", "n-a") - ); - entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_number", EntityType.ENTITY,1, $section).forEach(entity -> - entity.apply("DOC.1.0", "OECD Guideline number found", "n-a") - ); - entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_year", EntityType.ENTITY,2, $section).forEach(entity -> - entity.apply("DOC.1.0", "OECD Guideline year found", "n-a") - ); - entityCreationService.byRegex("(?<=OECD) Guideline (4\\d{2})", "oecd_guideline_number", EntityType.ENTITY,1, $section).forEach(entity -> - entity.apply("DOC.1.0", "OECD Guideline number found", "n-a") - ); - entityCreationService.byRegex("OECD Guideline 4\\d{2}", "oecd_guideline", EntityType.ENTITY, $section).forEach(entity -> - entity.apply("DOC.1.0", "OECD Guideline found", "n-a") - ); - end - -rule "DOC.1.2: Guidelines" - when - $section: Section( - ( - containsString("DATA REQUIREMENT") - || containsString("TEST GUIDELINE") - || containsString("MÉTODO(S) DE REFERÊNCIA(S):") - ) - && ( - containsString("OECD") - || containsString("EPA") - || containsString("OPPTS") - ) - && ( - hasEntitiesOfType("oecd_guideline") - || hasEntitiesOfType("epa_guideline") - || hasEntitiesOfType("ec_guideline") - ) - ) - then - $section.getEntitiesOfType(List.of("oecd_guideline","ec_guideline", "epa_guideline")).forEach(entity -> { - entity.apply("DOC.1.2", "OECD guideline found.", "n-a"); - }); - end - -rule "DOC.1.3: Guidelines" - when - $section: Section( - ( - hasEntitiesOfType("oecd_guideline") - || hasEntitiesOfType("epa_guideline") - || hasEntitiesOfType("ec_guideline") - ) - && !( - ( - containsString("DATA REQUIREMENT") - || containsString("TEST GUIDELINE") - || containsString("MÉTODO(S) DE REFERÊNCIA(S):") - ) - && ( - containsString("OECD") - || containsString("EPA") - || containsString("OPPTS") - ) - ) - ) - then - $section.getEntitiesOfType(List.of("oecd_guideline", "ec_guideline", "epa_guideline")).forEach(entity -> { - entity.remove("DOC.1.3", "removed"); - retract(entity); - }); - end - -rule "DOC.1.4: Guideline in Headlines" - when - $page: Page($pageNumber:number, - getMainBodyTextBlock().getSearchText().contains("DATA REQUIREMENT") - || getMainBodyTextBlock().getSearchText().contains("TEST GUIDELINE") - || getMainBodyTextBlock().getSearchText().contains("MÉTODO(S) DE REFERÊNCIA(S):")) - $headline: Headline(onPage($pageNumber), containsString("OECD") || containsString("EPA")|| containsString("OPPTS")) - then - entityCreationService.byRegex("OECD (No\\.? )?\\d{3}( \\(\\d{4}\\))?", "oecd_guideline", EntityType.ENTITY, $headline).forEach(entity -> - entity.apply("DOC.1.4", "OECD Guideline found", "n-a") - ); - entityCreationService.byRegex("OECD[\\s,]{1}(?:.{1,40}.(?>Procedure|Method).{1,20}\\d{3,4}(?>.{1,100}\\d{4}\\))?|\\[.{1,20}.Skin.{1,20}\\]|[\\d\\s,\\(\\)]{7,10}|[\\w\\.\\s]{1,15}[\\d]{3}\\s\\(\\d{4}\\)|.{0,20}[N|n]umber\\s\\d{3}.{0,1}|Test Guideline \\d{3})", "oecd_guideline", EntityType.ENTITY, $headline).forEach(entity -> - entity.apply("DOC.1.4", "OECD Guideline found", "n-a") - ); - entityCreationService.byRegex("EPA (OPPTS )?\\d{3}[. ]\\d{4}( \\(\\d{4}\\))?", "epa_guideline", EntityType.ENTITY, $headline).forEach(entity -> - entity.apply("DOC.1.4", "EPA Guideline found", "n-a") - ); - entityCreationService.byRegex("EC (Directive )?(No\\.? )?\\d{3,4}\\/\\d{3,4}((,? B(\\.| )\\d{1,2}\\.?)? \\(\\d{4}\\))?", "ec_guideline", EntityType.ENTITY, $headline).forEach(entity -> - entity.apply("DOC.1.4", "EC Guideline found", "n-a") - ); - entityCreationService.byRegex("Commission Regulation \\(EC\\) No \\d{3}\\/\\d{4}", "ec_guideline", EntityType.ENTITY, $headline).forEach(entity -> - entity.apply("DOC.1.4", "EC Guideline found", "n-a") - ); - entityCreationService.byRegex("OECD Method 4\\d{2}.{5,40}\\(.{5,40}\\d{4}\\)", "oecd_guideline", EntityType.ENTITY, $headline).forEach(entity -> - entity.apply("DOC.1.4", "OECD Guideline year found", "n-a") - ); - entityCreationService.byRegex("OPPTS (Guideline Number )?\\d{3}\\.\\d{4}( \\(\\d{4}\\))?", "epa_guideline", EntityType.ENTITY, $headline).forEach(entity -> - entity.apply("DOC.1.4", "EPA Guideline found", "n-a") - ); - entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2})", "oecd_guideline_number", EntityType.ENTITY,1, $headline).forEach(entity -> - entity.apply("DOC.1.4", "OECD Guideline no. found", "n-a") - ); - entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2}),?\\s\\(?(\\d{4})\\)?", "oecd_guideline_year", EntityType.ENTITY,2, $headline).forEach(entity -> - entity.apply("DOC.1.4", "OECD Guideline year found", "n-a") - ); - entityCreationService.byRegex("(?<=OECD)[\\w\\s,\\[\\]]{1,10}\\((\\d{4})\\)\\s(4[\\d]{2})", "oecd_guideline_year", EntityType.ENTITY,1, $headline).forEach(entity -> - entity.apply("DOC.1.4", "OECD Guideline year found", "n-a") - ); - entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_number", EntityType.ENTITY,1, $headline).forEach(entity -> - entity.apply("DOC.1.4", "OECD Guideline number found", "n-a") - ); - entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_year", EntityType.ENTITY,2, $headline).forEach(entity -> - entity.apply("DOC.1.4", "OECD Guideline year found", "n-a") - ); - entityCreationService.byRegex("(?<=OECD) Guideline (4\\d{2})", "oecd_guideline_number", EntityType.ENTITY,1, $headline).forEach(entity -> - entity.apply("DOC.1.4", "OECD Guideline number found", "n-a") - ); - entityCreationService.byRegex("OECD Guideline 4\\d{2}", "oecd_guideline", EntityType.ENTITY, $headline).forEach(entity -> - entity.apply("DOC.1.4", "OECD Guideline found", "n-a") - ); - end - -// Rule unit: DOC.2 -rule "DOC.2.0: Report number" - when - $section: Section(containsString("LABORATORY PROJECT ID") , containsString("Report Number:")) - then - entityCreationService.lineAfterString("Report Number:", "report_number", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> { - entity.apply("DOC.2.0", "Report number found", "n-a"); - }); - end - - -// Rule unit: DOC.3 -rule "DOC.3.0: Experimental Starting Date" - when - $section: Section(containsString("Experimental I. Starting Date:") || containsString("Experimental II. Starting Date:") || containsStringIgnoreCase("experimental start date") || containsStringIgnoreCase("experimental starting date")) - then - entityCreationService.lineAfterStrings( - List.of("Experimental start date", - "Experimental start date:", - "Experimental Starting Date", - "Experimental Starting Date:", - "Experimental starting date", - "Experimental starting date:", - "Experimental Start Date", - "Experimental Start Date:", - "Experimental I. Starting Date:", - "Experimental II. Starting Date:"), "experimental_start_date", EntityType.ENTITY, $section).forEach(entity -> { - entity.apply("DOC.3.0", "Experimental start date found", "n-a"); - }); - end - - -// Rule unit: DOC.4 -rule "DOC.4.0: Experimental Completion Date" - when - $section: Section(containsStringIgnoreCase("experimental termination date") || containsStringIgnoreCase("experimental completion date")) - then - entityCreationService.lineAfterStrings( - List.of("Experimental termination date", - "Experimental termination date:", - "Experimental Completion Date", - "Experimental Completion Date:", - "Experimental completion date", - "Experimental completion date:", - "Experimental Termination Date", - "Experimental Termination Date:"), "experimental_end_date", EntityType.ENTITY, $section).forEach(entity -> { - entity.apply("DOC.4.0", "Experimental end date found", "n-a"); - }); - end - - -// Rule unit: DOC.5 -rule "DOC.5.0: Ignore species and strain in irrelevant study types" - salience 1 - when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("406","428","438","439","471","474","487")) - $section: Section(hasEntitiesOfType("species") || hasEntitiesOfType("strain")) - then - $section.getEntitiesOfType(List.of("species", "strain")).forEach(entity -> { - entity.remove("DOC.5.0", "removed"); - retract(entity); - }); - end - -rule "DOC.5.1: Hide all skipped species and strains except in the relevant sections" - salience 1 - when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436")) - $section: Section( - (hasEntitiesOfType("species") || hasEntitiesOfType("strain")) - && !( - anyHeadlineContainsStringIgnoreCase("test system") - || anyHeadlineContainsStringIgnoreCase("animals") - || anyHeadlineContainsStringIgnoreCase("specification") - ) - ) - then - $section.getEntitiesOfType(List.of("species", "strain")).forEach(entity -> { - entity.remove("DOC.5.1", "removed"); - retract(entity); - }); - end - -rule "DOC.5.2: Species" - when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436")) - $section: Section(hasEntitiesOfType("species")) - then - $section.getEntitiesOfType("species").forEach(entity -> { - entity.apply("DOC.5.2", "Species found.", "n-a"); - entity.setValue(entity.getValue().toLowerCase()); - }); - end - -rule "DOC.5.3: Strain" - when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436")) - $section: Section( - hasEntitiesOfType("species") - && hasEntitiesOfType("strain") - && ( - anyHeadlineContainsStringIgnoreCase("test system") - || anyHeadlineContainsStringIgnoreCase("animals") - || anyHeadlineContainsStringIgnoreCase("specification") - ) - ) - then - $section.getEntitiesOfType("strain").forEach(entity -> { - entity.apply("DOC.5.3", "Strain found.", "n-a"); - }); - end - - -// Rule unit: DOC.6 -rule "DOC.6.0: study title by document structure" - when - $table: Table(onPage(1), - (containsString("Final Report") || containsString("SPL")), - numberOfRows == 1, - numberOfCols == 1) - $tableCell: TableCell(row == 1, col == 1) from $table.streamTableCells().toList() - $paragraph: Paragraph(previousSibling.isPresent(), nextSibling.isPresent()) from $tableCell.streamChildren().toList() - then - entityCreationService.bySemanticNode($paragraph, "title", EntityType.ENTITY).ifPresent(entity -> { - entity.apply("DOC.6.0", "Study title found", "n-a"); - }); - end - -rule "DOC.6.1: study title" - when - $table: Table(onPage(1), (containsString("Final Report") || containsString("SPL"))) - then - entityCreationService.byRegexWithLineBreaksIgnoreCase("(?<=\\n)[\\w\\W]{1,300}(?=\\nFinal Report)", "title", EntityType.ENTITY, $table).findFirst().ifPresent(entity -> { - entity.apply("DOC.6.1", "Title found", "n-a"); - }); - end - -rule "DOC.6.2: study title" - when - not Table(onPage(1), (containsString("Final Report") || containsString("SPL"))) - $section: Section(onPage(1), (containsString("Final Report") || containsString("SPL"))) - then - entityCreationService.byRegexWithLineBreaksIgnoreCase("(?<=\\n)[\\w\\W]{1,300}(?=\\nFinal Report)", "title", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> { - entity.apply("DOC.6.2", "Title found", "n-a"); - }); - end - - -// Rule unit: DOC.7 -rule "DOC.7.0: Performing Laboratory (Name)" - when - $section: Section(containsString("PERFORMING LABORATORY:")) - then - entityCreationService.lineAfterString("PERFORMING LABORATORY:", "laboratory_name", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> { - entity.apply("DOC.7.0", "Performing Laboratory found", "n-a"); - }); - end - -rule "DOC.7.1: Performing Laboratory (Country)" - when - nerEntities: NerEntities(hasEntitiesOfType("COUNTRY")) - $section: Section(containsString("PERFORMING LABORATORY:")) - then - nerEntities.streamEntitiesOfType("COUNTRY") - .filter(nerEntity -> $section.getTextRange().contains(nerEntity.textRange())) - .map(nerEntity -> entityCreationService.byNerEntity(nerEntity, "laboratory_country", EntityType.ENTITY, $section)) - .forEach(entity -> { - entity.apply("DOC.7.1", "Performing Laboratory found", "n-a"); - }); - end - -rule "DOC.7.2: Performing Laboratory (Country & Name) from dict" - when - $section: Section(containsString("PERFORMING LABORATORY:") || (containsString("PERFORMING") && containsString("LABORATORY:"))) - $countryOrNameFromDictionary: TextEntity(type == "laboratory_country" || type == "laboratory_name", $type: type, isDictionaryEntry()) from $section.getEntities() - then - $countryOrNameFromDictionary.apply("DOC.7.2", "Performing " + $type + " dictionary entry found."); - end - -rule "DOC.7.3: Performing Laboratory (Country) from dict" - when - $section: Section( - (hasEntitiesOfType("laboratory_country") || hasEntitiesOfType("laboratory_name")) - && !(containsString("PERFORMING LABORATORY:") || (containsString("PERFORMING") && containsString("LABORATORY:"))) - ) - then - $section.getEntitiesOfType(List.of("laboratory_country", "laboratory_name")).forEach(entity -> { - entity.remove("DOC.7.3", "removed"); - retract(entity); - }); - end - - -// Rule unit: DOC.8 -rule "DOC.8.0: GLP Study" - when - $headline: Headline(containsString("GOOD LABORATORY PRACTICE COMPLIANCE") - || containsString("GOOD LABORATORY PRACTICE COMPLIANCE STATEMENT") - || (containsString("DECLARACAO DE CONFORMIDADE") && containsString("PRATICAS DE LABORATORIO")) - || containsString("GLP Certificate") - || containsString("GLP Certificates") - || containsString("GOOD LABORATORY PRACTICE (GLP) CERTIFICATE") - || containsString("Good Laboratory Practice Certificate") - || containsString("STATEMENT OF GLP COMPLIANCE AND AUTHENTICATION")) - then - entityCreationService.bySemanticNode($headline, "glp_study", EntityType.ENTITY).ifPresent(entity -> { - entity.apply("DOC.8.0", "GLP Study found", "n-a"); - }); - end - - -// Rule unit: DOC.9 -rule "DOC.9.0: Batch number from CoA" - when - $section: Section( - ( - anyHeadlineContainsString("Analytical Report") - || anyHeadlineContainsStringIgnoreCase("Certificate of Analysis") - || containsStringIgnoreCase("Certificate of Analysis") - ) - && ( - containsStringIgnoreCase("batch") - || containsStringIgnoreCase("bath") - || containsStringIgnoreCase("barch") - || containsStringIgnoreCase("bateb") - ) - && ( - containsStringIgnoreCase("identification") - || containsStringIgnoreCase("ldentitfication") - || containsStringIgnoreCase("wentification") - || containsStringIgnoreCase("mentification") - || containsStringIgnoreCase("kientification") - || containsStringIgnoreCase("reference number") - || containsStringIgnoreCase("test substance") - ) - ) - then - entityCreationService.lineAfterStrings(List.of("Batch Identification", - "(Batch Identification):", - "Bateb Identification", - "Batch Wentification", - "Batch Mentification", - "Batch Kientification", - "Barch Identification", - "Bath ldentitfication", - "Batch of test substance :"), "batch_number", EntityType.ENTITY, $section).forEach(entity -> { - entity.apply("DOC.9.0", "Batch number found in CoA", "n-a"); - }); - end - -rule "DOC.9.1: Batch number" - when - $section: Section( - ( - anyHeadlineContainsStringIgnoreCase("Test Substance") - || anyHeadlineContainsStringIgnoreCase("Test and Control Substances") - || anyHeadlineContainsStringIgnoreCase("Test Item") - ) - && !( - anyHeadlineContainsString("component") - || anyHeadlineContainsString("reference") - || anyHeadlineContainsString("blank") - ) - && containsStringIgnoreCase("batch") - ) - then - Stream.of(entityCreationService.byRegex("Batch ID ([A-Z\\d\\-]{7,14})", "batch_number", EntityType.ENTITY, 1, $section), - entityCreationService.lineAfterStrings(List.of("Batch Identification", - "Batch number:", - "Batch reference number:", - "Batch:", - "Batch/Lot number:", - "Batch (Lot) Number:", - "Batch Number:", - "Batch Nº:", - "Batch no:" - ), "batch_number", EntityType.ENTITY, $section)).flatMap(a -> a) - .forEach(entity -> { - entity.apply("DOC.9.1", "Batch number found", "n-a"); - }); - end - -rule "DOC.9.2: Batch number" - when - $section: Section( - ( - anyHeadlineContainsStringIgnoreCase("Test Substance") - || anyHeadlineContainsStringIgnoreCase("Test and Control Substances") - || anyHeadlineContainsStringIgnoreCase("Test Item") - ) - && !( - anyHeadlineContainsString("component") - || anyHeadlineContainsString("reference") - || anyHeadlineContainsString("blank") - ) - && containsStringIgnoreCase("batch") - ) - $batchNumber: String() from List.of("Batch Identification", - "Batch number:", - "Batch reference number:", - "Batch:", - "Batch/Lot number:", - "Batch (Lot) Number:", - "Batch Number:", - "Batch Nº:", - "Batch no:") - $table: Table(containsStringIgnoreCase($batchNumber)) from $section.streamAllSubNodesOfType(NodeType.TABLE).toList() - then - entityCreationService.lineAfterStringAcrossColumnsIgnoreCase($batchNumber, "batch_number", EntityType.ENTITY, $table).forEach(entity -> { - entity.apply("DOC.9.2", "Batch number found", "n-a"); - }); - end - - -// Rule unit: DOC.10 -rule "DOC.10.0: Conclusions - LD50, LC50, Confidence" - when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","425","436")) - $section: Section( - (getHeadline().containsStringIgnoreCase("Conclusion") || anyHeadlineContainsStringIgnoreCase("Lethality")) - && (containsString("LD") || containsString("LC") || containsString("50") || containsString("LD50") || containsString("lethal concentration") || containsString("lethal dose")) - && ( - containsString("greater than") - || containsString("higher than") - || containsString("above") - || containsString("in excess") - || containsString("exceeds") - || containsString("was found to be") - || containsString("was calculated to be") - || containsString("estimated to be") - ) - ) - then - entityCreationService.byRegexIgnoreCase("(L[D|C]\\s?50|lethal concentration|lethal dose).{1,200}(greater than|considered to be above|in excess of|exceeds|higher than)", "ld50_greater", EntityType.ENTITY,2, $section).forEach(entity -> { - entity.apply("DOC.10.0", "LD50 greater than found", "n-a"); - }); - entityCreationService.byRegexIgnoreCase("\\b(?:(?:greater|higher) than|considered to be above|(?:was|is) (?:found|estimated) to be|was calculated to be|in excess of|exceeds|equal to)\\s?([\\d\\.]{1,6})\\s?mg\\/(?:kg|L)", "ld50_value", EntityType.ENTITY,1, $section).forEach(entity -> { - entity.apply("DOC.10.0", "LD50 value found", "n-a"); - }); - entityCreationService.byRegexIgnoreCase("confidence interval (?:is )?([\\d\\.]{2,6}).{0,20} to (?:greater than )?([\\d\\.]{2,6})", "confidence_minimal", EntityType.ENTITY,1, $section).forEach(entity -> { - entity.apply("DOC.10.0", "Minimal Confidence found", "n-a"); - }); - entityCreationService.byRegexIgnoreCase("confidence interval (?:is )?([\\d\\.]{2,6}).{0,20} to (?:greater than )?([\\d\\.]{2,6})", "confidence_maximal", EntityType.ENTITY,2, $section).forEach(entity -> { - entity.apply("DOC.10.0", "Maximal Confidence found", "n-a"); - }); - end - - -// Rule unit: DOC.11 -rule "DOC.11.0: Guideline Deviation" - when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471")) - $section: Section( - (getHeadline().containsStringIgnoreCase("General Information") || containsString("GENERAL INFORMATION")) - && (containsStringIgnoreCase("from the") || containsStringIgnoreCase("to the")) - ) - then - entityCreationService.betweenRegexes("(?:Deviations? from the [G|g]uidelines?)(?: and| or)?( the)?(?: Study Plan)?", "(?:(?:Deviations? from the Study Plan)|(?:Performing laboratory test)|(?:Other)|(?:Retention of [S|s]amples)|(?:Amendments? to Final Protocol))", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> { - entity.apply("DOC.11.0", "Deviation from Guidelines found", "n-a"); - }); - entityCreationService.betweenRegexes("(?:Deviations? (?:from|to)(?: the)? [S|s]tudy [P|p]lan)", "(?:Regulatory Guidelines)|(?:Other)|(?:Distribution of the report)|(?:Performing laboratory test)|(?:Distribution of the report)|(?:Retention of [S|s]amples)", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> { - entity.apply("DOC.11.0", "Deviation from Study Plan found", "n-a"); - }); - entityCreationService.betweenStrings("Deviations from the study plan", "Regulatory Guidelines", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> { - entity.apply("DOC.11.0", "Deviation from the study plan found", "n-a"); - }); - entityCreationService.byRegexIgnoreCase("(?>Study plan adherence)(.{1,20}deviations.{1,20} to the study plan.{0,50}\\.)\\s", "guideline_deviation", EntityType.ENTITY, 1, $section).forEach(entity -> { - entity.apply("DOC.11.0", "Guideline deviation found in text.", "n-a"); - }); - entityCreationService.betweenStringsIncludeEnd("Deviations from the study plan", "validity of the study.", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> { - entity.apply("DOC.11.0", "Deviation from the study plan found", "n-a"); - }); - end - -rule "DOC.11.1: Guideline Deviation in text" - when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471")) - $section: Section( - getHeadline().containsStringIgnoreCase("Introduction") - && containsStringIgnoreCase("deviations from the protocol") - ) - then - entityCreationService.byRegex("There were no deviations from the protocol.{1,100}\\.\\s", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> { - entity.apply("DOC.11.1", "Guideline deviation found in text.", "n-a"); - }); - end - - -// Rule unit: DOC.12 -rule "DOC.12.0: Clinical Signs" - when - FileAttribute(label == "OECD Number", value == "425") - $headline: Headline(containsAnyStringIgnoreCase("Clinical Signs", "Macroscopic Findings") && !containsString("TABLE") && !getHeadline().containsStringIgnoreCase("3 - MACROSCOPIC FINDINGS")) - then - entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "clinical_signs", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.12.0", "Clinical Signs found", "n-a")); - end - - -// Rule unit: DOC.13 -rule "DOC.13.0: Dosages" - when - FileAttribute(label == "OECD Number", value == "425") - $section: Section( - (anyHeadlineContainsStringIgnoreCase("Dosages") || anyHeadlineContainsStringIgnoreCase("Study Design")) - && !getHeadline().containsString("TABLE") - ) - then - entityCreationService.betweenStringsIncludeStartAndEnd("The animals were treated", ".", "dosages", EntityType.ENTITY, $section).forEach(entity -> { - entity.apply("DOC.13.0", "Dosage found", "n-a"); - }); - entityCreationService.betweenStringsIncludeStartAndEnd("Animals were treated", ".", "dosages", EntityType.ENTITY, $section).forEach(entity -> { - entity.apply("DOC.13.0", "Dosage found", "n-a"); - }); - entityCreationService.byRegexWithLineBreaks("(?:\\.[\\s|\\n]|^.{5,20}\\n)([^\\.]{1,200}(?:animal|given|received)[^\\.]{1,200}dose\\s(?:levels?\\s)?(?:of|at)[^\\.]{1,200})(?:\\.[\\s|\\n|$])", "dosages", EntityType.ENTITY,1, $section).forEach(entity -> { - entity.apply("DOC.13.0", "Dosage found", "n-a"); - }); - end - - -// Rule unit: DOC.14 -rule "DOC.14.0: Mortality" - when - $headline: Headline(containsString("Mortality") && !containsString("TABLE")) - FileAttribute(label == "OECD Number", value == "425") - then - entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "mortality", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.14.0", "Mortality found", "n-a")); - end - - -// Rule unit: DOC.15 -rule "DOC.15.0: Study Conclusion" - when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471")) - $section: Section( - getHeadline().containsStringIgnoreCase("Conclusion") - ) - then - entityCreationService.bySemanticNodeParagraphsOnly($section, "study_conclusion", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.15.0", "Study Conclusion found", "n-a")); - end - - -// Rule unit: DOC.16 -rule "DOC.16.0: Weight Behavior Changes" - when - FileAttribute(label == "OECD Number", value == "402") - $section: Section( - getHeadline().containsString("Results") - && ( - containsString("body weight") - || containsString("body weights") - || containsString("bodyweight") - || containsString("bodyweights") - ) - ) - then - entityCreationService.bySemanticNodeParagraphsOnly($section, "weight_behavior_changes", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.16.0", "Weight behavior changes found", "n-a")); - end - - -// Rule unit: DOC.17 -rule "DOC.17.0: Necropsy findings" - when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","436")) - $section: Section( - ( - anyHeadlineContainsStringIgnoreCase("Necropsy") - || getHeadline().containsStringIgnoreCase("Macroscopic Findings") - || getHeadline().containsStringIgnoreCase("Macroscopic examination") - ) - && !getHeadline().containsStringIgnoreCase("Table") - && !getHeadline().containsStringIgnoreCase("Appendix") - && !getHeadline().containsStringIgnoreCase("3 - MACROSCOPIC FINDINGS") - ) - then - entityCreationService.bySemanticNodeParagraphsOnly($section, "necropsy_findings", EntityType.ENTITY) - .forEach( entity -> entity.apply("DOC.17.0", "Necropsy section found", "n-a")); - end - - -// Rule unit: DOC.18 -rule "DOC.18.0: Clinical observations" - when - FileAttribute(label == "OECD Number", value == "403") - $section: Section( - ( - anyHeadlineContainsStringIgnoreCase("Clinical Observations") - || anyHeadlineContainsStringIgnoreCase("Clinical observations") - || anyHeadlineContainsStringIgnoreCase("In-life Observations") - || anyHeadlineContainsStringIgnoreCase("Postmortem Observations") - ) - && !anyHeadlineContainsStringIgnoreCase("Appendix") - && !anyHeadlineContainsStringIgnoreCase("Table") - && !anyHeadlineContainsStringIgnoreCase("Mortality") - ) - then - entityCreationService.bySemanticNodeParagraphsOnly($section, "clinical_observations", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.18.0", "Clinical observations section found", "n-a")); - end - - -// Rule unit: DOC.19 -rule "DOC.19.0: Bodyweight changes" - when - FileAttribute(label == "OECD Number", value == "403") - $headline: Headline(containsAnyStringIgnoreCase("Bodyweight", "Bodyweights", "Body Weights", "Body Weight"), !containsAnyStringIgnoreCase("Appendix", "TABLE")) - then - entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "bodyweight_changes", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.19.0", "Bodyweight section found", "n-a")); - end - - -// Rule unit: DOC.20 -rule "DOC.20.0: Study Design" - when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","404","405","406","428","429","438","439","474","487")) - $section: Section( - anyHeadlineContainsStringIgnoreCase("study design") - && !anyHeadlineContainsString("Preliminary screening test") - ) - then - entityCreationService.bySemanticNodeParagraphsOnly($section, "study_design", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.20.0", "Study design section found", "n-a")); - end - -rule "DOC.20.1: Study Design" - when - Headline(containsStringIgnoreCase("Study Design"), $sectionIdentifier: getSectionIdentifier()) - $headline: Headline(getSectionIdentifier().isChildOf($sectionIdentifier)) - then - entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "study_design", EntityType.ENTITY) - .forEach(entity -> { - entity.apply("DOC.20.1", "Study design section found", "n-a"); - }); - end - - -// Rule unit: DOC.21 -rule "DOC.21.0: Results and Conclusion (406, 428, 438, 439, 474 & 487)" - when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("406","428","438","439","474","487")) - $parentHeadline: Headline( - containsAnyString("Results", "Conclusion"), - !containsAnyString("POSITIVE CONTROL", "Positive Control", "Evaluation", "Micronucleus", "TABLE", "DISCUSSION", "CONCLUSIONS", "Interpretation","Viability", "analysis"), - $sectionIdentifier: getSectionIdentifier() - ) - not Headline(getSectionIdentifier().isChildOf($sectionIdentifier)) - then - entityCreationService.bySemanticNodeParagraphsOnly($parentHeadline.getParent(), "results_and_conclusion", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.21.0", "Results and Conclusion found", "n-a")); - end - -rule "DOC.21.1: Results and Conclusion (406, 428, 438, 439, 474 & 487)" - when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("406","428","438","439","474","487")) - Headline( - containsAnyString("Results", "Conclusion"), - !containsAnyString("POSITIVE CONTROL", "Positive Control", "Evaluation", "Micronucleus", "TABLE", "DISCUSSION", "CONCLUSIONS", "Interpretation","Viability", "analysis"), - $sectionIdentifier: getSectionIdentifier() - ) - $headline: Headline(getSectionIdentifier().isChildOf($sectionIdentifier)) - then - entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "results_and_conclusion", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.21.1", "Results and Conclusion found", "n-a")); - end - - -// Rule unit: DOC.22 -rule "DOC.22.0: Detailing (404 & 405)" - when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("404","405")) - $section: Section( - anyHeadlineContainsStringIgnoreCase("Results") - && !getHeadline().containsStringIgnoreCase("Evaluation") - && !getHeadline().containsStringIgnoreCase("study") - ) - then - entityCreationService.bySemanticNodeParagraphsOnly($section, "detailing", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.22.0", "Detailing found", "n-a")); - end - - -// Rule unit: DOC.23 -rule "DOC.23.0: Preliminary Test Results (429)" - when - FileAttribute(label == "OECD Number", value == "429") - $section: Section( - ((anyHeadlineContainsString("Preliminary Screening Test") && containsString("Clinical observations")) - || anyHeadlineContainsString("Pre-Experiment")) - ) - then - entityCreationService.bySemanticNodeParagraphsOnly($section, "preliminary_test_results", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.23.0", "Preliminary Test Results found", "n-a")); - end - - -// Rule unit: DOC.24 -rule "DOC.24.0: Test Results (429)" - when - FileAttribute(label == "OECD Number", value == "429") - $section: Section((getHeadline().containsString("RESULTS AND DISCUSSION") || getHeadline().containsString("Estimation of the proliferative response of lymph node cells") || getHeadline().containsString("Results in the Main Experiment"))) - then - entityCreationService.bySemanticNodeParagraphsOnly($section, "test_results", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.24.0", "Test Results found", "n-a")); - end - -rule "DOC.24.1: Test Results (429)" - when - Headline(containsStringIgnoreCase("RESULTS AND DISCUSSION"), $sectionIdentifierResultsAndDiscussion: getSectionIdentifier()) - $headline: Headline(getSectionIdentifier().isChildOf($sectionIdentifierResultsAndDiscussion)) - then - entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "test_results", EntityType.ENTITY) - .forEach(entity -> { - entity.apply("DOC.24.1", "Test Results found", "n-a"); - }); - end - - -// Rule unit: DOC.25 -rule "DOC.25.0: Approach used (429)" - when - FileAttribute(label == "OECD Number", value == "429") - $section: Section( - hasEntitiesOfType("species") - && (containsStringIgnoreCase("animals per") || containsStringIgnoreCase("animals /")) - ) - then - entityCreationService.byRegexIgnoreCase("\\banimals (?:per|\\/) .{0,15}(group)\\b", "approach_used", EntityType.ENTITY,1, $section).forEach(entity -> { - entity.apply("DOC.25.0", "Study animal approach found.", "n-a"); - }); - end - - -// Rule unit: DOC.26 -rule "DOC.26.0: Sex" - when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("405","429")) - $section: Section( - ( - anyHeadlineContainsStringIgnoreCase("animal") - || anyHeadlineContainsStringIgnoreCase("test system") - ) - && !getHeadline().containsStringIgnoreCase("selection") - && ( - containsStringIgnoreCase("sex:") - || containsStringIgnoreCase("male") - || containsStringIgnoreCase("female") - ) - ) - then - entityCreationService.byRegexIgnoreCase("([S|s]ex:)?[\\w\\s]{0,10}\\b(males?|females?)\\b", "sex", EntityType.ENTITY,2, $section).forEach(entity -> { - entity.apply("DOC.26.0", "Test animal sex found", "n-a"); - }); - end - - -// Rule unit: DOC.27 -rule "DOC.27.0: Animal Number 405" - when - FileAttribute(label == "OECD Number", value == "405") - $section: Section( - ( - anyHeadlineContainsStringIgnoreCase("animal") - || anyHeadlineContainsStringIgnoreCase("test system") - || anyHeadlineContainsStringIgnoreCase("reaction") - ) - && !getHeadline().containsString("selection") - && ( - containsStringIgnoreCase("number of animals") - || containsStringIgnoreCase("no.") - ) - ) - then - entityCreationService.byRegexIgnoreCase("(Number of animals:)[\\w\\s]{0,10}\\b([\\d]{1,3})\\b", "number_of_animals", EntityType.ENTITY,2, $section).forEach(entity -> { - entity.apply("DOC.27.0", "Number of animals found", "n-a"); - }); - entityCreationService.byRegexIgnoreCase("(?:.{1,10} No\\. )([\\d\\w\\-]{3,8})", "animal_numbers", EntityType.ENTITY,1, $section).forEach(entity -> { - entity.apply("DOC.27.0", "Number of animals found", "n-a"); - }); - end - - -// Rule unit: DOC.28 -rule "DOC.28.0: Animal Number 429" - when - FileAttribute(label == "OECD Number", value == "429") - $section: Section( - ( - anyHeadlineContainsStringIgnoreCase("animal") - || anyHeadlineContainsStringIgnoreCase("test system") - ) - && !getHeadline().containsString("selection") - && containsStringIgnoreCase("number of animals") - && (containsStringIgnoreCase("per") || containsString("/")) - && containsStringIgnoreCase("group") - ) - then - entityCreationService.byRegexIgnoreCase("(Number of animals per group:)[\\w\\s]{0,10}\\b([\\d]{1,3})\\b", "number_of_animals", EntityType.ENTITY,2, $section).forEach(entity -> { - entity.apply("DOC.28.0", "Number of animals in group found", "n-a"); - }); - entityCreationService.byRegexIgnoreCase("(Number of animals per group:).{0,60}\\b([\\d]{1,3})\\sper group\\b", "number_of_animals", EntityType.ENTITY,2, $section).forEach(entity -> { - entity.apply("DOC.28.0", "Number of animals in group found", "n-a"); - }); - entityCreationService.byRegexIgnoreCase("([\\d]{1,3})[\\w\\s\\/]{0,20}(?:treatment )?group\\b", "number_of_animals", EntityType.ENTITY,1 , $section).forEach(entity -> { - entity.apply("DOC.28.0", "Number of animals in group found", "n-a"); - }); - end - -rule "DOC.28.1: No. Of animals - Fallback to appendix tables listing all individual animals for 429" - when - $keyword: String() from List.of("Animal Number", "Animal No.", "Animal number") - $table: Table(containsString($keyword) && getHeadline().containsString("TABLE") && getHeadline().containsString("Individual")) - FileAttribute(label == "OECD Number", value == "429") - then - $table.streamTableCellsWithHeader($keyword) - .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "animal_numbers", EntityType.ENTITY)) - .filter(Optional::isPresent) - .map(Optional::get) - .forEach(entity -> { - entity.apply("DOC.28.1", "Animal number found.", "n-a"); - }); - end - - -// Rule unit: DOC.29 -rule "DOC.29.0: 4h Exposure" - when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("403","436")) - $section: Section( - (containsStringIgnoreCase("4 hours") || containsStringIgnoreCase("four hours")) - ) - then - entityCreationService.byRegexIgnoreCase("((?<=\\.\\s\\b).{1,100}(4|four) hours.*?\\.) ", "4h_exposure", EntityType.ENTITY,1, $section).forEach(entity -> { - entity.apply("DOC.29.0", "4h exposure sentence found", "n-a"); - }); - end - - -// Rule unit: DOC.30 -rule "DOC.30.0: Dilution of the test substance" - when - FileAttribute(label == "OECD Number", value == "404") - $section: Section( - getHeadline().containsString("Formulation") - && containsString("dilution") - ) - then - entityCreationService.bySemanticNodeParagraphsOnly($section, "dilution", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.30.0", "Dilution found.", "n-a")); - end - - -// Rule unit: DOC.31 -rule "DOC.31.0: Positive Control" - when - FileAttribute(label == "OECD Number", value == "429") - $section: Section( - getHeadline().containsStringIgnoreCase("Positive Control") - && !(getHeadline().containsStringIgnoreCase("Appendix") || getHeadline().containsStringIgnoreCase("Table")) - ) - then - entityCreationService.bySemanticNodeParagraphsOnly($section, "positive_control", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.31.0", "Positive control found.", "n-a")); - end - - -// Rule unit: DOC.32 -rule "DOC.32.0: Mortality Statement" - when - FileAttribute(label == "OECD Number", value == "402") - $headline: Headline(containsStringIgnoreCase("Mortality") && !containsString("TABLE")) - then - entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "mortality_statement", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.32.0", "Mortality Statement found", "n-a")); - end - - -// Rule unit: DOC.33 -rule "DOC.33.0: Dose Mortality" - when - FileAttribute(label == "OECD Number", value == "425") - $table: Table( - (hasHeader("Mortality") || hasHeader("Long Term Results") || hasHeader("LongTerm Outcome") || hasHeader("Long Term Outcome") || hasHeader("Comments") || hasHeader("Viability / Mortality") || hasHeader("Viability/Mortality")) - && - (hasHeader("Dose [mg/kg bodyweight]") || hasHeader("Dose [mg/kg body weight]") ||hasHeader("Dose (mg/kg)") || hasHeader("Dose levei (mg/kg)") || hasHeader("Dose Level (mg/kg)") || hasHeader("Dose level (mg/kg)") || hasHeader("Dosage [mg/kg body weight]")) - ) - then - Stream.of($table.streamTableCellsWithHeader("Mortality"), - $table.streamTableCellsWithHeader("Comments"), - $table.streamTableCellsWithHeader("Long Term Results"), - $table.streamTableCellsWithHeader("Long Term Outcome"), - $table.streamTableCellsWithHeader("LongTerm Outcome"), - $table.streamTableCellsWithHeader("Viability / Mortality"), - $table.streamTableCellsWithHeader("Viability/Mortality") - ).flatMap(a -> a) - .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dose_mortality", EntityType.ENTITY)) - .filter(Optional::isPresent) - .map(Optional::get) - .forEach(entity -> { - entity.apply("DOC.33.0", "Dose Mortality Data found.", "n-a"); - }); - - Stream.of($table.streamTableCellsWithHeader("Dose [mg/kg bodyweight]"), - $table.streamTableCellsWithHeader("Dose [mg/kg body weight]"), - $table.streamTableCellsWithHeader("Dose levei (mg/kg)"), - $table.streamTableCellsWithHeader("Dose Level (mg/kg)"), - $table.streamTableCellsWithHeader("Dose level (mg/kg)"), - $table.streamTableCellsWithHeader("Dose (mg/kg)"), - $table.streamTableCellsWithHeader("Dosage [mg/kg body weight]") - ).flatMap(a -> a) - .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dose_mortality_dose", EntityType.ENTITY)) - .filter(Optional::isPresent) - .map(Optional::get) - .forEach(entity -> { - entity.apply("DOC.33.0", "Dose Mortality Data found.", "n-a"); - }); - end - - -// Rule unit: DOC.34 -rule "DOC.34.0: Results (Main Study)" - when - FileAttribute(label == "OECD Number", value == "429") - $section: Section( - getHeadline().containsString("Results") - && getHeadline().getTextRange().length() < 20 - && !(getHeadline().containsString("Appendix") || getHeadline().containsString("Table")) - ) - then - entityCreationService.bySemanticNodeParagraphsOnly($section, "results_(main_study)", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.34.0", "Results for main study found.", "n-a")); - end - - -// Rule unit: DOC.35 -rule "DOC.35.0: Doses (mg/kg bodyweight)" - when - FileAttribute(label == "OECD Number", value == "402") - $section: Section( - anyHeadlineContainsStringIgnoreCase("study design") - ) - then - entityCreationService.bySemanticNodeParagraphsOnly($section, "doses_(mg_kg_bw)", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.35.0", "Doses per bodyweight information found", "n-a")); - end - - -//------------------------------------ Manual redaction rules ------------------------------------ - -// Rule unit: MAN.0 -rule "MAN.0.0: Apply manual resize redaction" - salience 128 - when - $resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED, $requestDate: requestDate) - not ManualResizeRedaction(annotationId == $id, requestDate.isBefore($requestDate)) - $entityToBeResized: TextEntity(matchesAnnotationId($id)) - then - manualChangesApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction); - retract($resizeRedaction); - update($entityToBeResized); - $entityToBeResized.getIntersectingNodes().forEach(node -> update(node)); - end - -rule "MAN.0.1: Apply manual resize redaction" - salience 128 - when - $resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED, $requestDate: requestDate) - not ManualResizeRedaction(annotationId == $id, requestDate.isBefore($requestDate)) - $imageToBeResized: Image(id == $id) - then - manualChangesApplicationService.resizeImage($imageToBeResized, $resizeRedaction); - retract($resizeRedaction); - update($imageToBeResized); - update($imageToBeResized.getParent()); - end - - -// Rule unit: MAN.1 -rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity" - salience 128 - when - $idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED) - $entityToBeRemoved: TextEntity(matchesAnnotationId($id)) - then - $entityToBeRemoved.getManualOverwrite().addChange($idRemoval); - update($entityToBeRemoved); - retract($idRemoval); - $entityToBeRemoved.getIntersectingNodes().forEach(node -> update(node)); - end - -rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image" - salience 128 - when - $idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED) - $imageEntityToBeRemoved: Image($id == id) - then - $imageEntityToBeRemoved.getManualOverwrite().addChange($idRemoval); - update($imageEntityToBeRemoved); - retract($idRemoval); - update($imageEntityToBeRemoved.getParent()); - end - - -// Rule unit: MAN.2 -rule "MAN.2.0: Apply force redaction" - salience 128 - when - $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED) - $entityToForce: TextEntity(matchesAnnotationId($id)) - then - $entityToForce.getManualOverwrite().addChange($force); - update($entityToForce); - $entityToForce.getIntersectingNodes().forEach(node -> update(node)); - retract($force); - end - -rule "MAN.2.1: Apply force redaction to images" - salience 128 - when - $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED) - $imageToForce: Image(id == $id) - then - $imageToForce.getManualOverwrite().addChange($force); - update($imageToForce); - update($imageToForce.getParent()); - retract($force); - end - - -// Rule unit: MAN.3 -rule "MAN.3.0: Apply entity recategorization" - salience 128 - when - $recategorization: ManualRecategorization($id: annotationId, $type: type, status == AnnotationStatus.APPROVED, $requestDate: requestDate) - not ManualRecategorization($id == annotationId, requestDate.isBefore($requestDate)) - $entityToBeRecategorized: TextEntity(matchesAnnotationId($id), type != $type) - then - $entityToBeRecategorized.getIntersectingNodes().forEach(node -> update(node)); - manualChangesApplicationService.recategorize($entityToBeRecategorized, $recategorization); - retract($recategorization); - // Entity is copied and inserted, so the old entity needs to be retracted to avoid duplication. - retract($entityToBeRecategorized); - end - -rule "MAN.3.1: Apply entity recategorization of same type" - salience 128 - when - $recategorization: ManualRecategorization($id: annotationId, $type: type, status == AnnotationStatus.APPROVED, $requestDate: requestDate) - not ManualRecategorization($id == annotationId, requestDate.isBefore($requestDate)) - $entityToBeRecategorized: TextEntity(matchesAnnotationId($id), type == $type) - then - $entityToBeRecategorized.getManualOverwrite().addChange($recategorization); - retract($recategorization); - end - -rule "MAN.3.2: Apply image recategorization" - salience 128 - when - $recategorization: ManualRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $requestDate: requestDate) - not ManualRecategorization($id == annotationId, requestDate.isBefore($requestDate)) - $imageToBeRecategorized: Image($id == id) - then - manualChangesApplicationService.recategorize($imageToBeRecategorized, $recategorization); - update($imageToBeRecategorized); - update($imageToBeRecategorized.getParent()); - retract($recategorization); - end - - -// Rule unit: MAN.4 -rule "MAN.4.0: Apply legal basis change" - salience 128 - when - $legalbasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED) - $imageToBeRecategorized: Image($id == id) - then - $imageToBeRecategorized.getManualOverwrite().addChange($legalbasisChange); - end - -rule "MAN.4.1: Apply legal basis change" - salience 128 - when - $legalBasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED) - $entityToBeChanged: TextEntity(matchesAnnotationId($id)) - then - $entityToBeChanged.getManualOverwrite().addChange($legalBasisChange); - end - - -//------------------------------------ Entity merging rules ------------------------------------ - -// Rule unit: X.0 -rule "X.0.0: Remove Entity contained by Entity of same type" - salience 65 - when - $larger: TextEntity($type: type, $entityType: entityType, active()) - $contained: TextEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !hasManualChanges(), active()) - then - $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); - retract($contained); - end - - -// Rule unit: X.2 -rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" - salience 64 - when - $falsePositive: TextEntity($type: type, entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges(), active()) - then - $entity.getIntersectingNodes().forEach(node -> update(node)); - $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); - retract($entity) - end - - -// Rule unit: X.3 -rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" - salience 64 - when - $falseRecommendation: TextEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) - then - $recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); - retract($recommendation); - end - - -// Rule unit: X.4 -rule "X.4.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY with same type" - salience 256 - when - $entity: TextEntity($type: type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) - then - $entity.addEngines($recommendation.getEngines()); - $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY with same type"); - retract($recommendation); - end - - -// Rule unit: X.5 -rule "X.5.0: Remove Entity of type RECOMMENDATION when contained by ENTITY" - salience 256 - when - $entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) - then - $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when contained by ENTITY"); - retract($recommendation); - end - - -// Rule unit: X.7 -rule "X.7.0: Remove all images" - salience 512 - when - $image: Image(imageType != ImageType.OCR, !hasManualChanges()) - then - $image.remove("X.7.0", "remove all images"); - retract($image); - end - - -//------------------------------------ File attributes rules ------------------------------------ - -// Rule unit: FA.1 -rule "FA.1.0: Remove duplicate FileAttributes" - salience 64 - when - $fileAttribute: FileAttribute($label: label, $value: value) - $duplicate: FileAttribute(this != $fileAttribute, label == $label, value == $value) - then - retract($duplicate); - end - - -//------------------------------------ Local dictionary search rules ------------------------------------ - -// Rule unit: LDS.0 -rule "LDS.0.0: Run local dictionary search" - agenda-group "LOCAL_DICTIONARY_ADDS" - salience -999 - when - $dictionaryModel: DictionaryModel(!localEntriesWithMatchedRules.isEmpty()) from dictionary.getDictionaryModels() - then - entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document) - .forEach(entity -> { - Collection matchedRules = $dictionaryModel.getLocalEntriesWithMatchedRules().get(entity.getValue()); - entity.addMatchedRules(matchedRules); - }); - end