From 7e9c6c8695ea3a4055ce69e9a96fc9952fd65363 Mon Sep 17 00:00:00 2001 From: Ali Oezyetimoglu Date: Thu, 12 Oct 2023 20:06:23 +0200 Subject: [PATCH] RED-6807: updated RM and DM rules * changed typos * added rules * fixed bugs --- .../server/utils/RedactionSearchUtility.java | 6 + .../DroolsSyntaxValidationServiceTest.java | 5 +- ...rules.drl => all_redact_manager_rules.drl} | 120 +++++++++--------- .../test/resources/drools/documine_flora.drl | 93 +++++++++----- 4 files changed, 125 insertions(+), 99 deletions(-) rename redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/{all_rules.drl => all_redact_manager_rules.drl} (89%) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/RedactionSearchUtility.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/RedactionSearchUtility.java index 1de288f8..e5bcba11 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/RedactionSearchUtility.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/RedactionSearchUtility.java @@ -132,6 +132,12 @@ public class RedactionSearchUtility { return getTextRangesByPatternWithLineBreaks(textBlock, group, pattern); } + public static List findTextRangesByRegexIgnoreCase(String regexPattern, TextBlock textBlock) { + + Pattern pattern = Patterns.getCompiledPattern(regexPattern, true); + return getTextRangesByPattern(textBlock, 0, pattern); + } + public static List findTextRangesByRegexIgnoreCase(String regexPattern, int group, TextBlock textBlock) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/drools/files/management/services/DroolsSyntaxValidationServiceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/drools/files/management/services/DroolsSyntaxValidationServiceTest.java index ee87df62..3f5c1055 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/drools/files/management/services/DroolsSyntaxValidationServiceTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/drools/files/management/services/DroolsSyntaxValidationServiceTest.java @@ -63,7 +63,7 @@ class DroolsSyntaxValidationServiceTest { void testAllRules() { DroolsSyntaxValidationService droolsSyntaxValidationService = new DroolsSyntaxValidationService(new KieContainerCreationService(rulesClient)); - var rulesFile = new ClassPathResource("drools/all_rules.drl"); + var rulesFile = new ClassPathResource("drools/all_redact_manager_rules.drl"); String rulesString = new String(rulesFile.getInputStream().readAllBytes()); @@ -216,8 +216,7 @@ class DroolsSyntaxValidationServiceTest { DroolsSyntaxValidationService droolsSyntaxValidationService = new DroolsSyntaxValidationService(new KieContainerCreationService(rulesClient)); - List ruleFiles = List.of("drools/rules.drl", - "drools/all_rules.drl", + List ruleFiles = List.of("drools/rules.drl", "drools/all_redact_manager_rules.drl", "drools/documine_flora.drl", "drools/manual_redaction_rules.drl", "drools/acceptance_rules.drl", diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl similarity index 89% rename from redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_rules.drl rename to redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl index 8acd4449..46c4e119 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl @@ -100,7 +100,7 @@ rule "CBI.0.0: Redact CBI Authors (non vertebrate Study)" not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $entity: TextEntity(type == "CBI_author", dictionaryEntry) then - $entity.apply("CBI.0.0", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $entity.redact("CBI.0.0", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end rule "CBI.0.1: Redact CBI Authors (vertebrate Study)" @@ -108,7 +108,7 @@ rule "CBI.0.1: Redact CBI Authors (vertebrate Study)" FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $entity: TextEntity(type == "CBI_author", dictionaryEntry) then - $entity.apply("CBI.0.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $entity.redact("CBI.0.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -126,7 +126,7 @@ rule "CBI.1.1: Redact CBI Address (vertebrate Study)" FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $entity: TextEntity(type == "CBI_address", dictionaryEntry) then - $entity.apply("CBI.1.1", "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $entity.redact("CBI.1.1", "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -387,7 +387,7 @@ rule "CBI.9.0: Redact all cells with Header Author(s) as CBI_author (non vertebr .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) .filter(Optional::isPresent) .map(Optional::get) - .forEach(redactionEntity -> redactionEntity.apply("CBI.9.0", "Author(s) found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(redactionEntity -> redactionEntity.redact("CBI.9.0", "Author(s) found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end rule "CBI.9.1: Redact all cells with Header Author as CBI_author (non vertebrate study)" @@ -400,7 +400,7 @@ rule "CBI.9.1: Redact all cells with Header Author as CBI_author (non vertebrate .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) .filter(Optional::isPresent) .map(Optional::get) - .forEach(redactionEntity -> redactionEntity.apply("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(redactionEntity -> redactionEntity.redact("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end @@ -415,7 +415,7 @@ rule "CBI.10.0: Redact all cells with Header Author(s) as CBI_author (vertebrate .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) .filter(Optional::isPresent) .map(Optional::get) - .forEach(redactionEntity -> redactionEntity.apply("CBI.10.0", "Author(s) found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.0", "Author(s) found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end rule "CBI.10.1: Redact all cells with Header Author as CBI_author (vertebrate study)" @@ -428,7 +428,7 @@ rule "CBI.10.1: Redact all cells with Header Author as CBI_author (vertebrate st .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) .filter(Optional::isPresent) .map(Optional::get) - .forEach(redactionEntity -> redactionEntity.apply("CBI.10.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end @@ -474,7 +474,7 @@ rule "CBI.12.2: Redact CBI_author, if its row contains a cell with header \"Vert then $table.streamEntitiesWhereRowHasHeaderAndAnyValue("Vertebrate study Y/N", List.of("Y", "Yes")) .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) - .forEach(authorEntity -> authorEntity.apply("CBI.12.2", "Redacted because it's row belongs to a vertebrate study", "Reg (EC) No 1107/2009 Art. 63 (2g)")); + .forEach(authorEntity -> authorEntity.redact("CBI.12.2", "Redacted because it's row belongs to a vertebrate study", "Reg (EC) No 1107/2009 Art. 63 (2g)")); end @@ -494,7 +494,7 @@ rule "CBI.14.0: Redact CBI_sponsor entities if preceded by \"batches produced at when $sponsorEntity: TextEntity(type == "CBI_sponsor", textBefore.contains("batches produced at")) then - $sponsorEntity.apply("CBI.14.0", "Redacted because it represents a sponsor company", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + $sponsorEntity.redact("CBI.14.0", "Redacted because it represents a sponsor company", "Reg (EC) No 1107/2009 Art. 63 (2g)"); end @@ -519,7 +519,7 @@ rule "CBI.15.0: Redact row if row contains \"determination of residues\" and liv .toList(); $section.getEntitiesOfType(List.of($keyword, $residueKeyword)) - .forEach(redactionEntity -> redactionEntity.apply("CBI.15.0", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)")); + .forEach(redactionEntity -> redactionEntity.redact("CBI.15.0", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)")); end rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determination of residues\" and livestock keyword" @@ -541,7 +541,7 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio $table.streamEntitiesWhereRowContainsStringsIgnoreCase(List.of($keyword, $residueKeyword)) .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) - .forEach(redactionEntity -> redactionEntity.apply("CBI.15.1", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)")); + .forEach(redactionEntity -> redactionEntity.redact("CBI.15.1", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)")); end @@ -554,7 +554,7 @@ rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.apply("CBI.16.0", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(entity); }); end @@ -567,7 +567,7 @@ rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (vertebrate study)" then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.apply("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(entity); }); end @@ -646,7 +646,7 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC then entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { - laboratoryEntity.apply("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + laboratoryEntity.redact("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(laboratoryEntity); }); end @@ -660,7 +660,7 @@ rule "CBI.21.0: Redact short Authors section (non vertebrate study)" then entityCreationService.byRegexIgnoreCase("(?<=author\\(?s\\)?\\s\\n?)([\\p{Lu}\\p{L} ]{5,15}(,|\\n)?){1,3}", "CBI_author", EntityType.ENTITY, $section) .forEach(entity -> { - entity.apply("CBI.21.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + entity.redact("CBI.21.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); }); end @@ -672,7 +672,7 @@ rule "CBI.21.1: Redact short Authors section (vertebrate study)" then entityCreationService.byRegexIgnoreCase("(?<=author\\(?s\\)?\\s\\n?)([\\p{Lu}\\p{L} ]{5,15}(,|\\n)?){1,3}", "CBI_author", EntityType.ENTITY, $section) .forEach(entity -> { - entity.apply("CBI.21.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + entity.redact("CBI.21.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); }); end @@ -685,7 +685,7 @@ rule "CBI.22.0: Redact Addresses in Reference Tables for vertebrate studies in n then $table.streamEntitiesWhereRowHasHeaderAndAnyValue("Vertebrate study Y/N", List.of("Y", "Yes")) .filter(redactionEntity -> redactionEntity.isType("CBI_address")) - .forEach(authorEntity -> authorEntity.apply("CBI.22.0", "Redacted because row is a vertebrate study", "Reg (EC) No 1107/2009 Art. 63 (2g)")); + .forEach(authorEntity -> authorEntity.redact("CBI.22.0", "Redacted because row is a vertebrate study", "Reg (EC) No 1107/2009 Art. 63 (2g)")); end @@ -697,7 +697,7 @@ rule "PII.0.0: Redact all PII (non vertebrate study)" not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $pii: TextEntity(type == "PII", dictionaryEntry) then - $pii.apply("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $pii.redact("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end rule "PII.0.1: Redact all PII (vertebrate study)" @@ -705,7 +705,7 @@ rule "PII.0.1: Redact all PII (vertebrate study)" FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $pii: TextEntity(type == "PII", dictionaryEntry) then - $pii.apply("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $pii.redact("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -716,7 +716,7 @@ rule "PII.1.0: Redact Emails by RegEx (Non vertebrate study)" $section: Section(containsString("@")) then entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) - .forEach(emailEntity -> emailEntity.apply("PII.1.0", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(emailEntity -> emailEntity.redact("PII.1.0", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end rule "PII.1.1: Redact Emails by RegEx (vertebrate study)" @@ -725,7 +725,7 @@ rule "PII.1.1: Redact Emails by RegEx (vertebrate study)" $section: Section(containsString("@")) then entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) - .forEach(emailEntity -> emailEntity.apply("PII.1.1", "Found by Email Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(emailEntity -> emailEntity.redact("PII.1.1", "Found by Email Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end rule "PII.1.2: Redact typoed Emails with indicator" @@ -733,7 +733,7 @@ rule "PII.1.2: Redact typoed Emails with indicator" $section: Section(containsString("@") || containsStringIgnoreCase("mail")) then entityCreationService.byRegexIgnoreCase("mail[:\\.\\s]{1,2}([\\w\\/\\-\\{\\(\\. ]{3,20}(@|a|f)\\s?[\\w\\/\\-\\{\\(\\. ]{3,20}(\\. \\w{2,4}\\b|\\.\\B|\\.\\w{1,4}\\b))", "PII", EntityType.ENTITY, $section) - .forEach(emailEntity -> emailEntity.apply("PII.1.2", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(emailEntity -> emailEntity.redact("PII.1.2", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end @@ -753,7 +753,7 @@ rule "PII.2.0: Redact Phone and Fax by RegEx (non vertebrate study)" containsString("Fer")) then entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) - .forEach(contactEntity -> contactEntity.apply("PII.2.0", "Found by Phone and Fax Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.2.0", "Found by Phone and Fax Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end rule "PII.2.1: Redact Phone and Fax by RegEx (vertebrate study)" @@ -771,7 +771,7 @@ rule "PII.2.1: Redact Phone and Fax by RegEx (vertebrate study)" containsString("Fer")) then entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) - .forEach(contactEntity -> contactEntity.apply("PII.2.1", "Found by Phone and Fax Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.2.1", "Found by Phone and Fax Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end rule "PII.2.2: Redact phone numbers without indicators" @@ -779,7 +779,7 @@ rule "PII.2.2: Redact phone numbers without indicators" $section: Section(containsString("+")) then entityCreationService.byRegex("(\\+[\\dO]{1,2} )(\\([\\dO]{1,3}\\))?[\\d\\-O ]{8,15}", "PII", EntityType.ENTITY, $section) - .forEach(entity -> entity.apply("PII.2.2", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.2.2", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end @@ -790,7 +790,7 @@ rule "PII.3.0: Redact telephone numbers by RegEx (Non vertebrate study)" $section: Section(matchesRegex("[+]\\d{1,}")) then entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) - .forEach(entity -> entity.apply("PII.3.0", "Telephone number found by regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.3.0", "Telephone number found by regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end rule "PII.3.1: Redact telephone numbers by RegEx (vertebrate study)" @@ -799,7 +799,7 @@ rule "PII.3.1: Redact telephone numbers by RegEx (vertebrate study)" $section: Section(matchesRegex("[+]\\d{1,}")) then entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) - .forEach(entity -> entity.apply("PII.3.1", "Telephone number found by regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.3.1", "Telephone number found by regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.4 @@ -828,7 +828,7 @@ rule "PII.4.0: Redact line after contact information keywords (non vertebrate st $section: Section(containsString($contactKeyword)) then entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.apply("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + .forEach(contactEntity -> contactEntity.redact("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end rule "PII.4.1: Redact line after contact information keywords (vertebrate study)" @@ -856,7 +856,7 @@ rule "PII.4.1: Redact line after contact information keywords (vertebrate study) $section: Section(containsString($contactKeyword)) then entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.apply("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end @@ -871,7 +871,7 @@ rule "PII.5.0: Redact line after contact information keywords reduced (non verte $section: Section(containsString($contactKeyword)) then entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.apply("PII.5.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.5.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end rule "PII.5.1: Redact line after contact information keywords reduced (Vertebrate study)" @@ -884,7 +884,7 @@ rule "PII.5.1: Redact line after contact information keywords reduced (Vertebrat $section: Section(containsString($contactKeyword)) then entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.apply("PII.5.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.5.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end @@ -898,7 +898,7 @@ rule "PII.6.0: Redact line between contact keywords (non vertebrate study)" entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) ) - .forEach(contactEntity -> contactEntity.apply("PII.6.0", "Found between contact keywords", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.6.0", "Found between contact keywords", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end rule "PII.6.1: Redact line between contact keywords (vertebrate study)" @@ -910,7 +910,7 @@ rule "PII.6.1: Redact line between contact keywords (vertebrate study)" entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) ) - .forEach(contactEntity -> contactEntity.apply("PII.6.1", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.6.1", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end @@ -930,7 +930,7 @@ rule "PII.7.0: Redact contact information if applicant is found (non vertebrate entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.apply("PII.7.0", "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.7.0", "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end rule "PII.7.1: Redact contact information if applicant is found (vertebrate study)" @@ -948,7 +948,7 @@ rule "PII.7.1: Redact contact information if applicant is found (vertebrate stud entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.apply("PII.7.1", "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.7.1", "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end @@ -968,7 +968,7 @@ rule "PII.8.0: Redact contact information if producer is found (non vertebrate s entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.apply("PII.8.0", "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + .forEach(entity -> entity.redact("PII.8.0", "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end rule "PII.8.1: Redact contact information if producer is found (vertebrate study)" @@ -986,7 +986,7 @@ rule "PII.8.1: Redact contact information if producer is found (vertebrate study entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.apply("PII.8.1", "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.8.1", "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end @@ -997,7 +997,7 @@ rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (non $section: Section(!hasTables(), containsString("AUTHOR(S)"), containsString("AUTHOR(S):"), containsString("COMPLETION DATE:"), containsString("STUDY COMPLETION DATE:")) then entityCreationService.shortestBetweenAnyString(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE:", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $section) - .forEach(authorEntity -> authorEntity.apply("PII.9.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(authorEntity -> authorEntity.redact("PII.9.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end //rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"STUDY COMPLETION DATE\" (non vertebrate study)" @@ -1006,7 +1006,7 @@ rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (non // $section: Section(!hasTables(), containsString("AUTHOR(S)"), containsString("AUTHOR(S):"), containsString("STUDY COMPLETION DATE:")) // then // entityCreationService.shortestBetweenAnyString(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $section) -// .forEach(authorEntity -> authorEntity.apply("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); +// .forEach(authorEntity -> authorEntity.redact("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); // end rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (vertebrate study)" @@ -1015,7 +1015,7 @@ rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (ver $section: Section(!hasTables(), containsString("AUTHOR(S)"), containsString("AUTHOR(S):"), containsString("COMPLETION DATE:"), containsString("STUDY COMPLETION DATE:")) then entityCreationService.shortestBetweenAnyString(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE:", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $section) - .forEach(authorEntity -> authorEntity.apply("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(authorEntity -> authorEntity.redact("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end //rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"STUDY COMPLETION DATE\" (vertebrate study)" @@ -1024,7 +1024,7 @@ rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (ver // $section: Section(!hasTables(), containsString("AUTHOR(S)"), containsString("AUTHOR(S):"), containsString("STUDY COMPLETION DATE:")) // then // entityCreationService.shortestBetweenAnyString(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $section) -// .forEach(authorEntity -> authorEntity.apply("PII.9.3", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); +// .forEach(authorEntity -> authorEntity.redact("PII.9.3", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); // end rule "PII.9.4: Redact between \"AUTHOR(S)\" and \"STUDY COMPLETION DATE\" (vertebrate study)" @@ -1043,7 +1043,7 @@ rule "PII.10.0: Redact study director abbreviation" $section: Section(containsString("KATH") || containsString("BECH") || containsString("KML")) then entityCreationService.byRegexIgnoreCase("((KATH)|(BECH)|(KML)) ?(\\d{4})","PII", EntityType.ENTITY, 1, $section) - .forEach(entity -> entity.apply("PII.10.0", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.10.0", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end @@ -1053,7 +1053,7 @@ rule "PII.11.0: Redact On behalf of Sequani Ltd.:" $section: Section(!hasTables(), containsString("On behalf of Sequani Ltd.: Name Title")) then entityCreationService.betweenStrings("On behalf of Sequani Ltd.: Name Title", "On behalf of", "PII", EntityType.ENTITY, $section) - .forEach(authorEntity -> authorEntity.apply("PII.11.0", "On behalf of Sequani Ltd.: Name Title was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(authorEntity -> authorEntity.redact("PII.11.0", "On behalf of Sequani Ltd.: Name Title was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end @@ -1103,7 +1103,7 @@ rule "ETC.1.0: Redact Purity" $section: Section(containsStringIgnoreCase("purity")) then entityCreationService.byRegex("\\bPurity:\\s*(?\\s*\\d{1,2}(?:\\.\\d{1,2})?\\s*%)", "purity", EntityType.ENTITY, 1, $section) - .forEach(entity -> entity.apply("ETC.1.0", "Purity found", "Reg (EC) No 1107/2009 Art. 63 (2a)")); + .forEach(entity -> entity.redact("ETC.1.0", "Purity found", "Reg (EC) No 1107/2009 Art. 63 (2a)")); end @@ -1113,7 +1113,7 @@ rule "ETC.2.0: Redact signatures (non vertebrate study)" not FileAttribute(label == "Vertebrate Study", value == "Yes") $signature: Image(imageType == ImageType.SIGNATURE) then - $signature.apply("ETC.2.0", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $signature.redact("ETC.2.0", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end rule "ETC.2.1: Redact signatures (vertebrate study)" @@ -1121,7 +1121,7 @@ rule "ETC.2.1: Redact signatures (vertebrate study)" FileAttribute(label == "Vertebrate Study", value == "Yes") $signature: Image(imageType == ImageType.SIGNATURE) then - $signature.apply("ETC.2.1", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $signature.redact("ETC.2.1", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -1131,7 +1131,7 @@ rule "ETC.3.0: Redact logos (vertebrate study)" not FileAttribute(label == "Vertebrate Study", value == "Yes") $logo: Image(imageType == ImageType.LOGO) then - $logo.apply("ETC.3.0", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $logo.redact("ETC.3.0", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end rule "ETC.3.1: Redact logos (non vertebrate study)" @@ -1139,7 +1139,7 @@ rule "ETC.3.1: Redact logos (non vertebrate study)" FileAttribute(label == "Vertebrate Study", value == "Yes") $logo: Image(imageType == ImageType.LOGO) then - $logo.apply("ETC.3.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $logo.redact("ETC.3.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -1148,21 +1148,21 @@ rule "ETC.4.0: Redact dossier dictionary entries" when $dossierRedaction: TextEntity(type == "dossier_redaction") then - $dossierRedaction.apply("ETC.4.0", "Specification of impurity found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $dossierRedaction.redact("ETC.4.0", "Specification of impurity found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end rule "ETC.4.1: Redact dossier dictionary entries" when $dossierRedaction: TextEntity(type == "dossier_redaction") then - $dossierRedaction.apply("ETC.4.1", "Dossier Redaction found", "Article 39(1)(2) of Regulation (EC) No 178/2002"); + $dossierRedaction.redact("ETC.4.1", "Dossier Redaction found", "Article 39(1)(2) of Regulation (EC) No 178/2002"); end rule "ETC.4.2: Redact dossier dictionary entries" when $dossierRedaction: TextEntity(type == "dossier_redaction") then - $dossierRedaction.apply("ETC.4.2", "Dossier redaction found", "Article 63(2)(a) of Regulation (EC) No 1107/2009 (making reference to Article 39 of Regulation EC No 178/2002)"); + $dossierRedaction.redact("ETC.4.2", "Dossier redaction found", "Article 63(2)(a) of Regulation (EC) No 1107/2009 (making reference to Article 39 of Regulation EC No 178/2002)"); end @@ -1186,14 +1186,14 @@ rule "ETC.6.0: Redact CAS Number" .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "PII", EntityType.ENTITY)) .filter(Optional::isPresent) .map(Optional::get) - .forEach(redactionEntity -> redactionEntity.apply("ETC.6.0", "Sample # found in Header", "Reg (EC) No 1107/2009 Art. 63 (2g)")); + .forEach(redactionEntity -> redactionEntity.redact("ETC.6.0", "Sample # found in Header", "Reg (EC) No 1107/2009 Art. 63 (2g)")); end // Rule unit: ETC.7 rule "ETC.7.0: Guidelines FileAttributes" when - $section: Section(!hasTables(), (containsString("DATA REQUIREMENT(S):") || containsString("TEST GUIDELINE(S):")) && (containsString("OECD") || containsString("EPA") || containsString("OPPTS"))) + $section: Section(!hasTables(), containsAnyString("DATA REQUIREMENT(S):", "TEST GUIDELINE(S):") && containsAnyString("OECD", "EPA", "OPPTS")) then RedactionSearchUtility.findTextRangesByRegex("OECD (No\\.? )?\\d{3}( \\(\\d{4}\\))?", $section.getTextBlock()).stream() .map(boundary -> $section.getTextBlock().subSequence(boundary).toString()) @@ -1208,7 +1208,7 @@ rule "ETC.8.0: Redact formulas (vertebrate study)" not FileAttribute(label == "Vertebrate Study", value == "Yes") $logo: Image(imageType == ImageType.FORMULA) then - $logo.apply("ETC.8.0", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $logo.redact("ETC.8.0", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end rule "ETC.8.1: Redact formulas (non vertebrate study)" @@ -1216,7 +1216,7 @@ rule "ETC.8.1: Redact formulas (non vertebrate study)" FileAttribute(label == "Vertebrate Study", value == "Yes") $logo: Image(imageType == ImageType.FORMULA) then - $logo.apply("ETC.8.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $logo.redact("ETC.8.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end // Rule unit: ETC.9 @@ -1225,7 +1225,7 @@ rule "ETC.9.0: Redact skipped impurities" FileAttribute(label == "Redact Skipped Impurities", value == "Yes") $skippedImpurities: TextEntity(type == "skipped_impurities") then - $skippedImpurities.apply("ETC.9.0", "Occasional Impurity found", "Article 63(2)(b) of Regulation (EC) No 1107/2009"); + $skippedImpurities.redact("ETC.9.0", "Occasional Impurity found", "Article 63(2)(b) of Regulation (EC) No 1107/2009"); end rule "ETC.9.1: Redact impurities" @@ -1233,7 +1233,7 @@ rule "ETC.9.1: Redact impurities" FileAttribute(label == "Redact Impurities", value == "Yes") $skippedImpurities: TextEntity(type == "impurities") then - $skippedImpurities.apply("ETC.9.1", "Impurity found", "Article 63(2)(b) of Regulation (EC) No 1107/2009"); + $skippedImpurities.redact("ETC.9.1", "Impurity found", "Article 63(2)(b) of Regulation (EC) No 1107/2009"); end // Rule unit: ETC.10 @@ -1241,7 +1241,7 @@ rule "ETC.10.0: Redact Product Composition Information" when $compositionInformation: TextEntity(type == "product_composition") then - $compositionInformation.apply("ETC.10.0", "Product Composition Information found", "Article 63(2)(d) of Regulation (EC) No 1107/2009"); + $compositionInformation.redact("ETC.10.0", "Product Composition Information found", "Article 63(2)(d) of Regulation (EC) No 1107/2009"); end // Rule unit: ETC.11 @@ -1252,7 +1252,7 @@ rule "ETC.11.0: Recommend first line in table cell with name and address of owne $tableCell: TableCell(col == $header.col, row == 2) from $table.streamTableCells().toList() then entityCreationService.bySemanticNode($tableCell, "PII", EntityType.RECOMMENDATION) - .ifPresent(redactionEntity -> redactionEntity.apply("ETC.11.0", "Trial Site owner and address found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .ifPresent(redactionEntity -> redactionEntity.redact("ETC.11.0", "Trial Site owner and address found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end //------------------------------------ AI rules ------------------------------------ @@ -1397,7 +1397,6 @@ rule "MAN.3.0: Apply entity recategorization" retract($entityToBeRecategorized); end - rule "MAN.3.1: Apply entity recategorization of same type" salience 128 when @@ -1409,7 +1408,6 @@ rule "MAN.3.1: Apply entity recategorization of same type" retract($recategorization); end - rule "MAN.3.2: Apply image recategorization" salience 128 when diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl index b39aaab2..317976f8 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl @@ -65,8 +65,10 @@ global Dictionary dictionary query "getFileAttributes" $fileAttribute: FileAttribute() end -//--------------------------------------------------------------------------- +//------------------------------------ H rules ------------------------------------ + +// Rule unit: H.0 rule "H.0.0: retract table of contents page" when $page: Page(getMainBodyTextBlock().getSearchText().contains("........") || (getMainBodyTextBlock().getSearchText().contains("APPENDICES") && getMainBodyTextBlock().getSearchText().contains("TABLES"))) @@ -76,6 +78,7 @@ rule "H.0.0: retract table of contents page" end +// Rule unit: H.1 rule "H.1.0: Ignore Table of Contents" salience 10 when @@ -87,7 +90,7 @@ rule "H.1.0: Ignore Table of Contents" end -// Rule unit: MAN.0 +// Rule unit: H.2 rule "H.2.0: Show headlines" when $headline: Headline() @@ -96,6 +99,7 @@ rule "H.2.0: Show headlines" end +// Rule unit: H.3 rule "H.3.0: Study Type File Attribute" when not FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","438","439","471","487")) @@ -113,6 +117,9 @@ rule "H.3.0: Study Type File Attribute" end +//------------------------------------ General documine rules ------------------------------------ + +// Rule unit: DOC.1 rule "DOC.1.0: Guidelines" when $section: Section( @@ -172,7 +179,6 @@ rule "DOC.1.0: Guidelines" ); end - rule "DOC.1.2: Guidelines" when $section: Section( @@ -198,7 +204,6 @@ rule "DOC.1.2: Guidelines" }); end - rule "DOC.1.3: Guidelines" when $section: Section( @@ -228,6 +233,7 @@ rule "DOC.1.3: Guidelines" end +// Rule unit: DOC.2 rule "DOC.2.0: Report number" when $section: Section(containsString("LABORATORY PROJECT ID") , containsString("Report Number:")) @@ -238,6 +244,7 @@ rule "DOC.2.0: Report number" end +// Rule unit: DOC.3 rule "DOC.3.0: Experimental Starting Date" when $section: Section(containsString("Experimental I. Starting Date:") || containsString("Experimental II. Starting Date:") || containsStringIgnoreCase("experimental start date") || containsStringIgnoreCase("experimental starting date")) @@ -258,6 +265,7 @@ rule "DOC.3.0: Experimental Starting Date" end +// Rule unit: DOC.4 rule "DOC.4.0: Experimental Completion Date" when $section: Section(containsStringIgnoreCase("experimental termination date") || containsStringIgnoreCase("experimental completion date")) @@ -276,7 +284,8 @@ rule "DOC.4.0: Experimental Completion Date" end - rule "DOC.5.0: Ignore species and strain in irrelevant study types" +// Rule unit: DOC.5 +rule "DOC.5.0: Ignore species and strain in irrelevant study types" salience 1 when FileAttribute(label == "OECD Number", valueEqualsAnyOf("406","428","438","439","471","474","487")) @@ -288,8 +297,7 @@ rule "DOC.4.0: Experimental Completion Date" }); end - - rule "DOC.5.1: Hide all skipped species and strains except in the relevant sections" +rule "DOC.5.1: Hide all skipped species and strains except in the relevant sections" salience 1 when FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436")) @@ -308,7 +316,6 @@ rule "DOC.4.0: Experimental Completion Date" }); end - rule "DOC.5.2: Species" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436")) @@ -320,7 +327,6 @@ rule "DOC.5.2: Species" }); end - rule "DOC.5.3: Strain" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436")) @@ -340,20 +346,21 @@ rule "DOC.5.3: Strain" end +// Rule unit: DOC.6 rule "DOC.6.0: study title by document structure" when $table: Table(onPage(1), (containsString("Final Report") || containsString("SPL")), numberOfRows == 1, numberOfCols == 1) + $tableCell: TableCell(row == 1, col == 1) from $table.streamTableCells().toList() + $paragraph: Paragraph(previousSibling.isPresent(), nextSibling.isPresent()) from $tableCell.streamChildren().toList() then - - entityCreationService.bySemanticNode($table.getCell(0, 0).streamChildren().toList().get(1), "title", EntityType.ENTITY).ifPresent(entity -> { + entityCreationService.bySemanticNode($paragraph, "title", EntityType.ENTITY).ifPresent(entity -> { entity.apply("DOC.6.0", "Study title found", "n-a"); }); end - rule "DOC.6.1: study title" when $table: Table(onPage(1), (containsString("Final Report") || containsString("SPL"))) @@ -363,7 +370,6 @@ rule "DOC.6.1: study title" }); end - rule "DOC.6.2: study title" when not Table(onPage(1), (containsString("Final Report") || containsString("SPL"))) @@ -375,7 +381,7 @@ rule "DOC.6.2: study title" end - +// Rule unit: DOC.7 rule "DOC.7.0: Performing Laboratory (Name)" when $section: Section(containsString("PERFORMING LABORATORY:")) @@ -385,8 +391,7 @@ rule "DOC.7.0: Performing Laboratory (Name)" }); end - - rule "DOC.7.1: Performing Laboratory (Country)" +rule "DOC.7.1: Performing Laboratory (Country)" when nerEntities: NerEntities(hasEntitiesOfType("COUNTRY")) $section: Section(containsString("PERFORMING LABORATORY:")) @@ -400,7 +405,6 @@ rule "DOC.7.0: Performing Laboratory (Name)" }); end - rule "DOC.7.2: Performing Laboratory (Country & Name) from dict" when $section: Section( @@ -416,7 +420,6 @@ rule "DOC.7.2: Performing Laboratory (Country & Name) from dict" }); end - rule "DOC.7.3: Performing Laboratory (Country) from dict" when $section: Section( @@ -431,6 +434,7 @@ rule "DOC.7.3: Performing Laboratory (Country) from dict" end +// Rule unit: DOC.8 rule "DOC.8.0: GLP Study" when $headline: Headline(containsString("GOOD LABORATORY PRACTICE COMPLIANCE") @@ -448,6 +452,7 @@ rule "DOC.8.0: GLP Study" end +// Rule unit: DOC.9 rule "DOC.9.0: Batch number from CoA" when $section: Section( @@ -486,7 +491,6 @@ rule "DOC.9.0: Batch number from CoA" }); end - rule "DOC.9.1: Batch number" when $section: Section( @@ -519,7 +523,6 @@ rule "DOC.9.1: Batch number" }); end - rule "DOC.9.2: Batch number" when $section: Section( @@ -552,8 +555,7 @@ rule "DOC.9.2: Batch number" end - - +// Rule unit: DOC.10 rule "DOC.10.0: Conclusions - LD50, LC50, Confidence" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","425","436")) @@ -587,6 +589,7 @@ rule "DOC.10.0: Conclusions - LD50, LC50, Confidence" end +// Rule unit: DOC.11 rule "DOC.11.0: Guideline Deviation" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471")) @@ -612,7 +615,6 @@ rule "DOC.11.0: Guideline Deviation" }); end - rule "DOC.11.1: Guideline Deviation in text" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471")) @@ -627,6 +629,7 @@ rule "DOC.11.1: Guideline Deviation in text" end +// Rule unit: DOC.12 rule "DOC.12.0: Clinical Signs" when FileAttribute(label == "OECD Number", value == "425") @@ -637,6 +640,7 @@ rule "DOC.12.0: Clinical Signs" end +// Rule unit: DOC.13 rule "DOC.13.0: Dosages" when FileAttribute(label == "OECD Number", value == "425") @@ -657,6 +661,7 @@ rule "DOC.13.0: Dosages" end +// Rule unit: DOC.14 rule "DOC.14.0: Mortality" when $headline: Headline(containsString("Mortality") && !containsString("TABLE")) @@ -667,6 +672,7 @@ rule "DOC.14.0: Mortality" end +// Rule unit: DOC.15 rule "DOC.15.0: Study Conclusion" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471")) @@ -679,6 +685,7 @@ rule "DOC.15.0: Study Conclusion" end +// Rule unit: DOC.16 rule "DOC.16.0: Weight Behavior Changes" when FileAttribute(label == "OECD Number", value == "402") @@ -697,6 +704,7 @@ rule "DOC.16.0: Weight Behavior Changes" end +// Rule unit: DOC.17 rule "DOC.17.0: Necropsy findings" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","436")) @@ -716,6 +724,7 @@ rule "DOC.17.0: Necropsy findings" end +// Rule unit: DOC.18 rule "DOC.18.0: Clinical observations" when FileAttribute(label == "OECD Number", value == "403") @@ -736,6 +745,7 @@ rule "DOC.18.0: Clinical observations" end +// Rule unit: DOC.19 rule "DOC.19.0: Bodyweight changes" when FileAttribute(label == "OECD Number", value == "403") @@ -746,6 +756,7 @@ rule "DOC.19.0: Bodyweight changes" end +// Rule unit: DOC.20 rule "DOC.20.0: Study Design" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","404","405","406","428","429","438","439","474","487")) @@ -758,7 +769,6 @@ rule "DOC.20.0: Study Design" .forEach(entity -> entity.apply("DOC.20.0", "Study design section found", "n-a")); end - rule "DOC.20.1: Study Design" when Headline(containsStringIgnoreCase("Study Design"), $sectionIdentifier: getSectionIdentifier()) @@ -771,6 +781,7 @@ rule "DOC.20.1: Study Design" end +// Rule unit: DOC.21 rule "DOC.21.0: Results and Conclusion (406, 428, 438, 439, 474 & 487)" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("406","428","438","439","474","487")) @@ -785,7 +796,6 @@ rule "DOC.21.0: Results and Conclusion (406, 428, 438, 439, 474 & 487)" .forEach(entity -> entity.apply("DOC.21.0", "Results and Conclusion found", "n-a")); end - rule "DOC.21.1: Results and Conclusion (406, 428, 438, 439, 474 & 487)" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("406","428","438","439","474","487")) @@ -801,6 +811,7 @@ rule "DOC.21.1: Results and Conclusion (406, 428, 438, 439, 474 & 487)" end +// Rule unit: DOC.22 rule "DOC.22.0: Detailing (404 & 405)" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("404","405")) @@ -815,6 +826,7 @@ rule "DOC.22.0: Detailing (404 & 405)" end +// Rule unit: DOC.23 rule "DOC.23.0: Preliminary Test Results (429)" when FileAttribute(label == "OECD Number", value == "429") @@ -828,6 +840,7 @@ rule "DOC.23.0: Preliminary Test Results (429)" end +// Rule unit: DOC.24 rule "DOC.24.0: Test Results (429)" when FileAttribute(label == "OECD Number", value == "429") @@ -837,7 +850,6 @@ rule "DOC.24.0: Test Results (429)" .forEach(entity -> entity.apply("DOC.24.0", "Test Results found", "n-a")); end - rule "DOC.24.1: Test Results (429)" when Headline(containsStringIgnoreCase("RESULTS AND DISCUSSION"), $sectionIdentifierResultsAndDiscussion: getSectionIdentifier()) @@ -850,6 +862,7 @@ rule "DOC.24.1: Test Results (429)" end +// Rule unit: DOC.25 rule "DOC.25.0: Approach used (429)" when FileAttribute(label == "OECD Number", value == "429") @@ -864,6 +877,7 @@ rule "DOC.25.0: Approach used (429)" end +// Rule unit: DOC.26 rule "DOC.26.0: Sex" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("405","429")) @@ -886,6 +900,7 @@ rule "DOC.26.0: Sex" end +// Rule unit: DOC.27 rule "DOC.27.0: Animal Number 405" when FileAttribute(label == "OECD Number", value == "405") @@ -911,6 +926,7 @@ rule "DOC.27.0: Animal Number 405" end +// Rule unit: DOC.28 rule "DOC.28.0: Animal Number 429" when FileAttribute(label == "OECD Number", value == "429") @@ -936,7 +952,6 @@ rule "DOC.28.0: Animal Number 429" }); end - rule "DOC.28.1: No. Of animals - Fallback to appendix tables listing all individual animals for 429" when $keyword: String() from List.of("Animal Number", "Animal No.", "Animal number") @@ -954,6 +969,7 @@ rule "DOC.28.1: No. Of animals - Fallback to appendix tables listing all individ end +// Rule unit: DOC.29 rule "DOC.29.0: 4h Exposure" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("403","436")) @@ -967,6 +983,7 @@ rule "DOC.29.0: 4h Exposure" end +// Rule unit: DOC.30 rule "DOC.30.0: Dilution of the test substance" when FileAttribute(label == "OECD Number", value == "404") @@ -980,6 +997,7 @@ rule "DOC.30.0: Dilution of the test substance" end +// Rule unit: DOC.31 rule "DOC.31.0: Positive Control" when FileAttribute(label == "OECD Number", value == "429") @@ -993,6 +1011,7 @@ rule "DOC.31.0: Positive Control" end +// Rule unit: DOC.32 rule "DOC.32.0: Mortality Statement" when FileAttribute(label == "OECD Number", value == "402") @@ -1003,6 +1022,7 @@ rule "DOC.32.0: Mortality Statement" end +// Rule unit: DOC.33 rule "DOC.33.0: Dose Mortality" when FileAttribute(label == "OECD Number", value == "425") @@ -1046,6 +1066,7 @@ rule "DOC.33.0: Dose Mortality" end +// Rule unit: DOC.34 rule "DOC.34.0: Results (Main Study)" when FileAttribute(label == "OECD Number", value == "429") @@ -1060,6 +1081,7 @@ rule "DOC.34.0: Results (Main Study)" end +// Rule unit: DOC.35 rule "DOC.35.0: Doses (mg/kg bodyweight)" when FileAttribute(label == "OECD Number", value == "402") @@ -1071,6 +1093,7 @@ rule "DOC.35.0: Doses (mg/kg bodyweight)" .forEach(entity -> entity.apply("DOC.35.0", "Doses per bodyweight information found", "n-a")); end + //------------------------------------ Manual redaction rules ------------------------------------ // Rule unit: MAN.0 @@ -1213,11 +1236,10 @@ rule "MAN.4.1: Apply legal basis change" end - //------------------------------------ Entity merging rules ------------------------------------ // Rule unit: X.0 -rule "X.0.0: remove Entity contained by Entity of same type" +rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when $larger: TextEntity($type: type, $entityType: entityType, active()) @@ -1229,7 +1251,7 @@ rule "X.0.0: remove Entity contained by Entity of same type" // Rule unit: X.2 -rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE" +rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type, entityType == EntityType.FALSE_POSITIVE, active()) @@ -1242,7 +1264,7 @@ rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE" // Rule unit: X.3 -rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" +rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" salience 64 when $falseRecommendation: TextEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, active()) @@ -1254,7 +1276,7 @@ rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM // Rule unit: X.4 -rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY with same type" +rule "X.4.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY with same type" salience 256 when $entity: TextEntity($type: type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) @@ -1267,7 +1289,7 @@ rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY wit // Rule unit: X.5 -rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY" +rule "X.5.0: Remove Entity of type RECOMMENDATION when contained by ENTITY" salience 256 when $entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) @@ -1292,8 +1314,7 @@ rule "X.7.0: remove all images" //------------------------------------ File attributes rules ------------------------------------ // Rule unit: FA.1 -rule "FA.1.0: remove duplicate FileAttributes" - +rule "FA.1.0: Remove duplicate FileAttributes" salience 64 when $fileAttribute: FileAttribute($label: label, $value: value) @@ -1303,6 +1324,8 @@ rule "FA.1.0: remove duplicate FileAttributes" end +//------------------------------------ Local dictionary search rules ------------------------------------ + // Rule unit: LDS.0 rule "LDS.0.0: Run local dictionary search" agenda-group "LOCAL_DICTIONARY_ADDS"