From 3c9cb6883148db95d6d5e38098860c7e6b034aa6 Mon Sep 17 00:00:00 2001 From: Ali Oezyetimoglu Date: Tue, 24 Oct 2023 15:12:16 +0200 Subject: [PATCH] RED-6807: updated RM and DM rules --- .../server/utils/RedactionSearchUtility.java | 6 + .../src/main/resources/drools/base_rules.drl | 3 +- .../DroolsSyntaxValidationServiceTest.java | 5 +- .../resources/drools/acceptance_rules.drl | 3 +- .../src/test/resources/drools/adama-pilot.drl | 2 +- ...rules.drl => all_redact_manager_rules.drl} | 355 +++++++++++------- .../test/resources/drools/documine_flora.drl | 114 +++--- .../drools/manual_redaction_rules.drl | 3 +- .../src/test/resources/drools/rules.drl | 3 +- .../src/test/resources/drools/rules_v2.drl | 3 +- .../EFSA_sanitisation_GFL_v1/rules.drl | 3 +- 11 files changed, 318 insertions(+), 182 deletions(-) rename redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/{all_rules.drl => all_redact_manager_rules.drl} (78%) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/RedactionSearchUtility.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/RedactionSearchUtility.java index 1de288f8..e5bcba11 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/RedactionSearchUtility.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/RedactionSearchUtility.java @@ -132,6 +132,12 @@ public class RedactionSearchUtility { return getTextRangesByPatternWithLineBreaks(textBlock, group, pattern); } + public static List findTextRangesByRegexIgnoreCase(String regexPattern, TextBlock textBlock) { + + Pattern pattern = Patterns.getCompiledPattern(regexPattern, true); + return getTextRangesByPattern(textBlock, 0, pattern); + } + public static List findTextRangesByRegexIgnoreCase(String regexPattern, int group, TextBlock textBlock) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/base_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/base_rules.drl index 17205da4..a61c62e5 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/base_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/base_rules.drl @@ -22,6 +22,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRu import com.iqser.red.service.redaction.v1.server.model.document.nodes.*; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell; import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph; @@ -73,7 +74,7 @@ query "getFileAttributes" //------------------------------------ Local dictionary search rules ------------------------------------ // Rule unit: LocalDictionarySearch.0 -rule "LDS.0.0: run local dictionary search" +rule "LDS.0.0: Run local dictionary search" agenda-group "LOCAL_DICTIONARY_ADDS" salience -999 when diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/drools/files/management/services/DroolsSyntaxValidationServiceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/drools/files/management/services/DroolsSyntaxValidationServiceTest.java index ee87df62..3f5c1055 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/drools/files/management/services/DroolsSyntaxValidationServiceTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/drools/files/management/services/DroolsSyntaxValidationServiceTest.java @@ -63,7 +63,7 @@ class DroolsSyntaxValidationServiceTest { void testAllRules() { DroolsSyntaxValidationService droolsSyntaxValidationService = new DroolsSyntaxValidationService(new KieContainerCreationService(rulesClient)); - var rulesFile = new ClassPathResource("drools/all_rules.drl"); + var rulesFile = new ClassPathResource("drools/all_redact_manager_rules.drl"); String rulesString = new String(rulesFile.getInputStream().readAllBytes()); @@ -216,8 +216,7 @@ class DroolsSyntaxValidationServiceTest { DroolsSyntaxValidationService droolsSyntaxValidationService = new DroolsSyntaxValidationService(new KieContainerCreationService(rulesClient)); - List ruleFiles = List.of("drools/rules.drl", - "drools/all_rules.drl", + List ruleFiles = List.of("drools/rules.drl", "drools/all_redact_manager_rules.drl", "drools/documine_flora.drl", "drools/manual_redaction_rules.drl", "drools/acceptance_rules.drl", diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl index c7ab3c3c..eb734c98 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl @@ -22,6 +22,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRu import com.iqser.red.service.redaction.v1.server.model.document.nodes.*; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell; import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph; @@ -752,7 +753,7 @@ rule "FA.1.0: remove duplicate FileAttributes" //------------------------------------ Local dictionary search rules ------------------------------------ // Rule unit: LDS.0 -rule "LDS.0.0: run local dictionary search" +rule "LDS.0.0: Run local dictionary search" agenda-group "LOCAL_DICTIONARY_ADDS" salience -999 when diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/adama-pilot.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/adama-pilot.drl index c4e2088c..aaa19b6a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/adama-pilot.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/adama-pilot.drl @@ -902,7 +902,7 @@ rule "FA.1.0: remove duplicate FileAttributes" // Rule unit: LDS.0 -rule "LDS.0.0: run local dictionary search" +rule "LDS.0.0: Run local dictionary search" agenda-group "LOCAL_DICTIONARY_ADDS" salience -999 when diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl similarity index 78% rename from redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_rules.drl rename to redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl index 8def8a55..c65a8059 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl @@ -22,6 +22,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRu import com.iqser.red.service.redaction.v1.server.model.document.nodes.*; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell; import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph; @@ -94,25 +95,25 @@ rule "SYN.1.0: Recommend CTL/BL laboratory that start with BL or CTL" //------------------------------------ CBI rules ------------------------------------ // Rule unit: CBI.0 -rule "CBI.0.0: Redact CBI Authors (Non Vertebrate Study)" +rule "CBI.0.0: Redact CBI Authors (non vertebrate Study)" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $entity: TextEntity(type == "CBI_author", dictionaryEntry) then - $entity.apply("CBI.0.0", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $entity.redact("CBI.0.0", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end -rule "CBI.0.1: Redact CBI Authors (Vertebrate Study)" +rule "CBI.0.1: Redact CBI Authors (vertebrate Study)" when FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $entity: TextEntity(type == "CBI_author", dictionaryEntry) then - $entity.apply("CBI.0.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $entity.redact("CBI.0.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end // Rule unit: CBI.1 -rule "CBI.1.0: Don't redact CBI Address (Non Vertebrate Study)" +rule "CBI.1.0: Do not redact CBI Address (non vertebrate Study)" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $entity: TextEntity(type == "CBI_address", dictionaryEntry) @@ -120,17 +121,17 @@ rule "CBI.1.0: Don't redact CBI Address (Non Vertebrate Study)" $entity.skip("CBI.1.0", "Address found for Non Vertebrate Study"); end -rule "CBI.1.1: Redact CBI Address (Vertebrate Study)" +rule "CBI.1.1: Redact CBI Address (vertebrate Study)" when FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $entity: TextEntity(type == "CBI_address", dictionaryEntry) then - $entity.apply("CBI.1.1", "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $entity.redact("CBI.1.1", "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end // Rule unit: CBI.2 -rule "CBI.2.0: Don't redact genitive CBI_author" +rule "CBI.2.0: Do not redact genitive CBI Author" when $entity: TextEntity(type == "CBI_author", anyMatch(textAfter, "['’’'ʼˈ´`‘′ʻ’']s"), applied()) then @@ -140,7 +141,7 @@ rule "CBI.2.0: Don't redact genitive CBI_author" // Rule unit: CBI.3 -rule "CBI.3.0: Redacted because Section contains Vertebrate" +rule "CBI.3.0: Redacted because Section contains a vertebrate" when $section: Section(!hasTables(), hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then @@ -155,7 +156,7 @@ rule "CBI.3.0: Redacted because Section contains Vertebrate" }); end -rule "CBI.3.1: Redacted because Table Row contains Vertebrate" +rule "CBI.3.1: Redacted because table row contains a vertebrate" when $table: Table(hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then @@ -171,7 +172,7 @@ rule "CBI.3.1: Redacted because Table Row contains Vertebrate" }); end -rule "CBI.3.2: Don't redact because Section doesn't contain Vertebrate" +rule "CBI.3.2: Do not redact because Section does not contain a vertebrate" when $section: Section(!hasTables(), !hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then @@ -179,7 +180,7 @@ rule "CBI.3.2: Don't redact because Section doesn't contain Vertebrate" .forEach(entity -> entity.skip("CBI.3.2", "No vertebrate found")); end -rule "CBI.3.3: Dont redact because Table Row doesn't contain Vertebrate" +rule "CBI.3.3: Do not redact because table row does not contain a vertebrate" when $table: Table(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) then @@ -190,7 +191,7 @@ rule "CBI.3.3: Dont redact because Table Row doesn't contain Vertebrate" // Rule unit: CBI.4 -rule "CBI.4.0: Dont redact Names and Addresses if no_redaction_indicator is found in Section" +rule "CBI.4.0: Do not redact Names and Addresses if no_redaction_indicator is found in Section" when $section: Section(!hasTables(), hasEntitiesOfType("vertebrate"), @@ -207,7 +208,7 @@ rule "CBI.4.0: Dont redact Names and Addresses if no_redaction_indicator is foun }); end -rule "CBI.4.1: Dont redact Names and Addresses if no_redaction_indicator is found in Table Row" +rule "CBI.4.1: Do not redact Names and Addresses if no_redaction_indicator is found in table row" when $table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("vertebrate"), @@ -228,7 +229,7 @@ rule "CBI.4.1: Dont redact Names and Addresses if no_redaction_indicator is foun // Rule unit: CBI.5 -rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in section" +rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in Section" when $section: Section(!hasTables(), hasEntitiesOfType("redaction_indicator"), @@ -248,7 +249,7 @@ rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also red }); end -rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in Table Row" +rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in table row" when $table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("redaction_indicator"), @@ -270,7 +271,7 @@ rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also red // Rule unit: CBI.6 -rule "CBI.6.0: Don't redact Names and Addresses if vertebrate but also published_information is found in Section" +rule "CBI.6.0: Do not redact Names and Addresses if vertebrate but also published_information is found in Section" when $section: Section(!hasTables(), hasEntitiesOfType("vertebrate"), @@ -289,7 +290,7 @@ rule "CBI.6.0: Don't redact Names and Addresses if vertebrate but also published }); end -rule "CBI.6.1: Don't redact Names and Addresses if vertebrate but also published_information is found in Table Row" +rule "CBI.6.1: Do not redact Names and Addresses if vertebrate but also published_information is found in table row" when $table: Table(hasEntitiesOfType("vertebrate"), hasEntitiesOfType("published_information"), @@ -310,7 +311,7 @@ rule "CBI.6.1: Don't redact Names and Addresses if vertebrate but also published // Rule unit: CBI.7 -rule "CBI.7.0: Do not redact Names and Addresses if published information found in section without tables" +rule "CBI.7.0: Do not redact Names and Addresses if published information found in Section without tables" when $section: Section(!hasTables(), hasEntitiesOfType("published_information"), @@ -358,7 +359,7 @@ rule "CBI.8.0: Redacted because Section contains must_redact entity" }); end -rule "CBI.8.1: Redacted because Table Row contains must_redact entity" +rule "CBI.8.1: Redacted because table row contains must_redact entity" when $table: Table(hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then @@ -376,7 +377,7 @@ rule "CBI.8.1: Redacted because Table Row contains must_redact entity" // Rule unit: CBI.9 -rule "CBI.9.0: Redact all Cell's with Header Author(s) as CBI_author (non vertebrate study)" +rule "CBI.9.0: Redact all cells with Header Author(s) as CBI_author (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") @@ -386,10 +387,10 @@ rule "CBI.9.0: Redact all Cell's with Header Author(s) as CBI_author (non verteb .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) .filter(Optional::isPresent) .map(Optional::get) - .forEach(redactionEntity -> redactionEntity.apply("CBI.9.0", "Author(s) found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(redactionEntity -> redactionEntity.redact("CBI.9.0", "Author(s) found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "CBI.9.1: Redact all Cell's with Header Author as CBI_author (non vertebrate study)" +rule "CBI.9.1: Redact all cells with Header Author as CBI_author (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") @@ -399,12 +400,12 @@ rule "CBI.9.1: Redact all Cell's with Header Author as CBI_author (non vertebrat .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) .filter(Optional::isPresent) .map(Optional::get) - .forEach(redactionEntity -> redactionEntity.apply("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(redactionEntity -> redactionEntity.redact("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end // Rule unit: CBI.10 -rule "CBI.10.0: Redact all Cell's with Header Author(s) as CBI_author (vertebrate study)" +rule "CBI.10.0: Redact all cells with Header Author(s) as CBI_author (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") @@ -414,10 +415,10 @@ rule "CBI.10.0: Redact all Cell's with Header Author(s) as CBI_author (vertebrat .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) .filter(Optional::isPresent) .map(Optional::get) - .forEach(redactionEntity -> redactionEntity.apply("CBI.10.0", "Author(s) found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.0", "Author(s) found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end -rule "CBI.10.1: Redact all Cell's with Header Author as CBI_author (vertebrate study)" +rule "CBI.10.1: Redact all cells with Header Author as CBI_author (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") @@ -427,7 +428,7 @@ rule "CBI.10.1: Redact all Cell's with Header Author as CBI_author (vertebrate s .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) .filter(Optional::isPresent) .map(Optional::get) - .forEach(redactionEntity -> redactionEntity.apply("CBI.10.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end @@ -443,7 +444,7 @@ rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study // Rule unit: CBI.12 -rule "CBI.12.0: Add all Cell's with Header Author(s) as CBI_author" +rule "CBI.12.0: Add all cells with Header Author(s) as CBI_author" salience 1 when $table: Table(hasHeader("Author(s)") || hasHeader("Author")) @@ -458,7 +459,7 @@ rule "CBI.12.0: Add all Cell's with Header Author(s) as CBI_author" .forEach(redactionEntity -> redactionEntity.skip("CBI.12.0", "Author(s) header found")); end -rule "CBI.12.1: Dont redact CBI_author, if its row contains a cell with header \"Vertebrate study Y/N\" and value No" +rule "CBI.12.1: Do not redact CBI_author, if its row contains a cell with header \"Vertebrate study Y/N\" and value No" when $table: Table(hasRowWithHeaderAndValue("Vertebrate study Y/N", "N") || hasRowWithHeaderAndValue("Vertebrate study Y/N", "No")) then @@ -473,12 +474,12 @@ rule "CBI.12.2: Redact CBI_author, if its row contains a cell with header \"Vert then $table.streamEntitiesWhereRowHasHeaderAndAnyValue("Vertebrate study Y/N", List.of("Y", "Yes")) .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) - .forEach(authorEntity -> authorEntity.apply("CBI.12.2", "Redacted because it's row belongs to a vertebrate study", "Reg (EC) No 1107/2009 Art. 63 (2g)")); + .forEach(authorEntity -> authorEntity.redact("CBI.12.2", "Redacted because it's row belongs to a vertebrate study", "Reg (EC) No 1107/2009 Art. 63 (2g)")); end // Rule unit: CBI.13 -rule "CBI.13.0: Ignore CBI Address Recommendations" +rule "CBI.13.0: Ignore CBI Address recommendations" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $entity: TextEntity(type == "CBI_address", entityType == EntityType.RECOMMENDATION) @@ -493,7 +494,7 @@ rule "CBI.14.0: Redact CBI_sponsor entities if preceded by \"batches produced at when $sponsorEntity: TextEntity(type == "CBI_sponsor", textBefore.contains("batches produced at")) then - $sponsorEntity.apply("CBI.14.0", "Redacted because it represents a sponsor company", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + $sponsorEntity.redact("CBI.14.0", "Redacted because it represents a sponsor company", "Reg (EC) No 1107/2009 Art. 63 (2g)"); end @@ -518,7 +519,7 @@ rule "CBI.15.0: Redact row if row contains \"determination of residues\" and liv .toList(); $section.getEntitiesOfType(List.of($keyword, $residueKeyword)) - .forEach(redactionEntity -> redactionEntity.apply("CBI.15.0", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)")); + .forEach(redactionEntity -> redactionEntity.redact("CBI.15.0", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)")); end rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determination of residues\" and livestock keyword" @@ -540,12 +541,12 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio $table.streamEntitiesWhereRowContainsStringsIgnoreCase(List.of($keyword, $residueKeyword)) .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) - .forEach(redactionEntity -> redactionEntity.apply("CBI.15.1", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)")); + .forEach(redactionEntity -> redactionEntity.redact("CBI.15.1", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)")); end // Rule unit: CBI.16 -rule "CBI.16.0: Add CBI_author with \"et al.\" Regex (non vertebrate study)" +rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") @@ -553,12 +554,12 @@ rule "CBI.16.0: Add CBI_author with \"et al.\" Regex (non vertebrate study)" then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.apply("CBI.16.0", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(entity); }); end -rule "CBI.16.1: Add CBI_author with \"et al.\" Regex (vertebrate study)" +rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") @@ -566,7 +567,7 @@ rule "CBI.16.1: Add CBI_author with \"et al.\" Regex (vertebrate study)" then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.apply("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(entity); }); end @@ -645,11 +646,48 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC then entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { - laboratoryEntity.apply("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + laboratoryEntity.redact("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(laboratoryEntity); }); end +// Rule unit: CBI.21 +rule "CBI.21.0: Redact short Authors section (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value == "Yes") + $section: Section(containsAnyStringIgnoreCase("author(s)", "author", "authors"), length() < 50, getTreeId().get(0) <= 20) //TODO: evaluate the reason of this rule + not TextEntity(type == "CBI_author", engines contains Engine.NER) from $section.getEntities() + then + entityCreationService.byRegexIgnoreCase("(?<=author\\(?s\\)?\\s\\n?)([\\p{Lu}\\p{L} ]{5,15}(,|\\n)?){1,3}", "CBI_author", EntityType.ENTITY, $section) + .forEach(entity -> { + entity.redact("CBI.21.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + }); + end + +rule "CBI.21.1: Redact short Authors section (vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value == "Yes") + $section: Section(containsAnyStringIgnoreCase("author(s)", "author", "authors"), length() < 50, getTreeId().get(0) <= 20) //TODO: evaluate the reason of this rule + not TextEntity(type == "CBI_author", engines contains Engine.NER) from $section.getEntities() + then + entityCreationService.byRegexIgnoreCase("(?<=author\\(?s\\)?\\s\\n?)([\\p{Lu}\\p{L} ]{5,15}(,|\\n)?){1,3}", "CBI_author", EntityType.ENTITY, $section) + .forEach(entity -> { + entity.redact("CBI.21.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + }); + end + + +// Rule unit: CBI.22 +rule "CBI.22.0: Redact Addresses in Reference Tables for vertebrate studies in non-vertebrate documents" + when + not FileAttribute(label == "Vertebrate Study", value == "Yes") + $table: Table(hasHeader("Vertebrate study Y/N"), hasRowWithHeaderAndValue("Vertebrate study Y/N", "Y") || hasRowWithHeaderAndValue("Vertebrate study Y/N", "Yes")) + then + $table.streamEntitiesWhereRowHasHeaderAndAnyValue("Vertebrate study Y/N", List.of("Y", "Yes")) + .filter(redactionEntity -> redactionEntity.isType("CBI_address")) + .forEach(authorEntity -> authorEntity.redact("CBI.22.0", "Redacted because row is a vertebrate study", "Reg (EC) No 1107/2009 Art. 63 (2g)")); + end + //------------------------------------ PII rules ------------------------------------ @@ -659,7 +697,7 @@ rule "PII.0.0: Redact all PII (non vertebrate study)" not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $pii: TextEntity(type == "PII", dictionaryEntry) then - $pii.apply("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $pii.redact("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end rule "PII.0.1: Redact all PII (vertebrate study)" @@ -667,7 +705,7 @@ rule "PII.0.1: Redact all PII (vertebrate study)" FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $pii: TextEntity(type == "PII", dictionaryEntry) then - $pii.apply("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $pii.redact("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -678,7 +716,7 @@ rule "PII.1.0: Redact Emails by RegEx (Non vertebrate study)" $section: Section(containsString("@")) then entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) - .forEach(emailEntity -> emailEntity.apply("PII.1.0", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(emailEntity -> emailEntity.redact("PII.1.0", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end rule "PII.1.1: Redact Emails by RegEx (vertebrate study)" @@ -687,7 +725,15 @@ rule "PII.1.1: Redact Emails by RegEx (vertebrate study)" $section: Section(containsString("@")) then entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) - .forEach(emailEntity -> emailEntity.apply("PII.1.1", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(emailEntity -> emailEntity.redact("PII.1.1", "Found by Email Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + end + +rule "PII.1.2: Redact typoed Emails with indicator" + when + $section: Section(containsString("@") || containsStringIgnoreCase("mail")) + then + entityCreationService.byRegexIgnoreCase("mail[:\\.\\s]{1,2}([\\w\\/\\-\\{\\(\\. ]{3,20}(@|a|f)\\s?[\\w\\/\\-\\{\\(\\. ]{3,20}(\\. \\w{2,4}\\b|\\.\\B|\\.\\w{1,4}\\b))", "PII", EntityType.ENTITY, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.2", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end @@ -707,7 +753,7 @@ rule "PII.2.0: Redact Phone and Fax by RegEx (non vertebrate study)" containsString("Fer")) then entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) - .forEach(contactEntity -> contactEntity.apply("PII.2.0", "Found by Phone and Fax Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.2.0", "Found by Phone and Fax Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end rule "PII.2.1: Redact Phone and Fax by RegEx (vertebrate study)" @@ -725,7 +771,15 @@ rule "PII.2.1: Redact Phone and Fax by RegEx (vertebrate study)" containsString("Fer")) then entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) - .forEach(contactEntity -> contactEntity.apply("PII.2.1", "Found by Phone and Fax Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.2.1", "Found by Phone and Fax Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + end + +rule "PII.2.2: Redact phone numbers without indicators" + when + $section: Section(containsString("+")) + then + entityCreationService.byRegex("(\\+[\\dO]{1,2} )(\\([\\dO]{1,3}\\))?[\\d\\-O ]{8,15}", "PII", EntityType.ENTITY, $section) + .forEach(entity -> entity.redact("PII.2.2", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end @@ -736,7 +790,7 @@ rule "PII.3.0: Redact telephone numbers by RegEx (Non vertebrate study)" $section: Section(matchesRegex("[+]\\d{1,}")) then entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) - .forEach(entity -> entity.apply("PII.3.0", "Telephone number found by regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.3.0", "Telephone number found by regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end rule "PII.3.1: Redact telephone numbers by RegEx (vertebrate study)" @@ -745,10 +799,9 @@ rule "PII.3.1: Redact telephone numbers by RegEx (vertebrate study)" $section: Section(matchesRegex("[+]\\d{1,}")) then entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) - .forEach(entity -> entity.apply("PII.3.1", "Telephone number found by regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.3.1", "Telephone number found by regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end - // Rule unit: PII.4 rule "PII.4.0: Redact line after contact information keywords (non vertebrate study)" when @@ -775,12 +828,12 @@ rule "PII.4.0: Redact line after contact information keywords (non vertebrate st $section: Section(containsString($contactKeyword)) then entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.apply("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + .forEach(contactEntity -> contactEntity.redact("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end -rule "PII.4.1: Redact line after contact information keywords (non vertebrate study)" +rule "PII.4.1: Redact line after contact information keywords (vertebrate study)" when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $contactKeyword: String() from List.of("Contact point:", "Contact:", "Alternative contact:", @@ -803,7 +856,7 @@ rule "PII.4.1: Redact line after contact information keywords (non vertebrate st $section: Section(containsString($contactKeyword)) then entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.apply("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end @@ -818,7 +871,7 @@ rule "PII.5.0: Redact line after contact information keywords reduced (non verte $section: Section(containsString($contactKeyword)) then entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.apply("PII.5.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.5.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end rule "PII.5.1: Redact line after contact information keywords reduced (Vertebrate study)" @@ -831,12 +884,12 @@ rule "PII.5.1: Redact line after contact information keywords reduced (Vertebrat $section: Section(containsString($contactKeyword)) then entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.apply("PII.5.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.5.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.6 -rule "PII.6.0: redact line between contact keywords (non vertebrate study)" +rule "PII.6.0: Redact line between contact keywords (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) @@ -845,10 +898,10 @@ rule "PII.6.0: redact line between contact keywords (non vertebrate study)" entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) ) - .forEach(contactEntity -> contactEntity.apply("PII.6.0", "Found between contact keywords", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.6.0", "Found between contact keywords", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.6.1: redact line between contact keywords" +rule "PII.6.1: Redact line between contact keywords (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) @@ -857,7 +910,7 @@ rule "PII.6.1: redact line between contact keywords" entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) ) - .forEach(contactEntity -> contactEntity.apply("PII.6.1", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.6.1", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end @@ -877,10 +930,10 @@ rule "PII.7.0: Redact contact information if applicant is found (non vertebrate entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.apply("PII.7.0", "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.7.0", "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.7.1: Redact contact information if applicant is found (non vertebrate study)" +rule "PII.7.1: Redact contact information if applicant is found (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $section: Section(getHeadline().containsString("applicant") || @@ -895,12 +948,12 @@ rule "PII.7.1: Redact contact information if applicant is found (non vertebrate entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.apply("PII.7.1", "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.7.1", "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.8 -rule "PII.8.0: Redact contact information if producer is found" +rule "PII.8.0: Redact contact information if producer is found (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $section: Section(containsStringIgnoreCase("producer of the plant protection") || @@ -915,10 +968,10 @@ rule "PII.8.0: Redact contact information if producer is found" entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.apply("PII.8.0", "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + .forEach(entity -> entity.redact("PII.8.0", "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end -rule "PII.8.1: Redact contact information if producer is found" +rule "PII.8.1: Redact contact information if producer is found (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $section: Section(containsStringIgnoreCase("producer of the plant protection") || @@ -933,54 +986,27 @@ rule "PII.8.1: Redact contact information if producer is found" entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.apply("PII.8.1", "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.8.1", "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.9 -rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"COMPLETION DATE\" (non vertebrate study)" +rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("COMPLETION DATE:"), !containsString("STUDY COMPLETION DATE:")) + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then - entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section) - .forEach(authorEntity -> authorEntity.apply("PII.9.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + .forEach(authorEntity -> authorEntity.redact("PII.9.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"STUDY COMPLETION DATE\" (non vertebrate study)" +rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("COMPLETION DATE:"), !containsString("STUDY COMPLETION DATE:")) + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then - entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section) - .forEach(authorEntity -> authorEntity.apply("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); - end - -rule "PII.9.2: Redact between \"AUTHOR(S)\" and \"COMPLETION DATE\" (non vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("STUDY COMPLETION DATE:")) - then - entityCreationService.betweenStrings("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", EntityType.ENTITY, $section) - .forEach(authorEntity -> authorEntity.apply("PII.9.2", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); - end - -rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"STUDY COMPLETION DATE\" (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("STUDY COMPLETION DATE:")) - then - entityCreationService.betweenStrings("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", EntityType.ENTITY, $section) - .forEach(authorEntity -> authorEntity.apply("PII.9.3", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); - end - -rule "PII.9.4: Redact between \"AUTHOR(S)\" and \"STUDY COMPLETION DATE\" (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsStringIgnoreCase("STUDY COMPLETION DATE")) - then - entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S):","AUTHOR(S)"), List.of("STUDY COMPLETION DATE:","STUDY COMPLETION DATE"), "PII", EntityType.ENTITY, $document) - .forEach(authorEntity -> authorEntity.apply("PII.9.4", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + .forEach(authorEntity -> authorEntity.redact("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end @@ -990,7 +1016,7 @@ rule "PII.10.0: Redact study director abbreviation" $section: Section(containsString("KATH") || containsString("BECH") || containsString("KML")) then entityCreationService.byRegexIgnoreCase("((KATH)|(BECH)|(KML)) ?(\\d{4})","PII", EntityType.ENTITY, 1, $section) - .forEach(entity -> entity.apply("PII.10.0", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.10.0", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end @@ -1000,7 +1026,7 @@ rule "PII.11.0: Redact On behalf of Sequani Ltd.:" $section: Section(!hasTables(), containsString("On behalf of Sequani Ltd.: Name Title")) then entityCreationService.betweenStrings("On behalf of Sequani Ltd.: Name Title", "On behalf of", "PII", EntityType.ENTITY, $section) - .forEach(authorEntity -> authorEntity.apply("PII.11.0", "On behalf of Sequani Ltd.: Name Title was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(authorEntity -> authorEntity.redact("PII.11.0", "On behalf of Sequani Ltd.: Name Title was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end @@ -1013,6 +1039,24 @@ rule "PII.12.0: Expand PII entities with salutation prefix" .ifPresent(expandedEntity -> expandedEntity.addMatchedRules($entityToExpand.getMatchedRuleList())); end +// Rule unit: PII.13 +rule "PII.13.0: Add recommendation for PII after Contact Person" + when + $section: Section(containsString("contact person:")) + then + entityCreationService.lineAfterStringsIgnoreCase(List.of("Contact Person", "Contact Person:"), "PII", EntityType.RECOMMENDATION, $section) + .forEach(entity -> entity.skip("PII.13.0", "Line after \"Source:\" in Test Animals Section")); + end + + +// Rule unit: PII.14 +rule "PII.14.0: Recommend title prefixed words as PII" + when + $section: Section(containsAnyString("Dr ", "PD Dr ", "Prof. Dr ", "Dr. med. vet ", "Dr. rer. nat ", "PhD ", "BSc ", "(FH) ", "Mr ", "Mrs ", "Ms ", "Miss ", "Dr.", "PD. Dr.", "Prof. Dr.", "Dr. med. vet.", "Dr. rer. nat.", "PhD.", "BSc.", "(FH).", "Mr.", "Mrs.", "Ms.", "Miss.")) + then + entityCreationService.byRegex("((Dr|PD Dr|Prof. Dr|Dr. med. vet|Dr. rer. nat|PhD|BSc|\\\\(FH\\\\)|Mr|Mrs|Ms|Miss)[.\\\\s]{1,2})([\\\\p{Lu}][\\\\p{L}\\\\-.]{1,20}\\\\s[\\\\p{Lu}][\\\\p{L}\\\\-.]{1,20})", "PII", EntityType.ENTITY, 3, $section); + end + //------------------------------------ Other rules ------------------------------------ @@ -1032,7 +1076,7 @@ rule "ETC.1.0: Redact Purity" $section: Section(containsStringIgnoreCase("purity")) then entityCreationService.byRegex("\\bPurity:\\s*(?\\s*\\d{1,2}(?:\\.\\d{1,2})?\\s*%)", "purity", EntityType.ENTITY, 1, $section) - .forEach(entity -> entity.apply("ETC.1.0", "Purity found", "Reg (EC) No 1107/2009 Art. 63 (2a)")); + .forEach(entity -> entity.redact("ETC.1.0", "Purity found", "Reg (EC) No 1107/2009 Art. 63 (2a)")); end @@ -1042,7 +1086,7 @@ rule "ETC.2.0: Redact signatures (non vertebrate study)" not FileAttribute(label == "Vertebrate Study", value == "Yes") $signature: Image(imageType == ImageType.SIGNATURE) then - $signature.apply("ETC.2.0", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $signature.redact("ETC.2.0", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end rule "ETC.2.1: Redact signatures (vertebrate study)" @@ -1050,7 +1094,7 @@ rule "ETC.2.1: Redact signatures (vertebrate study)" FileAttribute(label == "Vertebrate Study", value == "Yes") $signature: Image(imageType == ImageType.SIGNATURE) then - $signature.apply("ETC.2.1", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $signature.redact("ETC.2.1", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -1060,7 +1104,7 @@ rule "ETC.3.0: Redact logos (vertebrate study)" not FileAttribute(label == "Vertebrate Study", value == "Yes") $logo: Image(imageType == ImageType.LOGO) then - $logo.apply("ETC.3.0", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $logo.redact("ETC.3.0", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end rule "ETC.3.1: Redact logos (non vertebrate study)" @@ -1068,7 +1112,7 @@ rule "ETC.3.1: Redact logos (non vertebrate study)" FileAttribute(label == "Vertebrate Study", value == "Yes") $logo: Image(imageType == ImageType.LOGO) then - $logo.apply("ETC.3.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $logo.redact("ETC.3.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -1077,7 +1121,21 @@ rule "ETC.4.0: Redact dossier dictionary entries" when $dossierRedaction: TextEntity(type == "dossier_redaction") then - $dossierRedaction.apply("ETC.4.0", "Specification of impurity found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $dossierRedaction.redact("ETC.4.0", "Specification of impurity found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "ETC.4.1: Redact dossier dictionary entries" + when + $dossierRedaction: TextEntity(type == "dossier_redaction") + then + $dossierRedaction.redact("ETC.4.1", "Dossier Redaction found", "Article 39(1)(2) of Regulation (EC) No 178/2002"); + end + +rule "ETC.4.2: Redact dossier dictionary entries" + when + $dossierRedaction: TextEntity(type == "dossier_redaction") + then + $dossierRedaction.redact("ETC.4.2", "Dossier redaction found", "Article 63(2)(a) of Regulation (EC) No 1107/2009 (making reference to Article 39 of Regulation EC No 178/2002)"); end @@ -1101,14 +1159,14 @@ rule "ETC.6.0: Redact CAS Number" .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "PII", EntityType.ENTITY)) .filter(Optional::isPresent) .map(Optional::get) - .forEach(redactionEntity -> redactionEntity.apply("ETC.6.0", "Sample # found in Header", "Reg (EC) No 1107/2009 Art. 63 (2g)")); + .forEach(redactionEntity -> redactionEntity.redact("ETC.6.0", "Sample # found in Header", "Reg (EC) No 1107/2009 Art. 63 (2g)")); end // Rule unit: ETC.7 rule "ETC.7.0: Guidelines FileAttributes" when - $section: Section(!hasTables(), (containsString("DATA REQUIREMENT(S):") || containsString("TEST GUIDELINE(S):")) && (containsString("OECD") || containsString("EPA") || containsString("OPPTS"))) + $section: Section(!hasTables(), containsAnyString("DATA REQUIREMENT(S):", "TEST GUIDELINE(S):") && containsAnyString("OECD", "EPA", "OPPTS")) then RedactionSearchUtility.findTextRangesByRegex("OECD (No\\.? )?\\d{3}( \\(\\d{4}\\))?", $section.getTextBlock()).stream() .map(boundary -> $section.getTextBlock().subSequence(boundary).toString()) @@ -1123,7 +1181,7 @@ rule "ETC.8.0: Redact formulas (vertebrate study)" not FileAttribute(label == "Vertebrate Study", value == "Yes") $logo: Image(imageType == ImageType.FORMULA) then - $logo.apply("ETC.8.0", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $logo.redact("ETC.8.0", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end rule "ETC.8.1: Redact formulas (non vertebrate study)" @@ -1131,14 +1189,49 @@ rule "ETC.8.1: Redact formulas (non vertebrate study)" FileAttribute(label == "Vertebrate Study", value == "Yes") $logo: Image(imageType == ImageType.FORMULA) then - $logo.apply("ETC.8.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $logo.redact("ETC.8.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end +// Rule unit: ETC.9 +rule "ETC.9.0: Redact skipped impurities" + when + FileAttribute(label == "Redact Skipped Impurities", value == "Yes") + $skippedImpurities: TextEntity(type == "skipped_impurities") + then + $skippedImpurities.redact("ETC.9.0", "Occasional Impurity found", "Article 63(2)(b) of Regulation (EC) No 1107/2009"); + end + +rule "ETC.9.1: Redact impurities" + when + FileAttribute(label == "Redact Impurities", value == "Yes") + $skippedImpurities: TextEntity(type == "impurities") + then + $skippedImpurities.redact("ETC.9.1", "Impurity found", "Article 63(2)(b) of Regulation (EC) No 1107/2009"); + end + +// Rule unit: ETC.10 +rule "ETC.10.0: Redact Product Composition Information" + when + $compositionInformation: TextEntity(type == "product_composition") + then + $compositionInformation.redact("ETC.10.0", "Product Composition Information found", "Article 63(2)(d) of Regulation (EC) No 1107/2009"); + end + +// Rule unit: ETC.11 +rule "ETC.11.0: Recommend first line in table cell with name and address of owner" + when + $table: Table(hasHeader("Name and Address of Owner / Tenant"), containsString("trial site")) + $header: TableCell(isHeader(), containsString("Name and Address of Owner / Tenant")) from $table.streamTableCells().toList() + $tableCell: TableCell(col == $header.col, row == 2) from $table.streamTableCells().toList() + then + entityCreationService.bySemanticNode($tableCell, "PII", EntityType.RECOMMENDATION) + .ifPresent(redactionEntity -> redactionEntity.redact("ETC.11.0", "Trial Site owner and address found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + end //------------------------------------ AI rules ------------------------------------ // Rule unit: AI.0 -rule "AI.0.0: add all NER Entities of type CBI_author" +rule "AI.0.0: Add all NER Entities of type CBI_author" salience 999 when nerEntities: NerEntities(hasEntitiesOfType("CBI_author")) @@ -1149,7 +1242,7 @@ rule "AI.0.0: add all NER Entities of type CBI_author" // Rule unit: AI.1 -rule "AI.1.0: combine and add NER Entities as CBI_address" +rule "AI.1.0: Combine and add NER Entities as CBI_address" salience 999 when nerEntities: NerEntities(hasEntitiesOfType("ORG") || hasEntitiesOfType("STREET") || hasEntitiesOfType("CITY")) @@ -1159,7 +1252,7 @@ rule "AI.1.0: combine and add NER Entities as CBI_address" // Rule unit: AI.2 -rule "AI.2.0: add all NER Entities of any type except CBI_author" +rule "AI.2.0: Add all NER Entities of any type except CBI_author" salience 999 when nerEntities: NerEntities() @@ -1170,6 +1263,16 @@ rule "AI.2.0: add all NER Entities of any type except CBI_author" end +// Rule unit: AI.3 +rule "AI.3.0: Recommend authors from AI as PII" + salience 999 + when + nerEntities: NerEntities(hasEntitiesOfType("CBI_author")) + then + nerEntities.streamEntitiesOfType("CBI_author") + .forEach(nerEntity -> entityCreationService.byNerEntity(nerEntity, "PII", EntityType.RECOMMENDATION, document)); + end + //------------------------------------ Manual redaction rules ------------------------------------ // Rule unit: MAN.0 @@ -1267,7 +1370,6 @@ rule "MAN.3.0: Apply entity recategorization" retract($entityToBeRecategorized); end - rule "MAN.3.1: Apply entity recategorization of same type" salience 128 when @@ -1279,7 +1381,6 @@ rule "MAN.3.1: Apply entity recategorization of same type" retract($recategorization); end - rule "MAN.3.2: Apply image recategorization" salience 128 when @@ -1317,7 +1418,7 @@ rule "MAN.4.1: Apply legal basis change" //------------------------------------ Entity merging rules ------------------------------------ // Rule unit: X.0 -rule "X.0.0: remove Entity contained by Entity of same type" +rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when $larger: TextEntity($type: type, $entityType: entityType, active()) @@ -1329,7 +1430,7 @@ rule "X.0.0: remove Entity contained by Entity of same type" // Rule unit: X.1 -rule "X.1.0: merge intersecting Entities of same type" +rule "X.1.0: Merge intersecting Entities of same type" salience 64 when $first: TextEntity($type: type, $entityType: entityType, !resized(), active()) @@ -1345,7 +1446,7 @@ rule "X.1.0: merge intersecting Entities of same type" // Rule unit: X.2 -rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE" +rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type, entityType == EntityType.FALSE_POSITIVE, active()) @@ -1358,7 +1459,7 @@ rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE" // Rule unit: X.3 -rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" +rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" salience 64 when $falseRecommendation: TextEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, active()) @@ -1370,7 +1471,7 @@ rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM // Rule unit: X.4 -rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY with same type" +rule "X.4.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY with same type" salience 256 when $entity: TextEntity($type: type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) @@ -1383,7 +1484,7 @@ rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY wit // Rule unit: X.5 -rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY" +rule "X.5.0: Remove Entity of type RECOMMENDATION when contained by ENTITY" salience 256 when $entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) @@ -1395,7 +1496,7 @@ rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY" // Rule unit: X.6 -rule "X.6.0: remove Entity of lower rank, when contained by by entity of type ENTITY" +rule "X.6.0: Remove Entity of lower rank, when contained by by entity of type ENTITY" salience 32 when $higherRank: TextEntity($type: type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) @@ -1422,7 +1523,7 @@ rule "X.6.1: remove Entity of higher rank, when intersected by entity of type EN //------------------------------------ File attributes rules ------------------------------------ // Rule unit: FA.1 -rule "FA.1.0: remove duplicate FileAttributes" +rule "FA.1.0: Remove duplicate FileAttributes" salience 64 when $fileAttribute: FileAttribute($label: label, $value: value) @@ -1435,7 +1536,7 @@ rule "FA.1.0: remove duplicate FileAttributes" //------------------------------------ Local dictionary search rules ------------------------------------ // Rule unit: LDS.0 -rule "LDS.0.0: run local dictionary search" +rule "LDS.0.0: Run local dictionary search" agenda-group "LOCAL_DICTIONARY_ADDS" salience -999 when diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl index 417fa51a..ed8f67c6 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl @@ -22,6 +22,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRu import com.iqser.red.service.redaction.v1.server.model.document.nodes.*; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell; import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph; @@ -64,8 +65,10 @@ global Dictionary dictionary query "getFileAttributes" $fileAttribute: FileAttribute() end -//--------------------------------------------------------------------------- +//------------------------------------ H rules ------------------------------------ + +// Rule unit: H.0 rule "H.0.0: retract table of contents page" when $page: Page(getMainBodyTextBlock().getSearchText().contains("........") || (getMainBodyTextBlock().getSearchText().contains("APPENDICES") && getMainBodyTextBlock().getSearchText().contains("TABLES"))) @@ -75,6 +78,7 @@ rule "H.0.0: retract table of contents page" end +// Rule unit: H.1 rule "H.1.0: Ignore Table of Contents" salience 10 when @@ -86,7 +90,7 @@ rule "H.1.0: Ignore Table of Contents" end -// Rule unit: MAN.0 +// Rule unit: H.2 rule "H.2.0: Show headlines" when $headline: Headline() @@ -95,6 +99,7 @@ rule "H.2.0: Show headlines" end +// Rule unit: H.3 rule "H.3.0: Study Type File Attribute" when not FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","438","439","471","487")) @@ -112,6 +117,9 @@ rule "H.3.0: Study Type File Attribute" end +//------------------------------------ General documine rules ------------------------------------ + +// Rule unit: DOC.1 rule "DOC.1.0: Guidelines" when $section: Section( @@ -171,7 +179,6 @@ rule "DOC.1.0: Guidelines" ); end - rule "DOC.1.2: Guidelines" when $section: Section( @@ -197,7 +204,6 @@ rule "DOC.1.2: Guidelines" }); end - rule "DOC.1.3: Guidelines" when $section: Section( @@ -227,6 +233,7 @@ rule "DOC.1.3: Guidelines" end +// Rule unit: DOC.2 rule "DOC.2.0: Report number" when $section: Section(containsString("LABORATORY PROJECT ID") , containsString("Report Number:")) @@ -237,6 +244,7 @@ rule "DOC.2.0: Report number" end +// Rule unit: DOC.3 rule "DOC.3.0: Experimental Starting Date" when $section: Section(containsString("Experimental I. Starting Date:") || containsString("Experimental II. Starting Date:") || containsStringIgnoreCase("experimental start date") || containsStringIgnoreCase("experimental starting date")) @@ -257,6 +265,7 @@ rule "DOC.3.0: Experimental Starting Date" end +// Rule unit: DOC.4 rule "DOC.4.0: Experimental Completion Date" when $section: Section(containsStringIgnoreCase("experimental termination date") || containsStringIgnoreCase("experimental completion date")) @@ -275,7 +284,8 @@ rule "DOC.4.0: Experimental Completion Date" end - rule "DOC.5.0: Ignore species and strain in irrelevant study types" +// Rule unit: DOC.5 +rule "DOC.5.0: Ignore species and strain in irrelevant study types" salience 1 when FileAttribute(label == "OECD Number", valueEqualsAnyOf("406","428","438","439","471","474","487")) @@ -287,8 +297,7 @@ rule "DOC.4.0: Experimental Completion Date" }); end - - rule "DOC.5.1: Hide all skipped species and strains except in the relevant sections" +rule "DOC.5.1: Hide all skipped species and strains except in the relevant sections" salience 1 when FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436")) @@ -307,7 +316,6 @@ rule "DOC.4.0: Experimental Completion Date" }); end - rule "DOC.5.2: Species" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436")) @@ -319,7 +327,6 @@ rule "DOC.5.2: Species" }); end - rule "DOC.5.3: Strain" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436")) @@ -339,20 +346,21 @@ rule "DOC.5.3: Strain" end +// Rule unit: DOC.6 rule "DOC.6.0: study title by document structure" when $table: Table(onPage(1), (containsString("Final Report") || containsString("SPL")), numberOfRows == 1, numberOfCols == 1) + $tableCell: TableCell(row == 1, col == 1) from $table.streamTableCells().toList() + $paragraph: Paragraph(previousSibling.isPresent(), nextSibling.isPresent()) from $tableCell.streamChildren().toList() then - - entityCreationService.bySemanticNode($table.getCell(0, 0).streamChildren().toList().get(1), "title", EntityType.ENTITY).ifPresent(entity -> { + entityCreationService.bySemanticNode($paragraph, "title", EntityType.ENTITY).ifPresent(entity -> { entity.apply("DOC.6.0", "Study title found", "n-a"); }); end - rule "DOC.6.1: study title" when $table: Table(onPage(1), (containsString("Final Report") || containsString("SPL"))) @@ -362,7 +370,6 @@ rule "DOC.6.1: study title" }); end - rule "DOC.6.2: study title" when not Table(onPage(1), (containsString("Final Report") || containsString("SPL"))) @@ -374,7 +381,7 @@ rule "DOC.6.2: study title" end - +// Rule unit: DOC.7 rule "DOC.7.0: Performing Laboratory (Name)" when $section: Section(containsString("PERFORMING LABORATORY:")) @@ -384,21 +391,19 @@ rule "DOC.7.0: Performing Laboratory (Name)" }); end - rule "DOC.7.1: Performing Laboratory (Country)" - when - nerEntities: NerEntities(hasEntitiesOfType("COUNTRY")) - $section: Section(containsString("PERFORMING LABORATORY:")) - then - nerEntities.streamEntitiesOfType("COUNTRY") - .filter(nerEntity -> $section.getTextRange().contains(nerEntity.textRange())) - .map(nerEntity -> entityCreationService.byNerEntity(nerEntity, "laboratory_country", EntityType.ENTITY, $section)) - .forEach(entity -> { - entity.apply("DOC.7.1", "Performing Laboratory found", "n-a"); - }); + when + nerEntities: NerEntities(hasEntitiesOfType("COUNTRY")) + $section: Section(containsString("PERFORMING LABORATORY:")) + then + nerEntities.streamEntitiesOfType("COUNTRY") + .filter(nerEntity -> $section.getTextRange().contains(nerEntity.textRange())) + .map(nerEntity -> entityCreationService.byNerEntity(nerEntity, "laboratory_country", EntityType.ENTITY, $section)) + .forEach(entity -> { + entity.apply("DOC.7.1", "Performing Laboratory found", "n-a"); + }); end - rule "DOC.7.2: Performing Laboratory (Country & Name) from dict" when $section: Section( @@ -414,7 +419,6 @@ rule "DOC.7.2: Performing Laboratory (Country & Name) from dict" }); end - rule "DOC.7.3: Performing Laboratory (Country) from dict" when $section: Section( @@ -426,6 +430,7 @@ rule "DOC.7.3: Performing Laboratory (Country) from dict" end +// Rule unit: DOC.8 rule "DOC.8.0: GLP Study" when $headline: Headline(containsString("GOOD LABORATORY PRACTICE COMPLIANCE") @@ -443,6 +448,7 @@ rule "DOC.8.0: GLP Study" end +// Rule unit: DOC.9 rule "DOC.9.0: Batch number from CoA" when $section: Section( @@ -481,7 +487,6 @@ rule "DOC.9.0: Batch number from CoA" }); end - rule "DOC.9.1: Batch number" when $section: Section( @@ -514,7 +519,6 @@ rule "DOC.9.1: Batch number" }); end - rule "DOC.9.2: Batch number" when $section: Section( @@ -547,8 +551,7 @@ rule "DOC.9.2: Batch number" end - - +// Rule unit: DOC.10 rule "DOC.10.0: Conclusions - LD50, LC50, Confidence" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","425","436")) @@ -582,6 +585,7 @@ rule "DOC.10.0: Conclusions - LD50, LC50, Confidence" end +// Rule unit: DOC.11 rule "DOC.11.0: Guideline Deviation" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471")) @@ -607,7 +611,6 @@ rule "DOC.11.0: Guideline Deviation" }); end - rule "DOC.11.1: Guideline Deviation in text" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471")) @@ -622,6 +625,7 @@ rule "DOC.11.1: Guideline Deviation in text" end +// Rule unit: DOC.12 rule "DOC.12.0: Clinical Signs" when FileAttribute(label == "OECD Number", value == "425") @@ -632,6 +636,7 @@ rule "DOC.12.0: Clinical Signs" end +// Rule unit: DOC.13 rule "DOC.13.0: Dosages" when FileAttribute(label == "OECD Number", value == "425") @@ -652,6 +657,7 @@ rule "DOC.13.0: Dosages" end +// Rule unit: DOC.14 rule "DOC.14.0: Mortality" when $headline: Headline(containsString("Mortality") && !containsString("TABLE")) @@ -662,6 +668,7 @@ rule "DOC.14.0: Mortality" end +// Rule unit: DOC.15 rule "DOC.15.0: Study Conclusion" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471")) @@ -674,6 +681,7 @@ rule "DOC.15.0: Study Conclusion" end +// Rule unit: DOC.16 rule "DOC.16.0: Weight Behavior Changes" when FileAttribute(label == "OECD Number", value == "402") @@ -692,6 +700,7 @@ rule "DOC.16.0: Weight Behavior Changes" end +// Rule unit: DOC.17 rule "DOC.17.0: Necropsy findings" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","436")) @@ -711,6 +720,7 @@ rule "DOC.17.0: Necropsy findings" end +// Rule unit: DOC.18 rule "DOC.18.0: Clinical observations" when FileAttribute(label == "OECD Number", value == "403") @@ -731,6 +741,7 @@ rule "DOC.18.0: Clinical observations" end +// Rule unit: DOC.19 rule "DOC.19.0: Bodyweight changes" when FileAttribute(label == "OECD Number", value == "403") @@ -741,6 +752,7 @@ rule "DOC.19.0: Bodyweight changes" end +// Rule unit: DOC.20 rule "DOC.20.0: Study Design" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","404","405","406","428","429","438","439","474","487")) @@ -753,7 +765,6 @@ rule "DOC.20.0: Study Design" .forEach(entity -> entity.apply("DOC.20.0", "Study design section found", "n-a")); end - rule "DOC.20.1: Study Design" when Headline(containsStringIgnoreCase("Study Design"), $sectionIdentifier: getSectionIdentifier()) @@ -766,6 +777,7 @@ rule "DOC.20.1: Study Design" end +// Rule unit: DOC.21 rule "DOC.21.0: Results and Conclusion (406, 428, 438, 439, 474 & 487)" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("406","428","438","439","474","487")) @@ -780,7 +792,6 @@ rule "DOC.21.0: Results and Conclusion (406, 428, 438, 439, 474 & 487)" .forEach(entity -> entity.apply("DOC.21.0", "Results and Conclusion found", "n-a")); end - rule "DOC.21.1: Results and Conclusion (406, 428, 438, 439, 474 & 487)" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("406","428","438","439","474","487")) @@ -796,6 +807,7 @@ rule "DOC.21.1: Results and Conclusion (406, 428, 438, 439, 474 & 487)" end +// Rule unit: DOC.22 rule "DOC.22.0: Detailing (404 & 405)" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("404","405")) @@ -810,6 +822,7 @@ rule "DOC.22.0: Detailing (404 & 405)" end +// Rule unit: DOC.23 rule "DOC.23.0: Preliminary Test Results (429)" when FileAttribute(label == "OECD Number", value == "429") @@ -823,6 +836,7 @@ rule "DOC.23.0: Preliminary Test Results (429)" end +// Rule unit: DOC.24 rule "DOC.24.0: Test Results (429)" when FileAttribute(label == "OECD Number", value == "429") @@ -832,7 +846,6 @@ rule "DOC.24.0: Test Results (429)" .forEach(entity -> entity.apply("DOC.24.0", "Test Results found", "n-a")); end - rule "DOC.24.1: Test Results (429)" when Headline(containsStringIgnoreCase("RESULTS AND DISCUSSION"), $sectionIdentifierResultsAndDiscussion: getSectionIdentifier()) @@ -845,6 +858,7 @@ rule "DOC.24.1: Test Results (429)" end +// Rule unit: DOC.25 rule "DOC.25.0: Approach used (429)" when FileAttribute(label == "OECD Number", value == "429") @@ -859,6 +873,7 @@ rule "DOC.25.0: Approach used (429)" end +// Rule unit: DOC.26 rule "DOC.26.0: Sex" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("405","429")) @@ -881,6 +896,7 @@ rule "DOC.26.0: Sex" end +// Rule unit: DOC.27 rule "DOC.27.0: Animal Number 405" when FileAttribute(label == "OECD Number", value == "405") @@ -906,6 +922,7 @@ rule "DOC.27.0: Animal Number 405" end +// Rule unit: DOC.28 rule "DOC.28.0: Animal Number 429" when FileAttribute(label == "OECD Number", value == "429") @@ -931,7 +948,6 @@ rule "DOC.28.0: Animal Number 429" }); end - rule "DOC.28.1: No. Of animals - Fallback to appendix tables listing all individual animals for 429" when $keyword: String() from List.of("Animal Number", "Animal No.", "Animal number") @@ -949,6 +965,7 @@ rule "DOC.28.1: No. Of animals - Fallback to appendix tables listing all individ end +// Rule unit: DOC.29 rule "DOC.29.0: 4h Exposure" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("403","436")) @@ -962,6 +979,7 @@ rule "DOC.29.0: 4h Exposure" end +// Rule unit: DOC.30 rule "DOC.30.0: Dilution of the test substance" when FileAttribute(label == "OECD Number", value == "404") @@ -975,6 +993,7 @@ rule "DOC.30.0: Dilution of the test substance" end +// Rule unit: DOC.31 rule "DOC.31.0: Positive Control" when FileAttribute(label == "OECD Number", value == "429") @@ -988,6 +1007,7 @@ rule "DOC.31.0: Positive Control" end +// Rule unit: DOC.32 rule "DOC.32.0: Mortality Statement" when FileAttribute(label == "OECD Number", value == "402") @@ -998,6 +1018,7 @@ rule "DOC.32.0: Mortality Statement" end +// Rule unit: DOC.33 rule "DOC.33.0: Dose Mortality" when FileAttribute(label == "OECD Number", value == "425") @@ -1041,6 +1062,7 @@ rule "DOC.33.0: Dose Mortality" end +// Rule unit: DOC.34 rule "DOC.34.0: Results (Main Study)" when FileAttribute(label == "OECD Number", value == "429") @@ -1055,6 +1077,7 @@ rule "DOC.34.0: Results (Main Study)" end +// Rule unit: DOC.35 rule "DOC.35.0: Doses (mg/kg bodyweight)" when FileAttribute(label == "OECD Number", value == "402") @@ -1066,6 +1089,7 @@ rule "DOC.35.0: Doses (mg/kg bodyweight)" .forEach(entity -> entity.apply("DOC.35.0", "Doses per bodyweight information found", "n-a")); end + //------------------------------------ Manual redaction rules ------------------------------------ // Rule unit: MAN.0 @@ -1208,11 +1232,10 @@ rule "MAN.4.1: Apply legal basis change" end - //------------------------------------ Entity merging rules ------------------------------------ // Rule unit: X.0 -rule "X.0.0: remove Entity contained by Entity of same type" +rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when $larger: TextEntity($type: type, $entityType: entityType, active()) @@ -1224,7 +1247,7 @@ rule "X.0.0: remove Entity contained by Entity of same type" // Rule unit: X.2 -rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE" +rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type, entityType == EntityType.FALSE_POSITIVE, active()) @@ -1237,7 +1260,7 @@ rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE" // Rule unit: X.3 -rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" +rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" salience 64 when $falseRecommendation: TextEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, active()) @@ -1249,7 +1272,7 @@ rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM // Rule unit: X.4 -rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY with same type" +rule "X.4.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY with same type" salience 256 when $entity: TextEntity($type: type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) @@ -1262,7 +1285,7 @@ rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY wit // Rule unit: X.5 -rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY" +rule "X.5.0: Remove Entity of type RECOMMENDATION when contained by ENTITY" salience 256 when $entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) @@ -1287,8 +1310,7 @@ rule "X.7.0: remove all images" //------------------------------------ File attributes rules ------------------------------------ // Rule unit: FA.1 -rule "FA.1.0: remove duplicate FileAttributes" - +rule "FA.1.0: Remove duplicate FileAttributes" salience 64 when $fileAttribute: FileAttribute($label: label, $value: value) @@ -1298,8 +1320,10 @@ rule "FA.1.0: remove duplicate FileAttributes" end +//------------------------------------ Local dictionary search rules ------------------------------------ + // Rule unit: LDS.0 -rule "LDS.0.0: run local dictionary search" +rule "LDS.0.0: Run local dictionary search" agenda-group "LOCAL_DICTIONARY_ADDS" salience -999 when diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl index ae144708..5112137b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl @@ -22,6 +22,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRu import com.iqser.red.service.redaction.v1.server.model.document.nodes.*; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell; import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph; @@ -210,7 +211,7 @@ rule "MAN.4.1: Apply legal basis change" //------------------------------------ Local dictionary search rules ------------------------------------ // Rule unit: LDS.0 -rule "LDS.0.0: run local dictionary search" +rule "LDS.0.0: Run local dictionary search" agenda-group "LOCAL_DICTIONARY_ADDS" salience -999 when diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index 085028cd..b11b2c4a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -22,6 +22,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRu import com.iqser.red.service.redaction.v1.server.model.document.nodes.*; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell; import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph; @@ -1136,7 +1137,7 @@ rule "FA.1.0: remove duplicate FileAttributes" //------------------------------------ Local dictionary search rules ------------------------------------ // Rule unit: LDS.0 -rule "LDS.0.0: run local dictionary search" +rule "LDS.0.0: Run local dictionary search" agenda-group "LOCAL_DICTIONARY_ADDS" salience -999 when diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl index ad912d80..a9cd2ea5 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl @@ -22,6 +22,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRu import com.iqser.red.service.redaction.v1.server.model.document.nodes.*; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell; import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph; @@ -349,7 +350,7 @@ rule "FA.1.0: remove duplicate FileAttributes" //------------------------------------ Local dictionary search rules ------------------------------------ // Rule unit: LDS.0 -rule "LDS.0.0: run local dictionary search" +rule "LDS.0.0: Run local dictionary search" agenda-group "LOCAL_DICTIONARY_ADDS" salience -999 when diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl index 1a57e91b..2985b656 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl @@ -16,6 +16,7 @@ import com.iqser.red.service.redaction.v1.server.document.graph.*; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.*; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Section; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Table; +import com.iqser.red.service.redaction.v1.server.document.graph.nodes.TableCell; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document; import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Paragraph; @@ -724,7 +725,7 @@ rule "FA.1.0: remove duplicate FileAttributes" //------------------------------------ Local dictionary search rules ------------------------------------ // Rule unit: LDS.0 -rule "LDS.0.0: run local dictionary search" +rule "LDS.0.0: Run local dictionary search" agenda-group "LOCAL_DICTIONARY_ADDS" salience -999 when