diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl index 2f9df8a8..5b163016 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl @@ -183,6 +183,19 @@ rule "CBI.9.1: Redact all cells with Header Author as CBI_author (non vertebrate .forEach(redactionEntity -> redactionEntity.redact("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end +rule "CBI.9.2: Redact all cells with Header Author(s) as CBI_author (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.9.2", "Author(s) found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: CBI.10 rule "CBI.10.0: Redact all cells with Header Author(s) as CBI_author (vertebrate study)" @@ -211,6 +224,32 @@ rule "CBI.10.1: Redact all cells with Header Author as CBI_author (vertebrate st .forEach(redactionEntity -> redactionEntity.redact("CBI.10.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "CBI.10.2: Redact all cells with Header Author(s) as CBI_author (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.2", "Author(s) found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + +rule "CBI.10.3: Redact all cells with Header Author as CBI_author (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author")) + then + $table.streamTableCellsWithHeader("Author") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.3", "Author found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: CBI.11 rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study Y/N Header" @@ -262,6 +301,18 @@ rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" }); end +rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + dictionary.recommendEverywhere(entity); + }); + end + // Rule unit: CBI.17 rule "CBI.17.0: Add recommendation for Addresses in Test Organism sections, without colon" @@ -320,6 +371,18 @@ rule "CBI.20.2: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC }); end +rule "CBI.20.3: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\"" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) + then + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + .forEach(laboratoryEntity -> { + laboratoryEntity.redact("CBI.20.3", "PERFORMING LABORATORY was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + dictionary.recommendEverywhere(laboratoryEntity); + }); + end + //------------------------------------ PII rules ------------------------------------ @@ -347,6 +410,13 @@ rule "PII.0.2: Redact all PII (vertebrate study)" $pii.redact("PII.0.2", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end +rule "PII.0.3: Redact all PII" + when + $pii: TextEntity(type() == "PII", dictionaryEntry) + then + $pii.redact("PII.0.3", "Personal Information found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end + // Rule unit: PII.1 rule "PII.1.0: Redact Emails by RegEx" @@ -375,6 +445,22 @@ rule "PII.1.2: Redact Emails by RegEx (vertebrate study)" .forEach(emailEntity -> emailEntity.redact("PII.1.2", "Found by Email Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "PII.1.5: Redact Emails by RegEx" + when + $section: Section(containsString("@")) + then + entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.5", "Found by Email Regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + +rule "PII.1.6: Redact typoed Emails with indicator" + when + $section: Section(containsString("@") || containsStringIgnoreCase("mail")) + then + entityCreationService.byRegexIgnoreCase("mail[:\\.\\s]{1,2}([\\w\\/\\-\\{\\(\\. ]{3,20}(@|a|f)\\s?[\\w\\/\\-\\{\\(\\. ]{3,20}(\\. \\w{2,4}\\b|\\.\\B|\\.\\w{1,4}\\b))", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.6", "Personal information found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.2 rule "PII.2.0: Redact Phone and Fax by RegEx" @@ -487,6 +573,33 @@ rule "PII.4.0: Redact line after contact information keywords" .forEach(contactEntity -> contactEntity.redact("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end +rule "PII.4.1: Redact line after contact information keywords" + when + $contactKeyword: String() from List.of("Contact point:", + "Contact:", + "Alternative contact:", + "European contact:", + "No:", + "Contact:", + "Tel.:", + "Tel:", + "Telephone number:", + "Telephone No:", + "Telephone:", + "Phone No.", + "Phone:", + "Fax number:", + "Fax:", + "E-mail:", + "Email:", + "e-mail:", + "E-mail address:") + $section: Section(containsString($contactKeyword)) + then + entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) + .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.5 rule "PII.5.0: Redact line after contact information keywords reduced" @@ -564,6 +677,18 @@ rule "PII.6.2: Redact line between contact keywords (vertebrate study)" .forEach(contactEntity -> contactEntity.redact("PII.6.2", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "PII.6.3: Redact line between contact keywords (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) + then + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + ) + .forEach(contactEntity -> contactEntity.redact("PII.6.3", "Found between contact keywords", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.7 rule "PII.7.0: Redact contact information if applicant is found" @@ -702,6 +827,14 @@ rule "PII.9.2: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (ver .forEach(authorEntity -> authorEntity.redact("PII.9.2", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" + when + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) + then + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + .forEach(authorEntity -> authorEntity.redact("PII.9.3", "AUTHOR(S) was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.10 rule "PII.10.0: Redact study director abbreviation" @@ -779,6 +912,13 @@ rule "ETC.2.2: Redact signatures (vertebrate study)" $signature.redact("ETC.2.2", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end +rule "ETC.2.3: Redact signatures" + when + $signature: Image(imageType == ImageType.SIGNATURE) + then + $signature.redact("ETC.2.3", "Signature Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end + // Rule unit: ETC.3 rule "ETC.3.0: Redact logos" @@ -804,6 +944,12 @@ rule "ETC.3.2: Redact logos (vertebrate study)" $logo.redact("ETC.3.2", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end +rule "ETC.3.3: Redact logos" + when + $logo: Image(imageType == ImageType.LOGO) + then + $logo.redact("ETC.3.3", "Logo Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end // Rule unit: ETC.5 diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl index 5a6932f8..a2d11999 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl @@ -399,6 +399,19 @@ rule "CBI.9.1: Redact all cells with Header Author as CBI_author (non vertebrate .forEach(redactionEntity -> redactionEntity.redact("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end +rule "CBI.9.2: Redact all cells with Header Author(s) as CBI_author (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.9.2", "Author(s) found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: CBI.10 rule "CBI.10.0: Redact all cells with Header Author(s) as CBI_author (vertebrate study)" @@ -427,6 +440,32 @@ rule "CBI.10.1: Redact all cells with Header Author as CBI_author (vertebrate st .forEach(redactionEntity -> redactionEntity.redact("CBI.10.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "CBI.10.2: Redact all cells with Header Author(s) as CBI_author (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.2", "Author(s) found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + +rule "CBI.10.3: Redact all cells with Header Author as CBI_author (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author")) + then + $table.streamTableCellsWithHeader("Author") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.3", "Author found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: CBI.11 rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study Y/N Header" @@ -614,6 +653,18 @@ rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" }); end +rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + dictionary.recommendEverywhere(entity); + }); + end + // Rule unit: CBI.17 rule "CBI.17.0: Add recommendation for Addresses in Test Organism sections, without colon" @@ -705,6 +756,19 @@ rule "CBI.20.2: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC }); end +rule "CBI.20.3: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\"" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) + then + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + .forEach(laboratoryEntity -> { + laboratoryEntity.redact("CBI.20.3", "PERFORMING LABORATORY was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + dictionary.recommendEverywhere(laboratoryEntity); + }); + end + + // Rule unit: CBI.21 rule "CBI.21.0: Redact short Authors section (non vertebrate study)" when @@ -769,6 +833,13 @@ rule "PII.0.2: Redact all PII (vertebrate study)" $pii.redact("PII.0.2", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end +rule "PII.0.3: Redact all PII" + when + $pii: TextEntity(type() == "PII", dictionaryEntry) + then + $pii.redact("PII.0.3", "Personal Information found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end + // Rule unit: PII.1 rule "PII.1.0: Redact Emails by RegEx" @@ -813,6 +884,23 @@ rule "PII.1.4: Redact typoed Emails with indicator" .forEach(emailEntity -> emailEntity.redact("PII.1.4", "Personal information found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end +rule "PII.1.5: Redact Emails by RegEx" + when + $section: Section(containsString("@")) + then + entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.5", "Found by Email Regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + +rule "PII.1.6: Redact typoed Emails with indicator" + when + $section: Section(containsString("@") || containsStringIgnoreCase("mail")) + then + entityCreationService.byRegexIgnoreCase("mail[:\\.\\s]{1,2}([\\w\\/\\-\\{\\(\\. ]{3,20}(@|a|f)\\s?[\\w\\/\\-\\{\\(\\. ]{3,20}(\\. \\w{2,4}\\b|\\.\\B|\\.\\w{1,4}\\b))", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.6", "Personal information found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + + // Rule unit: PII.2 rule "PII.2.0: Redact Phone and Fax by RegEx" when @@ -931,6 +1019,33 @@ rule "PII.4.0: Redact line after contact information keywords" .forEach(contactEntity -> contactEntity.redact("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end +rule "PII.4.1: Redact line after contact information keywords" + when + $contactKeyword: String() from List.of("Contact point:", + "Contact:", + "Alternative contact:", + "European contact:", + "No:", + "Contact:", + "Tel.:", + "Tel:", + "Telephone number:", + "Telephone No:", + "Telephone:", + "Phone No.", + "Phone:", + "Fax number:", + "Fax:", + "E-mail:", + "Email:", + "e-mail:", + "E-mail address:") + $section: Section(containsString($contactKeyword)) + then + entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) + .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.5 rule "PII.5.0: Redact line after contact information keywords reduced" @@ -1008,6 +1123,18 @@ rule "PII.6.2: Redact line between contact keywords (vertebrate study)" .forEach(contactEntity -> contactEntity.redact("PII.6.2", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "PII.6.3: Redact line between contact keywords (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) + then + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + ) + .forEach(contactEntity -> contactEntity.redact("PII.6.3", "Found between contact keywords", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.7 rule "PII.7.0: Redact contact information if applicant is found" @@ -1146,6 +1273,14 @@ rule "PII.9.2: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (ver .forEach(authorEntity -> authorEntity.redact("PII.9.2", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" + when + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) + then + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + .forEach(authorEntity -> authorEntity.redact("PII.9.3", "AUTHOR(S) was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.10 rule "PII.10.0: Redact study director abbreviation" @@ -1253,6 +1388,13 @@ rule "ETC.2.2: Redact signatures (vertebrate study)" $signature.redact("ETC.2.2", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end +rule "ETC.2.3: Redact signatures" + when + $signature: Image(imageType == ImageType.SIGNATURE) + then + $signature.redact("ETC.2.3", "Signature Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end + // Rule unit: ETC.3 rule "ETC.3.0: Redact logos" @@ -1278,6 +1420,13 @@ rule "ETC.3.2: Redact logos (vertebrate study)" $logo.redact("ETC.3.2", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end +rule "ETC.3.3: Redact logos" + when + $logo: Image(imageType == ImageType.LOGO) + then + $logo.redact("ETC.3.3", "Logo Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end + // Rule unit: ETC.4 rule "ETC.4.0: Redact dossier dictionary entries" diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index 77bcd0c1..a112b43e 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -275,6 +275,20 @@ rule "CBI.9.1: Redact all cells with Header Author as CBI_author (non vertebrate end +rule "CBI.9.2: Redact all cells with Header Author(s) as CBI_author (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.9.2", "Author(s) found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + + // Rule unit: CBI.11 rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study Y/N Header" agenda-group "LOCAL_DICTIONARY_ADDS" @@ -450,6 +464,18 @@ rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" }); end +rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + dictionary.recommendEverywhere(entity); + }); + end + // Rule unit: CBI.17 rule "CBI.17.0: Add recommendation for Addresses in Test Organism sections, without colon" @@ -541,6 +567,18 @@ rule "CBI.20.2: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC }); end +rule "CBI.20.3: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\"" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) + then + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + .forEach(laboratoryEntity -> { + laboratoryEntity.redact("CBI.20.3", "PERFORMING LABORATORY was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + dictionary.recommendEverywhere(laboratoryEntity); + }); + end + //------------------------------------ PII rules ------------------------------------ @@ -569,6 +607,13 @@ rule "PII.0.2: Redact all PII (vertebrate study)" $pii.redact("PII.0.2", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end +rule "PII.0.3: Redact all PII" + when + $pii: TextEntity(type() == "PII", dictionaryEntry) + then + $pii.redact("PII.0.3", "Personal Information found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end + // Rule unit: PII.1 rule "PII.1.0: Redact Emails by RegEx" @@ -597,6 +642,22 @@ rule "PII.1.2: Redact Emails by RegEx (vertebrate study)" .forEach(emailEntity -> emailEntity.redact("PII.1.2", "Found by Email Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "PII.1.5: Redact Emails by RegEx" + when + $section: Section(containsString("@")) + then + entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.5", "Found by Email Regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + +rule "PII.1.6: Redact typoed Emails with indicator" + when + $section: Section(containsString("@") || containsStringIgnoreCase("mail")) + then + entityCreationService.byRegexIgnoreCase("mail[:\\.\\s]{1,2}([\\w\\/\\-\\{\\(\\. ]{3,20}(@|a|f)\\s?[\\w\\/\\-\\{\\(\\. ]{3,20}(\\. \\w{2,4}\\b|\\.\\B|\\.\\w{1,4}\\b))", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.6", "Personal information found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.4 rule "PII.4.0: Redact line after contact information keywords" @@ -626,6 +687,33 @@ rule "PII.4.0: Redact line after contact information keywords" .forEach(contactEntity -> contactEntity.redact("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end +rule "PII.4.1: Redact line after contact information keywords" + when + $contactKeyword: String() from List.of("Contact point:", + "Contact:", + "Alternative contact:", + "European contact:", + "No:", + "Contact:", + "Tel.:", + "Tel:", + "Telephone number:", + "Telephone No:", + "Telephone:", + "Phone No.", + "Phone:", + "Fax number:", + "Fax:", + "E-mail:", + "Email:", + "e-mail:", + "E-mail address:") + $section: Section(containsString($contactKeyword)) + then + entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) + .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.6 rule "PII.6.0: Redact line between contact keywords" @@ -663,6 +751,18 @@ rule "PII.6.2: Redact line between contact keywords (vertebrate study)" .forEach(contactEntity -> contactEntity.redact("PII.6.2", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "PII.6.3: Redact line between contact keywords (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) + then + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + ) + .forEach(contactEntity -> contactEntity.redact("PII.6.3", "Found between contact keywords", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.7 rule "PII.7.0: Redact contact information if applicant is found" @@ -801,6 +901,14 @@ rule "PII.9.2: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (ver .forEach(authorEntity -> authorEntity.redact("PII.9.2", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" + when + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) + then + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + .forEach(authorEntity -> authorEntity.redact("PII.9.3", "AUTHOR(S) was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.11 rule "PII.11.0: Redact On behalf of Sequani Ltd.:" @@ -868,6 +976,13 @@ rule "ETC.2.2: Redact signatures (vertebrate study)" $signature.redact("ETC.2.2", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end +rule "ETC.2.3: Redact signatures" + when + $signature: Image(imageType == ImageType.SIGNATURE) + then + $signature.redact("ETC.2.3", "Signature Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end + // Rule unit: ETC.3 rule "ETC.3.0: Redact logos" @@ -893,6 +1008,13 @@ rule "ETC.3.2: Redact logos (vertebrate study)" $logo.redact("ETC.3.2", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end +rule "ETC.3.3: Redact logos" + when + $logo: Image(imageType == ImageType.LOGO) + then + $logo.redact("ETC.3.3", "Logo Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end + // Rule unit: ETC.4 rule "ETC.4.0: Redact dossier dictionary entries" diff --git a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl index 5a6932f8..a2d11999 100644 --- a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl +++ b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl @@ -399,6 +399,19 @@ rule "CBI.9.1: Redact all cells with Header Author as CBI_author (non vertebrate .forEach(redactionEntity -> redactionEntity.redact("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end +rule "CBI.9.2: Redact all cells with Header Author(s) as CBI_author (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.9.2", "Author(s) found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: CBI.10 rule "CBI.10.0: Redact all cells with Header Author(s) as CBI_author (vertebrate study)" @@ -427,6 +440,32 @@ rule "CBI.10.1: Redact all cells with Header Author as CBI_author (vertebrate st .forEach(redactionEntity -> redactionEntity.redact("CBI.10.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "CBI.10.2: Redact all cells with Header Author(s) as CBI_author (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.2", "Author(s) found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + +rule "CBI.10.3: Redact all cells with Header Author as CBI_author (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author")) + then + $table.streamTableCellsWithHeader("Author") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.3", "Author found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: CBI.11 rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study Y/N Header" @@ -614,6 +653,18 @@ rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" }); end +rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + dictionary.recommendEverywhere(entity); + }); + end + // Rule unit: CBI.17 rule "CBI.17.0: Add recommendation for Addresses in Test Organism sections, without colon" @@ -705,6 +756,19 @@ rule "CBI.20.2: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC }); end +rule "CBI.20.3: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\"" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) + then + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + .forEach(laboratoryEntity -> { + laboratoryEntity.redact("CBI.20.3", "PERFORMING LABORATORY was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + dictionary.recommendEverywhere(laboratoryEntity); + }); + end + + // Rule unit: CBI.21 rule "CBI.21.0: Redact short Authors section (non vertebrate study)" when @@ -769,6 +833,13 @@ rule "PII.0.2: Redact all PII (vertebrate study)" $pii.redact("PII.0.2", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end +rule "PII.0.3: Redact all PII" + when + $pii: TextEntity(type() == "PII", dictionaryEntry) + then + $pii.redact("PII.0.3", "Personal Information found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end + // Rule unit: PII.1 rule "PII.1.0: Redact Emails by RegEx" @@ -813,6 +884,23 @@ rule "PII.1.4: Redact typoed Emails with indicator" .forEach(emailEntity -> emailEntity.redact("PII.1.4", "Personal information found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end +rule "PII.1.5: Redact Emails by RegEx" + when + $section: Section(containsString("@")) + then + entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.5", "Found by Email Regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + +rule "PII.1.6: Redact typoed Emails with indicator" + when + $section: Section(containsString("@") || containsStringIgnoreCase("mail")) + then + entityCreationService.byRegexIgnoreCase("mail[:\\.\\s]{1,2}([\\w\\/\\-\\{\\(\\. ]{3,20}(@|a|f)\\s?[\\w\\/\\-\\{\\(\\. ]{3,20}(\\. \\w{2,4}\\b|\\.\\B|\\.\\w{1,4}\\b))", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.6", "Personal information found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + + // Rule unit: PII.2 rule "PII.2.0: Redact Phone and Fax by RegEx" when @@ -931,6 +1019,33 @@ rule "PII.4.0: Redact line after contact information keywords" .forEach(contactEntity -> contactEntity.redact("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end +rule "PII.4.1: Redact line after contact information keywords" + when + $contactKeyword: String() from List.of("Contact point:", + "Contact:", + "Alternative contact:", + "European contact:", + "No:", + "Contact:", + "Tel.:", + "Tel:", + "Telephone number:", + "Telephone No:", + "Telephone:", + "Phone No.", + "Phone:", + "Fax number:", + "Fax:", + "E-mail:", + "Email:", + "e-mail:", + "E-mail address:") + $section: Section(containsString($contactKeyword)) + then + entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) + .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.5 rule "PII.5.0: Redact line after contact information keywords reduced" @@ -1008,6 +1123,18 @@ rule "PII.6.2: Redact line between contact keywords (vertebrate study)" .forEach(contactEntity -> contactEntity.redact("PII.6.2", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "PII.6.3: Redact line between contact keywords (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) + then + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + ) + .forEach(contactEntity -> contactEntity.redact("PII.6.3", "Found between contact keywords", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.7 rule "PII.7.0: Redact contact information if applicant is found" @@ -1146,6 +1273,14 @@ rule "PII.9.2: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (ver .forEach(authorEntity -> authorEntity.redact("PII.9.2", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" + when + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) + then + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + .forEach(authorEntity -> authorEntity.redact("PII.9.3", "AUTHOR(S) was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.10 rule "PII.10.0: Redact study director abbreviation" @@ -1253,6 +1388,13 @@ rule "ETC.2.2: Redact signatures (vertebrate study)" $signature.redact("ETC.2.2", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end +rule "ETC.2.3: Redact signatures" + when + $signature: Image(imageType == ImageType.SIGNATURE) + then + $signature.redact("ETC.2.3", "Signature Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end + // Rule unit: ETC.3 rule "ETC.3.0: Redact logos" @@ -1278,6 +1420,13 @@ rule "ETC.3.2: Redact logos (vertebrate study)" $logo.redact("ETC.3.2", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end +rule "ETC.3.3: Redact logos" + when + $logo: Image(imageType == ImageType.LOGO) + then + $logo.redact("ETC.3.3", "Logo Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end + // Rule unit: ETC.4 rule "ETC.4.0: Redact dossier dictionary entries"