From 3579c060337f3708b985c7fcf76944c2b6d67bd5 Mon Sep 17 00:00:00 2001 From: Andrei Isvoran Date: Wed, 13 Mar 2024 16:59:14 +0200 Subject: [PATCH] RED-8680 - Add rules for syngenta sanitisation seeds --- .../resources/drools/acceptance_rules.drl | 139 ++++++++++++++++ .../drools/all_redact_manager_rules.drl | 147 +++++++++++++++++ .../src/test/resources/drools/rules.drl | 114 ++++++++++++++ .../resources/all_redact_manager_rules.drl | 149 ++++++++++++++++++ 4 files changed, 549 insertions(+) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl index be46f5d1..c5d87449 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl @@ -181,6 +181,19 @@ rule "CBI.9.1: Redact all cells with Header Author as CBI_author (non vertebrate .forEach(redactionEntity -> redactionEntity.redact("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end +rule "CBI.9.2: Redact all cells with Header Author(s) as CBI_author (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.9.2", "Author(s) found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: CBI.10 rule "CBI.10.0: Redact all cells with Header Author(s) as CBI_author (vertebrate study)" @@ -209,6 +222,32 @@ rule "CBI.10.1: Redact all cells with Header Author as CBI_author (vertebrate st .forEach(redactionEntity -> redactionEntity.redact("CBI.10.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "CBI.10.2: Redact all cells with Header Author(s) as CBI_author (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.2", "Author(s) found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + +rule "CBI.10.3: Redact all cells with Header Author as CBI_author (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author")) + then + $table.streamTableCellsWithHeader("Author") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.3", "Author found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: CBI.11 rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study Y/N Header" @@ -260,6 +299,18 @@ rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" }); end +rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + dictionary.recommendEverywhere(entity); + }); + end + // Rule unit: CBI.17 rule "CBI.17.0: Add recommendation for Addresses in Test Organism sections, without colon" @@ -318,6 +369,18 @@ rule "CBI.20.2: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC }); end +rule "CBI.20.3: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\"" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) + then + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + .forEach(laboratoryEntity -> { + laboratoryEntity.redact("CBI.20.3", "PERFORMING LABORATORY was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + dictionary.recommendEverywhere(laboratoryEntity); + }); + end + //------------------------------------ PII rules ------------------------------------ @@ -345,6 +408,12 @@ rule "PII.0.2: Redact all PII (vertebrate study)" $pii.redact("PII.0.2", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end +rule "PII.0.3: Redact all PII" + when + $pii: TextEntity(type() == "PII", dictionaryEntry) + then + $pii.redact("PII.0.3", "Personal Information found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end // Rule unit: PII.1 @@ -374,6 +443,14 @@ rule "PII.1.2: Redact Emails by RegEx (vertebrate study)" .forEach(emailEntity -> emailEntity.redact("PII.1.2", "Found by Email Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "PII.1.5: Redact Emails by RegEx" + when + $section: Section(containsString("@")) + then + entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.5", "Found by Email Regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.2 rule "PII.2.0: Redact Phone and Fax by RegEx" @@ -486,6 +563,33 @@ rule "PII.4.0: Redact line after contact information keywords" .forEach(contactEntity -> contactEntity.redact("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end +rule "PII.4.1: Redact line after contact information keywords" + when + $contactKeyword: String() from List.of("Contact point:", + "Contact:", + "Alternative contact:", + "European contact:", + "No:", + "Contact:", + "Tel.:", + "Tel:", + "Telephone number:", + "Telephone No:", + "Telephone:", + "Phone No.", + "Phone:", + "Fax number:", + "Fax:", + "E-mail:", + "Email:", + "e-mail:", + "E-mail address:") + $section: Section(containsString($contactKeyword)) + then + entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) + .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.5 rule "PII.5.0: Redact line after contact information keywords reduced" @@ -563,6 +667,19 @@ rule "PII.6.2: Redact line between contact keywords (vertebrate study)" .forEach(contactEntity -> contactEntity.redact("PII.6.2", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "PII.6.3: Redact line between contact keywords (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) + then + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + ) + .forEach(contactEntity -> contactEntity.redact("PII.6.3", "Found between contact keywords", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + + // Rule unit: PII.7 rule "PII.7.0: Redact contact information if applicant is found" when @@ -710,6 +827,14 @@ rule "PII.10.0: Redact study director abbreviation" .forEach(entity -> entity.redact("PII.10.0", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end +rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" + when + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) + then + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + .forEach(authorEntity -> authorEntity.redact("PII.9.3", "AUTHOR(S) was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.11 rule "PII.11.0: Redact On behalf of Sequani Ltd.:" @@ -777,6 +902,13 @@ rule "ETC.2.2: Redact signatures (vertebrate study)" $signature.redact("ETC.2.2", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end +rule "ETC.2.3: Redact signatures" + when + $signature: Image(imageType == ImageType.SIGNATURE) + then + $signature.redact("ETC.2.3", "Signature Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end + // Rule unit: ETC.3 rule "ETC.3.0: Redact logos" @@ -802,6 +934,13 @@ rule "ETC.3.2: Redact logos (vertebrate study)" $logo.redact("ETC.3.2", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end +rule "ETC.3.3: Redact logos" + when + $logo: Image(imageType == ImageType.LOGO) + then + $logo.redact("ETC.3.3", "Logo Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end + // Rule unit: ETC.5 rule "ETC.5.0: Skip dossier_redaction entries if confidentiality is 'confidential'" diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl index 3261622e..c4a828b9 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl @@ -397,6 +397,19 @@ rule "CBI.9.1: Redact all cells with Header Author as CBI_author (non vertebrate .forEach(redactionEntity -> redactionEntity.redact("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end +rule "CBI.9.2: Redact all cells with Header Author(s) as CBI_author (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.9.2", "Author(s) found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: CBI.10 rule "CBI.10.0: Redact all cells with Header Author(s) as CBI_author (vertebrate study)" @@ -425,6 +438,32 @@ rule "CBI.10.1: Redact all cells with Header Author as CBI_author (vertebrate st .forEach(redactionEntity -> redactionEntity.redact("CBI.10.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "CBI.10.2: Redact all cells with Header Author(s) as CBI_author (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.2", "Author(s) found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + +rule "CBI.10.3: Redact all cells with Header Author as CBI_author (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author")) + then + $table.streamTableCellsWithHeader("Author") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.3", "Author found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: CBI.11 rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study Y/N Header" @@ -612,6 +651,18 @@ rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" }); end +rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + dictionary.recommendEverywhere(entity); + }); + end + // Rule unit: CBI.17 rule "CBI.17.0: Add recommendation for Addresses in Test Organism sections, without colon" @@ -703,6 +754,18 @@ rule "CBI.20.2: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC }); end +rule "CBI.20.3: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\"" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) + then + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + .forEach(laboratoryEntity -> { + laboratoryEntity.redact("CBI.20.3", "PERFORMING LABORATORY was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + dictionary.recommendEverywhere(laboratoryEntity); + }); + end + // Rule unit: CBI.21 rule "CBI.21.0: Redact short Authors section (non vertebrate study)" @@ -768,6 +831,12 @@ rule "PII.0.2: Redact all PII (vertebrate study)" $pii.redact("PII.0.2", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end +rule "PII.0.3: Redact all PII" + when + $pii: TextEntity(type() == "PII", dictionaryEntry) + then + $pii.redact("PII.0.3", "Personal Information found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end // Rule unit: PII.1 @@ -813,6 +882,22 @@ rule "PII.1.4: Redact typoed Emails with indicator" .forEach(emailEntity -> emailEntity.redact("PII.1.4", "Personal information found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end +rule "PII.1.5: Redact Emails by RegEx" + when + $section: Section(containsString("@")) + then + entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.5", "Found by Email Regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + +rule "PII.1.6: Redact typoed Emails with indicator" + when + $section: Section(containsString("@") || containsStringIgnoreCase("mail")) + then + entityCreationService.byRegexIgnoreCase("mail[:\\.\\s]{1,2}([\\w\\/\\-\\{\\(\\. ]{3,20}(@|a|f)\\s?[\\w\\/\\-\\{\\(\\. ]{3,20}(\\. \\w{2,4}\\b|\\.\\B|\\.\\w{1,4}\\b))", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.6", "Personal information found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.2 rule "PII.2.0: Redact Phone and Fax by RegEx" @@ -933,6 +1018,33 @@ rule "PII.4.0: Redact line after contact information keywords" .forEach(contactEntity -> contactEntity.redact("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end +rule "PII.4.1: Redact line after contact information keywords" + when + $contactKeyword: String() from List.of("Contact point:", + "Contact:", + "Alternative contact:", + "European contact:", + "No:", + "Contact:", + "Tel.:", + "Tel:", + "Telephone number:", + "Telephone No:", + "Telephone:", + "Phone No.", + "Phone:", + "Fax number:", + "Fax:", + "E-mail:", + "Email:", + "e-mail:", + "E-mail address:") + $section: Section(containsString($contactKeyword)) + then + entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) + .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.5 rule "PII.5.0: Redact line after contact information keywords reduced" @@ -1010,6 +1122,19 @@ rule "PII.6.2: Redact line between contact keywords (vertebrate study)" .forEach(contactEntity -> contactEntity.redact("PII.6.2", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "PII.6.3: Redact line between contact keywords (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) + then + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + ) + .forEach(contactEntity -> contactEntity.redact("PII.6.3", "Found between contact keywords", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + + // Rule unit: PII.7 rule "PII.7.0: Redact contact information if applicant is found" when @@ -1147,6 +1272,14 @@ rule "PII.9.2: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (ver .forEach(authorEntity -> authorEntity.redact("PII.9.2", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" + when + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) + then + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + .forEach(authorEntity -> authorEntity.redact("PII.9.3", "AUTHOR(S) was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.10 rule "PII.10.0: Redact study director abbreviation" @@ -1244,6 +1377,13 @@ rule "ETC.2.2: Redact signatures (vertebrate study)" $signature.redact("ETC.2.2", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end +rule "ETC.2.3: Redact signatures" + when + $signature: Image(imageType == ImageType.SIGNATURE) + then + $signature.redact("ETC.2.3", "Signature Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end + // Rule unit: ETC.3 rule "ETC.3.0: Redact logos" @@ -1269,6 +1409,13 @@ rule "ETC.3.2: Redact logos (vertebrate study)" $logo.redact("ETC.3.2", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end +rule "ETC.3.3: Redact logos" + when + $logo: Image(imageType == ImageType.LOGO) + then + $logo.redact("ETC.3.3", "Logo Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end + // Rule unit: ETC.4 rule "ETC.4.0: Redact dossier dictionary entries" diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index cff2c57d..514b2297 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -272,6 +272,19 @@ rule "CBI.9.1: Redact all cells with Header Author as CBI_author (non vertebrate .forEach(redactionEntity -> redactionEntity.redact("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end +rule "CBI.9.2: Redact all cells with Header Author(s) as CBI_author (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.9.2", "Author(s) found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: CBI.11 rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study Y/N Header" @@ -448,6 +461,18 @@ rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" }); end +rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + dictionary.recommendEverywhere(entity); + }); + end + // Rule unit: CBI.17 rule "CBI.17.0: Add recommendation for Addresses in Test Organism sections, without colon" @@ -539,6 +564,18 @@ rule "CBI.20.2: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC }); end +rule "CBI.20.3: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\"" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) + then + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + .forEach(laboratoryEntity -> { + laboratoryEntity.redact("CBI.20.3", "PERFORMING LABORATORY was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + dictionary.recommendEverywhere(laboratoryEntity); + }); + end + //------------------------------------ PII rules ------------------------------------ @@ -566,6 +603,12 @@ rule "PII.0.2: Redact all PII (vertebrate study)" $pii.redact("PII.0.2", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end +rule "PII.0.3: Redact all PII" + when + $pii: TextEntity(type() == "PII", dictionaryEntry) + then + $pii.redact("PII.0.3", "Personal Information found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end // Rule unit: PII.1 @@ -595,6 +638,14 @@ rule "PII.1.2: Redact Emails by RegEx (vertebrate study)" .forEach(emailEntity -> emailEntity.redact("PII.1.2", "Found by Email Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "PII.1.5: Redact Emails by RegEx" + when + $section: Section(containsString("@")) + then + entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.5", "Found by Email Regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.4 rule "PII.4.0: Redact line after contact information keywords" @@ -624,6 +675,33 @@ rule "PII.4.0: Redact line after contact information keywords" .forEach(contactEntity -> contactEntity.redact("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end +rule "PII.4.1: Redact line after contact information keywords" + when + $contactKeyword: String() from List.of("Contact point:", + "Contact:", + "Alternative contact:", + "European contact:", + "No:", + "Contact:", + "Tel.:", + "Tel:", + "Telephone number:", + "Telephone No:", + "Telephone:", + "Phone No.", + "Phone:", + "Fax number:", + "Fax:", + "E-mail:", + "Email:", + "e-mail:", + "E-mail address:") + $section: Section(containsString($contactKeyword)) + then + entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) + .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.6 rule "PII.6.0: Redact line between contact keywords" @@ -661,6 +739,20 @@ rule "PII.6.2: Redact line between contact keywords (vertebrate study)" .forEach(contactEntity -> contactEntity.redact("PII.6.2", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end + +rule "PII.6.3: Redact line between contact keywords (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) + then + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + ) + .forEach(contactEntity -> contactEntity.redact("PII.6.3", "Found between contact keywords", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + + // Rule unit: PII.7 rule "PII.7.0: Redact contact information if applicant is found" when @@ -798,6 +890,14 @@ rule "PII.9.2: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (ver .forEach(authorEntity -> authorEntity.redact("PII.9.2", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" + when + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) + then + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + .forEach(authorEntity -> authorEntity.redact("PII.9.3", "AUTHOR(S) was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.11 rule "PII.11.0: Redact On behalf of Sequani Ltd.:" @@ -865,6 +965,13 @@ rule "ETC.2.2: Redact signatures (vertebrate study)" $signature.redact("ETC.2.2", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end +rule "ETC.2.3: Redact signatures" + when + $signature: Image(imageType == ImageType.SIGNATURE) + then + $signature.redact("ETC.2.3", "Signature Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end + // Rule unit: ETC.3 rule "ETC.3.0: Redact logos" @@ -890,6 +997,13 @@ rule "ETC.3.2: Redact logos (vertebrate study)" $logo.redact("ETC.3.2", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end +rule "ETC.3.3: Redact logos" + when + $logo: Image(imageType == ImageType.LOGO) + then + $logo.redact("ETC.3.3", "Logo Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end + // Rule unit: ETC.4 rule "ETC.4.0: Redact dossier dictionary entries" diff --git a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl index 55cd6d88..892ec7f3 100644 --- a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl +++ b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl @@ -398,6 +398,19 @@ rule "CBI.9.1: Redact all cells with Header Author as CBI_author (non vertebrate .forEach(redactionEntity -> redactionEntity.redact("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end +rule "CBI.9.2: Redact all cells with Header Author(s) as CBI_author (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.9.2", "Author(s) found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: CBI.10 rule "CBI.10.0: Redact all cells with Header Author(s) as CBI_author (vertebrate study)" @@ -426,6 +439,32 @@ rule "CBI.10.1: Redact all cells with Header Author as CBI_author (vertebrate st .forEach(redactionEntity -> redactionEntity.redact("CBI.10.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "CBI.10.2: Redact all cells with Header Author(s) as CBI_author (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.2", "Author(s) found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + +rule "CBI.10.3: Redact all cells with Header Author as CBI_author (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author")) + then + $table.streamTableCellsWithHeader("Author") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.3", "Author found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: CBI.11 rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study Y/N Header" @@ -613,6 +652,18 @@ rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" }); end +rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + dictionary.recommendEverywhere(entity); + }); + end + // Rule unit: CBI.17 rule "CBI.17.0: Add recommendation for Addresses in Test Organism sections, without colon" @@ -704,6 +755,19 @@ rule "CBI.20.2: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC }); end +rule "CBI.20.3: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\"" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) + then + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + .forEach(laboratoryEntity -> { + laboratoryEntity.redact("CBI.20.3", "PERFORMING LABORATORY was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + dictionary.recommendEverywhere(laboratoryEntity); + }); + end + + // Rule unit: CBI.21 rule "CBI.21.0: Redact short Authors section (non vertebrate study)" when @@ -768,6 +832,13 @@ rule "PII.0.2: Redact all PII (vertebrate study)" $pii.redact("PII.0.2", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end +rule "PII.0.3: Redact all PII" + when + $pii: TextEntity(type() == "PII", dictionaryEntry) + then + $pii.redact("PII.0.3", "Personal Information found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end + // Rule unit: PII.1 rule "PII.1.0: Redact Emails by RegEx" @@ -812,6 +883,22 @@ rule "PII.1.4: Redact typoed Emails with indicator" .forEach(emailEntity -> emailEntity.redact("PII.1.4", "Personal information found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end +rule "PII.1.5: Redact Emails by RegEx" + when + $section: Section(containsString("@")) + then + entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.5", "Found by Email Regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + +rule "PII.1.6: Redact typoed Emails with indicator" + when + $section: Section(containsString("@") || containsStringIgnoreCase("mail")) + then + entityCreationService.byRegexIgnoreCase("mail[:\\.\\s]{1,2}([\\w\\/\\-\\{\\(\\. ]{3,20}(@|a|f)\\s?[\\w\\/\\-\\{\\(\\. ]{3,20}(\\. \\w{2,4}\\b|\\.\\B|\\.\\w{1,4}\\b))", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.6", "Personal information found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.2 rule "PII.2.0: Redact Phone and Fax by RegEx" @@ -931,6 +1018,33 @@ rule "PII.4.0: Redact line after contact information keywords" .forEach(contactEntity -> contactEntity.redact("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end +rule "PII.4.1: Redact line after contact information keywords" + when + $contactKeyword: String() from List.of("Contact point:", + "Contact:", + "Alternative contact:", + "European contact:", + "No:", + "Contact:", + "Tel.:", + "Tel:", + "Telephone number:", + "Telephone No:", + "Telephone:", + "Phone No.", + "Phone:", + "Fax number:", + "Fax:", + "E-mail:", + "Email:", + "e-mail:", + "E-mail address:") + $section: Section(containsString($contactKeyword)) + then + entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) + .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.5 rule "PII.5.0: Redact line after contact information keywords reduced" @@ -1008,6 +1122,19 @@ rule "PII.6.2: Redact line between contact keywords (vertebrate study)" .forEach(contactEntity -> contactEntity.redact("PII.6.2", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "PII.6.3: Redact line between contact keywords (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) + then + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + ) + .forEach(contactEntity -> contactEntity.redact("PII.6.3", "Found between contact keywords", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + + // Rule unit: PII.7 rule "PII.7.0: Redact contact information if applicant is found" when @@ -1145,6 +1272,14 @@ rule "PII.9.2: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (ver .forEach(authorEntity -> authorEntity.redact("PII.9.2", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" + when + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) + then + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + .forEach(authorEntity -> authorEntity.redact("PII.9.3", "AUTHOR(S) was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.10 rule "PII.10.0: Redact study director abbreviation" @@ -1252,6 +1387,13 @@ rule "ETC.2.2: Redact signatures (vertebrate study)" $signature.redact("ETC.2.2", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end +rule "ETC.2.3: Redact signatures" + when + $signature: Image(imageType == ImageType.SIGNATURE) + then + $signature.redact("ETC.2.3", "Signature Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end + // Rule unit: ETC.3 rule "ETC.3.0: Redact logos" @@ -1277,6 +1419,13 @@ rule "ETC.3.2: Redact logos (vertebrate study)" $logo.redact("ETC.3.2", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end +rule "ETC.3.3: Redact logos" + when + $logo: Image(imageType == ImageType.LOGO) + then + $logo.redact("ETC.3.3", "Logo Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end + // Rule unit: ETC.4 rule "ETC.4.0: Redact dossier dictionary entries"