diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl index 75dec75e..1396c534 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl @@ -159,17 +159,18 @@ rule "CBI.3.0: Redacted because Section contains a vertebrate" rule "CBI.3.1: Redacted because table row contains a vertebrate" when - $table: Table(hasEntitiesOfType("vertebrate"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) - $cellsWithvertebrate: TableCell() from $table.streamTableCellsWhichContainType("vertebrate").toList() - $tableCell: TableCell(row == $cellsWithvertebrate.row) from $table.streamTableCells().toList() - $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() + $table: Table(hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - $authorOrAddress.applyWithReferences( + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( "CBI.3.1", "Vertebrate found", "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("vertebrate", $authorOrAddress) + $table.getEntitiesOfTypeInSameRow("vertebrate", entity) ); + }); end rule "CBI.3.2: Do not redact because Section does not contain a vertebrate" @@ -190,20 +191,21 @@ rule "CBI.3.3: Do not redact because table row does not contain a vertebrate" end -// Rule unit: CBI.4 -rule "CBI.4.0: Do not redact Names and Addresses if no_redaction_indicator is found in Section" +rule "CBI.4.1: Don't redact authors or addresses which appear in the same row as a vertebrate and a no_redaction_indicator" when - $section: Section(!hasTables(), - hasEntitiesOfType("vertebrate"), - hasEntitiesOfType("no_redaction_indicator"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $table: Table(hasEntitiesOfType("no_redaction_indicator"), + hasEntitiesOfType("vertebrate"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate", "no-redaction_indicator")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) .forEach(entity -> { entity.skipWithReferences( - "CBI.4.0", + "CBI.4.1", "Vertebrate but a no redaction indicator found", - $section.getEntitiesOfType("no_redaction_indicator") + Stream.concat( + $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() ); }); end @@ -249,20 +251,22 @@ rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also red rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in table row" when - $table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("redaction_indicator"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) - TableCell($row: row) from $table.streamTableCellsWhichContainType("redaction_indicator").toList() - TableCell(row == $row) from $table.streamTableCellsWhichContainType("no_redaction_indicator").toList() - $tableCell: TableCell(row == $row) from $table.streamTableCells().toList() - $entity: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() + $table: Table(hasEntitiesOfType("no_redaction_indicator"), + hasEntitiesOfType("redaction_indicator"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - $entity.applyWithReferences( + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no_redaction_indicator")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( "CBI.5.1", "no_redaction_indicator but also redaction_indicator found", "Reg (EC) No 1107/2009 Art. 63 (2g)", Stream.concat( - $table.getEntitiesOfTypeInSameRow("redaction_indicator", $entity).stream(), - $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", $entity).stream()).toList() + $table.getEntitiesOfTypeInSameRow("redaction_indicator", entity).stream(), + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() ); + }); end @@ -352,17 +356,18 @@ rule "CBI.8.0: Redacted because Section contains must_redact entity" rule "CBI.8.1: Redacted because table row contains must_redact entity" when - $table: Table(hasEntitiesOfType("must_redact"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) - $cellsWithMustRedact: TableCell() from $table.streamTableCellsWhichContainType("must_redact").toList() - $tableCell: TableCell(row == $cellsWithMustRedact.row) from $table.streamTableCells().toList() - $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() + $table: Table(hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - $authorOrAddress.applyWithReferences( - "CBI.8.1", - "Must_redact found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("must_redact", $authorOrAddress) - ); + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("must_redact")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( + "CBI.8.1", + "Must_redact found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $table.getEntitiesOfTypeInSameRow("must_redact", entity) + ); + }); end @@ -441,15 +446,13 @@ rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s $table: Table(hasHeader("Author(s)") || hasHeader("Author"), hasHeaderIgnoreCase("Vertebrate Study Y/N")) TableCell(header, containsAnyStringIgnoreCase("Author", "Author(s)"), $authorCol: col) from $table.streamHeaders().toList() TableCell(header, containsStringIgnoreCase("Vertebrate study Y/N"), $vertebrateCol: col) from $table.streamHeaders().toList() - TableCell(!header, containsAnyString("Yes", "Y"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() - $authorCell: TableCell(row == $rowWithYes) from $table.streamCol($authorCol).toList() + $rowCell: TableCell(!header, containsAnyString("Yes", "Y"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() + TableCell(row == $rowWithYes) from $table.streamCol($authorCol).toList() then - entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY) - .ifPresent(authorEntity -> { - authorEntity.redact("CBI.12.0", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(3) of Regulation (EC) No 178/2002"); - dictionary.addMultipleAuthorsAsRecommendation(authorEntity); - }); - + entityCreationService.bySemanticNode($rowCell, "must_redact", EntityType.HINT) + .ifPresent(yesEntity -> { + yesEntity.skip("CBI.12.0", "must_redact"); + }); end rule "CBI.12.1: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes' (vertebrate study)" diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index f75d7e92..106f899c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -102,17 +102,18 @@ rule "CBI.3.0: Redacted because Section contains a vertebrate" rule "CBI.3.1: Redacted because table row contains a vertebrate" when - $table: Table(hasEntitiesOfType("vertebrate"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) - $cellsWithvertebrate: TableCell() from $table.streamTableCellsWhichContainType("vertebrate").toList() - $tableCell: TableCell(row == $cellsWithvertebrate.row) from $table.streamTableCells().toList() - $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() + $table: Table(hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - $authorOrAddress.applyWithReferences( + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( "CBI.3.1", "Vertebrate found", "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("vertebrate", $authorOrAddress) + $table.getEntitiesOfTypeInSameRow("vertebrate", entity) ); + }); end rule "CBI.3.2: Do not redact because Section does not contain a vertebrate" @@ -133,20 +134,21 @@ rule "CBI.3.3: Do not redact because table row does not contain a vertebrate" end -// Rule unit: CBI.4 -rule "CBI.4.0: Do not redact Names and Addresses if no_redaction_indicator is found in Section" +rule "CBI.4.1: Don't redact authors or addresses which appear in the same row as a vertebrate and a no_redaction_indicator" when - $section: Section(!hasTables(), - hasEntitiesOfType("vertebrate"), - hasEntitiesOfType("no_redaction_indicator"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $table: Table(hasEntitiesOfType("no_redaction_indicator"), + hasEntitiesOfType("vertebrate"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate", "no-redaction_indicator")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) .forEach(entity -> { entity.skipWithReferences( - "CBI.4.0", + "CBI.4.1", "Vertebrate but a no redaction indicator found", - $section.getEntitiesOfType("no_redaction_indicator") + Stream.concat( + $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() ); }); end @@ -192,20 +194,22 @@ rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also red rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in table row" when - $table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("redaction_indicator"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) - TableCell($row: row) from $table.streamTableCellsWhichContainType("redaction_indicator").toList() - TableCell(row == $row) from $table.streamTableCellsWhichContainType("no_redaction_indicator").toList() - $tableCell: TableCell(row == $row) from $table.streamTableCells().toList() - $entity: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() + $table: Table(hasEntitiesOfType("no_redaction_indicator"), + hasEntitiesOfType("redaction_indicator"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - $entity.applyWithReferences( + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no_redaction_indicator")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( "CBI.5.1", "no_redaction_indicator but also redaction_indicator found", "Reg (EC) No 1107/2009 Art. 63 (2g)", Stream.concat( - $table.getEntitiesOfTypeInSameRow("redaction_indicator", $entity).stream(), - $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", $entity).stream()).toList() + $table.getEntitiesOfTypeInSameRow("redaction_indicator", entity).stream(), + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() ); + }); end @@ -227,17 +231,18 @@ rule "CBI.8.0: Redacted because Section contains must_redact entity" rule "CBI.8.1: Redacted because table row contains must_redact entity" when - $table: Table(hasEntitiesOfType("must_redact"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) - $cellsWithMustRedact: TableCell() from $table.streamTableCellsWhichContainType("must_redact").toList() - $tableCell: TableCell(row == $cellsWithMustRedact.row) from $table.streamTableCells().toList() - $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() + $table: Table(hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - $authorOrAddress.applyWithReferences( - "CBI.8.1", - "Must_redact found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("must_redact", $authorOrAddress) - ); + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("must_redact")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( + "CBI.8.1", + "Must_redact found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $table.getEntitiesOfTypeInSameRow("must_redact", entity) + ); + }); end @@ -281,41 +286,19 @@ rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study // Rule unit: CBI.12 -rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes' (non vertebrate study)" +rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes'" agenda-group "LOCAL_DICTIONARY_ADDS" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $table: Table(hasHeader("Author(s)") || hasHeader("Author"), hasHeaderIgnoreCase("Vertebrate Study Y/N")) TableCell(header, containsAnyStringIgnoreCase("Author", "Author(s)"), $authorCol: col) from $table.streamHeaders().toList() TableCell(header, containsStringIgnoreCase("Vertebrate study Y/N"), $vertebrateCol: col) from $table.streamHeaders().toList() - TableCell(!header, containsAnyString("Yes", "Y"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() - $authorCell: TableCell(row == $rowWithYes) from $table.streamCol($authorCol).toList() + $rowCell: TableCell(!header, containsAnyString("Yes", "Y"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() + TableCell(row == $rowWithYes) from $table.streamCol($authorCol).toList() then - entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY) - .ifPresent(authorEntity -> { - authorEntity.redact("CBI.12.0", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(3) of Regulation (EC) No 178/2002"); - dictionary.addMultipleAuthorsAsRecommendation(authorEntity); - }); - - end - -rule "CBI.12.1: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes' (vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $table: Table(hasHeader("Author(s)") || hasHeader("Author"), hasHeaderIgnoreCase("Vertebrate Study Y/N")) - TableCell(header, containsAnyStringIgnoreCase("Author", "Author(s)"), $authorCol: col) from $table.streamHeaders().toList() - TableCell(header, containsStringIgnoreCase("Vertebrate study Y/N"), $vertebrateCol: col) from $table.streamHeaders().toList() - TableCell(!header, containsAnyString("Yes", "Y"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() - $authorCell: TableCell(row == $rowWithYes) from $table.streamCol($authorCol).toList() - then - - entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY) - .ifPresent(authorEntity -> { - authorEntity.redact("CBI.12.1", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - dictionary.addMultipleAuthorsAsRecommendation(authorEntity); - }); - + entityCreationService.bySemanticNode($rowCell, "must_redact", EntityType.HINT) + .ifPresent(yesEntity -> { + yesEntity.skip("CBI.12.0", "must_redact"); + }); end rule "CBI.12.2: Skip TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'No'" @@ -388,10 +371,9 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio // Rule unit: CBI.16 -rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" +rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx" agenda-group "LOCAL_DICTIONARY_ADDS" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("et al.")) then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) @@ -401,20 +383,6 @@ rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" }); end -rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section(containsString("et al.")) - then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - dictionary.recommendEverywhere(entity); - }); - end - - // Rule unit: CBI.17 rule "CBI.17.0: Add recommendation for Addresses in Test Organism sections, without colon" when @@ -467,10 +435,9 @@ rule "CBI.19.0: Expand CBI_author entities with salutation prefix" // Rule unit: CBI.20 -rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (non vertebrate study)" +rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\"" agenda-group "LOCAL_DICTIONARY_ADDS" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) then entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) @@ -480,64 +447,30 @@ rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC }); end -rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) - then - entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) - .forEach(laboratoryEntity -> { - laboratoryEntity.redact("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - dictionary.recommendEverywhere(laboratoryEntity); - }); - end - //------------------------------------ PII rules ------------------------------------ // Rule unit: PII.0 -rule "PII.0.0: Redact all PII (non vertebrate study)" +rule "PII.0.0: Redact all PII" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $pii: TextEntity(type() == "PII", dictionaryEntry) then $pii.redact("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end -rule "PII.0.1: Redact all PII (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $pii: TextEntity(type() == "PII", dictionaryEntry) - then - $pii.redact("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - end - - // Rule unit: PII.1 -rule "PII.1.0: Redact Emails by RegEx (Non vertebrate study)" +rule "PII.1.0: Redact Emails by RegEx" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("@")) then entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) .forEach(emailEntity -> emailEntity.redact("PII.1.0", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.1.1: Redact Emails by RegEx (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section(containsString("@")) - then - entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) - .forEach(emailEntity -> emailEntity.redact("PII.1.1", "Found by Email Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); - end - // Rule unit: PII.4 -rule "PII.4.0: Redact line after contact information keywords (non vertebrate study)" +rule "PII.4.0: Redact line after contact information keywords" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $contactKeyword: String() from List.of("Contact point:", "Contact:", "Alternative contact:", @@ -563,39 +496,10 @@ rule "PII.4.0: Redact line after contact information keywords (non vertebrate st .forEach(contactEntity -> contactEntity.redact("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end -rule "PII.4.1: Redact line after contact information keywords (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $contactKeyword: String() from List.of("Contact point:", - "Contact:", - "Alternative contact:", - "European contact:", - "No:", - "Contact:", - "Tel.:", - "Tel:", - "Telephone number:", - "Telephone No:", - "Telephone:", - "Phone No.", - "Phone:", - "Fax number:", - "Fax:", - "E-mail:", - "Email:", - "e-mail:", - "E-mail address:") - $section: Section(containsString($contactKeyword)) - then - entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); - end - // Rule unit: PII.6 rule "PII.6.0: Redact line between contact keywords (non vertebrate study)" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) then Stream.concat( @@ -605,23 +509,10 @@ rule "PII.6.0: Redact line between contact keywords (non vertebrate study)" .forEach(contactEntity -> contactEntity.redact("PII.6.0", "Found between contact keywords", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.6.1: Redact line between contact keywords (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) - then - Stream.concat( - entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), - entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) - ) - .forEach(contactEntity -> contactEntity.redact("PII.6.1", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); - end - // Rule unit: PII.7 rule "PII.7.0: Redact contact information if applicant is found (non vertebrate study)" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(getHeadline().containsString("applicant") || getHeadline().containsString("Primary contact") || getHeadline().containsString("Alternative contact") || @@ -637,29 +528,10 @@ rule "PII.7.0: Redact contact information if applicant is found (non vertebrate .forEach(entity -> entity.redact("PII.7.0", "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.7.1: Redact contact information if applicant is found (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section(getHeadline().containsString("applicant") || - getHeadline().containsString("Primary contact") || - getHeadline().containsString("Alternative contact") || - containsString("Applicant") || - containsString("Telephone number:")) - then - Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", - "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), - Stream.concat( - entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), - entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) - )) - .forEach(entity -> entity.redact("PII.7.1", "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); - end - // Rule unit: PII.8 -rule "PII.8.0: Redact contact information if producer is found (non vertebrate study)" +rule "PII.8.0: Redact contact information if producer is found" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsStringIgnoreCase("producer of the plant protection") || containsStringIgnoreCase("producer of the active substance") || containsStringIgnoreCase("manufacturer of the active substance") || @@ -675,44 +547,16 @@ rule "PII.8.0: Redact contact information if producer is found (non vertebrate s .forEach(entity -> entity.redact("PII.8.0", "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end -rule "PII.8.1: Redact contact information if producer is found (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section(containsStringIgnoreCase("producer of the plant protection") || - containsStringIgnoreCase("producer of the active substance") || - containsStringIgnoreCase("manufacturer of the active substance") || - containsStringIgnoreCase("manufacturer:") || - containsStringIgnoreCase("Producer or producers of the active substance")) - then - Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", - "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), - Stream.concat( - entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), - entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) - )) - .forEach(entity -> entity.redact("PII.8.1", "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); - end - // Rule unit: PII.9 -rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (non vertebrate study)" +rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) .forEach(authorEntity -> authorEntity.redact("PII.9.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) - then - entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) - .forEach(authorEntity -> authorEntity.redact("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); - end - // Rule unit: PII.11 rule "PII.11.0: Redact On behalf of Sequani Ltd.:" @@ -727,22 +571,12 @@ rule "PII.11.0: Redact On behalf of Sequani Ltd.:" // Rule unit: PII.12 rule "PII.12.0: Expand PII entities with salutation prefix" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $entityToExpand: TextEntity(type() == "PII", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")) then entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*") .ifPresent(expandedEntity -> expandedEntity.apply("PII.12.0", "Expanded PII with salutation prefix", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.12.1: Expand PII entities with salutation prefix" - when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $entityToExpand: TextEntity(type() == "PII", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")) - then - entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*") - .ifPresent(expandedEntity -> expandedEntity.apply("PII.12.1", "Expanded PII with salutation prefix", "Article 39(e)(2) of Regulation (EC) No 178/2002")); - end - //------------------------------------ Other rules ------------------------------------ @@ -759,20 +593,11 @@ rule "ETC.1.0: Redact Purity" // Rule unit: ETC.2 rule "ETC.2.0: Redact signatures (non vertebrate study)" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $signature: Image(imageType == ImageType.SIGNATURE) then $signature.redact("ETC.2.0", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end -rule "ETC.2.1: Redact signatures (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $signature: Image(imageType == ImageType.SIGNATURE) - then - $signature.redact("ETC.2.1", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - end - // Rule unit: ETC.3 rule "ETC.3.0: Skip logos (non vertebrate study)" @@ -850,20 +675,11 @@ rule "ETC.7.0: Guidelines FileAttributes" // Rule unit: ETC.8 rule "ETC.8.0: Redact formulas (vertebrate study)" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $logo: Image(imageType == ImageType.FORMULA) then $logo.redact("ETC.8.0", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end -rule "ETC.8.1: Redact formulas (non vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $logo: Image(imageType == ImageType.FORMULA) - then - $logo.redact("ETC.8.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - end - //------------------------------------ AI rules ------------------------------------ diff --git a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl index 9d43763e..527faf09 100644 --- a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl +++ b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl @@ -159,17 +159,18 @@ rule "CBI.3.0: Redacted because Section contains a vertebrate" rule "CBI.3.1: Redacted because table row contains a vertebrate" when - $table: Table(hasEntitiesOfType("vertebrate"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) - $cellsWithvertebrate: TableCell() from $table.streamTableCellsWhichContainType("vertebrate").toList() - $tableCell: TableCell(row == $cellsWithvertebrate.row) from $table.streamTableCells().toList() - $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() + $table: Table(hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - $authorOrAddress.applyWithReferences( + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( "CBI.3.1", "Vertebrate found", "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("vertebrate", $authorOrAddress) + $table.getEntitiesOfTypeInSameRow("vertebrate", entity) ); + }); end @@ -191,20 +192,21 @@ rule "CBI.3.3: Do not redact because table row does not contain a vertebrate" end -// Rule unit: CBI.4 -rule "CBI.4.0: Do not redact Names and Addresses if no_redaction_indicator is found in Section" +rule "CBI.4.1: Don't redact authors or addresses which appear in the same row as a vertebrate and a no_redaction_indicator" when - $section: Section(!hasTables(), - hasEntitiesOfType("vertebrate"), - hasEntitiesOfType("no_redaction_indicator"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $table: Table(hasEntitiesOfType("no_redaction_indicator"), + hasEntitiesOfType("vertebrate"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate", "no-redaction_indicator")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) .forEach(entity -> { entity.skipWithReferences( - "CBI.4.0", + "CBI.4.1", "Vertebrate but a no redaction indicator found", - $section.getEntitiesOfType("no_redaction_indicator") + Stream.concat( + $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() ); }); end @@ -250,20 +252,22 @@ rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also red rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in table row" when - $table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("redaction_indicator"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) - TableCell($row: row) from $table.streamTableCellsWhichContainType("redaction_indicator").toList() - TableCell(row == $row) from $table.streamTableCellsWhichContainType("no_redaction_indicator").toList() - $tableCell: TableCell(row == $row) from $table.streamTableCells().toList() - $entity: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() + $table: Table(hasEntitiesOfType("no_redaction_indicator"), + hasEntitiesOfType("redaction_indicator"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - $entity.applyWithReferences( + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no_redaction_indicator")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( "CBI.5.1", "no_redaction_indicator but also redaction_indicator found", "Reg (EC) No 1107/2009 Art. 63 (2g)", Stream.concat( - $table.getEntitiesOfTypeInSameRow("redaction_indicator", $entity).stream(), - $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", $entity).stream()).toList() + $table.getEntitiesOfTypeInSameRow("redaction_indicator", entity).stream(), + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() ); + }); end @@ -353,17 +357,18 @@ rule "CBI.8.0: Redacted because Section contains must_redact entity" rule "CBI.8.1: Redacted because table row contains must_redact entity" when - $table: Table(hasEntitiesOfType("must_redact"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) - $cellsWithMustRedact: TableCell() from $table.streamTableCellsWhichContainType("must_redact").toList() - $tableCell: TableCell(row == $cellsWithMustRedact.row) from $table.streamTableCells().toList() - $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() + $table: Table(hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - $authorOrAddress.applyWithReferences( - "CBI.8.1", - "Must_redact found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("must_redact", $authorOrAddress) - ); + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("must_redact")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( + "CBI.8.1", + "Must_redact found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $table.getEntitiesOfTypeInSameRow("must_redact", entity) + ); + }); end @@ -442,15 +447,13 @@ rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s $table: Table(hasHeader("Author(s)") || hasHeader("Author"), hasHeaderIgnoreCase("Vertebrate Study Y/N")) TableCell(header, containsAnyStringIgnoreCase("Author", "Author(s)"), $authorCol: col) from $table.streamHeaders().toList() TableCell(header, containsStringIgnoreCase("Vertebrate study Y/N"), $vertebrateCol: col) from $table.streamHeaders().toList() - TableCell(!header, containsAnyString("Yes", "Y"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() - $authorCell: TableCell(row == $rowWithYes) from $table.streamCol($authorCol).toList() + $rowCell: TableCell(!header, containsAnyString("Yes", "Y"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() + TableCell(row == $rowWithYes) from $table.streamCol($authorCol).toList() then - entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY) - .ifPresent(authorEntity -> { - authorEntity.redact("CBI.12.0", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(3) of Regulation (EC) No 178/2002"); - dictionary.addMultipleAuthorsAsRecommendation(authorEntity); - }); - + entityCreationService.bySemanticNode($rowCell, "must_redact", EntityType.HINT) + .ifPresent(yesEntity -> { + yesEntity.skip("CBI.12.0", "must_redact"); + }); end rule "CBI.12.1: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes' (vertebrate study)"