From 586f759f8a2ee6051bebbfa978754b7246b6ce85 Mon Sep 17 00:00:00 2001 From: maverickstuder Date: Tue, 10 Sep 2024 13:19:06 +0200 Subject: [PATCH 1/4] RED-9859: Redactions found by et. al. rule not skipped with published information - switch CBI.7.* with and CBI.16.* --- .../resources/drools/acceptance_rules.drl | 148 +++++++++--------- .../drools/all_redact_manager_rules.drl | 148 +++++++++--------- .../resources/drools/efsa_sanitisation.drl | 68 ++++---- .../src/test/resources/drools/rules.drl | 107 +++++++------ .../migration/RuleFileMigrator.java | 41 ++++- .../management/models/RuleFileBluePrint.java | 10 +- .../resources/all_redact_manager_rules.drl | 147 +++++++++-------- .../management/RuleFileMigrationTest.java | 10 +- 8 files changed, 357 insertions(+), 322 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl index 68bb9fbb..051948f9 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl @@ -131,56 +131,54 @@ rule "CBI.2.0: Do not redact genitive CBI Author" // Rule unit: CBI.7 -rule "CBI.7.0: Do not redact Names and Addresses if published information found in Section without tables" +rule "CBI.7.0: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" when - $section: Section(!hasTables(), - hasEntitiesOfType("published_information"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $section: Section(containsString("et al.")) then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(redactionEntity -> { - redactionEntity.skipWithReferences( - "CBI.7.0", - "Published Information found in section", - $section.getEntitiesOfType("published_information") - ); + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.7.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + dictionary.recommendEverywhere(entity); }); end -rule "CBI.7.1: Do not redact Names and Addresses if published information found in same table row" +rule "CBI.7.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" when - $table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) - $cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList() - $tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList() - $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsString("et al.")) then - $authorOrAddress.skipWithReferences("CBI.7.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress)); - end - -rule "CBI.7.2: Do not redact PII if published information found in Section without tables" - when - $section: Section(!hasTables(), - hasEntitiesOfType("published_information"), - hasEntitiesOfType("PII")) - then - $section.getEntitiesOfType("PII") - .forEach(redactionEntity -> { - redactionEntity.skipWithReferences( - "CBI.7.2", - "Published Information found in section", - $section.getEntitiesOfType("published_information") - ); + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.7.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + dictionary.recommendEverywhere(entity); }); end -rule "CBI.7.3: Do not redact PII if published information found in same table row" +rule "CBI.7.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" when - $table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII")) - $cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList() - $tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList() - $pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities() + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsString("et al.")) then - $pii.skipWithReferences("CBI.7.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii)); + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.7.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.7.3: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.7.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + dictionary.recommendEverywhere(entity); + }); end @@ -291,54 +289,56 @@ rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study // Rule unit: CBI.16 -rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx" - agenda-group "LOCAL_DICTIONARY_ADDS" +rule "CBI.16.0: Do not redact Names and Addresses if published information found in Section without tables" when - $section: Section(containsString("et al.")) + $section: Section(!hasTables(), + hasEntitiesOfType("published_information"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)"); - dictionary.recommendEverywhere(entity); + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(redactionEntity -> { + redactionEntity.skipWithReferences( + "CBI.16.0", + "Published Information found in section", + $section.getEntitiesOfType("published_information") + ); }); end -rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" +rule "CBI.16.1: Do not redact Names and Addresses if published information found in same table row" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section(containsString("et al.")) + $table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + $cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList() + $tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList() + $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); - dictionary.recommendEverywhere(entity); + $authorOrAddress.skipWithReferences("CBI.16.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress)); + end + +rule "CBI.16.2: Do not redact PII if published information found in Section without tables" + when + $section: Section(!hasTables(), + hasEntitiesOfType("published_information"), + hasEntitiesOfType("PII")) + then + $section.getEntitiesOfType("PII") + .forEach(redactionEntity -> { + redactionEntity.skipWithReferences( + "CBI.16.2", + "Published Information found in section", + $section.getEntitiesOfType("published_information") + ); }); end -rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" +rule "CBI.16.3: Do not redact PII if published information found in same table row" when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section(containsString("et al.")) + $table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII")) + $cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList() + $tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList() + $pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities() then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - dictionary.recommendEverywhere(entity); - }); - end - -rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - $section: Section(containsString("et al.")) - then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); - dictionary.recommendEverywhere(entity); - }); + $pii.skipWithReferences("CBI.16.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii)); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl index b26c04e7..d97cd465 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl @@ -314,56 +314,54 @@ rule "CBI.6.1: Do not redact Names and Addresses if vertebrate but also publishe // Rule unit: CBI.7 -rule "CBI.7.0: Do not redact Names and Addresses if published information found in Section without tables" +rule "CBI.7.0: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" when - $section: Section(!hasTables(), - hasEntitiesOfType("published_information"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $section: Section(containsString("et al.")) then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(redactionEntity -> { - redactionEntity.skipWithReferences( - "CBI.7.0", - "Published Information found in section", - $section.getEntitiesOfType("published_information") - ); + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.7.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + dictionary.recommendEverywhere(entity); }); end -rule "CBI.7.1: Do not redact Names and Addresses if published information found in same table row" +rule "CBI.7.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" when - $table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) - $cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList() - $tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList() - $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsString("et al.")) then - $authorOrAddress.skipWithReferences("CBI.7.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress)); - end - -rule "CBI.7.2: Do not redact PII if published information found in Section without tables" - when - $section: Section(!hasTables(), - hasEntitiesOfType("published_information"), - hasEntitiesOfType("PII")) - then - $section.getEntitiesOfType("PII") - .forEach(redactionEntity -> { - redactionEntity.skipWithReferences( - "CBI.7.2", - "Published Information found in section", - $section.getEntitiesOfType("published_information") - ); + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.7.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + dictionary.recommendEverywhere(entity); }); end -rule "CBI.7.3: Do not redact PII if published information found in same table row" +rule "CBI.7.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" when - $table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII")) - $cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList() - $tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList() - $pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities() + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsString("et al.")) then - $pii.skipWithReferences("CBI.7.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii)); + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.7.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.7.3: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.7.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + dictionary.recommendEverywhere(entity); + }); end @@ -656,54 +654,56 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio // Rule unit: CBI.16 -rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx" - agenda-group "LOCAL_DICTIONARY_ADDS" +rule "CBI.16.0: Do not redact Names and Addresses if published information found in Section without tables" when - $section: Section(containsString("et al.")) + $section: Section(!hasTables(), + hasEntitiesOfType("published_information"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)"); - dictionary.recommendEverywhere(entity); + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(redactionEntity -> { + redactionEntity.skipWithReferences( + "CBI.16.0", + "Published Information found in section", + $section.getEntitiesOfType("published_information") + ); }); end -rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" +rule "CBI.16.1: Do not redact Names and Addresses if published information found in same table row" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section(containsString("et al.")) + $table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + $cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList() + $tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList() + $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); - dictionary.recommendEverywhere(entity); + $authorOrAddress.skipWithReferences("CBI.16.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress)); + end + +rule "CBI.16.2: Do not redact PII if published information found in Section without tables" + when + $section: Section(!hasTables(), + hasEntitiesOfType("published_information"), + hasEntitiesOfType("PII")) + then + $section.getEntitiesOfType("PII") + .forEach(redactionEntity -> { + redactionEntity.skipWithReferences( + "CBI.16.2", + "Published Information found in section", + $section.getEntitiesOfType("published_information") + ); }); end -rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" +rule "CBI.16.3: Do not redact PII if published information found in same table row" when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section(containsString("et al.")) + $table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII")) + $cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList() + $tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList() + $pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities() then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - dictionary.recommendEverywhere(entity); - }); - end - -rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - $section: Section(containsString("et al.")) - then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); - dictionary.recommendEverywhere(entity); - }); + $pii.skipWithReferences("CBI.16.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii)); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/efsa_sanitisation.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/efsa_sanitisation.drl index 22f3dbff..cb5bb451 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/efsa_sanitisation.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/efsa_sanitisation.drl @@ -131,30 +131,30 @@ rule "CBI.2.0: Do not redact genitive CBI Author" // Rule unit: CBI.7 -rule "CBI.7.0: Do not redact Names and Addresses if published information found in Section without tables" +rule "CBI.7.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" when - $section: Section(!hasTables(), - hasEntitiesOfType("published_information"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsString("et al.")) then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(redactionEntity -> { - redactionEntity.skipWithReferences( - "CBI.7.0", - "Published Information found in section", - $section.getEntitiesOfType("published_information") - ); + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.7.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + dictionary.recommendEverywhere(entity); }); end -rule "CBI.7.1: Do not redact Names and Addresses if published information found in same table row" +rule "CBI.7.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" when - $table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) - $cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList() - $tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList() - $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsString("et al.")) then - $authorOrAddress.skipWithReferences("CBI.7.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress)); + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.7.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + dictionary.recommendEverywhere(entity); + }); end @@ -226,30 +226,30 @@ rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study // Rule unit: CBI.16 -rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" +rule "CBI.16.0: Do not redact Names and Addresses if published information found in Section without tables" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section(containsString("et al.")) + $section: Section(!hasTables(), + hasEntitiesOfType("published_information"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); - dictionary.recommendEverywhere(entity); + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(redactionEntity -> { + redactionEntity.skipWithReferences( + "CBI.16.0", + "Published Information found in section", + $section.getEntitiesOfType("published_information") + ); }); end -rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" +rule "CBI.16.1: Do not redact Names and Addresses if published information found in same table row" when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section(containsString("et al.")) + $table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + $cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList() + $tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList() + $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - dictionary.recommendEverywhere(entity); - }); + $authorOrAddress.skipWithReferences("CBI.16.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress)); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index 182faee1..0942d7a3 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -216,6 +216,58 @@ rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also red end +// Rule unit: CBI.7 +rule "CBI.7.0: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.7.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.7.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.7.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.7.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.7.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.7.3: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.7.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + dictionary.recommendEverywhere(entity); + }); + end + + // Rule unit: CBI.8 rule "CBI.8.0: Redacted because Section contains must_redact entity" when @@ -426,58 +478,6 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio end -// Rule unit: CBI.16 -rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - $section: Section(containsString("et al.")) - then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)"); - dictionary.recommendEverywhere(entity); - }); - end - -rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section(containsString("et al.")) - then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); - dictionary.recommendEverywhere(entity); - }); - end - -rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section(containsString("et al.")) - then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - dictionary.recommendEverywhere(entity); - }); - end - -rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - $section: Section(containsString("et al.")) - then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); - dictionary.recommendEverywhere(entity); - }); - end - - // Rule unit: CBI.17 rule "CBI.17.0: Add recommendation for Addresses in Test Organism sections, without colon" when @@ -1223,7 +1223,6 @@ rule "AI.5.0: Combine and add NER Entities as CBI_address" end - // Rule unit: AI.6 rule "AI.6.0: Add all NER Entities of type Location" salience 999 @@ -1238,7 +1237,6 @@ rule "AI.6.0: Add all NER Entities of type Location" end - // Rule unit: AI.7 rule "AI.7.0: Add all NER Entities of type Address" salience 999 @@ -1252,6 +1250,7 @@ rule "AI.7.0: Add all NER Entities of type Address" .ifPresent(e -> e.skip("AI.7.0", ""))); end + //------------------------------------ Manual changes rules ------------------------------------ // Rule unit: MAN.0 diff --git a/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/migration/RuleFileMigrator.java b/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/migration/RuleFileMigrator.java index e8bb5be2..94975b8e 100644 --- a/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/migration/RuleFileMigrator.java +++ b/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/migration/RuleFileMigrator.java @@ -4,11 +4,13 @@ import java.io.File; import java.io.FileOutputStream; import java.nio.charset.StandardCharsets; import java.util.List; +import java.util.Map; import com.knecon.fforesight.utility.rules.management.factory.RuleFileFactory; import com.knecon.fforesight.utility.rules.management.factory.RuleFileParser; import com.knecon.fforesight.utility.rules.management.models.BasicRule; import com.knecon.fforesight.utility.rules.management.models.RuleFileBluePrint; +import com.knecon.fforesight.utility.rules.management.models.RuleIdentifier; import com.knecon.fforesight.utility.rules.management.utils.RuleFileIO; import lombok.SneakyThrows; @@ -21,17 +23,15 @@ import lombok.experimental.UtilityClass; @UtilityClass public class RuleFileMigrator { + @SneakyThrows public void migrateFile(File ruleFile) { RuleFileBluePrint ruleFileBluePrint = RuleFileParser.buildBluePrintFromRulesString(RuleFileIO.getRulesString(ruleFile.getAbsolutePath())); RuleFileBluePrint combinedBluePrint = RuleFileParser.buildBluePrintFromAllRuleFiles(); - for (BasicRule ruleToReplace : ruleFileBluePrint.getAllRules()) { - List rulesToAdd = combinedBluePrint.findRuleByIdentifier(ruleToReplace.identifier()); - ruleFileBluePrint.removeRule(ruleToReplace.identifier()); - rulesToAdd.forEach(ruleFileBluePrint::addRule); - } + //replaceRules(ruleFileBluePrint, combinedBluePrint); + replaceRuleIdentifiers(combinedBluePrint, ruleFileBluePrint); String migratedRulesString = RuleFileFactory.buildRuleString(ruleFileBluePrint); String migratedFilePath = ruleFile.getAbsolutePath(); @@ -40,4 +40,35 @@ public class RuleFileMigrator { } } + + private static void replaceRules(RuleFileBluePrint ruleFileBluePrint, RuleFileBluePrint combinedBluePrint) { + + for (BasicRule ruleToReplace : ruleFileBluePrint.getAllRules()) { + List rulesToAdd = combinedBluePrint.findRuleByIdentifier(ruleToReplace.identifier()); + ruleFileBluePrint.removeRule(ruleToReplace.identifier()); + rulesToAdd.forEach(ruleFileBluePrint::addRule); + } + } + + + private static void replaceRuleIdentifiers(RuleFileBluePrint combinedBluePrint, RuleFileBluePrint ruleFileBluePrint) { + + Map identifierReplaceMap = Map.of("CBI.7.0", "CBI.16.0", "CBI.7.1", "CBI.16.1", "CBI.7.2", "CBI.16.2", "CBI.7.3", "CBI.16.3"); + for (String identifier : identifierReplaceMap.keySet()) { + RuleIdentifier ruleId = RuleIdentifier.fromString(identifier); + RuleIdentifier otherRuleId = RuleIdentifier.fromString(identifierReplaceMap.get(identifier)); + + List rulesToAdd = combinedBluePrint.findRuleByIdentifier(otherRuleId); + List otherRulesToAdd = combinedBluePrint.findRuleByIdentifier(ruleId); + boolean removeRules = ruleFileBluePrint.removeRule(ruleId); + boolean removeOtherRules = ruleFileBluePrint.removeRule(otherRuleId); + if (removeRules) { + rulesToAdd.forEach(ruleFileBluePrint::addRule); + } + if (removeOtherRules) { + otherRulesToAdd.forEach(ruleFileBluePrint::addRule); + } + } + } + } diff --git a/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/models/RuleFileBluePrint.java b/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/models/RuleFileBluePrint.java index 5b1abd6d..2b2ae91f 100644 --- a/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/models/RuleFileBluePrint.java +++ b/redaction-service-v1/rules-management/src/main/java/com/knecon/fforesight/utility/rules/management/models/RuleFileBluePrint.java @@ -7,15 +7,19 @@ import java.util.List; import java.util.Objects; import java.util.Optional; import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.stream.Collectors; public record RuleFileBluePrint(String imports, String globals, String queries, List ruleClasses) { - public void removeRule(RuleIdentifier ruleIdentifier) { + public boolean removeRule(RuleIdentifier ruleIdentifier) { + + AtomicBoolean wasRemoved = new AtomicBoolean(false); findRuleClassByType(ruleIdentifier.type()).ifPresent(ruleClass -> ruleClass.findRuleUnitByInteger(ruleIdentifier.unit()) .ifPresent(ruleUnit -> { - ruleUnit.rules().removeIf(rule -> rule.identifier().matches(ruleIdentifier)); + boolean removed = ruleUnit.rules().removeIf(rule -> rule.identifier().matches(ruleIdentifier)); + wasRemoved.set(removed); if (ruleUnit.rules().isEmpty()) { ruleClass.ruleUnits().remove(ruleUnit); } @@ -23,7 +27,7 @@ public record RuleFileBluePrint(String imports, String globals, String queries, ruleClasses().remove(ruleClass); } })); - + return wasRemoved.get(); } diff --git a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl index 8370921d..054174a8 100644 --- a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl +++ b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl @@ -315,58 +315,55 @@ rule "CBI.6.1: Do not redact Names and Addresses if vertebrate but also publishe // Rule unit: CBI.7 -rule "CBI.7.0: Do not redact Names and Addresses if published information found in Section without tables" +rule "CBI.7.0: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" when - $section: Section(!hasTables(), - hasEntitiesOfType("published_information"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $section: Section(containsString("et al.")) then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(redactionEntity -> { - redactionEntity.skipWithReferences( - "CBI.7.0", - "Published Information found in section", - $section.getEntitiesOfType("published_information") - ); + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.7.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + dictionary.recommendEverywhere(entity); }); end -rule "CBI.7.1: Do not redact Names and Addresses if published information found in same table row" +rule "CBI.7.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" when - $table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) - $cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList() - $tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList() - $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsString("et al.")) then - $authorOrAddress.skipWithReferences("CBI.7.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress)); - end - -rule "CBI.7.2: Do not redact PII if published information found in Section without tables" - when - $section: Section(!hasTables(), - hasEntitiesOfType("published_information"), - hasEntitiesOfType("PII")) - then - $section.getEntitiesOfType("PII") - .forEach(redactionEntity -> { - redactionEntity.skipWithReferences( - "CBI.7.2", - "Published Information found in section", - $section.getEntitiesOfType("published_information") - ); + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.7.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + dictionary.recommendEverywhere(entity); }); end -rule "CBI.7.3: Do not redact PII if published information found in same table row" +rule "CBI.7.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" when - $table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII")) - $cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList() - $tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList() - $pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities() + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsString("et al.")) then - $pii.skipWithReferences("CBI.7.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii)); + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.7.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + dictionary.recommendEverywhere(entity); + }); end +rule "CBI.7.3: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.7.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + dictionary.recommendEverywhere(entity); + }); + end // Rule unit: CBI.8 rule "CBI.8.0: Redacted because Section contains must_redact entity" @@ -656,54 +653,56 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio // Rule unit: CBI.16 -rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx" - agenda-group "LOCAL_DICTIONARY_ADDS" +rule "CBI.16.0: Do not redact Names and Addresses if published information found in Section without tables" when - $section: Section(containsString("et al.")) + $section: Section(!hasTables(), + hasEntitiesOfType("published_information"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)"); - dictionary.recommendEverywhere(entity); + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(redactionEntity -> { + redactionEntity.skipWithReferences( + "CBI.16.0", + "Published Information found in section", + $section.getEntitiesOfType("published_information") + ); }); end -rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" +rule "CBI.16.1: Do not redact Names and Addresses if published information found in same table row" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section(containsString("et al.")) + $table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + $cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList() + $tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList() + $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); - dictionary.recommendEverywhere(entity); + $authorOrAddress.skipWithReferences("CBI.16.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress)); + end + +rule "CBI.16.2: Do not redact PII if published information found in Section without tables" + when + $section: Section(!hasTables(), + hasEntitiesOfType("published_information"), + hasEntitiesOfType("PII")) + then + $section.getEntitiesOfType("PII") + .forEach(redactionEntity -> { + redactionEntity.skipWithReferences( + "CBI.16.2", + "Published Information found in section", + $section.getEntitiesOfType("published_information") + ); }); end -rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" +rule "CBI.16.3: Do not redact PII if published information found in same table row" when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section(containsString("et al.")) + $table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII")) + $cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList() + $tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList() + $pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities() then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - dictionary.recommendEverywhere(entity); - }); - end - -rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - $section: Section(containsString("et al.")) - then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); - dictionary.recommendEverywhere(entity); - }); + $pii.skipWithReferences("CBI.16.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii)); end diff --git a/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/RuleFileMigrationTest.java b/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/RuleFileMigrationTest.java index f25a0075..bea04152 100644 --- a/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/RuleFileMigrationTest.java +++ b/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/RuleFileMigrationTest.java @@ -25,10 +25,12 @@ public class RuleFileMigrationTest { // Put your redaction service drools paths and dossier-templates paths both RM and DM here static final List ruleFileDirs = List.of( - "/home/kschuettler/iqser/redaction/redaction-service/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools", - "/home/kschuettler/iqser/redaction/dossier-templates-v2", - "/home/kschuettler/iqser/fforesight/dossier-templates-v2", - "/home/kschuettler/iqser/business-logic"); + //"/Users/maverickstuder/Documents/RedactManager/redaction-service/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools", + // "/Users/maverickstuder/Documents/RedactManager/dossier-templates-v2" + "/Users/maverickstuder/Documents/PM" + + ); + @Test -- 2.47.2 From 4ebbf3ae28b403e5a61ecce617cd643e1eee859a Mon Sep 17 00:00:00 2001 From: maverickstuder Date: Tue, 10 Sep 2024 13:55:54 +0200 Subject: [PATCH 2/4] RED-9859: Redactions found by et. al. rule not skipped with published information - modify unit test --- .../service/redaction/v1/server/RedactionAcceptanceTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java index c7dad2bc..9e69bd34 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java @@ -338,7 +338,7 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest { var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); var desireeEtAl = findEntityByTypeAndValue(entityLog, "CBI_author", "Desiree").filter(e -> e.getEntryType().equals(EntryType.ENTITY)) - .filter(e -> e.getMatchedRule().startsWith("CBI.16")) + .filter(e -> e.getMatchedRule().startsWith("CBI.7")) .findAny() .orElseThrow(); IdRemoval removal = buildIdRemoval(desireeEtAl.getId()); -- 2.47.2 From 06ace50a683b641cf477d198a90894088707334a Mon Sep 17 00:00:00 2001 From: maverickstuder Date: Thu, 12 Sep 2024 12:09:59 +0200 Subject: [PATCH 3/4] RED-9859: update intersecting nodes on kie session insertion --- .../v1/server/service/document/EntityCreationService.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java index a6c592b5..5550ed0c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java @@ -1162,6 +1162,10 @@ public class EntityCreationService { if (kieSession != null) { kieSession.insert(textEntity); + textEntity.getIntersectingNodes() + .stream() + .filter(nodesInKieSession::contains) + .forEach(o -> kieSession.update(kieSession.getFactHandle(o), o)); } } -- 2.47.2 From a88c731f98010856d9e7320433616058412a7337 Mon Sep 17 00:00:00 2001 From: maverickstuder Date: Thu, 12 Sep 2024 12:09:59 +0200 Subject: [PATCH 4/4] RED-9859: update intersecting nodes on kie session insertion --- .../v1/server/service/document/EntityCreationService.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java index a6c592b5..5550ed0c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java @@ -1162,6 +1162,10 @@ public class EntityCreationService { if (kieSession != null) { kieSession.insert(textEntity); + textEntity.getIntersectingNodes() + .stream() + .filter(nodesInKieSession::contains) + .forEach(o -> kieSession.update(kieSession.getFactHandle(o), o)); } } -- 2.47.2