diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/AnalysisFinalizationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/AnalysisFinalizationService.java index aba284fd..45858e22 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/AnalysisFinalizationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/AnalysisFinalizationService.java @@ -61,6 +61,7 @@ public class AnalysisFinalizationService { // as workaround for duplicate key exceptions occurring due to simultaneous analyses and reanalyses save instead of insert is used // also analysis numbers should be incremented in every follow-up request, so checking if the log exists is not needed if (!redactionStorageService.entityLogExists(analyzeRequest.getDossierId(), analyzeRequest.getFileId())) { + entityLog.setEntityLogEntry(entityLogChanges.getNewEntityLogEntries()); redactionStorageService.saveEntityLog(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), entityLog); } else { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityChangeLogService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityChangeLogService.java index 577f4ae2..3ecfde68 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityChangeLogService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityChangeLogService.java @@ -41,7 +41,7 @@ public class EntityChangeLogService { var now = OffsetDateTime.now(); if (previousEntityLogEntries.isEmpty()) { newEntityLogEntries.forEach(entry -> entry.getChanges().add(new Change(analysisNumber, ChangeType.ADDED, now, Collections.emptyMap()))); - return new EntryChanges(newEntityLogEntries, new ArrayList<>()); + return new EntryChanges(newEntityLogEntries.stream().filter(entry -> !entry.getState().equals(EntryState.REMOVED)).collect(Collectors.toList()), new ArrayList<>()); } List toInsert = new ArrayList<>(); @@ -51,6 +51,9 @@ public class EntityChangeLogService { .filter(entry -> entry.getId().equals(entityLogEntry.getId())) .findAny(); if (optionalPreviousEntity.isEmpty()) { + if (entityLogEntry.getState().equals(EntryState.REMOVED)) { + continue; + } entityLogEntry.getChanges().add(new Change(analysisNumber, ChangeType.ADDED, now, Collections.emptyMap())); toInsert.add(entityLogEntry); continue; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java index 3e121a9f..421f4e3d 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java @@ -180,7 +180,6 @@ public class EntityLogCreatorService { .stream() .filter(entity -> !entity.getValue().isEmpty()) .filter(EntityLogCreatorService::notFalsePositiveOrFalseRecommendationOrRemoval) - .filter(entity -> !entity.removed()) .toList(); List images = document.streamAllImages() diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/all_rules_documine.drl b/redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/all_rules_documine.drl index 871c748a..851e61cb 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/all_rules_documine.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/all_rules_documine.drl @@ -1454,12 +1454,23 @@ rule "MAN.4.1: Apply legal basis change" rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when - $larger: TextEntity($type: type(), $entityType: entityType, !removed()) - $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges(), !removed()) + $larger: TextEntity($type: type(), $entityType: entityType, !removed(), !hasManualChanges()) + not TextEntity(getTextRange().equals($larger.getTextRange()), type() == $type, entityType == EntityType.DICTIONARY_REMOVAL, engines contains Engine.DOSSIER_DICTIONARY, !hasManualChanges()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) + then + $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); + retract($contained); + end + +rule "X.0.1: Remove Entity contained by Entity of same type with manual changes" + salience 65 + when + $larger: TextEntity($type: type(), $entityType: entityType, !removed(), hasManualChanges()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) then $contained.getIntersectingNodes().forEach(node -> update(node)); - $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); - update($contained); + $contained.remove("X.0.1", "remove Entity contained by Entity of same type with manual changes"); + retract($contained); end @@ -1468,10 +1479,21 @@ rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY), !hasManualChanges()) + then + $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); + retract($entity) + end + + +rule "X.2.1: Remove Entity of type HINT when contained by FALSE_POSITIVE" + salience 64 + when + $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); - $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); + $entity.remove("X.2.1", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); retract($entity) end @@ -1593,15 +1615,15 @@ rule "X.11.0: Remove dictionary entity which intersects with a manual entity" //------------------------------------ Dictionary merging rules ------------------------------------ // Rule unit: DICT.0 -rule "DICT.0.0: Ignore Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL" +rule "DICT.0.0: Remove Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL" salience 64 when $dictionaryRemoval: TextEntity($type: type(), entityType == EntityType.DICTIONARY_REMOVAL, engines contains Engine.DOSSIER_DICTIONARY) $entity: TextEntity(getTextRange().equals($dictionaryRemoval.getTextRange()), engines contains Engine.DICTIONARY, type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) - not TextEntity(containedBy($entity), engines contains Engine.DOSSIER_DICTIONARY, type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); - $entity.ignore("DICT.0.0", "Ignore Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL"); + $entity.remove("DICT.0.0", "Remove Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL"); + $entity.addEngine(Engine.DOSSIER_DICTIONARY); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java index c894f043..7006f332 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java @@ -307,7 +307,7 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest { var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); var desireeEtAl = findEntityByTypeAndValue(entityLog, "CBI_author", "Desiree").filter(e -> e.getEntryType().equals(EntryType.ENTITY)) - .filter(e -> e.getMatchedRule().startsWith("CBI.7")) + .filter(e -> e.getMatchedRule().startsWith("CBI.0")) .findAny() .orElseThrow(); IdRemoval removal = buildIdRemoval(desireeEtAl.getId()); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index 89ede8e5..8dc9ad8a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -245,9 +245,9 @@ public class RedactionIntegrationTest extends RulesIntegrationTest { analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request); AnalyzeResult result = analyzeService.analyze(request); - var redactionLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); + var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - var values = redactionLog.getEntityLogEntry() + var values = entityLog.getEntityLogEntry() .stream() .map(EntityLogEntry::getValue) .collect(Collectors.toList()); @@ -1279,6 +1279,10 @@ public class RedactionIntegrationTest extends RulesIntegrationTest { @Test public void entityIsAppliedAfterRecategorize() throws IOException { + String EFSA_SANITISATION_RULES = loadFromClassPath("drools/efsa_sanitisation.drl"); + when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(JSONPrimitive.of(EFSA_SANITISATION_RULES)); + + AnalyzeRequest request = uploadFileToStorage("files/new/SYNGENTA_EFSA_sanitisation_GFL_v1 (1).pdf"); ClassPathResource imageServiceResponseFileResource = new ClassPathResource("files/new/SYNGENTA_EFSA_sanitisation_GFL_v1 (1).IMAGE_INFO.json"); @@ -1299,9 +1303,12 @@ public class RedactionIntegrationTest extends RulesIntegrationTest { .findFirst() .get(); + + var newId = UUID.randomUUID().toString(); request.setManualRedactions(ManualRedactions.builder() .legalBasisChanges(Set.of(ManualLegalBasisChange.builder() .annotationId("3029651d0842a625f2d23f8375c23600") +// .annotationId(newId) .section("[19, 2]: Paragraph: Contact point: LexCo Contact:") .value("0049 331 441 551 14") .requestDate(OffsetDateTime.now()) @@ -1311,6 +1318,7 @@ public class RedactionIntegrationTest extends RulesIntegrationTest { .build())) .recategorizations(Set.of(ManualRecategorization.builder() .annotationId("3029651d0842a625f2d23f8375c23600") +// .annotationId(newId) .type("CBI_author") .legalBasis("") .requestDate(OffsetDateTime.now()) @@ -1326,6 +1334,7 @@ public class RedactionIntegrationTest extends RulesIntegrationTest { var changedAnnotation = entityLog.getEntityLogEntry() .stream() .filter(entityLogEntry -> entityLogEntry.getId().equals("3029651d0842a625f2d23f8375c23600")) +// .filter(entityLogEntry -> entityLogEntry.getId().equals(newId)) .findFirst() .get(); @@ -1736,14 +1745,14 @@ public class RedactionIntegrationTest extends RulesIntegrationTest { var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - var listCBI_0_0 = entityLog.getEntityLogEntry() + var listCBI_0_3 = entityLog.getEntityLogEntry() .stream() - .filter(entityLogEntry -> entityLogEntry.getMatchedRule().startsWith("CBI.0.0")) + .filter(entityLogEntry -> entityLogEntry.getMatchedRule().startsWith("CBI.0.3")) .toList(); - var listCBI_0_1 = entityLog.getEntityLogEntry() + var listCBI_0_4 = entityLog.getEntityLogEntry() .stream() - .filter(entityLogEntry -> entityLogEntry.getMatchedRule().startsWith("CBI.0.1")) + .filter(entityLogEntry -> entityLogEntry.getMatchedRule().startsWith("CBI.0.4")) .toList(); FileAttribute vertebrateNo = FileAttribute.builder().id("fileAttributeId").label("Vertebrate Study").placeholder("{fileattributes.vertebrateStudy}").value("No").build(); @@ -1755,18 +1764,18 @@ public class RedactionIntegrationTest extends RulesIntegrationTest { var entityLog2 = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - var list2CBI_0_0 = entityLog2.getEntityLogEntry() + var list2CBI_0_3 = entityLog2.getEntityLogEntry() .stream() - .filter(entityLogEntry -> entityLogEntry.getMatchedRule().startsWith("CBI.0.0")) + .filter(entityLogEntry -> entityLogEntry.getMatchedRule().startsWith("CBI.0.3")) .toList(); - var list2CBI_0_1 = entityLog2.getEntityLogEntry() + var list2CBI_0_4 = entityLog2.getEntityLogEntry() .stream() - .filter(entityLogEntry -> entityLogEntry.getMatchedRule().startsWith("CBI.0.1")) + .filter(entityLogEntry -> entityLogEntry.getMatchedRule().startsWith("CBI.0.4")) .toList(); - assertEquals(listCBI_0_0.size(), list2CBI_0_1.size()); - assertEquals(listCBI_0_1.size(), list2CBI_0_0.size()); + assertEquals(listCBI_0_3.size(), list2CBI_0_4.size()); + assertEquals(listCBI_0_4.size(), list2CBI_0_3.size()); request.setFileAttributes(List.of(vertebrateYes)); @@ -1776,18 +1785,18 @@ public class RedactionIntegrationTest extends RulesIntegrationTest { var entityLog3 = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - var list3CBI_0_0 = entityLog3.getEntityLogEntry() + var list3CBI_0_3 = entityLog3.getEntityLogEntry() .stream() - .filter(entityLogEntry -> entityLogEntry.getMatchedRule().startsWith("CBI.0.0")) + .filter(entityLogEntry -> entityLogEntry.getMatchedRule().startsWith("CBI.0.3")) .toList(); - var list3CBI_0_1 = entityLog3.getEntityLogEntry() + var list3CBI_0_4 = entityLog3.getEntityLogEntry() .stream() - .filter(entityLogEntry -> entityLogEntry.getMatchedRule().startsWith("CBI.0.1")) + .filter(entityLogEntry -> entityLogEntry.getMatchedRule().startsWith("CBI.0.4")) .toList(); - assertEquals(list3CBI_0_0.size(), list2CBI_0_1.size()); - assertEquals(list3CBI_0_1.size(), list2CBI_0_0.size()); + assertEquals(list3CBI_0_3.size(), list2CBI_0_4.size()); + assertEquals(list3CBI_0_4.size(), list2CBI_0_3.size()); } @@ -1979,7 +1988,7 @@ public class RedactionIntegrationTest extends RulesIntegrationTest { .get("reason"), "No vertebrate found -> removed by manual override"); assertEquals(responseDavidKsenia.getChanges() .get(1).getPropertyChanges() - .get("matchedRule"), "CBI.3.2 -> "); + .get("matchedRule"), "CBI.13.2 -> "); assertEquals(responseDavidKsenia.getChanges() .get(1).getPropertyChanges() .get("state"), "SKIPPED -> IGNORED"); @@ -2033,7 +2042,7 @@ public class RedactionIntegrationTest extends RulesIntegrationTest { .get("reason"), "No vertebrate found -> Recategorized entities are applied by default., recategorized by manual override"); assertEquals(responseDavidKsenia.getChanges() .get(1).getPropertyChanges() - .get("matchedRule"), "CBI.3.2 -> MAN.3.3"); + .get("matchedRule"), "CBI.13.2 -> MAN.3.3"); assertEquals(responseDavidKsenia.getChanges() .get(1).getPropertyChanges() .get("legalBasis"), " -> new legal basis"); @@ -2171,11 +2180,13 @@ public class RedactionIntegrationTest extends RulesIntegrationTest { .get(); var entityLogEntry2 = entityLog.getEntityLogEntry() .stream() - .filter(entityLogEntry -> entityLogEntry.getValue().equals(assessment)) + .filter(entityLogEntry -> entityLogEntry.getId().equals("64795d65b8654503a25f7c44fce71fcb")) .findFirst() .get(); - assertEquals(entityLogEntry1.getState(), EntryState.APPLIED); + assertEquals(entityLogEntry1.getState(), EntryState.REMOVED); + assertEquals(entityLogEntry1.getMatchedRule(), "DICT.0.0"); assertEquals(entityLogEntry2.getState(), EntryState.APPLIED); + assertEquals(entityLogEntry2.getMatchedRule(), "CBI.0.3"); assertThat(entityLogEntry2.getStartOffset() > entityLogEntry1.getEndOffset()); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/entity/TextEntityTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/entity/TextEntityTest.java index d5fa1c4f..75a77785 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/entity/TextEntityTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/entity/TextEntityTest.java @@ -16,11 +16,22 @@ public class TextEntityTest { PrecursorEntity entity = PrecursorEntity.builder().type("PII").entityType(EntityType.ENTITY).build(); entity.skip("CBI.1.0", ""); entity.skip("CBI.2.0", ""); - entity.skip("CBI.3.0", ""); - entity.skip("CBI.4.1", ""); - entity.skip("CBI.4.0", ""); - assertThat(entity.getMatchedRule().getRuleIdentifier().toString()).isEqualTo("CBI.4.1"); - assertThat(entity.getMatchedRuleUnit()).isEqualTo(4); + entity.skip("CBI.13.1", ""); + entity.skip("CBI.13.3", ""); + assertThat(entity.getMatchedRule().getRuleIdentifier().toString()).isEqualTo("CBI.13.3"); + assertThat(entity.getMatchedRuleUnit()).isEqualTo(13); + } + + @Test + public void testMatchedRule2() { + + PrecursorEntity entity = PrecursorEntity.builder().type("PII").entityType(EntityType.ENTITY).build(); + entity.redact("PII.4.1", "", "legal"); + entity.apply("MAN.3.0", ""); + entity.apply("MAN.3.3", ""); + entity.skip("CBI.13.2", ""); + assertThat(entity.getMatchedRule().getRuleIdentifier().toString()).isEqualTo("CBI.13.2"); + assertThat(entity.getMatchedRuleUnit()).isEqualTo(13); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesEnd2EndTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesEnd2EndTest.java index 8116c7c3..094905fb 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesEnd2EndTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesEnd2EndTest.java @@ -322,7 +322,7 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest { .findFirst() .get(); - assertEquals("CBI.7.0", asyaLyon.getMatchedRule()); + assertEquals("CBI.0.0", asyaLyon.getMatchedRule()); assertEquals("Published Information found in section", asyaLyon.getReason()); assertNotEquals(asyaLyon.getState(), EntryState.APPLIED); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/PrecursorEntityTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/PrecursorEntityTest.java index f52a2864..abaaabe3 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/PrecursorEntityTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/PrecursorEntityTest.java @@ -18,6 +18,7 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.mock.mockito.MockBean; import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogChanges; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle; @@ -134,15 +135,17 @@ public class PrecursorEntityTest extends BuildDocumentIntegrationTest { assertEquals(1, notFoundManualEntities.size()); assertTrue(document.getEntities().isEmpty()); - List redactionLogEntries = entityLogCreatorService.createInitialEntityLog(new AnalyzeRequest(), - document, - notFoundManualEntities, - new DictionaryVersion(), - 0L).getEntityLog().getEntityLogEntry(); + EntityLogChanges entityLogChanges = entityLogCreatorService.createInitialEntityLog(new AnalyzeRequest(), + document, + notFoundManualEntities, + new DictionaryVersion(), + 0L); + List entityLogEntries = entityLogChanges.getEntityLog().getEntityLogEntry(); + + assertEquals(1, entityLogEntries.size()); + assertEquals(value, entityLogEntries.get(0).getValue()); + assertEquals(type, entityLogEntries.get(0).getType()); - assertEquals(1, redactionLogEntries.size()); - assertEquals(value, redactionLogEntries.get(0).getValue()); - assertEquals(type, redactionLogEntries.get(0).getType()); return new DocumentAndEntity(document, notFoundManualEntities.get(0)); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl index a909ab29..eb0b93b5 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl @@ -85,20 +85,58 @@ rule "SYN.1.0: Recommend CTL/BL laboratory that start with BL or CTL" //------------------------------------ CBI rules ------------------------------------ // Rule unit: CBI.0 -rule "CBI.0.0: Redact CBI Authors (non vertebrate Study)" +rule "CBI.0.0: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.skip("CBI.0.0", "Author found by \"et al\" regex"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.0.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.0.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.0.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.0.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.0.3: Redact CBI Authors (non vertebrate Study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $entity: TextEntity(type() == "CBI_author", dictionaryEntry) then - $entity.redact("CBI.0.0", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $entity.redact("CBI.0.3", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end -rule "CBI.0.1: Redact CBI Authors (vertebrate Study)" +rule "CBI.0.4: Redact CBI Authors (vertebrate Study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $entity: TextEntity(type() == "CBI_author", dictionaryEntry) then - $entity.redact("CBI.0.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $entity.redact("CBI.0.4", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -130,58 +168,6 @@ rule "CBI.2.0: Do not redact genitive CBI Author" end -// Rule unit: CBI.7 -rule "CBI.7.0: Add CBI_author with \"et al.\" RegEx" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - $section: Section(containsString("et al.")) - then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.7.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)"); - dictionary.recommendEverywhere(entity); - }); - end - -rule "CBI.7.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section(containsString("et al.")) - then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.7.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); - dictionary.recommendEverywhere(entity); - }); - end - -rule "CBI.7.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section(containsString("et al.")) - then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.7.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - dictionary.recommendEverywhere(entity); - }); - end - -rule "CBI.7.3: Add CBI_author with \"et al.\" RegEx" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - $section: Section(containsString("et al.")) - then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.7.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); - dictionary.recommendEverywhere(entity); - }); - end - - // Rule unit: CBI.9 rule "CBI.9.0: Redact all cells with Header Author(s) as CBI_author (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" @@ -287,7 +273,6 @@ rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study $table.getEntitiesOfType("CBI_author").stream().filter(IEntity::applied).forEach(entity -> dictionary.addMultipleAuthorsAsRecommendation(entity)); end - // Rule unit: CBI.16 rule "CBI.16.0: Do not redact Names and Addresses if published information found in Section without tables" when @@ -366,9 +351,9 @@ rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC when $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) then - entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "PII", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { - laboratoryEntity.redact("CBI.20.0", "PERFORMING LABORATORY was found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + laboratoryEntity.redact("CBI.20.0", "PERFORMING LABORATORY was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); dictionary.recommendEverywhere(laboratoryEntity); }); end @@ -418,7 +403,7 @@ rule "CBI.23.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (no not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then - entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "CBI_author", EntityType.ENTITY, $document) + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "CBI_author", EntityType.ENTITY, $document, 200) .forEach(authorEntity -> authorEntity.redact("CBI.23.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end @@ -427,7 +412,7 @@ rule "CBI.23.1: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (ve FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then - entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "CBI_author", EntityType.ENTITY, $document) + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "CBI_author", EntityType.ENTITY, $document, 200) .forEach(authorEntity -> authorEntity.redact("CBI.23.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end @@ -439,7 +424,7 @@ rule "PII.0.0: Redact all PII" when $pii: TextEntity(type() == "PII", dictionaryEntry) then - $pii.redact("PII.0.0", "Personal Information found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + $pii.redact("PII.0.0", "Personal Information found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); end rule "PII.0.1: Redact all PII (non vertebrate study)" @@ -524,7 +509,7 @@ rule "PII.2.0: Redact Phone and Fax by RegEx" containsString("Fel") || containsString("Fer")) then - entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) + entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter[^\\w]|mobile|fel[^\\w]|fer[^\\w])[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) .forEach(contactEntity -> contactEntity.redact("PII.2.0", "Found by Phone and Fax Regex", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end @@ -534,7 +519,6 @@ rule "PII.2.1: Redact Phone and Fax by RegEx (non vertebrate study)" $section: Section(containsString("Contact") || containsString("Telephone") || containsString("Phone") || - containsString("Ph.") || containsString("Fax") || containsString("Tel") || containsString("Ter") || @@ -542,7 +526,7 @@ rule "PII.2.1: Redact Phone and Fax by RegEx (non vertebrate study)" containsString("Fel") || containsString("Fer")) then - entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter[^m]|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) + entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter[^\\w]|mobile|fel[^\\w]|fer[^\\w])[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) .forEach(contactEntity -> contactEntity.redact("PII.2.1", "Found by Phone and Fax Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end @@ -552,7 +536,6 @@ rule "PII.2.2: Redact Phone and Fax by RegEx (vertebrate study)" $section: Section(containsString("Contact") || containsString("Telephone") || containsString("Phone") || - containsString("Ph.") || containsString("Fax") || containsString("Tel") || containsString("Ter") || @@ -560,7 +543,7 @@ rule "PII.2.2: Redact Phone and Fax by RegEx (vertebrate study)" containsString("Fel") || containsString("Fer")) then - entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter[^m]|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) + entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter[^\\w]|mobile|fel[^\\w]|fer[^\\w])[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) .forEach(contactEntity -> contactEntity.redact("PII.2.2", "Found by Phone and Fax Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end @@ -568,7 +551,7 @@ rule "PII.2.2: Redact Phone and Fax by RegEx (vertebrate study)" // Rule unit: PII.3 rule "PII.3.0: Redact telephone numbers by RegEx" when - $section: Section(matchesRegex("[+]\\d{1,}")) + $section: Section(!hasTables(), matchesRegex("[+]\\d{1,}")) then entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) .forEach(entity -> entity.redact("PII.3.0", "Telephone number found by regex", "Reg (EC) No 1107/2009 Art. 63 (2e)")); @@ -577,7 +560,7 @@ rule "PII.3.0: Redact telephone numbers by RegEx" rule "PII.3.1: Redact telephone numbers by RegEx (Non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section(matchesRegex("[+]\\d{1,}")) + $section: Section(!hasTables(), matchesRegex("[+]\\d{1,}")) then entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) .forEach(entity -> entity.redact("PII.3.1", "Telephone number found by regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); @@ -586,12 +569,30 @@ rule "PII.3.1: Redact telephone numbers by RegEx (Non vertebrate study)" rule "PII.3.2: Redact telephone numbers by RegEx (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section(matchesRegex("[+]\\d{1,}")) + $section: Section(!hasTables(), matchesRegex("[+]\\d{1,}")) then entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) .forEach(entity -> entity.redact("PII.3.2", "Telephone number found by regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "PII.3.4: Redact telephone numbers by RegEx (Non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $rowCell: TableCell(matchesRegex("[+]\\d{1,}")) + then + entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $rowCell) + .forEach(entity -> entity.redact("PII.3.4", "Telephone number found by regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + end + +rule "PII.3.5: Redact telephone numbers by RegEx (vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $rowCell: TableCell(matchesRegex("[+]\\d{1,}")) + then + entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $rowCell) + .forEach(entity -> entity.redact("PII.3.5", "Telephone number found by regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + end + // Rule unit: PII.4 rule "PII.4.0: Redact line after contact information keywords" @@ -600,8 +601,6 @@ rule "PII.4.0: Redact line after contact information keywords" "Contact:", "Alternative contact:", "European contact:", - "No:", - "Contact:", "Tel.:", "Tel:", "Telephone number:", @@ -648,62 +647,6 @@ rule "PII.4.1: Redact line after contact information keywords" .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); end -rule "PII.4.2: Redact line after contact information keywords (Non vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $contactKeyword: String() from List.of("Contact point:", - "Contact:", - "Alternative contact:", - "European contact:", - "No:", - "Contact:", - "Tel.:", - "Tel:", - "Telephone number:", - "Telephone No:", - "Telephone:", - "Phone No.", - "Phone:", - "Fax number:", - "Fax:", - "E-mail:", - "Email:", - "e-mail:", - "E-mail address:") - $section: Section(containsString($contactKeyword)) - then - entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.4.2", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(3) of Regulation (EC) No 178/2002")); - end - -rule "PII.4.3: Redact line after contact information keywords (Vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $contactKeyword: String() from List.of("Contact point:", - "Contact:", - "Alternative contact:", - "European contact:", - "No:", - "Contact:", - "Tel.:", - "Tel:", - "Telephone number:", - "Telephone No:", - "Telephone:", - "Phone No.", - "Phone:", - "Fax number:", - "Fax:", - "E-mail:", - "Email:", - "e-mail:", - "E-mail address:") - $section: Section(containsString($contactKeyword)) - then - entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.4.3", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(2) of Regulation (EC) No 178/2002")); - end - // Rule unit: PII.5 rule "PII.5.0: Redact line after contact information keywords reduced" @@ -803,12 +746,7 @@ rule "PII.7.0: Redact contact information if applicant is found" containsString("Applicant") || containsString("Telephone number:")) then - Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", - "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), - Stream.concat( - entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), - entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) - )) + entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "Tel.:", "Tel:", "Telephone number:", "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section) .forEach(entity -> entity.redact("PII.7.0", "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end @@ -909,7 +847,7 @@ rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" when $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then - entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document, 200) .forEach(authorEntity -> authorEntity.redact("PII.9.0", "AUTHOR(S) was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end @@ -923,8 +861,9 @@ rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" // Rule unit: PII.10 -rule "PII.10.0: Redact study director abbreviation" +rule "PII.10.0: Redact study director abbreviation (non vertebrate study)" when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("KATH") || containsString("BECH") || containsString("KML")) then entityCreationService.byRegexIgnoreCase("((KATH)|(BECH)|(KML)) ?(\\d{4})","PII", EntityType.ENTITY, 1, $section) @@ -1027,7 +966,7 @@ rule "ETC.3.2: Redact logos (vertebrate study)" FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $logo: Image(imageType == ImageType.LOGO) then - $logo.redact("ETC.3.2", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $logo.redact("ETC.3.2", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end rule "ETC.3.3: Redact logos" @@ -1321,12 +1260,23 @@ rule "MAN.4.1: Apply legal basis change" rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when - $larger: TextEntity($type: type(), $entityType: entityType, !removed()) - $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges(), !removed()) + $larger: TextEntity($type: type(), $entityType: entityType, !removed(), !hasManualChanges()) + not TextEntity(getTextRange().equals($larger.getTextRange()), type() == $type, entityType == EntityType.DICTIONARY_REMOVAL, engines contains Engine.DOSSIER_DICTIONARY, !hasManualChanges()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) + then + $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); + retract($contained); + end + +rule "X.0.1: Remove Entity contained by Entity of same type with manual changes" + salience 65 + when + $larger: TextEntity($type: type(), $entityType: entityType, !removed(), hasManualChanges()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) then $contained.getIntersectingNodes().forEach(node -> update(node)); - $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); - update($contained); + $contained.remove("X.0.1", "remove Entity contained by Entity of same type with manual changes"); + retract($contained); end @@ -1335,10 +1285,21 @@ rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY), !hasManualChanges()) + then + $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); + retract($entity) + end + + +rule "X.2.1: Remove Entity of type HINT when contained by FALSE_POSITIVE" + salience 64 + when + $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); - $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); + $entity.remove("X.2.1", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); retract($entity) end @@ -1459,29 +1420,41 @@ rule "X.10.0: remove false positives of ai" // Rule unit: X.11 -rule "X.11.0: Remove dictionary entity which intersects with a manual entity" +rule "X.11.1: Remove non manual entity which intersects with a manual entity" salience 64 when $manualEntity: TextEntity(engines contains Engine.MANUAL, active()) - $dictionaryEntity: TextEntity(intersects($manualEntity), dictionaryEntry, engines not contains Engine.MANUAL) + $nonManualEntity: TextEntity(intersects($manualEntity), engines not contains Engine.MANUAL) then - $dictionaryEntity.remove("X.11.0", "remove dictionary entity which intersects with a manual entity"); - retract($dictionaryEntity); + $nonManualEntity.remove("X.11.1", "remove entity which intersects with a manual entity"); + retract($nonManualEntity); + end + + +rule "X.11.2: Remove non manual entity which are equal to manual entity" + salience 70 + when + $manualEntity: TextEntity(engines contains Engine.MANUAL, active(), $type: type()) + $nonManualEntity: TextEntity(getTextRange().equals($manualEntity.getTextRange()), type() == $type, entityType == EntityType.ENTITY, !hasManualChanges(), engines not contains Engine.MANUAL) + then + $manualEntity.addEngines($nonManualEntity.getEngines()); + $nonManualEntity.remove("X.11.2", "remove non manual entity which are equal to manual entity"); + retract($nonManualEntity); end //------------------------------------ Dictionary merging rules ------------------------------------ // Rule unit: DICT.0 -rule "DICT.0.0: Ignore Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL" +rule "DICT.0.0: Remove Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL" salience 64 when $dictionaryRemoval: TextEntity($type: type(), entityType == EntityType.DICTIONARY_REMOVAL, engines contains Engine.DOSSIER_DICTIONARY) $entity: TextEntity(getTextRange().equals($dictionaryRemoval.getTextRange()), engines contains Engine.DICTIONARY, type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) - not TextEntity(containedBy($entity), engines contains Engine.DOSSIER_DICTIONARY, type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); - $entity.ignore("DICT.0.0", "Ignore Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL"); + $entity.remove("DICT.0.0", "Remove Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL"); + $entity.addEngine(Engine.DOSSIER_DICTIONARY); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl index 72f8ecdb..0e8ab237 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl @@ -98,20 +98,58 @@ rule "SYN.1.0: Recommend CTL/BL laboratory that start with BL or CTL" //------------------------------------ CBI rules ------------------------------------ // Rule unit: CBI.0 -rule "CBI.0.0: Redact CBI Authors (non vertebrate Study)" +rule "CBI.0.0: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.skip("CBI.0.0", "Author found by \"et al\" regex"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.0.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.0.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.0.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.0.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.0.3: Redact CBI Authors (non vertebrate Study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $entity: TextEntity(type() == "CBI_author", dictionaryEntry) then - $entity.redact("CBI.0.0", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $entity.redact("CBI.0.3", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end -rule "CBI.0.1: Redact CBI Authors (vertebrate Study)" +rule "CBI.0.4: Redact CBI Authors (vertebrate Study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $entity: TextEntity(type() == "CBI_author", dictionaryEntry) then - $entity.redact("CBI.0.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $entity.redact("CBI.0.4", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -143,136 +181,6 @@ rule "CBI.2.0: Do not redact genitive CBI Author" end -// Rule unit: CBI.3 -rule "CBI.3.0: Redacted because Section contains a vertebrate" - when - $section: Section(!hasTables(), hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( - "CBI.3.0", - "Vertebrate found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - $section.getEntitiesOfType("vertebrate") - ); - }); - end - -rule "CBI.3.1: Redacted because table row contains a vertebrate" - when - $table: Table(hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( - "CBI.3.1", - "Vertebrate found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("vertebrate", entity) - ); - }); - end - -rule "CBI.3.2: Do not redact because Section does not contain a vertebrate" - when - $section: Section(!hasTables(), !hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(entity -> entity.skip("CBI.3.2", "No vertebrate found")); - end - -rule "CBI.3.3: Do not redact because table row does not contain a vertebrate" - when - $table: Table(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) - then - $table.streamEntitiesWhereRowContainsNoEntitiesOfType(List.of("vertebrate")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> entity.skip("CBI.3.3", "No vertebrate found")); - end - - -// Rule unit: CBI.4 -rule "CBI.4.0: Do not redact Names and Addresses if no_redaction_indicator is found in Section" - when - $section: Section(!hasTables(), - hasEntitiesOfType("vertebrate"), - hasEntitiesOfType("no_redaction_indicator"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(entity -> { - entity.skipWithReferences( - "CBI.4.0", - "Vertebrate but a no redaction indicator found", - $section.getEntitiesOfType("no_redaction_indicator") - ); - }); - end - -rule "CBI.4.1: Don't redact authors or addresses which appear in the same row as a vertebrate and a no_redaction_indicator" - when - $table: Table(hasEntitiesOfType("no_redaction_indicator"), - hasEntitiesOfType("vertebrate"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate", "no-redaction_indicator")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.skipWithReferences( - "CBI.4.1", - "Vertebrate but a no redaction indicator found", - Stream.concat( - $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), - $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() - ); - }); - end - - -// Rule unit: CBI.5 -rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in Section" - when - $section: Section(!hasTables(), - hasEntitiesOfType("redaction_indicator"), - hasEntitiesOfType("no_redaction_indicator"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( - "CBI.5.0", - "no_redaction_indicator but also redaction_indicator found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - Stream.concat( - $section.getEntitiesOfType("redaction_indicator").stream(), - $section.getEntitiesOfType("no_redaction_indicator").stream()).toList() - ); - }); - end - -rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in table row" - when - $table: Table(hasEntitiesOfType("no_redaction_indicator"), - hasEntitiesOfType("redaction_indicator"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no_redaction_indicator")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( - "CBI.5.1", - "no_redaction_indicator but also redaction_indicator found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - Stream.concat( - $table.getEntitiesOfTypeInSameRow("redaction_indicator", entity).stream(), - $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() - ); - }); - end - - // Rule unit: CBI.6 rule "CBI.6.0: Do not redact Names and Addresses if vertebrate but also published_information is found in Section" when @@ -313,91 +221,6 @@ rule "CBI.6.1: Do not redact Names and Addresses if vertebrate but also publishe end -// Rule unit: CBI.7 -rule "CBI.7.0: Add CBI_author with \"et al.\" RegEx" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - $section: Section(containsString("et al.")) - then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.7.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)"); - dictionary.recommendEverywhere(entity); - }); - end - -rule "CBI.7.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section(containsString("et al.")) - then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.7.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); - dictionary.recommendEverywhere(entity); - }); - end - -rule "CBI.7.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section(containsString("et al.")) - then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.7.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - dictionary.recommendEverywhere(entity); - }); - end - -rule "CBI.7.3: Add CBI_author with \"et al.\" RegEx" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - $section: Section(containsString("et al.")) - then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.7.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); - dictionary.recommendEverywhere(entity); - }); - end - - -// Rule unit: CBI.8 -rule "CBI.8.0: Redacted because Section contains must_redact entity" - when - $section: Section(!hasTables(), hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( - "CBI.8.0", - "must_redact entity found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - $section.getEntitiesOfType("must_redact") - ); - }); - end - -rule "CBI.8.1: Redacted because table row contains must_redact entity" - when - $table: Table(hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("must_redact")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( - "CBI.8.1", - "Must_redact found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("must_redact", entity) - ); - }); - end - - // Rule unit: CBI.9 rule "CBI.9.0: Redact all cells with Header Author(s) as CBI_author (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" @@ -517,19 +340,19 @@ rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study end -// Rule unit: CBI.12 +// Rule unit: CBI.12 - table rules remains rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes'" agenda-group "LOCAL_DICTIONARY_ADDS" when $table: Table(hasHeader("Author(s)") || hasHeader("Author"), hasHeaderIgnoreCase("Vertebrate Study Y/N")) TableCell(header, containsAnyStringIgnoreCase("Author", "Author(s)"), $authorCol: col) from $table.streamHeaders().toList() TableCell(header, containsStringIgnoreCase("Vertebrate study Y/N"), $vertebrateCol: col) from $table.streamHeaders().toList() - $rowCell: TableCell(!header, containsAnyString("Yes", "Y"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() + $rowCell: TableCell(!header, containsAnyString("Yes", "Y", "yes"), !containsAnyString("Y/N"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() TableCell(row == $rowWithYes) from $table.streamCol($authorCol).toList() then - entityCreationService.bySemanticNode($rowCell, "must_redact", EntityType.HINT) + entityCreationService.bySemanticNode($rowCell, "redaction_indicator", EntityType.HINT) .ifPresent(yesEntity -> { - yesEntity.skip("CBI.12.0", "must_redact"); + yesEntity.skip("CBI.12.0", "redaction_indicator"); }); end @@ -540,7 +363,7 @@ rule "CBI.12.1: Redact and recommend TableCell with header 'Author' or 'Author(s $table: Table(hasHeader("Author(s)") || hasHeader("Author"), hasHeaderIgnoreCase("Vertebrate Study Y/N")) TableCell(header, containsAnyStringIgnoreCase("Author", "Author(s)"), $authorCol: col) from $table.streamHeaders().toList() TableCell(header, containsStringIgnoreCase("Vertebrate study Y/N"), $vertebrateCol: col) from $table.streamHeaders().toList() - TableCell(!header, containsAnyString("Yes", "Y"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() + TableCell(!header, containsAnyString("Yes", "Y", "yes"), !containsAnyString("Y/N"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() $authorCell: TableCell(row == $rowWithYes) from $table.streamCol($authorCol).toList() then entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY) @@ -548,7 +371,6 @@ rule "CBI.12.1: Redact and recommend TableCell with header 'Author' or 'Author(s authorEntity.redact("CBI.12.1", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(3) of Regulation (EC) No 178/2002"); dictionary.addMultipleAuthorsAsRecommendation(authorEntity); }); - end rule "CBI.12.2: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes' (vertebrate study)" @@ -583,7 +405,137 @@ rule "CBI.12.3: Skip TableCell with header 'Author' or 'Author(s)' and header 'V end -// Rule unit: CBI.13 +//from CBI.3.3 +rule "CBI.12.4: Redacted because table row contains a redaction_indicator" + when + $table: Table(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + then + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( + "CBI.12.4", + "Redaction_indicator found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $table.getEntitiesOfTypeInSameRow("redaction_indicator", entity) + ); + }); + end + + +//from CBI.3.1 +rule "CBI.12.5: Redacted because table row contains a vertebrate" + when + $table: Table(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + then + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( + "CBI.12.5", + "Vertebrate found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $table.getEntitiesOfTypeInSameRow("vertebrate", entity) + ); + }); + end + + +rule "CBI.12.6: Skip Addresses on TableCell with header 'Owner'" + salience -1 + when + $table: Table() + then + $table.streamTableCellsWithHeader("Owner") + .flatMap(tc -> tc.getEntitiesOfType("CBI_address").stream()) + .forEach(adrEntity -> adrEntity.skip("CBI.12.6", "Not redacted because CBI_address in Owner column is never redacted")); + end + +rule "CBI.12.11: Redacted because table row contains a must_redact in table without header 'Vertebrate study Y/N'" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $table: Table(!hasHeaderIgnoreCase("Vertebrate Study Y/N"), hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("must_redact")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( + "CBI.12.11", + "Must redact found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $table.getEntitiesOfTypeInSameRow("must_redact", entity) + ); + }); + end + +rule "CBI.12.12: Not redacted because table row contains a redaction_indicator in table without header 'Vertebrate study Y/N'" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $table: Table(!hasHeaderIgnoreCase("Vertebrate Study Y/N"), hasEntitiesOfType("redaction_indicator"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.skipWithReferences( + "CBI.12.12", + "redaction_indicator found", + $table.getEntitiesOfTypeInSameRow("redaction_indicator", entity) + ); + }); + end + +rule "CBI.12.13: Redacted because table row contains a vertebrate and a redaction_indicator in table without header 'Vertebrate study Y/N'" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $table: Table(!hasHeaderIgnoreCase("Vertebrate Study Y/N"), hasEntitiesOfType("vertebrate"), hasEntitiesOfType("redaction_indicator"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $table.streamEntitiesWhereRowContainsEntitiesOfEachType(List.of("redaction_indicator", "vertebrate")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( + "CBI.12.13", + "Vertebrate and redaction_indicator found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $table.getEntitiesOfTypeInSameRow("redaction_indicator", entity) + ); + }); + end + +rule "CBI.12.14: Not redacted because table row contains a vertebrate but also a no_redaction_indicator in table without header 'Vertebrate study Y/N'" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $table: Table(!hasHeaderIgnoreCase("Vertebrate Study Y/N"), hasEntitiesOfType("vertebrate"), hasEntitiesOfType("no_redaction_indicator"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $table.streamEntitiesWhereRowContainsEntitiesOfEachType(List.of("no_redaction_indicator", "vertebrate")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.skipWithReferences( + "CBI.12.14", + "Vertebrate and no_redaction_indicator found", + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity) + ); + }); + end + +rule "CBI.12.15: Redacted because table row contains a vertebrate, a no_redaction_indicator and a redaction_indicator in table without header 'Vertebrate study Y/N'" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $table: Table(!hasHeaderIgnoreCase("Vertebrate Study Y/N"), hasEntitiesOfType("vertebrate"), hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("redaction_indicator"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $table.streamEntitiesWhereRowContainsEntitiesOfEachType(List.of("redaction_indicator", "vertebrate", "no_redaction_indicator")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( + "CBI.12.15", + "Vertebrate and redaction_indicator found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $table.getEntitiesOfTypeInSameRow("redaction_indicator", entity) + ); + }); + end + + +// Rule unit: CBI.13 - section rules rule "CBI.13.0: Ignore CBI Address recommendations" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -593,6 +545,88 @@ rule "CBI.13.0: Ignore CBI Address recommendations" retract($entity) end +// from CBI.3.0 +rule "CBI.13.1: Redacted because Section contains a vertebrate" + when + $section: Section(!hasTables(), hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( + "CBI.13.1", + "Vertebrate found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $section.getEntitiesOfType("vertebrate") + ); + }); + end + +//from CBI.3.2 +rule "CBI.13.2: Do not redact because Section does not contain a vertebrate" + when + $section: Section(!hasTables(), !hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(entity -> entity.skip("CBI.13.2", "No vertebrate found")); + end + + +// from CBI.4.0 +rule "CBI.13.3: Do not redact Names and Addresses if vertebrate and no_redaction_indicator is found in Section" + when + $section: Section(!hasTables(), + hasEntitiesOfType("vertebrate"), + hasEntitiesOfType("no_redaction_indicator"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(entity -> { + entity.skipWithReferences( + "CBI.13.3", + "Vertebrate but a no redaction indicator found", + $section.getEntitiesOfType("no_redaction_indicator") + ); + }); + end + + +// from CBI.5.0 +rule "CBI.13.4: Redact Names and Addresses if vertebrate and no_redaction_indicator but also redaction_indicator is found in Section" + when + $section: Section(!hasTables(), + hasEntitiesOfType("vertebrate"), + hasEntitiesOfType("redaction_indicator"), + hasEntitiesOfType("no_redaction_indicator"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( + "CBI.13.4", + "no_redaction_indicator but also redaction_indicator found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + Stream.concat( + $section.getEntitiesOfType("redaction_indicator").stream(), + $section.getEntitiesOfType("no_redaction_indicator").stream()).toList() + ); + }); + end + +// From CBI.8.0 +rule "CBI.13.5: Redacted because Section contains must_redact entity" + when + $section: Section(!hasTables(), hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( + "CBI.13.5", + "must_redact entity found", + "Reg (EC) No 1107/2009 Art. 63 (2b)", + $section.getEntitiesOfType("must_redact") + ); + }); + end // Rule unit: CBI.14 rule "CBI.14.0: Redact CBI_sponsor entities if preceded by \"batches produced at\"" @@ -625,7 +659,8 @@ rule "CBI.15.0: Redact row if row contains \"determination of residues\" and liv then entityCreationService.byString($keyword, "must_redact", EntityType.HINT, $section) .forEach(entity -> entity.skip("CBI.15.0", "must_redact")); - + entityCreationService.byString($residueKeyword, "must_redact", EntityType.HINT, $section) + .forEach(entity -> entity.skip("CBI.15.0", "must_redact")); $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) .forEach(redactionEntity -> redactionEntity.redact("CBI.15.0", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)")); end @@ -646,7 +681,8 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio then entityCreationService.byString($keyword, "must_redact", EntityType.HINT, $table) .forEach(entity -> entity.skip("CBI.15.1", "must_redact")); - + entityCreationService.byString($residueKeyword, "must_redact", EntityType.HINT, $table) + .forEach(entity -> entity.skip("CBI.15.1", "must_redact")); $table.streamEntitiesWhereRowContainsStringsIgnoreCase(List.of($keyword, $residueKeyword)) .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) .forEach(redactionEntity -> redactionEntity.redact("CBI.15.1", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)")); @@ -706,7 +742,6 @@ rule "CBI.16.3: Do not redact PII if published information found in same table r $pii.skipWithReferences("CBI.16.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii)); end - // Rule unit: CBI.17 rule "CBI.17.0: Add recommendation for Addresses in Test Organism sections, without colon" when @@ -765,9 +800,9 @@ rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC when $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) then - entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "PII", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { - laboratoryEntity.redact("CBI.20.0", "PERFORMING LABORATORY was found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + laboratoryEntity.redact("CBI.20.0", "PERFORMING LABORATORY was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); dictionary.recommendEverywhere(laboratoryEntity); }); end @@ -812,7 +847,43 @@ rule "CBI.20.3: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC // Rule unit: CBI.21 -rule "CBI.21.0: Redact short Authors section (non vertebrate study)" +// from CBI.6 +rule "CBI.21.0: Do not redact Names and Addresses if published_information is found in Section" + when + $section: Section(!hasTables(), + hasEntitiesOfType("published_information"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(entity -> { + entity.skipWithReferences( + "CBI.21.0", + "Published_information found", + $section.getEntitiesOfType("published_information") + ); + }); + end + +rule "CBI.21.1: Do not redact Names and Addresses if published_information is found in table row" + when + $table: Table(hasEntitiesOfType("published_information"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $tableCell: TableCell($row: row) from $table.streamTableCells().toList() + TextEntity(type() == "published_information", active(), entityType == EntityType.HINT) from $tableCell.getEntities() + then + $table.streamRow($row) + .flatMap(tc -> tc.getEntitiesOfType("CBI_author", "CBI_address").stream()) + .forEach(entity -> { + entity.skipWithReferences( + "CBI.21.1", + "Published_information found", + $table.getEntitiesOfTypeInSameRow("published_information", entity) + ); + }); + end + + +rule "CBI.21.2: Redact short Authors section (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsAnyStringIgnoreCase("author(s)", "author", "authors"), length() < 50, getTreeId().get(0) <= 20) //TODO: evaluate the reason of this rule @@ -820,11 +891,11 @@ rule "CBI.21.0: Redact short Authors section (non vertebrate study)" then entityCreationService.byRegexIgnoreCase("(?<=author\\(?s\\)?\\s\\n?)([\\p{Lu}\\p{L} ]{5,15}(,|\\n)?){1,3}", "CBI_author", EntityType.ENTITY, $section) .forEach(entity -> { - entity.redact("CBI.21.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + entity.redact("CBI.21.2", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); }); end -rule "CBI.21.1: Redact short Authors section (vertebrate study)" +rule "CBI.21.3: Redact short Authors section (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsAnyStringIgnoreCase("author(s)", "author", "authors"), length() < 50, getTreeId().get(0) <= 20) //TODO: evaluate the reason of this rule @@ -832,7 +903,7 @@ rule "CBI.21.1: Redact short Authors section (vertebrate study)" then entityCreationService.byRegexIgnoreCase("(?<=author\\(?s\\)?\\s\\n?)([\\p{Lu}\\p{L} ]{5,15}(,|\\n)?){1,3}", "CBI_author", EntityType.ENTITY, $section) .forEach(entity -> { - entity.redact("CBI.21.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + entity.redact("CBI.21.3", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); }); end @@ -855,7 +926,7 @@ rule "CBI.23.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (no not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then - entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "CBI_author", EntityType.ENTITY, $document) + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "CBI_author", EntityType.ENTITY, $document, 200) .forEach(authorEntity -> authorEntity.redact("CBI.23.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end @@ -864,7 +935,7 @@ rule "CBI.23.1: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (ve FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then - entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "CBI_author", EntityType.ENTITY, $document) + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "CBI_author", EntityType.ENTITY, $document, 200) .forEach(authorEntity -> authorEntity.redact("CBI.23.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end @@ -876,7 +947,7 @@ rule "PII.0.0: Redact all PII" when $pii: TextEntity(type() == "PII", dictionaryEntry) then - $pii.redact("PII.0.0", "Personal Information found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + $pii.redact("PII.0.0", "Personal Information found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); end rule "PII.0.1: Redact all PII (non vertebrate study)" @@ -977,7 +1048,7 @@ rule "PII.2.0: Redact Phone and Fax by RegEx" containsString("Fel") || containsString("Fer")) then - entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) + entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter[^\\w]|mobile|fel[^\\w]|fer[^\\w])[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) .forEach(contactEntity -> contactEntity.redact("PII.2.0", "Found by Phone and Fax Regex", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end @@ -987,7 +1058,6 @@ rule "PII.2.1: Redact Phone and Fax by RegEx (non vertebrate study)" $section: Section(containsString("Contact") || containsString("Telephone") || containsString("Phone") || - containsString("Ph.") || containsString("Fax") || containsString("Tel") || containsString("Ter") || @@ -995,7 +1065,7 @@ rule "PII.2.1: Redact Phone and Fax by RegEx (non vertebrate study)" containsString("Fel") || containsString("Fer")) then - entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter[^m]|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) + entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter[^\\w]|mobile|fel[^\\w]|fer[^\\w])[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) .forEach(contactEntity -> contactEntity.redact("PII.2.1", "Found by Phone and Fax Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end @@ -1005,7 +1075,6 @@ rule "PII.2.2: Redact Phone and Fax by RegEx (vertebrate study)" $section: Section(containsString("Contact") || containsString("Telephone") || containsString("Phone") || - containsString("Ph.") || containsString("Fax") || containsString("Tel") || containsString("Ter") || @@ -1013,7 +1082,7 @@ rule "PII.2.2: Redact Phone and Fax by RegEx (vertebrate study)" containsString("Fel") || containsString("Fer")) then - entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter[^m]|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) + entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter[^\\w]|mobile|fel[^\\w]|fer[^\\w])[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) .forEach(contactEntity -> contactEntity.redact("PII.2.2", "Found by Phone and Fax Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end @@ -1029,7 +1098,7 @@ rule "PII.2.3: Redact phone numbers without indicators" // Rule unit: PII.3 rule "PII.3.0: Redact telephone numbers by RegEx" when - $section: Section(matchesRegex("[+]\\d{1,}")) + $section: Section(!hasTables(), matchesRegex("[+]\\d{1,}")) then entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) .forEach(entity -> entity.redact("PII.3.0", "Telephone number found by regex", "Reg (EC) No 1107/2009 Art. 63 (2e)")); @@ -1038,7 +1107,7 @@ rule "PII.3.0: Redact telephone numbers by RegEx" rule "PII.3.1: Redact telephone numbers by RegEx (Non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section(matchesRegex("[+]\\d{1,}")) + $section: Section(!hasTables(), matchesRegex("[+]\\d{1,}")) then entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) .forEach(entity -> entity.redact("PII.3.1", "Telephone number found by regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); @@ -1047,12 +1116,38 @@ rule "PII.3.1: Redact telephone numbers by RegEx (Non vertebrate study)" rule "PII.3.2: Redact telephone numbers by RegEx (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section(matchesRegex("[+]\\d{1,}")) + $section: Section(!hasTables(), matchesRegex("[+]\\d{1,}")) then entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) .forEach(entity -> entity.redact("PII.3.2", "Telephone number found by regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "PII.3.3: Redact telephone numbers by RegEx" + when + $rowCell: TableCell(matchesRegex("[+]\\d{1,}")) + then + entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $rowCell) + .forEach(entity -> entity.redact("PII.3.3", "Telephone number found by regex", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.3.4: Redact telephone numbers by RegEx (Non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $rowCell: TableCell(matchesRegex("[+]\\d{1,}")) + then + entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $rowCell) + .forEach(entity -> entity.redact("PII.3.4", "Telephone number found by regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + end + +rule "PII.3.5: Redact telephone numbers by RegEx (vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $rowCell: TableCell(matchesRegex("[+]\\d{1,}")) + then + entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $rowCell) + .forEach(entity -> entity.redact("PII.3.5", "Telephone number found by regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + end + // Rule unit: PII.4 rule "PII.4.0: Redact line after contact information keywords" @@ -1061,8 +1156,6 @@ rule "PII.4.0: Redact line after contact information keywords" "Contact:", "Alternative contact:", "European contact:", - "No:", - "Contact:", "Tel.:", "Tel:", "Telephone number:", @@ -1109,62 +1202,6 @@ rule "PII.4.1: Redact line after contact information keywords" .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); end -rule "PII.4.2: Redact line after contact information keywords (Non vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $contactKeyword: String() from List.of("Contact point:", - "Contact:", - "Alternative contact:", - "European contact:", - "No:", - "Contact:", - "Tel.:", - "Tel:", - "Telephone number:", - "Telephone No:", - "Telephone:", - "Phone No.", - "Phone:", - "Fax number:", - "Fax:", - "E-mail:", - "Email:", - "e-mail:", - "E-mail address:") - $section: Section(containsString($contactKeyword)) - then - entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.4.2", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(3) of Regulation (EC) No 178/2002")); - end - -rule "PII.4.3: Redact line after contact information keywords (Vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $contactKeyword: String() from List.of("Contact point:", - "Contact:", - "Alternative contact:", - "European contact:", - "No:", - "Contact:", - "Tel.:", - "Tel:", - "Telephone number:", - "Telephone No:", - "Telephone:", - "Phone No.", - "Phone:", - "Fax number:", - "Fax:", - "E-mail:", - "Email:", - "e-mail:", - "E-mail address:") - $section: Section(containsString($contactKeyword)) - then - entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.4.3", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(2) of Regulation (EC) No 178/2002")); - end - // Rule unit: PII.5 rule "PII.5.0: Redact line after contact information keywords reduced" @@ -1264,12 +1301,7 @@ rule "PII.7.0: Redact contact information if applicant is found" containsString("Applicant") || containsString("Telephone number:")) then - Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", - "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), - Stream.concat( - entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), - entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) - )) + entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "Tel.:", "Tel:", "Telephone number:", "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section) .forEach(entity -> entity.redact("PII.7.0", "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end @@ -1365,12 +1397,13 @@ rule "PII.8.2: Redact contact information if producer is found (vertebrate study end +// UPDATED WITH LIMIT // Rule unit: PII.9 rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" when $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then - entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document, 200) .forEach(authorEntity -> authorEntity.redact("PII.9.0", "AUTHOR(S) was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end @@ -1402,8 +1435,9 @@ rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" // Rule unit: PII.10 -rule "PII.10.0: Redact study director abbreviation" +rule "PII.10.0: Redact study director abbreviation (non vertebrate study)" when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("KATH") || containsString("BECH") || containsString("KML")) then entityCreationService.byRegexIgnoreCase("((KATH)|(BECH)|(KML)) ?(\\d{4})","PII", EntityType.ENTITY, 1, $section) @@ -1411,6 +1445,16 @@ rule "PII.10.0: Redact study director abbreviation" end +rule "PII.10.1: Redact study director abbreviation (vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsString("KATH") || containsString("BECH") || containsString("KML")) + then + entityCreationService.byRegexIgnoreCase("((KATH)|(BECH)|(KML)) ?(\\d{4})","PII", EntityType.ENTITY, 1, $section) + .forEach(entity -> entity.redact("PII.10.1", "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + end + + // Rule unit: PII.11 rule "PII.11.0: Redact On behalf of Sequani Ltd.:" when @@ -1431,6 +1475,8 @@ rule "PII.12.0: Expand PII entities with salutation prefix" .ifPresent(expandedEntity -> expandedEntity.apply("PII.12.0", "Expanded PII with salutation prefix", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end + +// Rule unit: PII.12 rule "PII.12.1: Expand PII entities with salutation prefix" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -1535,7 +1581,7 @@ rule "ETC.3.2: Redact logos (vertebrate study)" FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $logo: Image(imageType == ImageType.LOGO) then - $logo.redact("ETC.3.2", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $logo.redact("ETC.3.2", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end rule "ETC.3.3: Redact logos" @@ -1545,6 +1591,14 @@ rule "ETC.3.3: Redact logos" $logo.redact("ETC.3.3", "Logo Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); end +// from preGFL Knoell +rule "ETC.3.4: Skip logos" + when + $logo: Image(imageType == ImageType.LOGO) + then + $logo.skip("ETC.3.4", "No Logos in preGFL documents"); + end + // Rule unit: ETC.4 rule "ETC.4.0: Redact dossier dictionary entries" @@ -1689,6 +1743,21 @@ rule "ETC.12.1: Redact dossier_redaction (Vertebrate study)" $dossierRedaction.redact("ETC.12.1", "Dossier dictionary entry found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end +rule "ETC.12.2: Skip dossier_redaction (Non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $dossierRedaction: TextEntity(type() == "dossier_redaction") + then + $dossierRedaction.skip("ETC.12.2", "Dossier dictionary entry found"); + end + +rule "ETC.12.3: Skip dossier_redaction (Vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $dossierRedaction: TextEntity(type() == "dossier_redaction") + then + $dossierRedaction.skip("ETC.12.3", "Dossier dictionary entry found"); + end //------------------------------------ AI rules ------------------------------------ @@ -1816,7 +1885,7 @@ rule "AI.7.0: Add all NER Entities of type Address" end -//------------------------------------ Manual changes rules ------------------------------------ +//------------------------------------ Manual redaction rules ------------------------------------ // Rule unit: MAN.0 rule "MAN.0.0: Apply manual resize redaction" @@ -1975,12 +2044,23 @@ rule "MAN.4.1: Apply legal basis change" rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when - $larger: TextEntity($type: type(), $entityType: entityType, !removed()) - $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges(), !removed()) + $larger: TextEntity($type: type(), $entityType: entityType, !removed(), !hasManualChanges()) + not TextEntity(getTextRange().equals($larger.getTextRange()), type() == $type, entityType == EntityType.DICTIONARY_REMOVAL, engines contains Engine.DOSSIER_DICTIONARY, !hasManualChanges()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) + then + $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); + retract($contained); + end + +rule "X.0.1: Remove Entity contained by Entity of same type with manual changes" + salience 65 + when + $larger: TextEntity($type: type(), $entityType: entityType, !removed(), hasManualChanges()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) then $contained.getIntersectingNodes().forEach(node -> update(node)); - $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); - update($contained); + $contained.remove("X.0.1", "remove Entity contained by Entity of same type with manual changes"); + retract($contained); end @@ -1989,10 +2069,21 @@ rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY), !hasManualChanges()) + then + $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); + retract($entity) + end + + +rule "X.2.1: Remove Entity of type HINT when contained by FALSE_POSITIVE" + salience 64 + when + $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); - $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); + $entity.remove("X.2.1", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); retract($entity) end @@ -2124,18 +2215,41 @@ rule "X.11.0: Remove dictionary entity which intersects with a manual entity" end +rule "X.11.1: Remove non manual entity which intersects with a manual entity" + salience 64 + when + $manualEntity: TextEntity(engines contains Engine.MANUAL, active()) + $nonManualEntity: TextEntity(intersects($manualEntity), engines not contains Engine.MANUAL) + then + $nonManualEntity.remove("X.11.1", "remove entity which intersects with a manual entity"); + retract($nonManualEntity); + end + + +rule "X.11.2: Remove non manual entity which are equal to manual entity" + salience 70 + when + $manualEntity: TextEntity(engines contains Engine.MANUAL, active(), $type: type()) + $nonManualEntity: TextEntity(getTextRange().equals($manualEntity.getTextRange()), type() == $type, entityType == EntityType.ENTITY, !hasManualChanges(), engines not contains Engine.MANUAL) + then + $manualEntity.addEngines($nonManualEntity.getEngines()); + $nonManualEntity.remove("X.11.2", "remove non manual entity which are equal to manual entity"); + retract($nonManualEntity); + end + + //------------------------------------ Dictionary merging rules ------------------------------------ // Rule unit: DICT.0 -rule "DICT.0.0: Ignore Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL" +rule "DICT.0.0: Remove Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL" salience 64 when $dictionaryRemoval: TextEntity($type: type(), entityType == EntityType.DICTIONARY_REMOVAL, engines contains Engine.DOSSIER_DICTIONARY) $entity: TextEntity(getTextRange().equals($dictionaryRemoval.getTextRange()), engines contains Engine.DICTIONARY, type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) - not TextEntity(containedBy($entity), engines contains Engine.DOSSIER_DICTIONARY, type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); - $entity.ignore("DICT.0.0", "Ignore Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL"); + $entity.remove("DICT.0.0", "Remove Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL"); + $entity.addEngine(Engine.DOSSIER_DICTIONARY); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl index 335e791b..77d6bf13 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl @@ -1388,12 +1388,23 @@ rule "MAN.4.1: Apply legal basis change" rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when - $larger: TextEntity($type: type(), $entityType: entityType, !removed()) - $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges(), !removed()) + $larger: TextEntity($type: type(), $entityType: entityType, !removed(), !hasManualChanges()) + not TextEntity(getTextRange().equals($larger.getTextRange()), type() == $type, entityType == EntityType.DICTIONARY_REMOVAL, engines contains Engine.DOSSIER_DICTIONARY, !hasManualChanges()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) + then + $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); + retract($contained); + end + +rule "X.0.1: Remove Entity contained by Entity of same type with manual changes" + salience 65 + when + $larger: TextEntity($type: type(), $entityType: entityType, !removed(), hasManualChanges()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) then $contained.getIntersectingNodes().forEach(node -> update(node)); - $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); - update($contained); + $contained.remove("X.0.1", "remove Entity contained by Entity of same type with manual changes"); + retract($contained); end @@ -1402,10 +1413,21 @@ rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY), !hasManualChanges()) + then + $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); + retract($entity) + end + + +rule "X.2.1: Remove Entity of type HINT when contained by FALSE_POSITIVE" + salience 64 + when + $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); - $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); + $entity.remove("X.2.1", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); retract($entity) end @@ -1527,15 +1549,15 @@ rule "X.11.0: Remove dictionary entity which intersects with a manual entity" //------------------------------------ Dictionary merging rules ------------------------------------ // Rule unit: DICT.0 -rule "DICT.0.0: Ignore Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL" +rule "DICT.0.0: Remove Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL" salience 64 when $dictionaryRemoval: TextEntity($type: type(), entityType == EntityType.DICTIONARY_REMOVAL, engines contains Engine.DOSSIER_DICTIONARY) $entity: TextEntity(getTextRange().equals($dictionaryRemoval.getTextRange()), engines contains Engine.DICTIONARY, type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) - not TextEntity(containedBy($entity), engines contains Engine.DOSSIER_DICTIONARY, type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); - $entity.ignore("DICT.0.0", "Ignore Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL"); + $entity.remove("DICT.0.0", "Remove Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL"); + $entity.addEngine(Engine.DOSSIER_DICTIONARY); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/efsa_sanitisation.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/efsa_sanitisation.drl index 1dfcc9c0..a3fd859f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/efsa_sanitisation.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/efsa_sanitisation.drl @@ -85,23 +85,60 @@ rule "SYN.1.0: Recommend CTL/BL laboratory that start with BL or CTL" //------------------------------------ CBI rules ------------------------------------ // Rule unit: CBI.0 -rule "CBI.0.0: Redact CBI Authors (non vertebrate Study)" +rule "CBI.0.0: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.skip("CBI.0.0", "Author found by \"et al\" regex"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.0.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.0.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.0.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.0.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.0.3: Redact CBI Authors (non vertebrate Study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $entity: TextEntity(type() == "CBI_author", dictionaryEntry) then - $entity.redact("CBI.0.0", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $entity.redact("CBI.0.3", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end -rule "CBI.0.1: Redact CBI Authors (vertebrate Study)" +rule "CBI.0.4: Redact CBI Authors (vertebrate Study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $entity: TextEntity(type() == "CBI_author", dictionaryEntry) then - $entity.redact("CBI.0.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $entity.redact("CBI.0.4", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end - // Rule unit: CBI.1 rule "CBI.1.0: Do not redact CBI Address (non vertebrate Study)" when @@ -130,34 +167,6 @@ rule "CBI.2.0: Do not redact genitive CBI Author" end -// Rule unit: CBI.7 -rule "CBI.7.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section(containsString("et al.")) - then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.7.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); - dictionary.recommendEverywhere(entity); - }); - end - -rule "CBI.7.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section(containsString("et al.")) - then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.7.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - dictionary.recommendEverywhere(entity); - }); - end - - // Rule unit: CBI.9 rule "CBI.9.0: Redact all cells with Header Author(s) as CBI_author (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" @@ -305,7 +314,7 @@ rule "CBI.23.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (no not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then - entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "CBI_author", EntityType.ENTITY, $document) + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "CBI_author", EntityType.ENTITY, $document, 200) .forEach(authorEntity -> authorEntity.redact("CBI.23.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end @@ -314,7 +323,7 @@ rule "CBI.23.1: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (ve FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then - entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "CBI_author", EntityType.ENTITY, $document) + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "CBI_author", EntityType.ENTITY, $document, 200) .forEach(authorEntity -> authorEntity.redact("CBI.23.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end @@ -374,7 +383,6 @@ rule "PII.2.1: Redact Phone and Fax by RegEx (non vertebrate study)" $section: Section(containsString("Contact") || containsString("Telephone") || containsString("Phone") || - containsString("Ph.") || containsString("Fax") || containsString("Tel") || containsString("Ter") || @@ -382,7 +390,7 @@ rule "PII.2.1: Redact Phone and Fax by RegEx (non vertebrate study)" containsString("Fel") || containsString("Fer")) then - entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter[^m]|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) + entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter[^\\w]|mobile|fel[^\\w]|fer[^\\w])[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) .forEach(contactEntity -> contactEntity.redact("PII.2.1", "Found by Phone and Fax Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end @@ -392,7 +400,6 @@ rule "PII.2.2: Redact Phone and Fax by RegEx (vertebrate study)" $section: Section(containsString("Contact") || containsString("Telephone") || containsString("Phone") || - containsString("Ph.") || containsString("Fax") || containsString("Tel") || containsString("Ter") || @@ -400,7 +407,7 @@ rule "PII.2.2: Redact Phone and Fax by RegEx (vertebrate study)" containsString("Fel") || containsString("Fer")) then - entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter[^m]|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) + entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter[^\\w]|mobile|fel[^\\w]|fer[^\\w])[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) .forEach(contactEntity -> contactEntity.redact("PII.2.2", "Found by Phone and Fax Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end @@ -417,7 +424,7 @@ rule "PII.2.3: Redact phone numbers without indicators" rule "PII.3.1: Redact telephone numbers by RegEx (Non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section(matchesRegex("[+]\\d{1,}")) + $section: Section(!hasTables(), matchesRegex("[+]\\d{1,}")) then entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) .forEach(entity -> entity.redact("PII.3.1", "Telephone number found by regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); @@ -426,13 +433,86 @@ rule "PII.3.1: Redact telephone numbers by RegEx (Non vertebrate study)" rule "PII.3.2: Redact telephone numbers by RegEx (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section(matchesRegex("[+]\\d{1,}")) + $section: Section(!hasTables(), matchesRegex("[+]\\d{1,}")) then entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) .forEach(entity -> entity.redact("PII.3.2", "Telephone number found by regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "PII.3.4: Redact telephone numbers by RegEx (Non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $rowCell: TableCell(matchesRegex("[+]\\d{1,}")) + then + entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $rowCell) + .forEach(entity -> entity.redact("PII.3.4", "Telephone number found by regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + end + +rule "PII.3.5: Redact telephone numbers by RegEx (vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $rowCell: TableCell(matchesRegex("[+]\\d{1,}")) + then + entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $rowCell) + .forEach(entity -> entity.redact("PII.3.5", "Telephone number found by regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + end + + +// Rule unit: PII.5 +rule "PII.5.1: Redact line after contact information keywords reduced (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $contactKeyword: String() from List.of("Contact point:", + "Contact:", + "Alternative contact:", + "European contact:") + $section: Section(containsString($contactKeyword)) + then + entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) + .forEach(contactEntity -> contactEntity.redact("PII.5.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + end + +rule "PII.5.2: Redact line after contact information keywords reduced (Vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $contactKeyword: String() from List.of("Contact point:", + "Contact:", + "Alternative contact:", + "European contact:") + $section: Section(containsString($contactKeyword)) + then + entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) + .forEach(contactEntity -> contactEntity.redact("PII.5.2", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + end + + +// Rule unit: PII.6 +rule "PII.6.1: Redact line between contact keywords (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) + then + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + ) + .forEach(contactEntity -> contactEntity.redact("PII.6.1", "Found between contact keywords", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + end + +rule "PII.6.2: Redact line between contact keywords (vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) + then + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + ) + .forEach(contactEntity -> contactEntity.redact("PII.6.2", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + end + + // Rule unit: PII.7 rule "PII.7.1: Redact contact information if applicant is found (non vertebrate study)" when @@ -510,8 +590,9 @@ rule "PII.8.2: Redact contact information if producer is found (vertebrate study // Rule unit: PII.10 -rule "PII.10.0: Redact study director abbreviation" +rule "PII.10.0: Redact study director abbreviation (non vertebrate study)" when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("KATH") || containsString("BECH") || containsString("KML")) then entityCreationService.byRegexIgnoreCase("((KATH)|(BECH)|(KML)) ?(\\d{4})","PII", EntityType.ENTITY, 1, $section) @@ -519,6 +600,16 @@ rule "PII.10.0: Redact study director abbreviation" end +rule "PII.10.1: Redact study director abbreviation (vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsString("KATH") || containsString("BECH") || containsString("KML")) + then + entityCreationService.byRegexIgnoreCase("((KATH)|(BECH)|(KML)) ?(\\d{4})","PII", EntityType.ENTITY, 1, $section) + .forEach(entity -> entity.redact("PII.10.1", "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + end + + // Rule unit: PII.11 rule "PII.11.0: Redact On behalf of Sequani Ltd.:" when @@ -593,7 +684,7 @@ rule "ETC.3.2: Redact logos (vertebrate study)" FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $logo: Image(imageType == ImageType.LOGO) then - $logo.redact("ETC.3.2", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $logo.redact("ETC.3.2", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -617,6 +708,22 @@ rule "ETC.5.1: Remove dossier_redaction entries if confidentiality is not 'confi retract($dossierRedaction); end +// Rule unit: ETC.12 +rule "ETC.12.2: Skip dossier_redaction (Non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $dossierRedaction: TextEntity(type() == "dossier_redaction") + then + $dossierRedaction.skip("ETC.12.2", "Dossier dictionary entry found"); + end + +rule "ETC.12.3: Skip dossier_redaction (Vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $dossierRedaction: TextEntity(type() == "dossier_redaction") + then + $dossierRedaction.skip("ETC.12.3", "Dossier dictionary entry found"); + end //------------------------------------ AI rules ------------------------------------ @@ -880,12 +987,23 @@ rule "MAN.4.1: Apply legal basis change" rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when - $larger: TextEntity($type: type(), $entityType: entityType, !removed()) - $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges(), !removed()) + $larger: TextEntity($type: type(), $entityType: entityType, !removed(), !hasManualChanges()) + not TextEntity(getTextRange().equals($larger.getTextRange()), type() == $type, entityType == EntityType.DICTIONARY_REMOVAL, engines contains Engine.DOSSIER_DICTIONARY, !hasManualChanges()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) + then + $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); + retract($contained); + end + +rule "X.0.1: Remove Entity contained by Entity of same type with manual changes" + salience 65 + when + $larger: TextEntity($type: type(), $entityType: entityType, !removed(), hasManualChanges()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) then $contained.getIntersectingNodes().forEach(node -> update(node)); - $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); - update($contained); + $contained.remove("X.0.1", "remove Entity contained by Entity of same type with manual changes"); + retract($contained); end @@ -894,10 +1012,21 @@ rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY), !hasManualChanges()) + then + $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); + retract($entity) + end + + +rule "X.2.1: Remove Entity of type HINT when contained by FALSE_POSITIVE" + salience 64 + when + $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); - $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); + $entity.remove("X.2.1", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); retract($entity) end @@ -1018,29 +1147,41 @@ rule "X.10.0: remove false positives of ai" // Rule unit: X.11 -rule "X.11.0: Remove dictionary entity which intersects with a manual entity" +rule "X.11.1: Remove non manual entity which intersects with a manual entity" salience 64 when $manualEntity: TextEntity(engines contains Engine.MANUAL, active()) - $dictionaryEntity: TextEntity(intersects($manualEntity), dictionaryEntry, engines not contains Engine.MANUAL) + $nonManualEntity: TextEntity(intersects($manualEntity), engines not contains Engine.MANUAL) then - $dictionaryEntity.remove("X.11.0", "remove dictionary entity which intersects with a manual entity"); - retract($dictionaryEntity); + $nonManualEntity.remove("X.11.1", "remove entity which intersects with a manual entity"); + retract($nonManualEntity); + end + + +rule "X.11.2: Remove non manual entity which are equal to manual entity" + salience 70 + when + $manualEntity: TextEntity(engines contains Engine.MANUAL, active(), $type: type()) + $nonManualEntity: TextEntity(getTextRange().equals($manualEntity.getTextRange()), type() == $type, entityType == EntityType.ENTITY, !hasManualChanges(), engines not contains Engine.MANUAL) + then + $manualEntity.addEngines($nonManualEntity.getEngines()); + $nonManualEntity.remove("X.11.2", "remove non manual entity which are equal to manual entity"); + retract($nonManualEntity); end //------------------------------------ Dictionary merging rules ------------------------------------ // Rule unit: DICT.0 -rule "DICT.0.0: Ignore Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL" +rule "DICT.0.0: Remove Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL" salience 64 when $dictionaryRemoval: TextEntity($type: type(), entityType == EntityType.DICTIONARY_REMOVAL, engines contains Engine.DOSSIER_DICTIONARY) $entity: TextEntity(getTextRange().equals($dictionaryRemoval.getTextRange()), engines contains Engine.DICTIONARY, type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) - not TextEntity(containedBy($entity), engines contains Engine.DOSSIER_DICTIONARY, type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); - $entity.ignore("DICT.0.0", "Ignore Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL"); + $entity.remove("DICT.0.0", "Remove Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL"); + $entity.addEngine(Engine.DOSSIER_DICTIONARY); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl index 5d626852..03612b29 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl @@ -149,7 +149,7 @@ rule "AI.7.0: Add all NER Entities of type Address" end -//------------------------------------ Manual changes rules ------------------------------------ +//------------------------------------ Manual redaction rules ------------------------------------ // Rule unit: MAN.0 rule "MAN.0.0: Apply manual resize redaction" @@ -304,7 +304,90 @@ rule "MAN.4.1: Apply legal basis change" //------------------------------------ Entity merging rules ------------------------------------ +// Rule unit: X.0 +rule "X.0.0: Remove Entity contained by Entity of same type" + salience 65 + when + $larger: TextEntity($type: type(), $entityType: entityType, !removed(), !hasManualChanges()) + not TextEntity(getTextRange().equals($larger.getTextRange()), type() == $type, entityType == EntityType.DICTIONARY_REMOVAL, engines contains Engine.DOSSIER_DICTIONARY, !hasManualChanges()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) + then + $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); + retract($contained); + end + +rule "X.0.1: Remove Entity contained by Entity of same type with manual changes" + salience 65 + when + $larger: TextEntity($type: type(), $entityType: entityType, !removed(), hasManualChanges()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) + then + $contained.getIntersectingNodes().forEach(node -> update(node)); + $contained.remove("X.0.1", "remove Entity contained by Entity of same type with manual changes"); + retract($contained); + end + + +// Rule unit: X.2 +rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" + salience 64 + when + $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY), !hasManualChanges()) + then + $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); + retract($entity) + end + + +rule "X.2.1: Remove Entity of type HINT when contained by FALSE_POSITIVE" + salience 64 + when + $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.HINT), !hasManualChanges()) + then + $entity.getIntersectingNodes().forEach(node -> update(node)); + $entity.remove("X.2.1", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); + retract($entity) + end + + +// Rule unit: X.3 +rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" + salience 64 + when + $falseRecommendation: TextEntity($type: type(), entityType == EntityType.FALSE_RECOMMENDATION, active()) + $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) + then + $recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); + retract($recommendation); + end + + +// Rule unit: X.4 +rule "X.4.0: Remove Entity of type RECOMMENDATION when text range equals ENTITY with same type" + salience 256 + when + $entity: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) + $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) + then + $entity.addEngines($recommendation.getEngines()); + $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when text range equals ENTITY with same type"); + retract($recommendation); + end + + // Rule unit: X.5 +rule "X.5.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY" + salience 256 + when + $entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) + $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges()) + then + $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY"); + retract($recommendation); + end + rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATION" salience 256 when @@ -316,6 +399,30 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI end +// Rule unit: X.6 +rule "X.6.0: Remove Entity of lower rank, when contained by entity of type ENTITY or HINT" + salience 32 + when + $higherRank: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) + $lowerRank: TextEntity(containedBy($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges()) + then + $lowerRank.getIntersectingNodes().forEach(node -> update(node)); + $lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY or HINT"); + retract($lowerRank); + end + +rule "X.6.1: remove Entity, when contained in another entity of type ENTITY or HINT with larger text range" + salience 32 + when + $outer: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) + $inner: TextEntity(containedBy($outer), type() != $type, $outer.getTextRange().length > getTextRange().length(), !hasManualChanges()) + then + $inner.getIntersectingNodes().forEach(node -> update(node)); + $inner.remove("X.6.1", "remove Entity, when contained in another entity of type ENTITY or HINT with larger text range"); + retract($inner); + end + + // Rule unit: X.8 rule "X.8.0: Remove Entity when text range and type equals to imported Entity" salience 257 @@ -361,29 +468,54 @@ rule "X.10.0: remove false positives of ai" // Rule unit: X.11 -rule "X.11.0: Remove dictionary entity which intersects with a manual entity" +rule "X.11.1: Remove non manual entity which intersects with a manual entity" salience 64 when $manualEntity: TextEntity(engines contains Engine.MANUAL, active()) - $dictionaryEntity: TextEntity(intersects($manualEntity), dictionaryEntry, engines not contains Engine.MANUAL) + $nonManualEntity: TextEntity(intersects($manualEntity), engines not contains Engine.MANUAL) then - $dictionaryEntity.remove("X.11.0", "remove dictionary entity which intersects with a manual entity"); - retract($dictionaryEntity); + $nonManualEntity.remove("X.11.1", "remove entity which intersects with a manual entity"); + retract($nonManualEntity); + end + + +rule "X.11.2: Remove non manual entity which are equal to manual entity" + salience 70 + when + $manualEntity: TextEntity(engines contains Engine.MANUAL, active(), $type: type()) + $nonManualEntity: TextEntity(getTextRange().equals($manualEntity.getTextRange()), type() == $type, entityType == EntityType.ENTITY, !hasManualChanges(), engines not contains Engine.MANUAL) + then + $manualEntity.addEngines($nonManualEntity.getEngines()); + $nonManualEntity.remove("X.11.2", "remove non manual entity which are equal to manual entity"); + retract($nonManualEntity); end //------------------------------------ Dictionary merging rules ------------------------------------ // Rule unit: DICT.0 -rule "DICT.0.0: Ignore Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL" +rule "DICT.0.0: Remove Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL" salience 64 when $dictionaryRemoval: TextEntity($type: type(), entityType == EntityType.DICTIONARY_REMOVAL, engines contains Engine.DOSSIER_DICTIONARY) $entity: TextEntity(getTextRange().equals($dictionaryRemoval.getTextRange()), engines contains Engine.DICTIONARY, type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) - not TextEntity(containedBy($entity), engines contains Engine.DOSSIER_DICTIONARY, type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); - $entity.ignore("DICT.0.0", "Ignore Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL"); + $entity.remove("DICT.0.0", "Remove Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL"); + $entity.addEngine(Engine.DOSSIER_DICTIONARY); + end + + +//------------------------------------ File attributes rules ------------------------------------ + +// Rule unit: FA.1 +rule "FA.1.0: Remove duplicate FileAttributes" + salience 64 + when + $fileAttribute: FileAttribute($label: label, $value: value) + $duplicate: FileAttribute(this != $fileAttribute, label == $label, value == $value) + then + retract($duplicate); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index fb754b3c..a74133d3 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -85,151 +85,20 @@ rule "SYN.0.0: Redact if CTL/* or BL/* was found (Non Vertebrate Study)" //------------------------------------ CBI rules ------------------------------------ - -// Rule unit: CBI.3 -rule "CBI.3.0: Redacted because Section contains a vertebrate" - when - $section: Section(!hasTables(), hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( - "CBI.3.0", - "Vertebrate found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - $section.getEntitiesOfType("vertebrate") - ); - }); - end - -rule "CBI.3.1: Redacted because table row contains a vertebrate" - when - $table: Table(hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( - "CBI.3.1", - "Vertebrate found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("vertebrate", entity) - ); - }); - end - -rule "CBI.3.2: Do not redact because Section does not contain a vertebrate" - when - $section: Section(!hasTables(), !hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(entity -> entity.skip("CBI.3.2", "No vertebrate found")); - end - -rule "CBI.3.3: Do not redact because table row does not contain a vertebrate" - when - $table: Table(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) - then - $table.streamEntitiesWhereRowContainsNoEntitiesOfType(List.of("vertebrate")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> entity.skip("CBI.3.3", "No vertebrate found")); - end - - -// Rule unit: CBI.4 -rule "CBI.4.0: Do not redact Names and Addresses if no_redaction_indicator is found in Section" - when - $section: Section(!hasTables(), - hasEntitiesOfType("vertebrate"), - hasEntitiesOfType("no_redaction_indicator"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(entity -> { - entity.skipWithReferences( - "CBI.4.0", - "Vertebrate but a no redaction indicator found", - $section.getEntitiesOfType("no_redaction_indicator") - ); - }); - end - -rule "CBI.4.1: Don't redact authors or addresses which appear in the same row as a vertebrate and a no_redaction_indicator" - when - $table: Table(hasEntitiesOfType("no_redaction_indicator"), - hasEntitiesOfType("vertebrate"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate", "no-redaction_indicator")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.skipWithReferences( - "CBI.4.1", - "Vertebrate but a no redaction indicator found", - Stream.concat( - $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), - $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() - ); - }); - end - - -// Rule unit: CBI.5 -rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in Section" - when - $section: Section(!hasTables(), - hasEntitiesOfType("redaction_indicator"), - hasEntitiesOfType("no_redaction_indicator"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( - "CBI.5.0", - "no_redaction_indicator but also redaction_indicator found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - Stream.concat( - $section.getEntitiesOfType("redaction_indicator").stream(), - $section.getEntitiesOfType("no_redaction_indicator").stream()).toList() - ); - }); - end - -rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in table row" - when - $table: Table(hasEntitiesOfType("no_redaction_indicator"), - hasEntitiesOfType("redaction_indicator"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no_redaction_indicator")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( - "CBI.5.1", - "no_redaction_indicator but also redaction_indicator found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - Stream.concat( - $table.getEntitiesOfTypeInSameRow("redaction_indicator", entity).stream(), - $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() - ); - }); - end - - -// Rule unit: CBI.7 -rule "CBI.7.0: Add CBI_author with \"et al.\" RegEx" +// Rule unit: CBI.0 +rule "CBI.0.0: Add CBI_author with \"et al.\" RegEx" agenda-group "LOCAL_DICTIONARY_ADDS" when $section: Section(containsString("et al.")) then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.redact("CBI.7.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + entity.skip("CBI.0.0", "Author found by \"et al\" regex"); dictionary.recommendEverywhere(entity); }); end -rule "CBI.7.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" +rule "CBI.0.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -237,12 +106,12 @@ rule "CBI.7.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.redact("CBI.7.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + entity.redact("CBI.0.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(entity); }); end -rule "CBI.7.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" +rule "CBI.0.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -250,56 +119,11 @@ rule "CBI.7.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.redact("CBI.7.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + entity.redact("CBI.0.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(entity); }); end -rule "CBI.7.3: Add CBI_author with \"et al.\" RegEx" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - $section: Section(containsString("et al.")) - then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.7.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); - dictionary.recommendEverywhere(entity); - }); - end - - -// Rule unit: CBI.8 -rule "CBI.8.0: Redacted because Section contains must_redact entity" - when - $section: Section(!hasTables(), hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( - "CBI.8.0", - "must_redact entity found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - $section.getEntitiesOfType("must_redact") - ); - }); - end - -rule "CBI.8.1: Redacted because table row contains must_redact entity" - when - $table: Table(hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("must_redact")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( - "CBI.8.1", - "Must_redact found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("must_redact", entity) - ); - }); - end - // Rule unit: CBI.9 rule "CBI.9.0: Redact all cells with Header Author(s) as CBI_author (non vertebrate study)" @@ -353,19 +177,19 @@ rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study end -// Rule unit: CBI.12 +// Rule unit: CBI.12 - table rules remains rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes'" agenda-group "LOCAL_DICTIONARY_ADDS" when $table: Table(hasHeader("Author(s)") || hasHeader("Author"), hasHeaderIgnoreCase("Vertebrate Study Y/N")) TableCell(header, containsAnyStringIgnoreCase("Author", "Author(s)"), $authorCol: col) from $table.streamHeaders().toList() TableCell(header, containsStringIgnoreCase("Vertebrate study Y/N"), $vertebrateCol: col) from $table.streamHeaders().toList() - $rowCell: TableCell(!header, containsAnyString("Yes", "Y"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() + $rowCell: TableCell(!header, containsAnyString("Yes", "Y", "yes"), !containsAnyString("Y/N"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() TableCell(row == $rowWithYes) from $table.streamCol($authorCol).toList() then - entityCreationService.bySemanticNode($rowCell, "must_redact", EntityType.HINT) + entityCreationService.bySemanticNode($rowCell, "redaction_indicator", EntityType.HINT) .ifPresent(yesEntity -> { - yesEntity.skip("CBI.12.0", "must_redact"); + yesEntity.skip("CBI.12.0", "redaction_indicator"); }); end @@ -376,7 +200,7 @@ rule "CBI.12.1: Redact and recommend TableCell with header 'Author' or 'Author(s $table: Table(hasHeader("Author(s)") || hasHeader("Author"), hasHeaderIgnoreCase("Vertebrate Study Y/N")) TableCell(header, containsAnyStringIgnoreCase("Author", "Author(s)"), $authorCol: col) from $table.streamHeaders().toList() TableCell(header, containsStringIgnoreCase("Vertebrate study Y/N"), $vertebrateCol: col) from $table.streamHeaders().toList() - TableCell(!header, containsAnyString("Yes", "Y"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() + TableCell(!header, containsAnyString("Yes", "Y", "yes"), !containsAnyString("Y/N"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() $authorCell: TableCell(row == $rowWithYes) from $table.streamCol($authorCol).toList() then entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY) @@ -384,7 +208,6 @@ rule "CBI.12.1: Redact and recommend TableCell with header 'Author' or 'Author(s authorEntity.redact("CBI.12.1", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(3) of Regulation (EC) No 178/2002"); dictionary.addMultipleAuthorsAsRecommendation(authorEntity); }); - end rule "CBI.12.2: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes' (vertebrate study)" @@ -419,6 +242,126 @@ rule "CBI.12.3: Skip TableCell with header 'Author' or 'Author(s)' and header 'V end +//from CBI.3.3 +rule "CBI.12.4: Redacted because table row contains a redaction_indicator" + when + $table: Table(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + then + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( + "CBI.12.4", + "Redaction_indicator found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $table.getEntitiesOfTypeInSameRow("redaction_indicator", entity) + ); + }); + end + + +//from CBI.3.1 +rule "CBI.12.5: Redacted because table row contains a vertebrate" + when + $table: Table(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + then + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( + "CBI.12.5", + "Vertebrate found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $table.getEntitiesOfTypeInSameRow("vertebrate", entity) + ); + }); + end + + +// Rule unit: CBI.13 - section rules +// from CBI.3.0 +rule "CBI.13.1: Redacted because Section contains a vertebrate" + when + $section: Section(!hasTables(), hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( + "CBI.13.1", + "Vertebrate found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $section.getEntitiesOfType("vertebrate") + ); + }); + end + +//from CBI.3.2 +rule "CBI.13.2: Do not redact because Section does not contain a vertebrate" + when + $section: Section(!hasTables(), !hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(entity -> entity.skip("CBI.13.2", "No vertebrate found")); + end + + +// from CBI.4.0 +rule "CBI.13.3: Do not redact Names and Addresses if vertebrate and no_redaction_indicator is found in Section" + when + $section: Section(!hasTables(), + hasEntitiesOfType("vertebrate"), + hasEntitiesOfType("no_redaction_indicator"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(entity -> { + entity.skipWithReferences( + "CBI.13.3", + "Vertebrate but a no redaction indicator found", + $section.getEntitiesOfType("no_redaction_indicator") + ); + }); + end + + +// from CBI.5.0 +rule "CBI.13.4: Redact Names and Addresses if vertebrate and no_redaction_indicator but also redaction_indicator is found in Section" + when + $section: Section(!hasTables(), + hasEntitiesOfType("vertebrate"), + hasEntitiesOfType("redaction_indicator"), + hasEntitiesOfType("no_redaction_indicator"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( + "CBI.13.4", + "no_redaction_indicator but also redaction_indicator found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + Stream.concat( + $section.getEntitiesOfType("redaction_indicator").stream(), + $section.getEntitiesOfType("no_redaction_indicator").stream()).toList() + ); + }); + end + +// From CBI.8.0 +rule "CBI.13.5: Redacted because Section contains must_redact entity" + when + $section: Section(!hasTables(), hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( + "CBI.13.5", + "must_redact entity found", + "Reg (EC) No 1107/2009 Art. 63 (2b)", + $section.getEntitiesOfType("must_redact") + ); + }); + end + // Rule unit: CBI.14 rule "CBI.14.0: Redact CBI_sponsor entities if preceded by \"batches produced at\"" when @@ -450,7 +393,8 @@ rule "CBI.15.0: Redact row if row contains \"determination of residues\" and liv then entityCreationService.byString($keyword, "must_redact", EntityType.HINT, $section) .forEach(entity -> entity.skip("CBI.15.0", "must_redact")); - + entityCreationService.byString($residueKeyword, "must_redact", EntityType.HINT, $section) + .forEach(entity -> entity.skip("CBI.15.0", "must_redact")); $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) .forEach(redactionEntity -> redactionEntity.redact("CBI.15.0", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)")); end @@ -471,7 +415,8 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio then entityCreationService.byString($keyword, "must_redact", EntityType.HINT, $table) .forEach(entity -> entity.skip("CBI.15.1", "must_redact")); - + entityCreationService.byString($residueKeyword, "must_redact", EntityType.HINT, $table) + .forEach(entity -> entity.skip("CBI.15.1", "must_redact")); $table.streamEntitiesWhereRowContainsStringsIgnoreCase(List.of($keyword, $residueKeyword)) .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) .forEach(redactionEntity -> redactionEntity.redact("CBI.15.1", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)")); @@ -536,9 +481,9 @@ rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC when $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) then - entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "PII", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { - laboratoryEntity.redact("CBI.20.0", "PERFORMING LABORATORY was found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + laboratoryEntity.redact("CBI.20.0", "PERFORMING LABORATORY was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); dictionary.recommendEverywhere(laboratoryEntity); }); end @@ -588,7 +533,7 @@ rule "CBI.23.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (no not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then - entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "CBI_author", EntityType.ENTITY, $document) + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "CBI_author", EntityType.ENTITY, $document, 200) .forEach(authorEntity -> authorEntity.redact("CBI.23.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end @@ -597,7 +542,7 @@ rule "CBI.23.1: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (ve FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then - entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "CBI_author", EntityType.ENTITY, $document) + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "CBI_author", EntityType.ENTITY, $document, 200) .forEach(authorEntity -> authorEntity.redact("CBI.23.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end @@ -609,7 +554,7 @@ rule "PII.0.0: Redact all PII" when $pii: TextEntity(type() == "PII", dictionaryEntry) then - $pii.redact("PII.0.0", "Personal Information found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + $pii.redact("PII.0.0", "Personal Information found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); end rule "PII.0.1: Redact all PII (non vertebrate study)" @@ -687,8 +632,6 @@ rule "PII.4.0: Redact line after contact information keywords" "Contact:", "Alternative contact:", "European contact:", - "No:", - "Contact:", "Tel.:", "Tel:", "Telephone number:", @@ -735,62 +678,6 @@ rule "PII.4.1: Redact line after contact information keywords" .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); end -rule "PII.4.2: Redact line after contact information keywords (Non vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $contactKeyword: String() from List.of("Contact point:", - "Contact:", - "Alternative contact:", - "European contact:", - "No:", - "Contact:", - "Tel.:", - "Tel:", - "Telephone number:", - "Telephone No:", - "Telephone:", - "Phone No.", - "Phone:", - "Fax number:", - "Fax:", - "E-mail:", - "Email:", - "e-mail:", - "E-mail address:") - $section: Section(containsString($contactKeyword)) - then - entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.4.2", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(3) of Regulation (EC) No 178/2002")); - end - -rule "PII.4.3: Redact line after contact information keywords (Vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $contactKeyword: String() from List.of("Contact point:", - "Contact:", - "Alternative contact:", - "European contact:", - "No:", - "Contact:", - "Tel.:", - "Tel:", - "Telephone number:", - "Telephone No:", - "Telephone:", - "Phone No.", - "Phone:", - "Fax number:", - "Fax:", - "E-mail:", - "Email:", - "e-mail:", - "E-mail address:") - $section: Section(containsString($contactKeyword)) - then - entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.4.3", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(2) of Regulation (EC) No 178/2002")); - end - // Rule unit: PII.6 rule "PII.6.0: Redact line between contact keywords" @@ -850,12 +737,7 @@ rule "PII.7.0: Redact contact information if applicant is found" containsString("Applicant") || containsString("Telephone number:")) then - Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", - "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), - Stream.concat( - entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), - entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) - )) + entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "Tel.:", "Tel:", "Telephone number:", "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section) .forEach(entity -> entity.redact("PII.7.0", "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end @@ -956,7 +838,7 @@ rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" when $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then - entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document, 200) .forEach(authorEntity -> authorEntity.redact("PII.9.0", "AUTHOR(S) was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end @@ -1064,7 +946,7 @@ rule "ETC.3.2: Redact logos (vertebrate study)" FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $logo: Image(imageType == ImageType.LOGO) then - $logo.redact("ETC.3.2", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $logo.redact("ETC.3.2", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end rule "ETC.3.3: Redact logos" @@ -1251,7 +1133,7 @@ rule "AI.7.0: Add all NER Entities of type Address" end -//------------------------------------ Manual changes rules ------------------------------------ +//------------------------------------ Manual redaction rules ------------------------------------ // Rule unit: MAN.0 rule "MAN.0.0: Apply manual resize redaction" @@ -1410,12 +1292,23 @@ rule "MAN.4.1: Apply legal basis change" rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when - $larger: TextEntity($type: type(), $entityType: entityType, !removed()) - $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges(), !removed()) + $larger: TextEntity($type: type(), $entityType: entityType, !removed(), !hasManualChanges()) + not TextEntity(getTextRange().equals($larger.getTextRange()), type() == $type, entityType == EntityType.DICTIONARY_REMOVAL, engines contains Engine.DOSSIER_DICTIONARY, !hasManualChanges()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) + then + $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); + retract($contained); + end + +rule "X.0.1: Remove Entity contained by Entity of same type with manual changes" + salience 65 + when + $larger: TextEntity($type: type(), $entityType: entityType, !removed(), hasManualChanges()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) then $contained.getIntersectingNodes().forEach(node -> update(node)); - $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); - update($contained); + $contained.remove("X.0.1", "remove Entity contained by Entity of same type with manual changes"); + retract($contained); end @@ -1424,10 +1317,21 @@ rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY), !hasManualChanges()) + then + $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); + retract($entity) + end + + +rule "X.2.1: Remove Entity of type HINT when contained by FALSE_POSITIVE" + salience 64 + when + $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); - $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); + $entity.remove("X.2.1", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); retract($entity) end @@ -1548,29 +1452,41 @@ rule "X.10.0: remove false positives of ai" // Rule unit: X.11 -rule "X.11.0: Remove dictionary entity which intersects with a manual entity" +rule "X.11.1: Remove non manual entity which intersects with a manual entity" salience 64 when $manualEntity: TextEntity(engines contains Engine.MANUAL, active()) - $dictionaryEntity: TextEntity(intersects($manualEntity), dictionaryEntry, engines not contains Engine.MANUAL) + $nonManualEntity: TextEntity(intersects($manualEntity), engines not contains Engine.MANUAL) then - $dictionaryEntity.remove("X.11.0", "remove dictionary entity which intersects with a manual entity"); - retract($dictionaryEntity); + $nonManualEntity.remove("X.11.1", "remove entity which intersects with a manual entity"); + retract($nonManualEntity); + end + + +rule "X.11.2: Remove non manual entity which are equal to manual entity" + salience 70 + when + $manualEntity: TextEntity(engines contains Engine.MANUAL, active(), $type: type()) + $nonManualEntity: TextEntity(getTextRange().equals($manualEntity.getTextRange()), type() == $type, entityType == EntityType.ENTITY, !hasManualChanges(), engines not contains Engine.MANUAL) + then + $manualEntity.addEngines($nonManualEntity.getEngines()); + $nonManualEntity.remove("X.11.2", "remove non manual entity which are equal to manual entity"); + retract($nonManualEntity); end //------------------------------------ Dictionary merging rules ------------------------------------ // Rule unit: DICT.0 -rule "DICT.0.0: Ignore Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL" +rule "DICT.0.0: Remove Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL" salience 64 when $dictionaryRemoval: TextEntity($type: type(), entityType == EntityType.DICTIONARY_REMOVAL, engines contains Engine.DOSSIER_DICTIONARY) $entity: TextEntity(getTextRange().equals($dictionaryRemoval.getTextRange()), engines contains Engine.DICTIONARY, type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) - not TextEntity(containedBy($entity), engines contains Engine.DOSSIER_DICTIONARY, type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); - $entity.ignore("DICT.0.0", "Ignore Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL"); + $entity.remove("DICT.0.0", "Remove Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL"); + $entity.addEngine(Engine.DOSSIER_DICTIONARY); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl index d9bf6a88..fa34d1fd 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl @@ -72,12 +72,46 @@ query "getFileAttributes" //------------------------------------ CBI rules ------------------------------------ // Rule unit: CBI.0 -rule "CBI.0.0: Redact CBI Authors (non vertebrate Study)" +rule "CBI.0.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.0.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.0.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.0.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.0.3: Redact CBI Authors (non vertebrate Study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $entity: TextEntity(type() == "CBI_author", dictionaryEntry) then - $entity.redact("CBI.0.0", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $entity.redact("CBI.0.3", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "CBI.0.4: Redact CBI Authors (vertebrate Study)" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $entity: TextEntity(type() == "CBI_author", dictionaryEntry) + then + $entity.redact("CBI.0.4", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -332,12 +366,23 @@ rule "MAN.4.1: Apply legal basis change" rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when - $larger: TextEntity($type: type(), $entityType: entityType, !removed()) - $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges(), !removed()) + $larger: TextEntity($type: type(), $entityType: entityType, !removed(), !hasManualChanges()) + not TextEntity(getTextRange().equals($larger.getTextRange()), type() == $type, entityType == EntityType.DICTIONARY_REMOVAL, engines contains Engine.DOSSIER_DICTIONARY, !hasManualChanges()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) + then + $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); + retract($contained); + end + +rule "X.0.1: Remove Entity contained by Entity of same type with manual changes" + salience 65 + when + $larger: TextEntity($type: type(), $entityType: entityType, !removed(), hasManualChanges()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) then $contained.getIntersectingNodes().forEach(node -> update(node)); - $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); - update($contained); + $contained.remove("X.0.1", "remove Entity contained by Entity of same type with manual changes"); + retract($contained); end @@ -346,10 +391,21 @@ rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY), !hasManualChanges()) + then + $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); + retract($entity) + end + + +rule "X.2.1: Remove Entity of type HINT when contained by FALSE_POSITIVE" + salience 64 + when + $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); - $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); + $entity.remove("X.2.1", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); retract($entity) end @@ -470,29 +526,41 @@ rule "X.10.0: remove false positives of ai" // Rule unit: X.11 -rule "X.11.0: Remove dictionary entity which intersects with a manual entity" +rule "X.11.1: Remove non manual entity which intersects with a manual entity" salience 64 when $manualEntity: TextEntity(engines contains Engine.MANUAL, active()) - $dictionaryEntity: TextEntity(intersects($manualEntity), dictionaryEntry, engines not contains Engine.MANUAL) + $nonManualEntity: TextEntity(intersects($manualEntity), engines not contains Engine.MANUAL) then - $dictionaryEntity.remove("X.11.0", "remove dictionary entity which intersects with a manual entity"); - retract($dictionaryEntity); + $nonManualEntity.remove("X.11.1", "remove entity which intersects with a manual entity"); + retract($nonManualEntity); + end + + +rule "X.11.2: Remove non manual entity which are equal to manual entity" + salience 70 + when + $manualEntity: TextEntity(engines contains Engine.MANUAL, active(), $type: type()) + $nonManualEntity: TextEntity(getTextRange().equals($manualEntity.getTextRange()), type() == $type, entityType == EntityType.ENTITY, !hasManualChanges(), engines not contains Engine.MANUAL) + then + $manualEntity.addEngines($nonManualEntity.getEngines()); + $nonManualEntity.remove("X.11.2", "remove non manual entity which are equal to manual entity"); + retract($nonManualEntity); end //------------------------------------ Dictionary merging rules ------------------------------------ // Rule unit: DICT.0 -rule "DICT.0.0: Ignore Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL" +rule "DICT.0.0: Remove Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL" salience 64 when $dictionaryRemoval: TextEntity($type: type(), entityType == EntityType.DICTIONARY_REMOVAL, engines contains Engine.DOSSIER_DICTIONARY) $entity: TextEntity(getTextRange().equals($dictionaryRemoval.getTextRange()), engines contains Engine.DICTIONARY, type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) - not TextEntity(containedBy($entity), engines contains Engine.DOSSIER_DICTIONARY, type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); - $entity.ignore("DICT.0.0", "Ignore Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL"); + $entity.remove("DICT.0.0", "Remove Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL"); + $entity.addEngine(Engine.DOSSIER_DICTIONARY); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo.drl index a0ca7b1f..840ce18e 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo.drl @@ -458,12 +458,23 @@ rule "MAN.4.1: Apply legal basis change" rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when - $larger: TextEntity($type: type(), $entityType: entityType, !removed()) - $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges(), !removed()) + $larger: TextEntity($type: type(), $entityType: entityType, !removed(), !hasManualChanges()) + not TextEntity(getTextRange().equals($larger.getTextRange()), type() == $type, entityType == EntityType.DICTIONARY_REMOVAL, engines contains Engine.DOSSIER_DICTIONARY, !hasManualChanges()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) + then + $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); + retract($contained); + end + +rule "X.0.1: Remove Entity contained by Entity of same type with manual changes" + salience 65 + when + $larger: TextEntity($type: type(), $entityType: entityType, !removed(), hasManualChanges()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) then $contained.getIntersectingNodes().forEach(node -> update(node)); - $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); - update($contained); + $contained.remove("X.0.1", "remove Entity contained by Entity of same type with manual changes"); + retract($contained); end @@ -472,10 +483,21 @@ rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY), !hasManualChanges()) + then + $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); + retract($entity) + end + + +rule "X.2.1: Remove Entity of type HINT when contained by FALSE_POSITIVE" + salience 64 + when + $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); - $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); + $entity.remove("X.2.1", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); retract($entity) end @@ -607,29 +629,41 @@ rule "X.10.0: remove false positives of ai" // Rule unit: X.11 -rule "X.11.0: Remove dictionary entity which intersects with a manual entity" +rule "X.11.1: Remove non manual entity which intersects with a manual entity" salience 64 when $manualEntity: TextEntity(engines contains Engine.MANUAL, active()) - $dictionaryEntity: TextEntity(intersects($manualEntity), dictionaryEntry, engines not contains Engine.MANUAL) + $nonManualEntity: TextEntity(intersects($manualEntity), engines not contains Engine.MANUAL) then - $dictionaryEntity.remove("X.11.0", "remove dictionary entity which intersects with a manual entity"); - retract($dictionaryEntity); + $nonManualEntity.remove("X.11.1", "remove entity which intersects with a manual entity"); + retract($nonManualEntity); + end + + +rule "X.11.2: Remove non manual entity which are equal to manual entity" + salience 70 + when + $manualEntity: TextEntity(engines contains Engine.MANUAL, active(), $type: type()) + $nonManualEntity: TextEntity(getTextRange().equals($manualEntity.getTextRange()), type() == $type, entityType == EntityType.ENTITY, !hasManualChanges(), engines not contains Engine.MANUAL) + then + $manualEntity.addEngines($nonManualEntity.getEngines()); + $nonManualEntity.remove("X.11.2", "remove non manual entity which are equal to manual entity"); + retract($nonManualEntity); end //------------------------------------ Dictionary merging rules ------------------------------------ // Rule unit: DICT.0 -rule "DICT.0.0: Ignore Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL" +rule "DICT.0.0: Remove Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL" salience 64 when $dictionaryRemoval: TextEntity($type: type(), entityType == EntityType.DICTIONARY_REMOVAL, engines contains Engine.DOSSIER_DICTIONARY) $entity: TextEntity(getTextRange().equals($dictionaryRemoval.getTextRange()), engines contains Engine.DICTIONARY, type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) - not TextEntity(containedBy($entity), engines contains Engine.DOSSIER_DICTIONARY, type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); - $entity.ignore("DICT.0.0", "Ignore Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL"); + $entity.remove("DICT.0.0", "Remove Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL"); + $entity.addEngine(Engine.DOSSIER_DICTIONARY); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/test_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/test_rules.drl index 7b125c31..699c4182 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/test_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/test_rules.drl @@ -358,12 +358,23 @@ rule "MAN.4.1: Apply legal basis change" rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when - $larger: TextEntity($type: type(), $entityType: entityType, !removed()) - $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges(), !removed()) + $larger: TextEntity($type: type(), $entityType: entityType, !removed(), !hasManualChanges()) + not TextEntity(getTextRange().equals($larger.getTextRange()), type() == $type, entityType == EntityType.DICTIONARY_REMOVAL, engines contains Engine.DOSSIER_DICTIONARY, !hasManualChanges()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) + then + $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); + retract($contained); + end + +rule "X.0.1: Remove Entity contained by Entity of same type with manual changes" + salience 65 + when + $larger: TextEntity($type: type(), $entityType: entityType, !removed(), hasManualChanges()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) then $contained.getIntersectingNodes().forEach(node -> update(node)); - $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); - update($contained); + $contained.remove("X.0.1", "remove Entity contained by Entity of same type with manual changes"); + retract($contained); end @@ -372,14 +383,49 @@ rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY), !hasManualChanges()) + then + $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); + retract($entity) + end + + +rule "X.2.1: Remove Entity of type HINT when contained by FALSE_POSITIVE" + salience 64 + when + $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); + $entity.remove("X.2.1", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); + retract($entity) + end + + +// Rule unit: X.2 +rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" + salience 64 + when + $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY), !hasManualChanges()) + then $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); retract($entity) end +rule "X.2.1: Remove Entity of type HINT when contained by FALSE_POSITIVE" + salience 64 + when + $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.HINT), !hasManualChanges()) + then + $entity.getIntersectingNodes().forEach(node -> update(node)); + $entity.remove("X.2.1", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); + retract($entity) + end + + // Rule unit: X.3 rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" salience 64 @@ -507,29 +553,41 @@ rule "X.10.0: remove false positives of ai" // Rule unit: X.11 -rule "X.11.0: Remove dictionary entity which intersects with a manual entity" +rule "X.11.1: Remove non manual entity which intersects with a manual entity" salience 64 when $manualEntity: TextEntity(engines contains Engine.MANUAL, active()) - $dictionaryEntity: TextEntity(intersects($manualEntity), dictionaryEntry, engines not contains Engine.MANUAL) + $nonManualEntity: TextEntity(intersects($manualEntity), engines not contains Engine.MANUAL) then - $dictionaryEntity.remove("X.11.0", "remove dictionary entity which intersects with a manual entity"); - retract($dictionaryEntity); + $nonManualEntity.remove("X.11.1", "remove entity which intersects with a manual entity"); + retract($nonManualEntity); + end + + +rule "X.11.2: Remove non manual entity which are equal to manual entity" + salience 70 + when + $manualEntity: TextEntity(engines contains Engine.MANUAL, active(), $type: type()) + $nonManualEntity: TextEntity(getTextRange().equals($manualEntity.getTextRange()), type() == $type, entityType == EntityType.ENTITY, !hasManualChanges(), engines not contains Engine.MANUAL) + then + $manualEntity.addEngines($nonManualEntity.getEngines()); + $nonManualEntity.remove("X.11.2", "remove non manual entity which are equal to manual entity"); + retract($nonManualEntity); end //------------------------------------ Dictionary merging rules ------------------------------------ // Rule unit: DICT.0 -rule "DICT.0.0: Ignore Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL" +rule "DICT.0.0: Remove Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL" salience 64 when $dictionaryRemoval: TextEntity($type: type(), entityType == EntityType.DICTIONARY_REMOVAL, engines contains Engine.DOSSIER_DICTIONARY) $entity: TextEntity(getTextRange().equals($dictionaryRemoval.getTextRange()), engines contains Engine.DICTIONARY, type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) - not TextEntity(containedBy($entity), engines contains Engine.DOSSIER_DICTIONARY, type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); - $entity.ignore("DICT.0.0", "Ignore Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL"); + $entity.remove("DICT.0.0", "Remove Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL"); + $entity.addEngine(Engine.DOSSIER_DICTIONARY); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl index 6336f279..afd36897 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl @@ -13,52 +13,56 @@ import java.util.stream.Stream; import java.util.Optional; import com.iqser.red.service.redaction.v1.server.logger.RulesLogger; -import com.iqser.red.service.redaction.v1.server.document.graph.*; -import com.iqser.red.service.redaction.v1.server.document.graph.nodes.*; -import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Section; -import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Table; -import com.iqser.red.service.redaction.v1.server.document.graph.nodes.TableCell; -import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode; -import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document; -import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Paragraph; -import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Image; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.*; -import com.iqser.red.service.redaction.v1.server.document.graph.textblock.*; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType; -import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType; +import com.iqser.red.service.redaction.v1.server.model.document.*; +import com.iqser.red.service.redaction.v1.server.model.document.TextRange; +import com.iqser.red.service.redaction.v1.server.model.document.entity.*; +import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; +import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule; +import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity +import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule +import com.iqser.red.service.redaction.v1.server.model.document.nodes.*; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Headline; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.SectionIdentifier; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Footer; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Header; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType; +import com.iqser.red.service.redaction.v1.server.model.document.textblock.*; +import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock; +import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector; +import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock; +import com.iqser.red.service.redaction.v1.server.model.document.textblock.ConcatenatedTextBlock; +import com.iqser.red.service.redaction.v1.server.model.NerEntities; +import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary; +import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryModel; +import com.iqser.red.service.redaction.v1.server.service.document.EntityCreationService; +import com.iqser.red.service.redaction.v1.server.service.ManualChangesApplicationService; +import com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility; + import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine; -import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService; -import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.Dictionary; -import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.DictionaryModel; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus; -import com.iqser.red.service.redaction.v1.server.document.services.ManualChangesApplicationService; -import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity; -import com.iqser.red.service.redaction.v1.server.document.graph.TextRange; -import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity; -import com.iqser.red.service.redaction.v1.server.document.graph.TextRange; -import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntitiesAdapter; -import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities; -import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility -import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType -import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType -import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntity -import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity -import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType + +import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngineProto.LayoutEngine; global Document document global EntityCreationService entityCreationService global ManualChangesApplicationService manualChangesApplicationService -global NerEntitiesAdapter nerEntitiesAdapter global Dictionary dictionary global RulesLogger logger - //------------------------------------ queries ------------------------------------ query "getFileAttributes" @@ -81,27 +85,53 @@ rule "SYN.1.0: Recommend CTL/BL laboratory that start with BL or CTL" //------------------------------------ CBI rules ------------------------------------ // Rule unit: CBI.0 -rule "CBI.0.0: Redact CBI Authors (Non Vertebrate Study)" +rule "CBI.0.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.0.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.0.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.0.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.0.3: Redact CBI Authors (non vertebrate Study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $entity: TextEntity(type() == "CBI_author", dictionaryEntry) then $entity.apply("CBI.0.0", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end -rule "CBI.0.1: Redact CBI Authors (Vertebrate Study)" +rule "CBI.0.4: Redact CBI Authors (vertebrate Study)" when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $entity: TextEntity(type() == "CBI_author", dictionaryEntry) then - $entity.apply("CBI.0.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $entity.redact("CBI.0.4", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end // Rule unit: CBI.1 -rule "CBI.1.0: Don't redact CBI Address (Non Vertebrate Study)" +rule "CBI.1.0: Do not redact CBI Address (non vertebrate Study)" when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $entity: TextEntity(type() == "CBI_address", dictionaryEntry) then $entity.skip("CBI.1.0", "Address found for Non Vertebrate Study"); @@ -112,101 +142,73 @@ rule "CBI.1.1: Redact CBI Address (Vertebrate Study)" FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") $entity: TextEntity(type() == "CBI_address", dictionaryEntry) then - $entity.apply("CBI.1.1", "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $entity.redact("CBI.1.1", "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end // Rule unit: CBI.2 -rule "CBI.2.0: Don't redact genitive CBI_author" +rule "CBI.2.0: Do not redact genitive CBI Author" when - $entity: TextEntity(type() == "CBI_author", anyMatch(textAfter, "['’’'ʼˈ´`‘′ʻ’']s"), applied()) + $entity: TextEntity(type() == "CBI_author", anyMatch(textAfter, "['’’'ʼˈ´`‘′ʻ’']s")) then entityCreationService.byTextRange($entity.getTextRange(), "CBI_author", EntityType.FALSE_POSITIVE, document) .ifPresent(falsePositive -> falsePositive.skip("CBI.2.0", "Genitive Author found")); end -// Rule unit: CBI.7 -rule "CBI.7.0: Do not redact Names and Addresses if published information found in section without tables" - when - $section: Section(!hasTables(), - hasEntitiesOfType("published_information"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(redactionEntity -> { - redactionEntity.skipWithReferences( - "CBI.7.0", - "Published Information found in section", - $section.getEntitiesOfType("published_information") - ); - }); - end - -rule "CBI.7.1: Do not redact Names and Addresses if published information found in same table row" - when - $table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author")) - $cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList() - $tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList() - $author: TextEntity(type() == "CBI_author", active()) from $tableCell.getEntities() - then - $author.skipWithReferences("CBI.7.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $author)); - end - - // Rule unit: CBI.9 -rule "CBI.9.0: Redact all Cell's with Header Author(s) as CBI_author (non vertebrate study)" +rule "CBI.9.0: Redact all cells with Header Author(s) as CBI_author (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $table: Table(hasHeader("Author(s)")) then $table.streamTableCellsWithHeader("Author(s)") .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) .filter(Optional::isPresent) .map(Optional::get) - .forEach(redactionEntity -> redactionEntity.apply("CBI.9.0", "Author(s) found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(redactionEntity -> redactionEntity.redact("CBI.9.0", "Author(s) found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "CBI.9.1: Redact all Cell's with Header Author as CBI_author (non vertebrate study)" +rule "CBI.9.1: Redact all cells with Header Author as CBI_author (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $table: Table(hasHeader("Author")) then $table.streamTableCellsWithHeader("Author") .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) .filter(Optional::isPresent) .map(Optional::get) - .forEach(redactionEntity -> redactionEntity.apply("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(redactionEntity -> redactionEntity.redact("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end // Rule unit: CBI.10 -rule "CBI.10.0: Redact all Cell's with Header Author(s) as CBI_author (vertebrate study)" +rule "CBI.10.0: Redact all cells with Header Author(s) as CBI_author (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $table: Table(hasHeader("Author(s)")) then $table.streamTableCellsWithHeader("Author(s)") .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) .filter(Optional::isPresent) .map(Optional::get) - .forEach(redactionEntity -> redactionEntity.apply("CBI.10.0", "Author(s) found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.0", "Author(s) found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end -rule "CBI.10.1: Redact all Cell's with Header Author as CBI_author (vertebrate study)" +rule "CBI.10.1: Redact all cells with Header Author as CBI_author (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $table: Table(hasHeader("Author")) then $table.streamTableCellsWithHeader("Author") .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) .filter(Optional::isPresent) .map(Optional::get) - .forEach(redactionEntity -> redactionEntity.apply("CBI.10.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end @@ -217,35 +219,35 @@ rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study when $table: Table(hasHeader("Author(s)") && hasHeader("Vertebrate Study Y/N")) then - $table.getEntitiesOfType("CBI_author").forEach(entity -> dictionary.addMultipleAuthorsAsRecommendation(entity)); + $table.getEntitiesOfType("CBI_author").stream().filter(IEntity::applied).forEach(entity -> dictionary.addMultipleAuthorsAsRecommendation(entity)); end // Rule unit: CBI.16 -rule "CBI.16.0: Add CBI_author with \"et al.\" Regex (non vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" +rule "CBI.16.0: Do not redact Names and Addresses if published information found in Section without tables" when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(containsString("et al.")) + $section: Section(!hasTables(), + hasEntitiesOfType("published_information"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.apply("CBI.16.0", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); - dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false); + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(redactionEntity -> { + redactionEntity.skipWithReferences( + "CBI.16.0", + "Published Information found in section", + $section.getEntitiesOfType("published_information") + ); }); end -rule "CBI.16.1: Add CBI_author with \"et al.\" Regex (vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" +rule "CBI.16.1: Do not redact Names and Addresses if published information found in same table row" when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(containsString("et al.")) + $table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + $cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList() + $tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList() + $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.apply("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false); - }); + $authorOrAddress.skipWithReferences("CBI.16.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress)); end @@ -268,33 +270,53 @@ rule "CBI.17.1: Add recommendation for Addresses in Test Organism sections, with // Rule unit: CBI.20 -rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (non vertebrate study)" +rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when - not FileAttribute(label == "Vertebrate Study", value == "Yes") + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) then entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { - laboratoryEntity.skip("CBI.20.0", "PERFORMING LABORATORY was found for non vertebrate study"); + laboratoryEntity.skip("CBI.20.1", "PERFORMING LABORATORY was found for non vertebrate study"); dictionary.recommendEverywhere(laboratoryEntity); }); end -rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (vertebrate study)" +rule "CBI.20.2: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when - FileAttribute(label == "Vertebrate Study", value == "Yes") + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) then entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { - laboratoryEntity.apply("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + laboratoryEntity.redact("CBI.20.2", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(laboratoryEntity); }); end +// Rule unit: CBI.23 +rule "CBI.23.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) + then + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "CBI_author", EntityType.ENTITY, $document, 200) + .forEach(authorEntity -> authorEntity.redact("CBI.23.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + end + +rule "CBI.23.1: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) + then + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "CBI_author", EntityType.ENTITY, $document, 200) + .forEach(authorEntity -> authorEntity.redact("CBI.23.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + end + + //------------------------------------ PII rules ------------------------------------ // Rule unit: PII.0 @@ -334,15 +356,22 @@ rule "PII.1.2: Redact Emails by RegEx (vertebrate study)" .forEach(emailEntity -> emailEntity.redact("PII.1.2", "Found by Email Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "PII.1.3: Redact typoed Emails with indicator" + when + $section: Section(containsString("@") || containsStringIgnoreCase("mail")) + then + entityCreationService.byRegexIgnoreCase("mail[:\\.\\s]{1,2}([\\w\\/\\-\\{\\(\\. ]{3,20}(@|a|f)\\s?[\\w\\/\\-\\{\\(\\. ]{3,20}(\\. \\w{2,4}\\b|\\.\\B|\\.\\w{1,4}\\b))", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.3", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + end + // Rule unit: PII.2 -ule "PII.2.1: Redact Phone and Fax by RegEx (non vertebrate study)" +rule "PII.2.1: Redact Phone and Fax by RegEx (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("Contact") || containsString("Telephone") || containsString("Phone") || - containsString("Ph.") || containsString("Fax") || containsString("Tel") || containsString("Ter") || @@ -350,7 +379,7 @@ ule "PII.2.1: Redact Phone and Fax by RegEx (non vertebrate study)" containsString("Fel") || containsString("Fer")) then - entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) + entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter[^\\w]|mobile|fel[^\\w]|fer[^\\w])[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) .forEach(contactEntity -> contactEntity.redact("PII.2.1", "Found by Phone and Fax Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end @@ -360,7 +389,6 @@ rule "PII.2.2: Redact Phone and Fax by RegEx (vertebrate study)" $section: Section(containsString("Contact") || containsString("Telephone") || containsString("Phone") || - containsString("Ph.") || containsString("Fax") || containsString("Tel") || containsString("Ter") || @@ -368,46 +396,227 @@ rule "PII.2.2: Redact Phone and Fax by RegEx (vertebrate study)" containsString("Fel") || containsString("Fer")) then - entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) + entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter[^\\w]|mobile|fel[^\\w]|fer[^\\w])[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) .forEach(contactEntity -> contactEntity.redact("PII.2.2", "Found by Phone and Fax Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end -// Rule unit: PII.9 -rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (non vertebrate study)" +// Rule unit: PII.3 +rule "PII.3.1: Redact telephone numbers by RegEx (Non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) + $section: Section(!hasTables(), matchesRegex("[+]\\d{1,}")) then - entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) - .forEach(authorEntity -> authorEntity.redact("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) + .forEach(entity -> entity.redact("PII.3.1", "Telephone number found by regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.9.2: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (vertebrate study)" +rule "PII.3.2: Redact telephone numbers by RegEx (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) + $section: Section(!hasTables(), matchesRegex("[+]\\d{1,}")) then - entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) - .forEach(authorEntity -> authorEntity.redact("PII.9.2", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) + .forEach(entity -> entity.redact("PII.3.2", "Telephone number found by regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end -rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"COMPLETION DATE\" (non vertebrate study)" + +rule "PII.3.4: Redact telephone numbers by RegEx (Non vertebrate study)" when - not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("COMPLETION DATE:"), !containsString("STUDY COMPLETION DATE:")) + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $rowCell: TableCell(matchesRegex("[+]\\d{1,}")) then - entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section) - .forEach(authorEntity -> authorEntity.apply("PII.9.3", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $rowCell) + .forEach(entity -> entity.redact("PII.3.4", "Telephone number found by regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.9.4: Redact between \"AUTHOR(S)\" and \"STUDY COMPLETION DATE\" (non vertebrate study)" +rule "PII.3.5: Redact telephone numbers by RegEx (vertebrate study)" when - FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes") - $section: Section(!hasTables(), containsString("AUTHOR(S):"), containsString("COMPLETION DATE:"), !containsString("STUDY COMPLETION DATE:")) + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $rowCell: TableCell(matchesRegex("[+]\\d{1,}")) then - entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section) - .forEach(authorEntity -> authorEntity.apply("PII.9.4", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $rowCell) + .forEach(entity -> entity.redact("PII.3.5", "Telephone number found by regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + end + + +// Rule unit: PII.5 +rule "PII.5.1: Redact line after contact information keywords reduced (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $contactKeyword: String() from List.of("Contact point:", + "Contact:", + "Alternative contact:", + "European contact:") + $section: Section(containsString($contactKeyword)) + then + entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) + .forEach(contactEntity -> contactEntity.redact("PII.5.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + end + +rule "PII.5.2: Redact line after contact information keywords reduced (Vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $contactKeyword: String() from List.of("Contact point:", + "Contact:", + "Alternative contact:", + "European contact:") + $section: Section(containsString($contactKeyword)) + then + entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) + .forEach(contactEntity -> contactEntity.redact("PII.5.2", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + end + + +// Rule unit: PII.6 +rule "PII.6.1: Redact line between contact keywords (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) + then + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + ) + .forEach(contactEntity -> contactEntity.redact("PII.6.1", "Found between contact keywords", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + end + +rule "PII.6.2: Redact line between contact keywords (vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) + then + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + ) + .forEach(contactEntity -> contactEntity.redact("PII.6.2", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + end + + +// Rule unit: PII.7 +rule "PII.7.1: Redact contact information if applicant is found (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(getHeadline().containsString("applicant") || + getHeadline().containsString("Primary contact") || + getHeadline().containsString("Alternative contact") || + containsString("Applicant") || + containsString("Telephone number:")) + then + Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", + "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + )) + .forEach(entity -> entity.redact("PII.7.1", "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + end + +rule "PII.7.2: Redact contact information if applicant is found (vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(getHeadline().containsString("applicant") || + getHeadline().containsString("Primary contact") || + getHeadline().containsString("Alternative contact") || + containsString("Applicant") || + containsString("Telephone number:")) + then + Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", + "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + )) + .forEach(entity -> entity.redact("PII.7.2", "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + end + + +// Rule unit: PII.8 +rule "PII.8.1: Redact contact information if producer is found (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsStringIgnoreCase("producer of the plant protection") || + containsStringIgnoreCase("producer of the active substance") || + containsStringIgnoreCase("manufacturer of the active substance") || + containsStringIgnoreCase("manufacturer:") || + containsStringIgnoreCase("Producer or producers of the active substance")) + then + Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", + "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + )) + .forEach(entity -> entity.redact("PII.8.1", "Producer was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + end + +rule "PII.8.2: Redact contact information if producer is found (vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsStringIgnoreCase("producer of the plant protection") || + containsStringIgnoreCase("producer of the active substance") || + containsStringIgnoreCase("manufacturer of the active substance") || + containsStringIgnoreCase("manufacturer:") || + containsStringIgnoreCase("Producer or producers of the active substance")) + then + Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", + "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + )) + .forEach(entity -> entity.redact("PII.8.2", "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + end + + +// Rule unit: PII.10 +rule "PII.10.0: Redact study director abbreviation (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsString("KATH") || containsString("BECH") || containsString("KML")) + then + entityCreationService.byRegexIgnoreCase("((KATH)|(BECH)|(KML)) ?(\\d{4})","PII", EntityType.ENTITY, 1, $section) + .forEach(entity -> entity.redact("PII.10.0", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + end + +rule "PII.10.1: Redact study director abbreviation (vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsString("KATH") || containsString("BECH") || containsString("KML")) + then + entityCreationService.byRegexIgnoreCase("((KATH)|(BECH)|(KML)) ?(\\d{4})","PII", EntityType.ENTITY, 1, $section) + .forEach(entity -> entity.redact("PII.10.1", "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + end + + +// Rule unit: PII.11 +rule "PII.11.0: Redact On behalf of Sequani Ltd.:" + when + $section: Section(!hasTables(), containsString("On behalf of Sequani Ltd.: Name Title")) + then + entityCreationService.betweenStrings("On behalf of Sequani Ltd.: Name Title", "On behalf of", "PII", EntityType.ENTITY, $section) + .forEach(authorEntity -> authorEntity.redact("PII.11.0", "On behalf of Sequani Ltd.: Name Title was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + end + + +// Rule unit: PII.12 +rule "PII.12.0: Expand PII entities with salutation prefix" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $entityToExpand: TextEntity(type() == "PII", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")) + then + entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*") + .ifPresent(expandedEntity -> expandedEntity.apply("PII.12.0", "Expanded PII with salutation prefix", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + end + +rule "PII.12.1: Expand PII entities with salutation prefix" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $entityToExpand: TextEntity(type() == "PII", anyMatch(textBefore, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*")) + then + entityCreationService.byPrefixExpansionRegex($entityToExpand, "\\b(Mrs?|Ms|Miss|Sir|Madame?|Mme)\\s?\\.?\\s*") + .ifPresent(expandedEntity -> expandedEntity.apply("PII.12.1", "Expanded PII with salutation prefix", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end @@ -418,7 +627,7 @@ rule "ETC.0.0: Purity Hint" when $section: Section(containsStringIgnoreCase("purity")) then - entityCreationService.byRegexIgnoreCase("(purity ?( of|\\(.{1,20}\\))?( ?:)?) .{0,5}[\\d\\.]+( .{0,4}\\.)? ?%", "hint_only", EntityType.ENTITY, 1, $section) + entityCreationService.byRegexIgnoreCase("(purity ?( of|\\(.{1,20}\\))?( ?:)?) .{0,5}[\\d\\.]+( .{0,4}\\.)? ?%", "hint_only", EntityType.HINT, 1, $section) .forEach(hint -> hint.skip("ETC.0.0", "hint only")); end @@ -426,84 +635,90 @@ rule "ETC.0.0: Purity Hint" // Rule unit: ETC.2 rule "ETC.2.1: Redact signatures (non vertebrate study)" when - not FileAttribute(label == "Vertebrate Study", value == "Yes") + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $signature: Image(imageType == ImageType.SIGNATURE) then - $signature.apply("ETC.2.0", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $signature.redact("ETC.2.1", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "ETC.2.2: Redact signatures (vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $signature: Image(imageType == ImageType.SIGNATURE) + then + $signature.redact("ETC.2.2", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end // Rule unit: ETC.3 +rule "ETC.3.1: Skip logos (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $logo: Image(imageType == ImageType.LOGO) + then + $logo.skip("ETC.3.1", "Logo Found"); + end + rule "ETC.3.2: Redact logos (vertebrate study)" when - not FileAttribute(label == "Vertebrate Study", value == "Yes") + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $logo: Image(imageType == ImageType.LOGO) then - $logo.apply("ETC.3.0", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); - end - -rule "ETC.3.3: Redact logos (non vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value == "Yes") - $logo: Image(imageType == ImageType.LOGO) - then - $logo.apply("ETC.3.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $logo.redact("ETC.3.2", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end -// Rule unit: ETC.5 -rule "ETC.5.0: Skip dossier_redaction entries if confidentiality is 'confidential'" +// Rule unit: ETC.12 +rule "ETC.12.2: Skip dossier_redaction (Non vertebrate study)" when - FileAttribute(label == "Confidentiality", value == "confidential") + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $dossierRedaction: TextEntity(type() == "dossier_redaction") then - $dossierRedaction.skip("ETC.5.0", "Ignore dossier_redaction when confidential"); - $dossierRedaction.getIntersectingNodes().forEach(node -> update(node)); + $dossierRedaction.skip("ETC.12.2", "Dossier dictionary entry found"); end -rule "ETC.5.1: Remove dossier_redaction entries if confidentiality is not 'confidential'" - salience 256 +rule "ETC.12.3: Skip dossier_redaction (Vertebrate study)" when - not FileAttribute(label == "Confidentiality", value == "confidential") + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $dossierRedaction: TextEntity(type() == "dossier_redaction") then - $dossierRedaction.remove("ETC.5.1", "Remove dossier_redaction when not confidential"); - retract($dossierRedaction); + $dossierRedaction.skip("ETC.12.3", "Dossier dictionary entry found"); end //------------------------------------ AI rules ------------------------------------ // Rule unit: AI.0 -rule "AI.0.0: add all NER Entities of type CBI_author" +rule "AI.0.0: Add all NER Entities of type CBI_author" salience 999 when nerEntities: NerEntities(hasEntitiesOfType("CBI_author")) then nerEntities.streamEntitiesOfType("CBI_author") + .filter(entity -> entity.value().length() > 3) + .filter(entity -> entity.value().length() < 40) .forEach(nerEntity -> entityCreationService.optionalByNerEntity(nerEntity, EntityType.RECOMMENDATION, document)); end // Rule unit: AI.1 -rule "AI.1.0: combine and add NER Entities as CBI_address" +rule "AI.1.0: Combine and add NER Entities as CBI_address" salience 999 when nerEntities: NerEntities(hasEntitiesOfType("ORG") || hasEntitiesOfType("STREET") || hasEntitiesOfType("CITY")) then - nerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities) - .map(boundary -> entityCreationService.forceByBoundary(boundary, "CBI_address", EntityType.RECOMMENDATION, document)) - .forEach(entity -> entity.addEngine(Engine.NER)); + entityCreationService.combineNerEntitiesToCbiAddressDefaults(nerEntities, "CBI_address", EntityType.RECOMMENDATION, document).toList(); end -//------------------------------------ Manual redaction rules ------------------------------------ +//------------------------------------ Manual changes rules ------------------------------------ // Rule unit: MAN.0 rule "MAN.0.0: Apply manual resize redaction" salience 128 when - $resizeRedaction: ManualResizeRedaction($id: annotationId) + $resizeRedaction: ManualResizeRedaction($id: annotationId, $requestDate: requestDate) + not ManualResizeRedaction(annotationId == $id, requestDate.isBefore($requestDate)) $entityToBeResized: TextEntity(matchesAnnotationId($id)) then manualChangesApplicationService.resize($entityToBeResized, $resizeRedaction); @@ -515,7 +730,8 @@ rule "MAN.0.0: Apply manual resize redaction" rule "MAN.0.1: Apply manual resize redaction" salience 128 when - $resizeRedaction: ManualResizeRedaction($id: annotationId) + $resizeRedaction: ManualResizeRedaction($id: annotationId, $requestDate: requestDate) + not ManualResizeRedaction(annotationId == $id, requestDate.isBefore($requestDate)) $imageToBeResized: Image(id == $id) then manualChangesApplicationService.resizeImage($imageToBeResized, $resizeRedaction); @@ -529,7 +745,7 @@ rule "MAN.0.1: Apply manual resize redaction" rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity" salience 128 when - $idRemoval: IdRemoval($id: annotationId) + $idRemoval: IdRemoval($id: annotationId, !removeFromDictionary, !removeFromAllDossiers) $entityToBeRemoved: TextEntity(matchesAnnotationId($id)) then $entityToBeRemoved.getManualOverwrite().addChange($idRemoval); @@ -553,7 +769,6 @@ rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to // Rule unit: MAN.2 rule "MAN.2.0: Apply force redaction" - no-loop true salience 128 when $force: ManualForceRedaction($id: annotationId) @@ -562,10 +777,10 @@ rule "MAN.2.0: Apply force redaction" $entityToForce.getManualOverwrite().addChange($force); update($entityToForce); $entityToForce.getIntersectingNodes().forEach(node -> update(node)); + retract($force); end rule "MAN.2.1: Apply force redaction to images" - no-loop true salience 128 when $force: ManualForceRedaction($id: annotationId) @@ -574,6 +789,7 @@ rule "MAN.2.1: Apply force redaction to images" $imageToForce.getManualOverwrite().addChange($force); update($imageToForce); update($imageToForce.getParent()); + retract($force); end @@ -581,19 +797,32 @@ rule "MAN.2.1: Apply force redaction to images" rule "MAN.3.0: Apply entity recategorization" salience 128 when - $recategorization: ManualRecategorization($id: annotationId) - $entityToBeRecategorized: TextEntity(matchesAnnotationId($id)) + $recategorization: ManualRecategorization($id: annotationId, $type: type, $requestDate: requestDate) + not ManualRecategorization($id == annotationId, requestDate.isBefore($requestDate)) + $entityToBeRecategorized: TextEntity(matchesAnnotationId($id), type() != $type) then $entityToBeRecategorized.getIntersectingNodes().forEach(node -> update(node)); - manualChangesApplicationService.recategorize($entityToBeRecategorized, $recategorization); - retract($recategorization); + $entityToBeRecategorized.getManualOverwrite().addChange($recategorization); update($entityToBeRecategorized); + retract($recategorization); end -rule "MAN.3.1: Apply image recategorization" +rule "MAN.3.1: Apply entity recategorization of same type" salience 128 when - $recategorization: ManualRecategorization($id: annotationId) + $recategorization: ManualRecategorization($id: annotationId, $type: type, $requestDate: requestDate) + not ManualRecategorization($id == annotationId, requestDate.isBefore($requestDate)) + $entityToBeRecategorized: TextEntity(matchesAnnotationId($id), type() == $type) + then + $entityToBeRecategorized.getManualOverwrite().addChange($recategorization); + retract($recategorization); + end + +rule "MAN.3.2: Apply image recategorization" + salience 128 + when + $recategorization: ManualRecategorization($id: annotationId, $requestDate: requestDate) + not ManualRecategorization($id == annotationId, requestDate.isBefore($requestDate)) $imageToBeRecategorized: Image($id == id) then manualChangesApplicationService.recategorize($imageToBeRecategorized, $recategorization); @@ -602,14 +831,25 @@ rule "MAN.3.1: Apply image recategorization" retract($recategorization); end +rule "MAN.3.3: Apply recategorization entities by default" + salience 128 + when + $entity: TextEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) + then + $entity.apply("MAN.3.3", "Recategorized entities are applied by default.", $entity.legalBasis()); + end + + // Rule unit: MAN.4 rule "MAN.4.0: Apply legal basis change" salience 128 when - $legalbasisChange: ManualLegalBasisChange($id: annotationId) + $legalBasisChange: ManualLegalBasisChange($id: annotationId) $imageToBeRecategorized: Image($id == id) then - $imageToBeRecategorized.getManualOverwrite().addChange($legalbasisChange); + $imageToBeRecategorized.getManualOverwrite().addChange($legalBasisChange); + update($imageToBeRecategorized) + retract($legalBasisChange) end rule "MAN.4.1: Apply legal basis change" @@ -619,59 +859,67 @@ rule "MAN.4.1: Apply legal basis change" $entityToBeChanged: TextEntity(matchesAnnotationId($id)) then $entityToBeChanged.getManualOverwrite().addChange($legalBasisChange); + update($entityToBeChanged) + retract($legalBasisChange) end //------------------------------------ Entity merging rules ------------------------------------ // Rule unit: X.0 -rule "X.0.0: remove Entity contained by Entity of same type" +rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when - $larger: TextEntity($type: type(), $entityType: entityType, active()) - $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !resized(), active()) + $larger: TextEntity($type: type(), $entityType: entityType, !removed(), !hasManualChanges()) + not TextEntity(getTextRange().equals($larger.getTextRange()), type() == $type, entityType == EntityType.DICTIONARY_REMOVAL, engines contains Engine.DOSSIER_DICTIONARY, !hasManualChanges()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) then - $contained.getIntersectingNodes().forEach(node -> update(node)); $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); retract($contained); end - -// Rule unit: X.1 -rule "X.1.0: merge intersecting Entities of same type" - salience 64 +rule "X.0.1: Remove Entity contained by Entity of same type with manual changes" + salience 65 when - $first: TextEntity($type: type(), $entityType: entityType, !resized(), active()) - $second: TextEntity(intersects($first), type() == $type, entityType == $entityType, this != $first, !resized(), active()) + $larger: TextEntity($type: type(), $entityType: entityType, !removed(), hasManualChanges()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) then - TextEntity mergedEntity = entityCreationService.mergeEntitiesOfSameType(List.of($first, $second), $type, $entityType, document); - $first.remove("X.1.0", "merge intersecting Entities of same type"); - $second.remove("X.1.0", "merge intersecting Entities of same type"); - retract($first); - retract($second); - mergedEntity.getIntersectingNodes().forEach(node -> update(node)); + $contained.getIntersectingNodes().forEach(node -> update(node)); + $contained.remove("X.0.1", "remove Entity contained by Entity of same type with manual changes"); + retract($contained); end // Rule unit: X.2 -rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE" +rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type() == $type, entityType == EntityType.ENTITY, !resized(), active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY), !hasManualChanges()) then - $entity.getIntersectingNodes().forEach(node -> update(node)); $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); retract($entity) end +rule "X.2.1: Remove Entity of type HINT when contained by FALSE_POSITIVE" + salience 64 + when + $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.HINT), !hasManualChanges()) + then + $entity.getIntersectingNodes().forEach(node -> update(node)); + $entity.remove("X.2.1", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); + retract($entity) + end + + // Rule unit: X.3 -rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" +rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" salience 64 when $falseRecommendation: TextEntity($type: type(), entityType == EntityType.FALSE_RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !resized(), active()) + $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); retract($recommendation); @@ -679,47 +927,151 @@ rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM // Rule unit: X.4 -rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY with same type" +rule "X.4.0: Remove Entity of type RECOMMENDATION when text range equals ENTITY with same type" salience 256 when - $entity: TextEntity($type: type(), entityType == EntityType.ENTITY, active()) - $recommendation: TextEntity(intersects($entity), type() == $type, entityType == EntityType.RECOMMENDATION, !resized(), active()) + $entity: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) + $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $entity.addEngines($recommendation.getEngines()); - $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY with same type"); + $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when text range equals ENTITY with same type"); retract($recommendation); end // Rule unit: X.5 -rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY" +rule "X.5.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY" salience 256 when - $entity: TextEntity(entityType == EntityType.ENTITY, active()) - $recommendation: TextEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized(), active()) + $entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) + $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then - $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when contained by ENTITY"); + $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY"); + retract($recommendation); + end + +rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATION" + salience 256 + when + $entity: TextEntity($type: type(), entityType == EntityType.RECOMMENDATION, active()) + $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) + then + $recommendation.remove("X.5.1", "remove Entity of type RECOMMENDATION when contained by RECOMMENDATION"); retract($recommendation); end // Rule unit: X.6 -rule "X.6.0: remove Entity of lower rank, when intersected by entity of type ENTITY" +rule "X.6.0: Remove Entity of lower rank, when contained by entity of type ENTITY or HINT" salience 32 when - $higherRank: TextEntity($type: type(), entityType == EntityType.ENTITY, active()) - $lowerRank: TextEntity(intersects($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !resized(), active()) + $higherRank: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) + $lowerRank: TextEntity(containedBy($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges()) then $lowerRank.getIntersectingNodes().forEach(node -> update(node)); - $lowerRank.remove("X.6.0", "remove Entity of lower rank, when intersected by entity of type ENTITY"); + $lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY or HINT"); retract($lowerRank); end +rule "X.6.1: remove Entity, when contained in another entity of type ENTITY or HINT with larger text range" + salience 32 + when + $outer: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) + $inner: TextEntity(containedBy($outer), type() != $type, $outer.getTextRange().length > getTextRange().length(), !hasManualChanges()) + then + $inner.getIntersectingNodes().forEach(node -> update(node)); + $inner.remove("X.6.1", "remove Entity, when contained in another entity of type ENTITY or HINT with larger text range"); + retract($inner); + end + + +// Rule unit: X.8 +rule "X.8.0: Remove Entity when text range and type equals to imported Entity" + salience 257 + when + $entity: TextEntity($type: type(), engines contains Engine.IMPORTED, active()) + $other: TextEntity(getTextRange().equals($entity.getTextRange()), this != $entity, type() == $type, engines not contains Engine.IMPORTED) + then + $other.remove("X.8.0", "remove Entity when text range and type equals to imported Entity"); + $entity.addEngines($other.getEngines()); + retract($other); + end + +rule "X.8.1: Remove Entity when intersected by imported Entity" + salience 256 + when + $entity: TextEntity(engines contains Engine.IMPORTED, active()) + $other: TextEntity(intersects($entity), this != $entity, engines not contains Engine.IMPORTED) + then + $other.remove("X.8.1", "remove Entity when intersected by imported Entity"); + retract($other); + end + + +// Rule unit: X.9 +rule "X.9.0: Merge mostly contained signatures" + when + $aiSignature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.AI) + $signature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.ALGORITHM, mostlyContains($aiSignature, 0.8)) + then + $aiSignature.remove("X.9.0", "removed because already contained by alogrithm signature"); + $signature.addEngine(LayoutEngine.AI); + end + + +// Rule unit: X.10 +rule "X.10.0: remove false positives of ai" + when + $anyImage: Image(engines contains LayoutEngine.ALGORITHM) + $aiSignature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.AI, !mostlyContainedBy($anyImage, 0.8)) + then + $aiSignature.remove("X.10.0", "Removed because false positive"); + end + + +// Rule unit: X.11 +rule "X.11.1: Remove non manual entity which intersects with a manual entity" + salience 64 + when + $manualEntity: TextEntity(engines contains Engine.MANUAL, active()) + $nonManualEntity: TextEntity(intersects($manualEntity), engines not contains Engine.MANUAL) + then + $nonManualEntity.remove("X.11.1", "remove entity which intersects with a manual entity"); + retract($nonManualEntity); + end + + +rule "X.11.2: Remove non manual entity which are equal to manual entity" + salience 70 + when + $manualEntity: TextEntity(engines contains Engine.MANUAL, active(), $type: type()) + $nonManualEntity: TextEntity(getTextRange().equals($manualEntity.getTextRange()), type() == $type, entityType == EntityType.ENTITY, !hasManualChanges(), engines not contains Engine.MANUAL) + then + $manualEntity.addEngines($nonManualEntity.getEngines()); + $nonManualEntity.remove("X.11.2", "remove non manual entity which are equal to manual entity"); + retract($nonManualEntity); + end + + +//------------------------------------ Dictionary merging rules ------------------------------------ + +// Rule unit: DICT.0 +rule "DICT.0.0: Remove Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL" + salience 64 + when + $dictionaryRemoval: TextEntity($type: type(), entityType == EntityType.DICTIONARY_REMOVAL, engines contains Engine.DOSSIER_DICTIONARY) + $entity: TextEntity(getTextRange().equals($dictionaryRemoval.getTextRange()), engines contains Engine.DICTIONARY, type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) + then + $entity.getIntersectingNodes().forEach(node -> update(node)); + $entity.remove("DICT.0.0", "Remove Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL"); + $entity.addEngine(Engine.DOSSIER_DICTIONARY); + end + //------------------------------------ File attributes rules ------------------------------------ // Rule unit: FA.1 -rule "FA.1.0: remove duplicate FileAttributes" +rule "FA.1.0: Remove duplicate FileAttributes" salience 64 when $fileAttribute: FileAttribute($label: label, $value: value) @@ -736,7 +1088,11 @@ rule "LDS.0.0: Run local dictionary search" agenda-group "LOCAL_DICTIONARY_ADDS" salience -999 when - DictionaryModel(!localEntries.isEmpty(), $type: type, $searchImplementation: localSearch) from dictionary.getDictionaryModels() + $dictionaryModel: DictionaryModel(!localEntriesWithMatchedRules.isEmpty()) from dictionary.getDictionaryModels() then - entityCreationService.bySearchImplementation($searchImplementation, $type, EntityType.RECOMMENDATION, document).toList(); + entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document) + .forEach(entity -> { + Collection matchedRules = $dictionaryModel.getMatchedRulesForLocalDictionaryEntry(entity.getValue()); + matchedRules.forEach(matchedRule -> entity.addMatchedRule(matchedRule.asSkippedIfApplied())); + }); end diff --git a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl index 2e7944f0..7d841391 100644 --- a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl +++ b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl @@ -63,7 +63,6 @@ global ManualChangesApplicationService manualChangesApplicationService global Dictionary dictionary global RulesLogger logger - //------------------------------------ queries ------------------------------------ query "getFileAttributes" @@ -99,20 +98,58 @@ rule "SYN.1.0: Recommend CTL/BL laboratory that start with BL or CTL" //------------------------------------ CBI rules ------------------------------------ // Rule unit: CBI.0 -rule "CBI.0.0: Redact CBI Authors (non vertebrate Study)" +rule "CBI.0.0: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.skip("CBI.0.0", "Author found by \"et al\" regex"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.0.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.0.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.0.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.0.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.0.3: Redact CBI Authors (non vertebrate Study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $entity: TextEntity(type() == "CBI_author", dictionaryEntry) then - $entity.redact("CBI.0.0", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $entity.redact("CBI.0.3", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end -rule "CBI.0.1: Redact CBI Authors (vertebrate Study)" +rule "CBI.0.4: Redact CBI Authors (vertebrate Study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $entity: TextEntity(type() == "CBI_author", dictionaryEntry) then - $entity.redact("CBI.0.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $entity.redact("CBI.0.4", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end @@ -144,136 +181,6 @@ rule "CBI.2.0: Do not redact genitive CBI Author" end -// Rule unit: CBI.3 -rule "CBI.3.0: Redacted because Section contains a vertebrate" - when - $section: Section(!hasTables(), hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( - "CBI.3.0", - "Vertebrate found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - $section.getEntitiesOfType("vertebrate") - ); - }); - end - -rule "CBI.3.1: Redacted because table row contains a vertebrate" - when - $table: Table(hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( - "CBI.3.1", - "Vertebrate found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("vertebrate", entity) - ); - }); - end - -rule "CBI.3.2: Do not redact because Section does not contain a vertebrate" - when - $section: Section(!hasTables(), !hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(entity -> entity.skip("CBI.3.2", "No vertebrate found")); - end - -rule "CBI.3.3: Do not redact because table row does not contain a vertebrate" - when - $table: Table(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) - then - $table.streamEntitiesWhereRowContainsNoEntitiesOfType(List.of("vertebrate")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> entity.skip("CBI.3.3", "No vertebrate found")); - end - - -// Rule unit: CBI.4 -rule "CBI.4.0: Do not redact Names and Addresses if no_redaction_indicator is found in Section" - when - $section: Section(!hasTables(), - hasEntitiesOfType("vertebrate"), - hasEntitiesOfType("no_redaction_indicator"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(entity -> { - entity.skipWithReferences( - "CBI.4.0", - "Vertebrate but a no redaction indicator found", - $section.getEntitiesOfType("no_redaction_indicator") - ); - }); - end - -rule "CBI.4.1: Don't redact authors or addresses which appear in the same row as a vertebrate and a no_redaction_indicator" - when - $table: Table(hasEntitiesOfType("no_redaction_indicator"), - hasEntitiesOfType("vertebrate"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate", "no-redaction_indicator")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.skipWithReferences( - "CBI.4.1", - "Vertebrate but a no redaction indicator found", - Stream.concat( - $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), - $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() - ); - }); - end - - -// Rule unit: CBI.5 -rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in Section" - when - $section: Section(!hasTables(), - hasEntitiesOfType("redaction_indicator"), - hasEntitiesOfType("no_redaction_indicator"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( - "CBI.5.0", - "no_redaction_indicator but also redaction_indicator found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - Stream.concat( - $section.getEntitiesOfType("redaction_indicator").stream(), - $section.getEntitiesOfType("no_redaction_indicator").stream()).toList() - ); - }); - end - -rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in table row" - when - $table: Table(hasEntitiesOfType("no_redaction_indicator"), - hasEntitiesOfType("redaction_indicator"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no_redaction_indicator")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( - "CBI.5.1", - "no_redaction_indicator but also redaction_indicator found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - Stream.concat( - $table.getEntitiesOfTypeInSameRow("redaction_indicator", entity).stream(), - $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() - ); - }); - end - - // Rule unit: CBI.6 rule "CBI.6.0: Do not redact Names and Addresses if vertebrate but also published_information is found in Section" when @@ -314,90 +221,6 @@ rule "CBI.6.1: Do not redact Names and Addresses if vertebrate but also publishe end -// Rule unit: CBI.7 -rule "CBI.7.0: Add CBI_author with \"et al.\" RegEx" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - $section: Section(containsString("et al.")) - then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.7.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)"); - dictionary.recommendEverywhere(entity); - }); - end - -rule "CBI.7.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section(containsString("et al.")) - then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.7.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); - dictionary.recommendEverywhere(entity); - }); - end - -rule "CBI.7.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section(containsString("et al.")) - then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.7.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); - dictionary.recommendEverywhere(entity); - }); - end - -rule "CBI.7.3: Add CBI_author with \"et al.\" RegEx" - agenda-group "LOCAL_DICTIONARY_ADDS" - when - $section: Section(containsString("et al.")) - then - entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) - .forEach(entity -> { - entity.redact("CBI.7.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); - dictionary.recommendEverywhere(entity); - }); - end - -// Rule unit: CBI.8 -rule "CBI.8.0: Redacted because Section contains must_redact entity" - when - $section: Section(!hasTables(), hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( - "CBI.8.0", - "must_redact entity found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - $section.getEntitiesOfType("must_redact") - ); - }); - end - -rule "CBI.8.1: Redacted because table row contains must_redact entity" - when - $table: Table(hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) - then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("must_redact")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( - "CBI.8.1", - "Must_redact found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("must_redact", entity) - ); - }); - end - - // Rule unit: CBI.9 rule "CBI.9.0: Redact all cells with Header Author(s) as CBI_author (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" @@ -516,19 +339,19 @@ rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study end -// Rule unit: CBI.12 +// Rule unit: CBI.12 - table rules remains rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes'" agenda-group "LOCAL_DICTIONARY_ADDS" when $table: Table(hasHeader("Author(s)") || hasHeader("Author"), hasHeaderIgnoreCase("Vertebrate Study Y/N")) TableCell(header, containsAnyStringIgnoreCase("Author", "Author(s)"), $authorCol: col) from $table.streamHeaders().toList() TableCell(header, containsStringIgnoreCase("Vertebrate study Y/N"), $vertebrateCol: col) from $table.streamHeaders().toList() - $rowCell: TableCell(!header, containsAnyString("Yes", "Y"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() + $rowCell: TableCell(!header, containsAnyString("Yes", "Y", "yes"), !containsAnyString("Y/N"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() TableCell(row == $rowWithYes) from $table.streamCol($authorCol).toList() then - entityCreationService.bySemanticNode($rowCell, "must_redact", EntityType.HINT) + entityCreationService.bySemanticNode($rowCell, "redaction_indicator", EntityType.HINT) .ifPresent(yesEntity -> { - yesEntity.skip("CBI.12.0", "must_redact"); + yesEntity.skip("CBI.12.0", "redaction_indicator"); }); end @@ -539,7 +362,7 @@ rule "CBI.12.1: Redact and recommend TableCell with header 'Author' or 'Author(s $table: Table(hasHeader("Author(s)") || hasHeader("Author"), hasHeaderIgnoreCase("Vertebrate Study Y/N")) TableCell(header, containsAnyStringIgnoreCase("Author", "Author(s)"), $authorCol: col) from $table.streamHeaders().toList() TableCell(header, containsStringIgnoreCase("Vertebrate study Y/N"), $vertebrateCol: col) from $table.streamHeaders().toList() - TableCell(!header, containsAnyString("Yes", "Y"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() + TableCell(!header, containsAnyString("Yes", "Y", "yes"), !containsAnyString("Y/N"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() $authorCell: TableCell(row == $rowWithYes) from $table.streamCol($authorCol).toList() then entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY) @@ -547,7 +370,6 @@ rule "CBI.12.1: Redact and recommend TableCell with header 'Author' or 'Author(s authorEntity.redact("CBI.12.1", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(3) of Regulation (EC) No 178/2002"); dictionary.addMultipleAuthorsAsRecommendation(authorEntity); }); - end rule "CBI.12.2: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes' (vertebrate study)" @@ -582,7 +404,137 @@ rule "CBI.12.3: Skip TableCell with header 'Author' or 'Author(s)' and header 'V end -// Rule unit: CBI.13 +//from CBI.3.3 +rule "CBI.12.4: Redacted because table row contains a redaction_indicator" + when + $table: Table(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + then + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( + "CBI.12.4", + "Redaction_indicator found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $table.getEntitiesOfTypeInSameRow("redaction_indicator", entity) + ); + }); + end + + +//from CBI.3.1 +rule "CBI.12.5: Redacted because table row contains a vertebrate" + when + $table: Table(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + then + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( + "CBI.12.5", + "Vertebrate found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $table.getEntitiesOfTypeInSameRow("vertebrate", entity) + ); + }); + end + + +rule "CBI.12.6: Skip Addresses on TableCell with header 'Owner'" + salience -1 + when + $table: Table() + then + $table.streamTableCellsWithHeader("Owner") + .flatMap(tc -> tc.getEntitiesOfType("CBI_address").stream()) + .forEach(adrEntity -> adrEntity.skip("CBI.12.6", "Not redacted because CBI_address in Owner column is never redacted")); + end + +rule "CBI.12.11: Redacted because table row contains a must_redact in table without header 'Vertebrate study Y/N'" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $table: Table(!hasHeaderIgnoreCase("Vertebrate Study Y/N"), hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("must_redact")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( + "CBI.12.11", + "Must redact found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $table.getEntitiesOfTypeInSameRow("must_redact", entity) + ); + }); + end + +rule "CBI.12.12: Not redacted because table row contains a redaction_indicator in table without header 'Vertebrate study Y/N'" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $table: Table(!hasHeaderIgnoreCase("Vertebrate Study Y/N"), hasEntitiesOfType("redaction_indicator"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.skipWithReferences( + "CBI.12.12", + "redaction_indicator found", + $table.getEntitiesOfTypeInSameRow("redaction_indicator", entity) + ); + }); + end + +rule "CBI.12.13: Redacted because table row contains a vertebrate and a redaction_indicator in table without header 'Vertebrate study Y/N'" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $table: Table(!hasHeaderIgnoreCase("Vertebrate Study Y/N"), hasEntitiesOfType("vertebrate"), hasEntitiesOfType("redaction_indicator"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $table.streamEntitiesWhereRowContainsEntitiesOfEachType(List.of("redaction_indicator", "vertebrate")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( + "CBI.12.13", + "Vertebrate and redaction_indicator found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $table.getEntitiesOfTypeInSameRow("redaction_indicator", entity) + ); + }); + end + +rule "CBI.12.14: Not redacted because table row contains a vertebrate but also a no_redaction_indicator in table without header 'Vertebrate study Y/N'" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $table: Table(!hasHeaderIgnoreCase("Vertebrate Study Y/N"), hasEntitiesOfType("vertebrate"), hasEntitiesOfType("no_redaction_indicator"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $table.streamEntitiesWhereRowContainsEntitiesOfEachType(List.of("no_redaction_indicator", "vertebrate")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.skipWithReferences( + "CBI.12.14", + "Vertebrate and no_redaction_indicator found", + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity) + ); + }); + end + +rule "CBI.12.15: Redacted because table row contains a vertebrate, a no_redaction_indicator and a redaction_indicator in table without header 'Vertebrate study Y/N'" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $table: Table(!hasHeaderIgnoreCase("Vertebrate Study Y/N"), hasEntitiesOfType("vertebrate"), hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("redaction_indicator"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $table.streamEntitiesWhereRowContainsEntitiesOfEachType(List.of("redaction_indicator", "vertebrate", "no_redaction_indicator")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( + "CBI.12.15", + "Vertebrate and redaction_indicator found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $table.getEntitiesOfTypeInSameRow("redaction_indicator", entity) + ); + }); + end + + +// Rule unit: CBI.13 - section rules rule "CBI.13.0: Ignore CBI Address recommendations" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -592,6 +544,88 @@ rule "CBI.13.0: Ignore CBI Address recommendations" retract($entity) end +// from CBI.3.0 +rule "CBI.13.1: Redacted because Section contains a vertebrate" + when + $section: Section(!hasTables(), hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( + "CBI.13.1", + "Vertebrate found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $section.getEntitiesOfType("vertebrate") + ); + }); + end + +//from CBI.3.2 +rule "CBI.13.2: Do not redact because Section does not contain a vertebrate" + when + $section: Section(!hasTables(), !hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(entity -> entity.skip("CBI.13.2", "No vertebrate found")); + end + + +// from CBI.4.0 +rule "CBI.13.3: Do not redact Names and Addresses if vertebrate and no_redaction_indicator is found in Section" + when + $section: Section(!hasTables(), + hasEntitiesOfType("vertebrate"), + hasEntitiesOfType("no_redaction_indicator"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(entity -> { + entity.skipWithReferences( + "CBI.13.3", + "Vertebrate but a no redaction indicator found", + $section.getEntitiesOfType("no_redaction_indicator") + ); + }); + end + + +// from CBI.5.0 +rule "CBI.13.4: Redact Names and Addresses if vertebrate and no_redaction_indicator but also redaction_indicator is found in Section" + when + $section: Section(!hasTables(), + hasEntitiesOfType("vertebrate"), + hasEntitiesOfType("redaction_indicator"), + hasEntitiesOfType("no_redaction_indicator"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( + "CBI.13.4", + "no_redaction_indicator but also redaction_indicator found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + Stream.concat( + $section.getEntitiesOfType("redaction_indicator").stream(), + $section.getEntitiesOfType("no_redaction_indicator").stream()).toList() + ); + }); + end + +// From CBI.8.0 +rule "CBI.13.5: Redacted because Section contains must_redact entity" + when + $section: Section(!hasTables(), hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( + "CBI.13.5", + "must_redact entity found", + "Reg (EC) No 1107/2009 Art. 63 (2b)", + $section.getEntitiesOfType("must_redact") + ); + }); + end // Rule unit: CBI.14 rule "CBI.14.0: Redact CBI_sponsor entities if preceded by \"batches produced at\"" @@ -624,7 +658,8 @@ rule "CBI.15.0: Redact row if row contains \"determination of residues\" and liv then entityCreationService.byString($keyword, "must_redact", EntityType.HINT, $section) .forEach(entity -> entity.skip("CBI.15.0", "must_redact")); - + entityCreationService.byString($residueKeyword, "must_redact", EntityType.HINT, $section) + .forEach(entity -> entity.skip("CBI.15.0", "must_redact")); $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) .forEach(redactionEntity -> redactionEntity.redact("CBI.15.0", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)")); end @@ -645,7 +680,8 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio then entityCreationService.byString($keyword, "must_redact", EntityType.HINT, $table) .forEach(entity -> entity.skip("CBI.15.1", "must_redact")); - + entityCreationService.byString($residueKeyword, "must_redact", EntityType.HINT, $table) + .forEach(entity -> entity.skip("CBI.15.1", "must_redact")); $table.streamEntitiesWhereRowContainsStringsIgnoreCase(List.of($keyword, $residueKeyword)) .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) .forEach(redactionEntity -> redactionEntity.redact("CBI.15.1", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)")); @@ -764,9 +800,9 @@ rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC when $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) then - entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "PII", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { - laboratoryEntity.redact("CBI.20.0", "PERFORMING LABORATORY was found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + laboratoryEntity.redact("CBI.20.0", "PERFORMING LABORATORY was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); dictionary.recommendEverywhere(laboratoryEntity); }); end @@ -811,7 +847,43 @@ rule "CBI.20.3: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC // Rule unit: CBI.21 -rule "CBI.21.0: Redact short Authors section (non vertebrate study)" +// from CBI.6 +rule "CBI.21.0: Do not redact Names and Addresses if published_information is found in Section" + when + $section: Section(!hasTables(), + hasEntitiesOfType("published_information"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + then + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) + .forEach(entity -> { + entity.skipWithReferences( + "CBI.21.0", + "Published_information found", + $section.getEntitiesOfType("published_information") + ); + }); + end + +rule "CBI.21.1: Do not redact Names and Addresses if published_information is found in table row" + when + $table: Table(hasEntitiesOfType("published_information"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $tableCell: TableCell($row: row) from $table.streamTableCells().toList() + TextEntity(type() == "published_information", active(), entityType == EntityType.HINT) from $tableCell.getEntities() + then + $table.streamRow($row) + .flatMap(tc -> tc.getEntitiesOfType("CBI_author", "CBI_address").stream()) + .forEach(entity -> { + entity.skipWithReferences( + "CBI.21.1", + "Published_information found", + $table.getEntitiesOfTypeInSameRow("published_information", entity) + ); + }); + end + + +rule "CBI.21.2: Redact short Authors section (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsAnyStringIgnoreCase("author(s)", "author", "authors"), length() < 50, getTreeId().get(0) <= 20) //TODO: evaluate the reason of this rule @@ -819,11 +891,11 @@ rule "CBI.21.0: Redact short Authors section (non vertebrate study)" then entityCreationService.byRegexIgnoreCase("(?<=author\\(?s\\)?\\s\\n?)([\\p{Lu}\\p{L} ]{5,15}(,|\\n)?){1,3}", "CBI_author", EntityType.ENTITY, $section) .forEach(entity -> { - entity.redact("CBI.21.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + entity.redact("CBI.21.2", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); }); end -rule "CBI.21.1: Redact short Authors section (vertebrate study)" +rule "CBI.21.3: Redact short Authors section (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsAnyStringIgnoreCase("author(s)", "author", "authors"), length() < 50, getTreeId().get(0) <= 20) //TODO: evaluate the reason of this rule @@ -831,7 +903,7 @@ rule "CBI.21.1: Redact short Authors section (vertebrate study)" then entityCreationService.byRegexIgnoreCase("(?<=author\\(?s\\)?\\s\\n?)([\\p{Lu}\\p{L} ]{5,15}(,|\\n)?){1,3}", "CBI_author", EntityType.ENTITY, $section) .forEach(entity -> { - entity.redact("CBI.21.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + entity.redact("CBI.21.3", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); }); end @@ -854,7 +926,7 @@ rule "CBI.23.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (no not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then - entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "CBI_author", EntityType.ENTITY, $document) + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "CBI_author", EntityType.ENTITY, $document, 200) .forEach(authorEntity -> authorEntity.redact("CBI.23.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end @@ -863,7 +935,7 @@ rule "CBI.23.1: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (ve FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then - entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "CBI_author", EntityType.ENTITY, $document) + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "CBI_author", EntityType.ENTITY, $document, 200) .forEach(authorEntity -> authorEntity.redact("CBI.23.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end @@ -875,7 +947,7 @@ rule "PII.0.0: Redact all PII" when $pii: TextEntity(type() == "PII", dictionaryEntry) then - $pii.redact("PII.0.0", "Personal Information found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + $pii.redact("PII.0.0", "Personal Information found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); end rule "PII.0.1: Redact all PII (non vertebrate study)" @@ -976,7 +1048,7 @@ rule "PII.2.0: Redact Phone and Fax by RegEx" containsString("Fel") || containsString("Fer")) then - entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) + entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter[^\\w]|mobile|fel[^\\w]|fer[^\\w])[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) .forEach(contactEntity -> contactEntity.redact("PII.2.0", "Found by Phone and Fax Regex", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end @@ -986,7 +1058,6 @@ rule "PII.2.1: Redact Phone and Fax by RegEx (non vertebrate study)" $section: Section(containsString("Contact") || containsString("Telephone") || containsString("Phone") || - containsString("Ph.") || containsString("Fax") || containsString("Tel") || containsString("Ter") || @@ -994,7 +1065,7 @@ rule "PII.2.1: Redact Phone and Fax by RegEx (non vertebrate study)" containsString("Fel") || containsString("Fer")) then - entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter[^m]|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) + entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter[^\\w]|mobile|fel[^\\w]|fer[^\\w])[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) .forEach(contactEntity -> contactEntity.redact("PII.2.1", "Found by Phone and Fax Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end @@ -1004,7 +1075,6 @@ rule "PII.2.2: Redact Phone and Fax by RegEx (vertebrate study)" $section: Section(containsString("Contact") || containsString("Telephone") || containsString("Phone") || - containsString("Ph.") || containsString("Fax") || containsString("Tel") || containsString("Ter") || @@ -1012,7 +1082,7 @@ rule "PII.2.2: Redact Phone and Fax by RegEx (vertebrate study)" containsString("Fel") || containsString("Fer")) then - entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter[^m]|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) + entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter[^\\w]|mobile|fel[^\\w]|fer[^\\w])[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) .forEach(contactEntity -> contactEntity.redact("PII.2.2", "Found by Phone and Fax Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end @@ -1028,7 +1098,7 @@ rule "PII.2.3: Redact phone numbers without indicators" // Rule unit: PII.3 rule "PII.3.0: Redact telephone numbers by RegEx" when - $section: Section(matchesRegex("[+]\\d{1,}")) + $section: Section(!hasTables(), matchesRegex("[+]\\d{1,}")) then entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) .forEach(entity -> entity.redact("PII.3.0", "Telephone number found by regex", "Reg (EC) No 1107/2009 Art. 63 (2e)")); @@ -1037,7 +1107,7 @@ rule "PII.3.0: Redact telephone numbers by RegEx" rule "PII.3.1: Redact telephone numbers by RegEx (Non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section(matchesRegex("[+]\\d{1,}")) + $section: Section(!hasTables(), matchesRegex("[+]\\d{1,}")) then entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) .forEach(entity -> entity.redact("PII.3.1", "Telephone number found by regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); @@ -1046,12 +1116,39 @@ rule "PII.3.1: Redact telephone numbers by RegEx (Non vertebrate study)" rule "PII.3.2: Redact telephone numbers by RegEx (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $section: Section(matchesRegex("[+]\\d{1,}")) + $section: Section(!hasTables(), matchesRegex("[+]\\d{1,}")) then entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) .forEach(entity -> entity.redact("PII.3.2", "Telephone number found by regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "PII.3.3: Redact telephone numbers by RegEx" + when + $rowCell: TableCell(matchesRegex("[+]\\d{1,}")) + then + entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $rowCell) + .forEach(entity -> entity.redact("PII.3.3", "Telephone number found by regex", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.3.4: Redact telephone numbers by RegEx (Non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $rowCell: TableCell(matchesRegex("[+]\\d{1,}")) + then + entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $rowCell) + .forEach(entity -> entity.redact("PII.3.4", "Telephone number found by regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + end + +rule "PII.3.5: Redact telephone numbers by RegEx (vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $rowCell: TableCell(matchesRegex("[+]\\d{1,}")) + then + entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $rowCell) + .forEach(entity -> entity.redact("PII.3.5", "Telephone number found by regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + end + + // Rule unit: PII.4 rule "PII.4.0: Redact line after contact information keywords" when @@ -1059,8 +1156,6 @@ rule "PII.4.0: Redact line after contact information keywords" "Contact:", "Alternative contact:", "European contact:", - "No:", - "Contact:", "Tel.:", "Tel:", "Telephone number:", @@ -1107,62 +1202,6 @@ rule "PII.4.1: Redact line after contact information keywords" .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); end -rule "PII.4.2: Redact line after contact information keywords (Non vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $contactKeyword: String() from List.of("Contact point:", - "Contact:", - "Alternative contact:", - "European contact:", - "No:", - "Contact:", - "Tel.:", - "Tel:", - "Telephone number:", - "Telephone No:", - "Telephone:", - "Phone No.", - "Phone:", - "Fax number:", - "Fax:", - "E-mail:", - "Email:", - "e-mail:", - "E-mail address:") - $section: Section(containsString($contactKeyword)) - then - entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.4.2", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(3) of Regulation (EC) No 178/2002")); - end - -rule "PII.4.3: Redact line after contact information keywords (Vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $contactKeyword: String() from List.of("Contact point:", - "Contact:", - "Alternative contact:", - "European contact:", - "No:", - "Contact:", - "Tel.:", - "Tel:", - "Telephone number:", - "Telephone No:", - "Telephone:", - "Phone No.", - "Phone:", - "Fax number:", - "Fax:", - "E-mail:", - "Email:", - "e-mail:", - "E-mail address:") - $section: Section(containsString($contactKeyword)) - then - entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.4.3", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(2) of Regulation (EC) No 178/2002")); - end - // Rule unit: PII.5 rule "PII.5.0: Redact line after contact information keywords reduced" @@ -1262,12 +1301,7 @@ rule "PII.7.0: Redact contact information if applicant is found" containsString("Applicant") || containsString("Telephone number:")) then - Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", - "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), - Stream.concat( - entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), - entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) - )) + entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "Tel.:", "Tel:", "Telephone number:", "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section) .forEach(entity -> entity.redact("PII.7.0", "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end @@ -1363,12 +1397,13 @@ rule "PII.8.2: Redact contact information if producer is found (vertebrate study end +// UPDATED WITH LIMIT // Rule unit: PII.9 rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" when $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then - entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document, 200) .forEach(authorEntity -> authorEntity.redact("PII.9.0", "AUTHOR(S) was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end @@ -1401,8 +1436,9 @@ rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" // Rule unit: PII.10 -rule "PII.10.0: Redact study director abbreviation" +rule "PII.10.0: Redact study director abbreviation (non vertebrate study)" when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("KATH") || containsString("BECH") || containsString("KML")) then entityCreationService.byRegexIgnoreCase("((KATH)|(BECH)|(KML)) ?(\\d{4})","PII", EntityType.ENTITY, 1, $section) @@ -1410,6 +1446,16 @@ rule "PII.10.0: Redact study director abbreviation" end +rule "PII.10.1: Redact study director abbreviation (vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsString("KATH") || containsString("BECH") || containsString("KML")) + then + entityCreationService.byRegexIgnoreCase("((KATH)|(BECH)|(KML)) ?(\\d{4})","PII", EntityType.ENTITY, 1, $section) + .forEach(entity -> entity.redact("PII.10.1", "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + end + + // Rule unit: PII.11 rule "PII.11.0: Redact On behalf of Sequani Ltd.:" when @@ -1441,6 +1487,7 @@ rule "PII.12.1: Expand PII entities with salutation prefix" .ifPresent(expandedEntity -> expandedEntity.apply("PII.12.1", "Expanded PII with salutation prefix", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end + // Rule unit: PII.13 rule "PII.13.0: Add recommendation for PII after Contact Person" when @@ -1535,7 +1582,7 @@ rule "ETC.3.2: Redact logos (vertebrate study)" FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $logo: Image(imageType == ImageType.LOGO) then - $logo.redact("ETC.3.2", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $logo.redact("ETC.3.2", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end rule "ETC.3.3: Redact logos" @@ -1545,6 +1592,14 @@ rule "ETC.3.3: Redact logos" $logo.redact("ETC.3.3", "Logo Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); end +// from preGFL Knoell +rule "ETC.3.4: Skip logos" + when + $logo: Image(imageType == ImageType.LOGO) + then + $logo.skip("ETC.3.4", "No Logos in preGFL documents"); + end + // Rule unit: ETC.4 rule "ETC.4.0: Redact dossier dictionary entries" @@ -1632,6 +1687,7 @@ rule "ETC.8.1: Redact formulas (non vertebrate study)" $logo.redact("ETC.8.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end + // Rule unit: ETC.9 rule "ETC.9.0: Redact skipped impurities" when @@ -1649,6 +1705,7 @@ rule "ETC.9.1: Redact impurities" $skippedImpurities.redact("ETC.9.1", "Impurity found", "Article 63(2)(b) of Regulation (EC) No 1107/2009"); end + // Rule unit: ETC.10 rule "ETC.10.0: Redact Product Composition Information" when @@ -1657,6 +1714,7 @@ rule "ETC.10.0: Redact Product Composition Information" $compositionInformation.redact("ETC.10.0", "Product Composition Information found", "Article 63(2)(d) of Regulation (EC) No 1107/2009"); end + // Rule unit: ETC.11 rule "ETC.11.0: Recommend first line in table cell with name and address of owner" when @@ -1686,6 +1744,22 @@ rule "ETC.12.1: Redact dossier_redaction (Vertebrate study)" $dossierRedaction.redact("ETC.12.1", "Dossier dictionary entry found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end +rule "ETC.12.2: Skip dossier_redaction (Non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $dossierRedaction: TextEntity(type() == "dossier_redaction") + then + $dossierRedaction.skip("ETC.12.2", "Dossier dictionary entry found"); + end + +rule "ETC.12.3: Skip dossier_redaction (Vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $dossierRedaction: TextEntity(type() == "dossier_redaction") + then + $dossierRedaction.skip("ETC.12.3", "Dossier dictionary entry found"); + end + //------------------------------------ AI rules ------------------------------------ // Rule unit: AI.0 @@ -1734,8 +1808,6 @@ rule "AI.3.0: Recommend authors from AI as PII" end - - // Rule unit: AI.4 rule "AI.4.0: Add all NER Entities of type Person" salience 999 @@ -1944,6 +2016,7 @@ rule "MAN.3.3: Apply recategorization entities by default" $entity.apply("MAN.3.3", "Recategorized entities are applied by default.", $entity.legalBasis()); end + // Rule unit: MAN.4 rule "MAN.4.0: Apply legal basis change" salience 128 @@ -1974,25 +2047,55 @@ rule "MAN.4.1: Apply legal basis change" rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when - $larger: TextEntity($type: type(), $entityType: entityType, !removed()) - $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges(), !removed()) + $larger: TextEntity($type: type(), $entityType: entityType, !removed(), !hasManualChanges()) + not TextEntity(getTextRange().equals($larger.getTextRange()), type() == $type, entityType == EntityType.DICTIONARY_REMOVAL, engines contains Engine.DOSSIER_DICTIONARY, !hasManualChanges()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) then - $contained.getIntersectingNodes().forEach(node -> update(node)); $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); - update($contained); + retract($contained); end +rule "X.0.1: Remove Entity contained by Entity of same type with manual changes" + salience 65 + when + $larger: TextEntity($type: type(), $entityType: entityType, !removed(), hasManualChanges()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) + then + $contained.getIntersectingNodes().forEach(node -> update(node)); + $contained.remove("X.0.1", "remove Entity contained by Entity of same type with manual changes"); + retract($contained); + end +rule "X.0.4: Remove Entity contained by Entity of same type" + salience 65 + when + $larger: TextEntity($type: type(), $entityType: entityType, !removed(), !hasManualChanges()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges(), !removed()) + then + $contained.remove("X.0.4", "remove Entity contained by Entity of same type"); + update($contained); + end // Rule unit: X.2 rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY), !hasManualChanges()) + then + $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); + retract($entity) + end + + +rule "X.2.1: Remove Entity of type HINT when contained by FALSE_POSITIVE" + salience 64 + when + $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); - $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); + $entity.remove("X.2.1", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); retract($entity) end @@ -2090,6 +2193,7 @@ rule "X.8.1: Remove Entity when intersected by imported Entity" retract($other); end + // Rule unit: X.9 rule "X.9.0: Merge mostly contained signatures" when @@ -2100,6 +2204,7 @@ rule "X.9.0: Merge mostly contained signatures" $signature.addEngine(LayoutEngine.AI); end + // Rule unit: X.10 rule "X.10.0: remove false positives of ai" when @@ -2122,14 +2227,25 @@ rule "X.11.0: Remove dictionary entity which intersects with a manual entity" end -rule "X.11.1: Remove non manual entity which are equal to manual entity" +rule "X.11.1: Remove non manual entity which intersects with a manual entity" + salience 64 + when + $manualEntity: TextEntity(engines contains Engine.MANUAL, active()) + $nonManualEntity: TextEntity(intersects($manualEntity), engines not contains Engine.MANUAL) + then + $nonManualEntity.remove("X.11.1", "remove entity which intersects with a manual entity"); + retract($nonManualEntity); + end + + +rule "X.11.2: Remove non manual entity which are equal to manual entity" salience 70 when $manualEntity: TextEntity(engines contains Engine.MANUAL, active(), $type: type()) $nonManualEntity: TextEntity(getTextRange().equals($manualEntity.getTextRange()), type() == $type, entityType == EntityType.ENTITY, !hasManualChanges(), engines not contains Engine.MANUAL) then $manualEntity.addEngines($nonManualEntity.getEngines()); - $nonManualEntity.remove("X.11.1", "remove non manual entity which are equal to manual entity"); + $nonManualEntity.remove("X.11.2", "remove non manual entity which are equal to manual entity"); retract($nonManualEntity); end @@ -2137,15 +2253,15 @@ rule "X.11.1: Remove non manual entity which are equal to manual entity" //------------------------------------ Dictionary merging rules ------------------------------------ // Rule unit: DICT.0 -rule "DICT.0.0: Ignore Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL" +rule "DICT.0.0: Remove Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL" salience 64 when $dictionaryRemoval: TextEntity($type: type(), entityType == EntityType.DICTIONARY_REMOVAL, engines contains Engine.DOSSIER_DICTIONARY) $entity: TextEntity(getTextRange().equals($dictionaryRemoval.getTextRange()), engines contains Engine.DICTIONARY, type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) - not TextEntity(containedBy($entity), engines contains Engine.DOSSIER_DICTIONARY, type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); - $entity.ignore("DICT.0.0", "Ignore Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL"); + $entity.remove("DICT.0.0", "Remove Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL"); + $entity.addEngine(Engine.DOSSIER_DICTIONARY); end //------------------------------------ File attributes rules ------------------------------------ diff --git a/redaction-service-v1/rules-management/src/main/resources/all_rules_documine.drl b/redaction-service-v1/rules-management/src/main/resources/all_rules_documine.drl index 871c748a..851e61cb 100644 --- a/redaction-service-v1/rules-management/src/main/resources/all_rules_documine.drl +++ b/redaction-service-v1/rules-management/src/main/resources/all_rules_documine.drl @@ -1454,12 +1454,23 @@ rule "MAN.4.1: Apply legal basis change" rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when - $larger: TextEntity($type: type(), $entityType: entityType, !removed()) - $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges(), !removed()) + $larger: TextEntity($type: type(), $entityType: entityType, !removed(), !hasManualChanges()) + not TextEntity(getTextRange().equals($larger.getTextRange()), type() == $type, entityType == EntityType.DICTIONARY_REMOVAL, engines contains Engine.DOSSIER_DICTIONARY, !hasManualChanges()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) + then + $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); + retract($contained); + end + +rule "X.0.1: Remove Entity contained by Entity of same type with manual changes" + salience 65 + when + $larger: TextEntity($type: type(), $entityType: entityType, !removed(), hasManualChanges()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) then $contained.getIntersectingNodes().forEach(node -> update(node)); - $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); - update($contained); + $contained.remove("X.0.1", "remove Entity contained by Entity of same type with manual changes"); + retract($contained); end @@ -1468,10 +1479,21 @@ rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY), !hasManualChanges()) + then + $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); + retract($entity) + end + + +rule "X.2.1: Remove Entity of type HINT when contained by FALSE_POSITIVE" + salience 64 + when + $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); - $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); + $entity.remove("X.2.1", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); retract($entity) end @@ -1593,15 +1615,15 @@ rule "X.11.0: Remove dictionary entity which intersects with a manual entity" //------------------------------------ Dictionary merging rules ------------------------------------ // Rule unit: DICT.0 -rule "DICT.0.0: Ignore Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL" +rule "DICT.0.0: Remove Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL" salience 64 when $dictionaryRemoval: TextEntity($type: type(), entityType == EntityType.DICTIONARY_REMOVAL, engines contains Engine.DOSSIER_DICTIONARY) $entity: TextEntity(getTextRange().equals($dictionaryRemoval.getTextRange()), engines contains Engine.DICTIONARY, type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) - not TextEntity(containedBy($entity), engines contains Engine.DOSSIER_DICTIONARY, type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); - $entity.ignore("DICT.0.0", "Ignore Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL"); + $entity.remove("DICT.0.0", "Remove Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL"); + $entity.addEngine(Engine.DOSSIER_DICTIONARY); end diff --git a/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/factory/RuleFileFactoryTest.java b/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/factory/RuleFileFactoryTest.java index d0354ab4..b6b85cef 100644 --- a/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/factory/RuleFileFactoryTest.java +++ b/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/factory/RuleFileFactoryTest.java @@ -97,7 +97,7 @@ class RuleFileFactoryTest { void createFileFromIdentifiersForRedactionService() { // This is exactly the string used for the current rules.drl in the redaction-service - String identifiers = "CBI.5.*, CBI.9.*, CBI.11.*, AI.1.*, PII.4.*, ETC.8.*, PII.0.*, ETC.6.*, SYN.0.*, CBI.3.*, ETC.4.*, ETC.3.*, PII.12.*, ETC.1.*, PII.9.*, PII.7.*, CBI.12.*, X.*.*, CBI.14.*, CBI.16.*, CBI.18.*, CBI.4.*, AI.0.*, CBI.8.*, PII.1.*, ETC.7.*, LDS.*.*, MAN.*.*, ETC.5.*, PII.11.*, ETC.2.*, CBI.20.*, FA.*.*, PII.8.*, PII.6.*, CBI.15.*, CBI.17.*, CBI.19.*"; + String identifiers = "CBI.9.*, CBI.11.*, AI.1.*, PII.4.*, ETC.8.*, PII.0.*, ETC.6.*, SYN.0.*, ETC.4.*, ETC.3.*, PII.12.*, ETC.1.*, PII.9.*, PII.7.*, CBI.12.*, X.*.*, CBI.14.*, CBI.16.*, CBI.18.*, AI.0.*, PII.1.*, ETC.7.*, LDS.*.*, MAN.*.*, ETC.5.*, PII.11.*, ETC.2.*, CBI.20.*, FA.*.*, PII.8.*, PII.6.*, CBI.15.*, CBI.17.*, CBI.19.*"; String result = RuleFileFactory.createFileFromIdentifiers(identifiers, ApplicationType.RM); System.out.println(result); try (var out = new FileOutputStream(