From 80fdff803dcc3c27eb7f7904e58ff6d2f1a310b9 Mon Sep 17 00:00:00 2001 From: Maverick Studer Date: Thu, 22 Feb 2024 16:15:32 +0100 Subject: [PATCH 01/21] RED-8607: Higher rank hint removed if overlaps lower rank redaction --- .../resources/drools/acceptance_rules.drl | 32 ++++++++-------- .../drools/all_redact_manager_rules.drl | 34 ++++++++--------- .../test/resources/drools/documine_flora.drl | 14 +++---- .../drools/manual_redaction_rules.drl | 2 +- .../src/test/resources/drools/rules.drl | 32 ++++++++-------- .../src/test/resources/drools/rules_v2.drl | 32 ++++++++-------- .../src/test/resources/drools/table_demo.drl | 32 ++++++++-------- .../src/test/resources/drools/test_rules.drl | 32 ++++++++-------- .../resources/all_redact_manager_rules.drl | 37 +++++++++---------- .../src/main/resources/all_rules_documine.drl | 14 +++---- .../management/RuleFileMigrationTest.java | 6 +-- 11 files changed, 132 insertions(+), 135 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl index 85083ed2..ff854a43 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl @@ -882,8 +882,8 @@ rule "MAN.4.1: Apply legal basis change" rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when - $larger: TextEntity($type: type(), $entityType: entityType, active() || skipped()) - $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges(), active()) + $larger: TextEntity($type: type(), $entityType: entityType, active()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) then $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); retract($contained); @@ -911,7 +911,7 @@ rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges(), active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); @@ -924,7 +924,7 @@ rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM salience 64 when $falseRecommendation: TextEntity($type: type(), entityType == EntityType.FALSE_RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); retract($recommendation); @@ -936,7 +936,7 @@ rule "X.4.0: Remove Entity of type RECOMMENDATION when text range equals ENTITY salience 256 when $entity: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $entity.addEngines($recommendation.getEngines()); $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when text range equals ENTITY with same type"); @@ -949,7 +949,7 @@ rule "X.5.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY" salience 256 when $entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY"); retract($recommendation); @@ -959,7 +959,7 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI salience 256 when $entity: TextEntity($type: type(), entityType == EntityType.RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.1", "remove Entity of type RECOMMENDATION when contained by RECOMMENDATION"); retract($recommendation); @@ -967,26 +967,26 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI // Rule unit: X.6 -rule "X.6.0: Remove Entity of lower rank, when contained by by entity of type ENTITY" +rule "X.6.0: Remove Entity of lower rank, when contained by entity of type ENTITY or HINT" salience 32 when $higherRank: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $lowerRank: TextEntity(containedBy($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges(), active()) + $lowerRank: TextEntity(containedBy($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges()) then $lowerRank.getIntersectingNodes().forEach(node -> update(node)); - $lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY"); + $lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY or HINT"); retract($lowerRank); end -rule "X.6.1: remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity" +rule "X.6.1: remove Entity, when contained in another entity of type ENTITY or HINT with larger text range" salience 32 when - $higherRank: TextEntity($type: type(), $value: value, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active(), !hasManualChanges()) - $lowerRank: TextEntity(intersects($higherRank), type() != $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), active(), $lowerRank.getValue().length() > $value.length()) + $outer: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) + $inner: TextEntity(containedBy($outer), type() != $type, $outer.getTextRange().length > getTextRange().length(), !hasManualChanges()) then - $higherRank.getIntersectingNodes().forEach(node -> update(node)); - $higherRank.remove("X.6.1", "remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity"); - retract($higherRank); + $inner.getIntersectingNodes().forEach(node -> update(node)); + $inner.remove("X.6.1", "remove Entity, when contained in another entity of type ENTITY or HINT with larger text range"); + retract($inner); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl index ce417ea6..95e4bf27 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl @@ -744,7 +744,7 @@ rule "PII.1.2: Redact typoed Emails with indicator" when $section: Section(containsString("@") || containsStringIgnoreCase("mail")) then - entityCreationService.byRegexIgnoreCase("mail[:\\.\\s]{1,2}([\\w\\/\\-\\{\\(\\. ]{3,20}(@|a|f)\\s?[\\w\\/\\-\\{\\(\\. ]{3,20}(\\. \\w{2,4}\\b|\\.\\B|\\.\\w{1,4}\\b))", "PII", EntityType.ENTITY, $section) + entityCreationService.byRegexIgnoreCase("mail[:\\.\\s]{1,2}([\\w\\/\\-\\{\\(\\. ]{3,20}(@|a|f)\\s?[\\w\\/\\-\\{\\(\\. ]{3,20}(\\. \\w{2,4}\\b|\\.\\B|\\.\\w{1,4}\\b))", "PII", EntityType.ENTITY, 1, $section) .forEach(emailEntity -> emailEntity.redact("PII.1.2", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end @@ -1452,8 +1452,8 @@ rule "MAN.4.1: Apply legal basis change" rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when - $larger: TextEntity($type: type(), $entityType: entityType, active() || skipped()) - $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges(), active()) + $larger: TextEntity($type: type(), $entityType: entityType, active()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) then $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); retract($contained); @@ -1481,7 +1481,7 @@ rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges(), active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); @@ -1494,7 +1494,7 @@ rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM salience 64 when $falseRecommendation: TextEntity($type: type(), entityType == EntityType.FALSE_RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); retract($recommendation); @@ -1506,7 +1506,7 @@ rule "X.4.0: Remove Entity of type RECOMMENDATION when text range equals ENTITY salience 256 when $entity: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $entity.addEngines($recommendation.getEngines()); $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when text range equals ENTITY with same type"); @@ -1519,7 +1519,7 @@ rule "X.5.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY" salience 256 when $entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY"); retract($recommendation); @@ -1529,7 +1529,7 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI salience 256 when $entity: TextEntity($type: type(), entityType == EntityType.RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.1", "remove Entity of type RECOMMENDATION when contained by RECOMMENDATION"); retract($recommendation); @@ -1537,26 +1537,26 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI // Rule unit: X.6 -rule "X.6.0: Remove Entity of lower rank, when contained by by entity of type ENTITY" +rule "X.6.0: Remove Entity of lower rank, when contained by entity of type ENTITY or HINT" salience 32 when $higherRank: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $lowerRank: TextEntity(containedBy($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges(), active()) + $lowerRank: TextEntity(containedBy($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges()) then $lowerRank.getIntersectingNodes().forEach(node -> update(node)); - $lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY"); + $lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY or HINT"); retract($lowerRank); end -rule "X.6.1: remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity" +rule "X.6.1: remove Entity, when contained in another entity of type ENTITY or HINT with larger text range" salience 32 when - $higherRank: TextEntity($type: type(), $value: value, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active(), !hasManualChanges()) - $lowerRank: TextEntity(intersects($higherRank), type() != $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), active(), $lowerRank.getValue().length() > $value.length()) + $outer: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) + $inner: TextEntity(containedBy($outer), type() != $type, $outer.getTextRange().length > getTextRange().length(), !hasManualChanges()) then - $higherRank.getIntersectingNodes().forEach(node -> update(node)); - $higherRank.remove("X.6.1", "remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity"); - retract($higherRank); + $inner.getIntersectingNodes().forEach(node -> update(node)); + $inner.remove("X.6.1", "remove Entity, when contained in another entity of type ENTITY or HINT with larger text range"); + retract($inner); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl index 983a5e95..228989b6 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl @@ -1312,8 +1312,8 @@ rule "MAN.4.1: Apply legal basis change" rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when - $larger: TextEntity($type: type(), $entityType: entityType, active() || skipped()) - $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges(), active()) + $larger: TextEntity($type: type(), $entityType: entityType, active()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) then $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); retract($contained); @@ -1325,7 +1325,7 @@ rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges(), active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); @@ -1338,7 +1338,7 @@ rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM salience 64 when $falseRecommendation: TextEntity($type: type(), entityType == EntityType.FALSE_RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); retract($recommendation); @@ -1350,7 +1350,7 @@ rule "X.4.0: Remove Entity of type RECOMMENDATION when text range equals ENTITY salience 256 when $entity: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $entity.addEngines($recommendation.getEngines()); $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when text range equals ENTITY with same type"); @@ -1363,7 +1363,7 @@ rule "X.5.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY" salience 256 when $entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY"); retract($recommendation); @@ -1373,7 +1373,7 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI salience 256 when $entity: TextEntity($type: type(), entityType == EntityType.RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.1", "remove Entity of type RECOMMENDATION when contained by RECOMMENDATION"); retract($recommendation); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl index 932bfdc2..3cb50691 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl @@ -225,7 +225,7 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI salience 256 when $entity: TextEntity($type: type(), entityType == EntityType.RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.1", "remove Entity of type RECOMMENDATION when contained by RECOMMENDATION"); retract($recommendation); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index ade6e8f6..fdd704b7 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -1042,8 +1042,8 @@ rule "MAN.4.1: Apply legal basis change" rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when - $larger: TextEntity($type: type(), $entityType: entityType, active() || skipped()) - $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges(), active()) + $larger: TextEntity($type: type(), $entityType: entityType, active()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) then $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); retract($contained); @@ -1071,7 +1071,7 @@ rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges(), active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); @@ -1084,7 +1084,7 @@ rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM salience 64 when $falseRecommendation: TextEntity($type: type(), entityType == EntityType.FALSE_RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); retract($recommendation); @@ -1096,7 +1096,7 @@ rule "X.4.0: Remove Entity of type RECOMMENDATION when text range equals ENTITY salience 256 when $entity: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $entity.addEngines($recommendation.getEngines()); $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when text range equals ENTITY with same type"); @@ -1109,7 +1109,7 @@ rule "X.5.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY" salience 256 when $entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY"); retract($recommendation); @@ -1119,7 +1119,7 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI salience 256 when $entity: TextEntity($type: type(), entityType == EntityType.RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.1", "remove Entity of type RECOMMENDATION when contained by RECOMMENDATION"); retract($recommendation); @@ -1127,26 +1127,26 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI // Rule unit: X.6 -rule "X.6.0: Remove Entity of lower rank, when contained by by entity of type ENTITY" +rule "X.6.0: Remove Entity of lower rank, when contained by entity of type ENTITY or HINT" salience 32 when $higherRank: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $lowerRank: TextEntity(containedBy($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges(), active()) + $lowerRank: TextEntity(containedBy($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges()) then $lowerRank.getIntersectingNodes().forEach(node -> update(node)); - $lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY"); + $lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY or HINT"); retract($lowerRank); end -rule "X.6.1: remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity" +rule "X.6.1: remove Entity, when contained in another entity of type ENTITY or HINT with larger text range" salience 32 when - $higherRank: TextEntity($type: type(), $value: value, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active(), !hasManualChanges()) - $lowerRank: TextEntity(intersects($higherRank), type() != $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), active(), $lowerRank.getValue().length() > $value.length()) + $outer: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) + $inner: TextEntity(containedBy($outer), type() != $type, $outer.getTextRange().length > getTextRange().length(), !hasManualChanges()) then - $higherRank.getIntersectingNodes().forEach(node -> update(node)); - $higherRank.remove("X.6.1", "remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity"); - retract($higherRank); + $inner.getIntersectingNodes().forEach(node -> update(node)); + $inner.remove("X.6.1", "remove Entity, when contained in another entity of type ENTITY or HINT with larger text range"); + retract($inner); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl index f270a177..2984605c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl @@ -248,8 +248,8 @@ rule "MAN.4.1: Apply legal basis change" rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when - $larger: TextEntity($type: type(), $entityType: entityType, active() || skipped()) - $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges(), active()) + $larger: TextEntity($type: type(), $entityType: entityType, active()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) then $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); retract($contained); @@ -277,7 +277,7 @@ rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges(), active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); @@ -290,7 +290,7 @@ rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM salience 64 when $falseRecommendation: TextEntity($type: type(), entityType == EntityType.FALSE_RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); retract($recommendation); @@ -302,7 +302,7 @@ rule "X.4.0: Remove Entity of type RECOMMENDATION when text range equals ENTITY salience 256 when $entity: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $entity.addEngines($recommendation.getEngines()); $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when text range equals ENTITY with same type"); @@ -315,7 +315,7 @@ rule "X.5.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY" salience 256 when $entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY"); retract($recommendation); @@ -325,7 +325,7 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI salience 256 when $entity: TextEntity($type: type(), entityType == EntityType.RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.1", "remove Entity of type RECOMMENDATION when contained by RECOMMENDATION"); retract($recommendation); @@ -333,26 +333,26 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI // Rule unit: X.6 -rule "X.6.0: Remove Entity of lower rank, when contained by by entity of type ENTITY" +rule "X.6.0: Remove Entity of lower rank, when contained by entity of type ENTITY or HINT" salience 32 when $higherRank: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $lowerRank: TextEntity(containedBy($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges(), active()) + $lowerRank: TextEntity(containedBy($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges()) then $lowerRank.getIntersectingNodes().forEach(node -> update(node)); - $lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY"); + $lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY or HINT"); retract($lowerRank); end -rule "X.6.1: remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity" +rule "X.6.1: remove Entity, when contained in another entity of type ENTITY or HINT with larger text range" salience 32 when - $higherRank: TextEntity($type: type(), $value: value, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active(), !hasManualChanges()) - $lowerRank: TextEntity(intersects($higherRank), type() != $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), active(), $lowerRank.getValue().length() > $value.length()) + $outer: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) + $inner: TextEntity(containedBy($outer), type() != $type, $outer.getTextRange().length > getTextRange().length(), !hasManualChanges()) then - $higherRank.getIntersectingNodes().forEach(node -> update(node)); - $higherRank.remove("X.6.1", "remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity"); - retract($higherRank); + $inner.getIntersectingNodes().forEach(node -> update(node)); + $inner.remove("X.6.1", "remove Entity, when contained in another entity of type ENTITY or HINT with larger text range"); + retract($inner); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo.drl index db7b1379..61cf03cd 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo.drl @@ -374,8 +374,8 @@ rule "MAN.4.1: Apply legal basis change" rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when - $larger: TextEntity($type: type(), $entityType: entityType, active() || skipped()) - $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges(), active()) + $larger: TextEntity($type: type(), $entityType: entityType, active()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) then $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); retract($contained); @@ -387,7 +387,7 @@ rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges(), active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); @@ -400,7 +400,7 @@ rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM salience 64 when $falseRecommendation: TextEntity($type: type(), entityType == EntityType.FALSE_RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); retract($recommendation); @@ -412,7 +412,7 @@ rule "X.4.0: Remove Entity of type RECOMMENDATION when text range equals ENTITY salience 256 when $entity: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $entity.addEngines($recommendation.getEngines()); $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when text range equals ENTITY with same type"); @@ -425,7 +425,7 @@ rule "X.5.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY" salience 256 when $entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY"); retract($recommendation); @@ -435,7 +435,7 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI salience 256 when $entity: TextEntity($type: type(), entityType == EntityType.RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.1", "remove Entity of type RECOMMENDATION when contained by RECOMMENDATION"); retract($recommendation); @@ -443,26 +443,26 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI // Rule unit: X.6 -rule "X.6.0: Remove Entity of lower rank, when contained by by entity of type ENTITY" +rule "X.6.0: Remove Entity of lower rank, when contained by entity of type ENTITY or HINT" salience 32 when $higherRank: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $lowerRank: TextEntity(containedBy($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges(), active()) + $lowerRank: TextEntity(containedBy($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges()) then $lowerRank.getIntersectingNodes().forEach(node -> update(node)); - $lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY"); + $lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY or HINT"); retract($lowerRank); end -rule "X.6.1: remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity" +rule "X.6.1: remove Entity, when contained in another entity of type ENTITY or HINT with larger text range" salience 32 when - $higherRank: TextEntity($type: type(), $value: value, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active(), !hasManualChanges()) - $lowerRank: TextEntity(intersects($higherRank), type() != $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), active(), $lowerRank.getValue().length() > $value.length()) + $outer: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) + $inner: TextEntity(containedBy($outer), type() != $type, $outer.getTextRange().length > getTextRange().length(), !hasManualChanges()) then - $higherRank.getIntersectingNodes().forEach(node -> update(node)); - $higherRank.remove("X.6.1", "remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity"); - retract($higherRank); + $inner.getIntersectingNodes().forEach(node -> update(node)); + $inner.remove("X.6.1", "remove Entity, when contained in another entity of type ENTITY or HINT with larger text range"); + retract($inner); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/test_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/test_rules.drl index fb460944..17a85e9b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/test_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/test_rules.drl @@ -274,8 +274,8 @@ rule "MAN.4.1: Apply legal basis change" rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when - $larger: TextEntity($type: type(), $entityType: entityType, active() || skipped()) - $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges(), active()) + $larger: TextEntity($type: type(), $entityType: entityType, active()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) then $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); retract($contained); @@ -303,7 +303,7 @@ rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges(), active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); @@ -316,7 +316,7 @@ rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM salience 64 when $falseRecommendation: TextEntity($type: type(), entityType == EntityType.FALSE_RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); retract($recommendation); @@ -328,7 +328,7 @@ rule "X.4.0: Remove Entity of type RECOMMENDATION when text range equals ENTITY salience 256 when $entity: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $entity.addEngines($recommendation.getEngines()); $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when text range equals ENTITY with same type"); @@ -341,7 +341,7 @@ rule "X.5.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY" salience 256 when $entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY"); retract($recommendation); @@ -351,7 +351,7 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI salience 256 when $entity: TextEntity($type: type(), entityType == EntityType.RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.1", "remove Entity of type RECOMMENDATION when contained by RECOMMENDATION"); retract($recommendation); @@ -359,26 +359,26 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI // Rule unit: X.6 -rule "X.6.0: Remove Entity of lower rank, when contained by by entity of type ENTITY" +rule "X.6.0: Remove Entity of lower rank, when contained by entity of type ENTITY or HINT" salience 32 when $higherRank: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $lowerRank: TextEntity(containedBy($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges(), active()) + $lowerRank: TextEntity(containedBy($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges()) then $lowerRank.getIntersectingNodes().forEach(node -> update(node)); - $lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY"); + $lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY or HINT"); retract($lowerRank); end -rule "X.6.1: remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity" +rule "X.6.1: remove Entity, when contained in another entity of type ENTITY or HINT with larger text range" salience 32 when - $higherRank: TextEntity($type: type(), $value: value, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active(), !hasManualChanges()) - $lowerRank: TextEntity(intersects($higherRank), type() != $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), active(), $lowerRank.getValue().length() > $value.length()) + $outer: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) + $inner: TextEntity(containedBy($outer), type() != $type, $outer.getTextRange().length > getTextRange().length(), !hasManualChanges()) then - $higherRank.getIntersectingNodes().forEach(node -> update(node)); - $higherRank.remove("X.6.1", "remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity"); - retract($higherRank); + $inner.getIntersectingNodes().forEach(node -> update(node)); + $inner.remove("X.6.1", "remove Entity, when contained in another entity of type ENTITY or HINT with larger text range"); + retract($inner); end diff --git a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl index c29f78a0..ec55a2e6 100644 --- a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl +++ b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl @@ -743,7 +743,7 @@ rule "PII.1.2: Redact typoed Emails with indicator" when $section: Section(containsString("@") || containsStringIgnoreCase("mail")) then - entityCreationService.byRegexIgnoreCase("mail[:\\.\\s]{1,2}([\\w\\/\\-\\{\\(\\. ]{3,20}(@|a|f)\\s?[\\w\\/\\-\\{\\(\\. ]{3,20}(\\. \\w{2,4}\\b|\\.\\B|\\.\\w{1,4}\\b))", "PII", EntityType.ENTITY, $section) + entityCreationService.byRegexIgnoreCase("mail[:\\.\\s]{1,2}([\\w\\/\\-\\{\\(\\. ]{3,20}(@|a|f)\\s?[\\w\\/\\-\\{\\(\\. ]{3,20}(\\. \\w{2,4}\\b|\\.\\B|\\.\\w{1,4}\\b))", "PII", EntityType.ENTITY, 1, $section) .forEach(emailEntity -> emailEntity.redact("PII.1.2", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end @@ -1473,8 +1473,8 @@ rule "MAN.4.1: Apply legal basis change" rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when - $larger: TextEntity($type: type(), $entityType: entityType, active() || skipped()) - $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges(), active()) + $larger: TextEntity($type: type(), $entityType: entityType, active()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) then $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); retract($contained); @@ -1502,7 +1502,7 @@ rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges(), active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); @@ -1515,7 +1515,7 @@ rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM salience 64 when $falseRecommendation: TextEntity($type: type(), entityType == EntityType.FALSE_RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); retract($recommendation); @@ -1527,7 +1527,7 @@ rule "X.4.0: Remove Entity of type RECOMMENDATION when text range equals ENTITY salience 256 when $entity: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $entity.addEngines($recommendation.getEngines()); $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when text range equals ENTITY with same type"); @@ -1540,19 +1540,17 @@ rule "X.5.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY" salience 256 when $entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY"); retract($recommendation); end - -// Rule unit: X.5 rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATION" salience 256 when $entity: TextEntity($type: type(), entityType == EntityType.RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.1", "remove Entity of type RECOMMENDATION when contained by RECOMMENDATION"); retract($recommendation); @@ -1560,27 +1558,26 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI // Rule unit: X.6 -rule "X.6.0: Remove Entity of lower rank, when contained by by entity of type ENTITY" +rule "X.6.0: Remove Entity of lower rank, when contained by entity of type ENTITY or HINT" salience 32 when $higherRank: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $lowerRank: TextEntity(containedBy($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges(), active()) + $lowerRank: TextEntity(containedBy($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges()) then $lowerRank.getIntersectingNodes().forEach(node -> update(node)); - $lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY"); + $lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY or HINT"); retract($lowerRank); end - -rule "X.6.1: remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity" +rule "X.6.1: remove Entity, when contained in another entity of type ENTITY or HINT with larger text range" salience 32 when - $higherRank: TextEntity($type: type(), $value: value, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active(), !hasManualChanges()) - $lowerRank: TextEntity(intersects($higherRank), type() != $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), active(), $lowerRank.getValue().length() > $value.length()) + $outer: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) + $inner: TextEntity(containedBy($outer), type() != $type, $outer.getTextRange().length > getTextRange().length(), !hasManualChanges()) then - $higherRank.getIntersectingNodes().forEach(node -> update(node)); - $higherRank.remove("X.6.1", "remove Entity of higher rank, when intersected by entity of type ENTITY and length of lower rank Entity is bigger than the higher rank Entity"); - retract($higherRank); + $inner.getIntersectingNodes().forEach(node -> update(node)); + $inner.remove("X.6.1", "remove Entity, when contained in another entity of type ENTITY or HINT with larger text range"); + retract($inner); end diff --git a/redaction-service-v1/rules-management/src/main/resources/all_rules_documine.drl b/redaction-service-v1/rules-management/src/main/resources/all_rules_documine.drl index 0be20320..2bf9e43e 100644 --- a/redaction-service-v1/rules-management/src/main/resources/all_rules_documine.drl +++ b/redaction-service-v1/rules-management/src/main/resources/all_rules_documine.drl @@ -1458,8 +1458,8 @@ rule "MAN.4.1: Apply legal basis change" rule "X.0.0: Remove Entity contained by Entity of same type" salience 65 when - $larger: TextEntity($type: type(), $entityType: entityType, active() || skipped()) - $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges(), active()) + $larger: TextEntity($type: type(), $entityType: entityType, active()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) then $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); retract($contained); @@ -1471,7 +1471,7 @@ rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges(), active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) then $entity.getIntersectingNodes().forEach(node -> update(node)); $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); @@ -1484,7 +1484,7 @@ rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM salience 64 when $falseRecommendation: TextEntity($type: type(), entityType == EntityType.FALSE_RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); retract($recommendation); @@ -1496,7 +1496,7 @@ rule "X.4.0: Remove Entity of type RECOMMENDATION when text range equals ENTITY salience 256 when $entity: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $entity.addEngines($recommendation.getEngines()); $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when text range equals ENTITY with same type"); @@ -1509,7 +1509,7 @@ rule "X.5.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY" salience 256 when $entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) - $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY"); retract($recommendation); @@ -1521,7 +1521,7 @@ rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATI salience 256 when $entity: TextEntity($type: type(), entityType == EntityType.RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) then $recommendation.remove("X.5.1", "remove Entity of type RECOMMENDATION when contained by RECOMMENDATION"); retract($recommendation); diff --git a/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/RuleFileMigrationTest.java b/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/RuleFileMigrationTest.java index abb7fcb6..41e292f1 100644 --- a/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/RuleFileMigrationTest.java +++ b/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/RuleFileMigrationTest.java @@ -25,9 +25,9 @@ public class RuleFileMigrationTest { // Put your redaction service drools paths and dossier-templates paths both RM and DM here static final List ruleFileDirs = List.of( - "/home/kschuettler/iqser/redaction/redaction-service/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools", - "/home/kschuettler/iqser/fforesight/dossier-templates-v2/", - "/home/kschuettler/iqser/redaction/dossier-templates-v2/"); + "/Users/maverickstuder/Documents/RedactManager/redaction-service/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools", + "/Users/maverickstuder/Documents/DocuMine/dossier-templates-v2/", + "/Users/maverickstuder/Documents/RedactManager/dossier-templates-v2/"); @Test -- 2.47.2 From ac38a966c5dbc85b1524e9a3b20db8271bb87f0a Mon Sep 17 00:00:00 2001 From: Corina Olariu Date: Fri, 23 Feb 2024 13:22:53 +0100 Subject: [PATCH 02/21] RED-8589 - Add "MANUAL" engine to all annotations that has entries in... --- .../service/EntityLogCreatorService.java | 35 ++++++++---- .../ManualChangesEnd2EndTest.java | 56 ++++++++++++++++++- 2 files changed, 78 insertions(+), 13 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java index f630a968..6cd69627 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java @@ -11,6 +11,7 @@ import java.util.stream.Collectors; import org.springframework.stereotype.Service; import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogChanges; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry; @@ -26,6 +27,7 @@ import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity; import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryVersion; import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity; +import com.iqser.red.service.redaction.v1.server.model.document.entity.ManualChangeOverwrite; import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; @@ -213,11 +215,13 @@ public class EntityLogCreatorService { .positions(List.of(new Position(image.getPosition(), image.getPage().getNumber()))) .containingNodeId(image.getTreeId()) .closestHeadline(image.getHeadline().getTextBlock().getSearchText()) - .section(image.getManualOverwrite().getSection().orElse(image.getParent().toString())) + .section(image.getManualOverwrite().getSection() + .orElse(image.getParent().toString())) .imageHasTransparency(image.isTransparent()) .manualChanges(ManualChangeFactory.toManualChangeList(image.getManualOverwrite().getManualChangeLog(), isHint)) .state(buildEntryState(image)) .entryType(isHint ? EntryType.IMAGE_HINT : EntryType.IMAGE) + .engines(getEngines(null, image.getManualOverwrite())) .build(); } @@ -245,13 +249,12 @@ public class EntityLogCreatorService { .textBefore("") .startOffset(-1) .endOffset(-1) - .positions(precursorEntity.getManualOverwrite() - .getPositions() - .orElse(precursorEntity.getEntityPosition()) - .stream() - .map(entityPosition -> new Position(entityPosition.rectangle2D(), entityPosition.pageNumber())) - .toList()) - .engines(precursorEntity.getEngines()) + .positions(precursorEntity.getManualOverwrite().getPositions() + .orElse(precursorEntity.getEntityPosition()) + .stream() + .map(entityPosition -> new Position(entityPosition.rectangle2D(), entityPosition.pageNumber())) + .toList()) + .engines(getEngines(precursorEntity.getEngines(), precursorEntity.getManualOverwrite())) //imported is no longer used, frontend should check engines //(was .imported(precursorEntity.getEngines() != null && precursorEntity.getEngines().contains(Engine.IMPORTED))) .imported(false) @@ -271,7 +274,8 @@ public class EntityLogCreatorService { .legalBasis(entity.legalBasis()) .value(entity.getManualOverwrite().getValue().orElse(entity.getMatchedRule().isWriteValueWithLineBreaks() ? entity.getValueWithLineBreaks() : entity.getValue())) .type(entity.type()) - .section(entity.getManualOverwrite().getSection().orElse(entity.getDeepestFullyContainingNode().toString())) + .section(entity.getManualOverwrite().getSection() + .orElse(entity.getDeepestFullyContainingNode().toString())) .containingNodeId(entity.getDeepestFullyContainingNode().getTreeId()) .closestHeadline(entity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText()) .matchedRule(entity.getMatchedRule().getRuleIdentifier().toString()) @@ -281,7 +285,7 @@ public class EntityLogCreatorService { .startOffset(entity.getTextRange().start()) .endOffset(entity.getTextRange().end()) .dossierDictionaryEntry(entity.isDossierDictionaryEntry()) - .engines(entity.getEngines() != null ? entity.getEngines() : Collections.emptySet()) + .engines(getEngines(entity.getEngines(), entity.getManualOverwrite())) //imported is no longer used, frontend should check engines //(was .imported(entity.getEngines() != null && entity.getEngines().contains(Engine.IMPORTED))) .imported(false) @@ -293,6 +297,17 @@ public class EntityLogCreatorService { } + private Set getEngines(Set currentEngines, ManualChangeOverwrite manualChangeOverwrite) { + + Set engines = currentEngines != null ? new HashSet<>(currentEngines) : new HashSet<>(); + + if (manualChangeOverwrite != null && !manualChangeOverwrite.getManualChangeLog().isEmpty()) { + engines.add(Engine.MANUAL); + } + return engines; + } + + private boolean isHint(EntityType entityType) { return entityType.equals(EntityType.HINT); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesEnd2EndTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesEnd2EndTest.java index 128849f7..c483a49a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesEnd2EndTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesEnd2EndTest.java @@ -38,6 +38,7 @@ import com.iqser.red.commons.jackson.ObjectMapperFactory; import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult; import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState; @@ -58,7 +59,6 @@ import com.iqser.red.service.redaction.v1.server.Application; import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest; import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse; import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; -import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils; import com.iqser.red.service.redaction.v1.server.service.document.DocumentGraphMapper; @@ -318,6 +318,7 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest { .filter(entry -> entry.getValue().equals("Oxford University Press")) .findFirst() .get(); + assertFalse(oxfordUniversityPress.getEngines().contains(Engine.MANUAL)); var asyaLyon = redactionLog.getEntityLogEntry() .stream() @@ -364,6 +365,7 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest { assertEquals(asyaLyon2.getState(), EntryState.APPLIED); assertEquals(1, oxfordUniversityPressRecategorized.getManualChanges().size()); + assertTrue(oxfordUniversityPressRecategorized.getEngines().contains(Engine.MANUAL)); } @@ -401,10 +403,58 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest { analyzeService.reanalyze(request); EntityLog entityLog = redactionStorageService.getEntityLog(request.getDossierId(), request.getFileId()); - EntityLogEntry entityLogEntry = entityLog.getEntityLogEntry().stream().filter(entry -> entry.getId().equals(annotationId)).findFirst().orElseThrow(); + EntityLogEntry entityLogEntry = entityLog.getEntityLogEntry() + .stream() + .filter(entry -> entry.getId().equals(annotationId)) + .findFirst() + .orElseThrow(); assertEquals("Expand to Hint", entityLogEntry.getValue()); assertEquals(1, entityLogEntry.getPositions().size()); - assertEquals(ManualRedactionType.RESIZE, entityLogEntry.getManualChanges().get(entityLogEntry.getManualChanges().size() - 1).getManualRedactionType()); + assertEquals(ManualRedactionType.RESIZE, + entityLogEntry.getManualChanges() + .get(entityLogEntry.getManualChanges().size() - 1).getManualRedactionType()); + assertTrue(entityLogEntry.getEngines().contains(Engine.MANUAL)); + } + + + @Test + @SneakyThrows + public void testAddEngineManualToResizeDictionaryEntry() { + + String filePath = "files/new/crafted document.pdf"; + AnalyzeRequest request = uploadFileToStorage(filePath); + analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request); + AnalyzeResult result = analyzeService.analyze(request); + ManualRedactions manualRedactions = new ManualRedactions(); + + EntityLog entityLog = redactionStorageService.getEntityLog(request.getDossierId(), request.getFileId()); + var dictionaryEntry = entityLog.getEntityLogEntry() + .stream() + .filter(entry -> entry.isDictionaryEntry() || entry.isDossierDictionaryEntry()) + .findFirst() + .get(); + ManualResizeRedaction manualResizeRedaction = ManualResizeRedaction.builder() + .annotationId(dictionaryEntry.getId()) + .requestDate(OffsetDateTime.now()) + .value("Image") + .positions(List.of(new Rectangle(new Point(56.8f, 496.27f), 61.25f, 12.83f, 1))) + .updateDictionary(true) + .build(); + manualRedactions.setResizeRedactions(Set.of(manualResizeRedaction)); + request.setManualRedactions(manualRedactions); + + analyzeService.reanalyze(request); + + entityLog = redactionStorageService.getEntityLog(request.getDossierId(), request.getFileId()); + EntityLogEntry entityLogEntry = entityLog.getEntityLogEntry() + .stream() + .filter(entry -> entry.getId().equals(dictionaryEntry.getId())) + .findFirst() + .orElseThrow(); + assertEquals(ManualRedactionType.RESIZE_IN_DICTIONARY, + entityLogEntry.getManualChanges() + .get(entityLogEntry.getManualChanges().size() - 1).getManualRedactionType()); + assertTrue(entityLogEntry.getEngines().contains(Engine.MANUAL)); } } -- 2.47.2 From 12a22bbb47a3f745ff6535e14a30012e582d4b55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kilian=20Sch=C3=BCttler?= Date: Mon, 26 Feb 2024 17:55:40 +0100 Subject: [PATCH 03/21] Hotfix bp --- .../build.gradle.kts | 3 +- .../v1/server/model/document/nodes/Table.java | 117 +++++++++---- .../v1/server/document/graph/TableTest.java | 160 ++++++++++++++++++ .../utils/EntityVisualizationUtility.java | 61 +++++++ .../utils/LayoutParsingRequestProvider.java | 4 +- .../drools/all_redact_manager_rules.drl | 67 ++++---- .../src/test/resources/drools/rules.drl | 67 ++++---- .../files/Minimal Examples/BasicTable.pdf | Bin 0 -> 19603 bytes .../resources/all_redact_manager_rules.drl | 68 ++++---- .../management/RuleFileMigrationTest.java | 12 +- 10 files changed, 413 insertions(+), 146 deletions(-) create mode 100644 redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/TableTest.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/EntityVisualizationUtility.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/BasicTable.pdf diff --git a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts index a45fbb68..d4a1009b 100644 --- a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts +++ b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts @@ -12,7 +12,7 @@ plugins { description = "redaction-service-server-v1" -val layoutParserVersion = "0.86.0" +val layoutParserVersion = "0.91.0" val jacksonVersion = "2.15.2" val droolsVersion = "9.44.0.Final" val pdfBoxVersion = "3.0.0" @@ -65,6 +65,7 @@ dependencies { testImplementation("org.apache.pdfbox:pdfbox-tools:${pdfBoxVersion}") testImplementation("org.springframework.boot:spring-boot-starter-test:${springBootStarterVersion}") + testImplementation("com.knecon.fforesight:viewer-doc-processor:${layoutParserVersion}") testImplementation("com.knecon.fforesight:layoutparser-service-processor:${layoutParserVersion}") { exclude( group = "com.iqser.red.service", diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Table.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Table.java index 100f3fe9..4d56c729 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Table.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Table.java @@ -9,6 +9,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; import java.util.stream.IntStream; import java.util.stream.Stream; @@ -64,8 +65,7 @@ public class Table implements SemanticNode { */ public Stream streamEntitiesWhereRowContainsStringsIgnoreCase(List strings) { - return IntStream.range(0, numberOfRows) - .boxed() + return IntStream.range(0, numberOfRows).boxed() .filter(row -> rowContainsStringsIgnoreCase(row, strings)) .flatMap(this::streamRow) .map(TableCell::getEntities) @@ -82,8 +82,11 @@ public class Table implements SemanticNode { */ public boolean rowContainsStringsIgnoreCase(Integer row, List strings) { - String rowText = streamRow(row).map(TableCell::getTextBlock).collect(new TextBlockCollector()).getSearchText().toLowerCase(Locale.ROOT); - return strings.stream().map(String::toLowerCase).allMatch(rowText::contains); + String rowText = streamRow(row).map(TableCell::getTextBlock) + .collect(new TextBlockCollector()).getSearchText().toLowerCase(Locale.ROOT); + return strings.stream() + .map(String::toLowerCase) + .allMatch(rowText::contains); } @@ -96,9 +99,13 @@ public class Table implements SemanticNode { */ public Stream streamEntitiesWhereRowHasHeaderAndValue(String header, String value) { - List vertebrateStudyCols = streamHeaders().filter(headerNode -> headerNode.containsString(header)).map(TableCell::getCol).toList(); + List vertebrateStudyCols = streamHeaders().filter(headerNode -> headerNode.containsString(header)) + .map(TableCell::getCol) + .toList(); return streamTableCells().filter(tableCellNode -> vertebrateStudyCols.stream() - .anyMatch(vertebrateStudyCol -> getCell(tableCellNode.getRow(), vertebrateStudyCol).containsString(value))).map(TableCell::getEntities).flatMap(Collection::stream); + .anyMatch(vertebrateStudyCol -> getCell(tableCellNode.getRow(), vertebrateStudyCol).containsString(value))) + .map(TableCell::getEntities) + .flatMap(Collection::stream); } @@ -111,9 +118,13 @@ public class Table implements SemanticNode { */ public Stream streamEntitiesWhereRowHasHeaderAndAnyValue(String header, List values) { - List colsWithHeader = streamHeaders().filter(headerNode -> headerNode.containsString(header)).map(TableCell::getCol).toList(); + List colsWithHeader = streamHeaders().filter(headerNode -> headerNode.containsString(header)) + .map(TableCell::getCol) + .toList(); return streamTableCells().filter(tableCellNode -> colsWithHeader.stream() - .anyMatch(colWithHeader -> getCell(tableCellNode.getRow(), colWithHeader).containsAnyString(values))).map(TableCell::getEntities).flatMap(Collection::stream); + .anyMatch(colWithHeader -> getCell(tableCellNode.getRow(), colWithHeader).containsAnyString(values))) + .map(TableCell::getEntities) + .flatMap(Collection::stream); } @@ -126,16 +137,33 @@ public class Table implements SemanticNode { */ public Stream streamEntitiesWhereRowContainsEntitiesOfType(List types) { - List rowsWithEntityOfType = getEntities().stream() - .filter(TextEntity::active) - .filter(redactionEntity -> types.stream().anyMatch(type -> type.equals(redactionEntity.type()))) - .map(TextEntity::getIntersectingNodes) - .filter(node -> node instanceof TableCell) - .map(node -> (TableCell) node) - .map(TableCell::getRow) - .toList(); + return IntStream.range(0, numberOfRows).boxed() + .filter(rowNumber -> streamTextEntitiesInRow(rowNumber).map(TextEntity::type) + .anyMatch(types::contains)) + .flatMap(this::streamRow) + .map(TableCell::getEntities) + .flatMap(Collection::stream); + } - return rowsWithEntityOfType.stream().flatMap(this::streamRow).map(TableCell::getEntities).flatMap(Collection::stream); + + /** + * Streams all entities in this table, that appear in a row, which contains at least one entity of each of the provided types. + * Ignores Entity with ignored == true or removed == true. + * + * @param types type strings to check whether a row contains an entity like them + * @return Stream of all entities in this table, that appear in a row, which contains at least one entity of each of the provided types. + */ + public Stream streamEntitiesWhereRowContainsEntitiesOfEachType(List types) { + + return IntStream.range(0, numberOfRows).boxed() + .filter(rowNumber -> { + Set entityTypes = streamTextEntitiesInRow(rowNumber).map(TextEntity::type) + .collect(Collectors.toSet()); + return entityTypes.containsAll(types); + }) + .flatMap(this::streamRow) + .map(TableCell::getEntities) + .flatMap(Collection::stream); } @@ -148,18 +176,43 @@ public class Table implements SemanticNode { */ public Stream streamEntitiesWhereRowContainsNoEntitiesOfType(List types) { - return IntStream.range(0, numberOfRows) - .boxed() - .filter(rowNumber -> streamRow(rowNumber).map(TableCell::getEntities) - .flatMap(Collection::stream) - .filter(TextEntity::active) - .noneMatch(entity -> types.contains(entity.type()))) + return IntStream.range(0, numberOfRows).boxed() + .filter(rowNumber -> streamTextEntitiesInRow(rowNumber).map(TextEntity::type) + .noneMatch(types::contains)) .flatMap(this::streamRow) .map(TableCell::getEntities) .flatMap(Collection::stream); } + /** + * Streams all Entities in the given row. + * + * @param rowNumber the row number to look for + * @return stream of TextEntities occurring in row + */ + public Stream streamTextEntitiesInRow(int rowNumber) { + + return streamRow(rowNumber).map(TableCell::getEntities) + .flatMap(Collection::stream) + .filter(TextEntity::active); + } + + + /** + * Streams all Entities in the given col. + * + * @param colNumber the column number to look for + * @return stream of TextEntities occurring in row + */ + public Stream streamTextEntitiesInCol(int colNumber) { + + return streamCol(colNumber).map(TableCell::getEntities) + .flatMap(Collection::stream) + .filter(TextEntity::active); + } + + /** * Returns a TableCell at the provided row and column location. * @@ -173,7 +226,8 @@ public class Table implements SemanticNode { throw new IllegalArgumentException(format("row %d, col %d is out of bounds for number of rows of %d and number of cols %d", row, col, numberOfRows, numberOfCols)); } int idx = row * numberOfCols + col; - return (TableCell) documentTree.getEntryById(treeId).getChildren().get(idx).getNode(); + return (TableCell) documentTree.getEntryById(treeId).getChildren() + .get(idx).getNode(); } @@ -196,7 +250,7 @@ public class Table implements SemanticNode { */ public Stream streamTableCellsWhichContainType(String type) { - return streamTableCells().filter(tableCell -> tableCell.getEntities().stream().filter(TextEntity::active).anyMatch(entity -> entity.type().equals(type))); + return streamTableCells().filter(tableCell -> tableCell.hasEntitiesOfType(type)); } @@ -222,7 +276,8 @@ public class Table implements SemanticNode { */ public Stream streamCol(int col) { - return IntStream.range(0, numberOfRows).boxed().map(row -> getCell(row, col)); + return IntStream.range(0, numberOfRows).boxed() + .map(row -> getCell(row, col)); } @@ -234,7 +289,8 @@ public class Table implements SemanticNode { */ public Stream streamRow(int row) { - return IntStream.range(0, numberOfCols).boxed().map(col -> getCell(row, col)); + return IntStream.range(0, numberOfCols).boxed() + .map(col -> getCell(row, col)); } @@ -258,7 +314,8 @@ public class Table implements SemanticNode { */ public Stream streamHeadersForCell(int row, int col) { - return Stream.concat(streamRow(row), streamCol(col)).filter(TableCell::isHeader); + return Stream.concat(streamRow(row), streamCol(col)) + .filter(TableCell::isHeader); } @@ -348,7 +405,9 @@ public class Table implements SemanticNode { public TextBlock getTextBlock() { if (textBlock == null) { - textBlock = streamAllSubNodes().filter(SemanticNode::isLeaf).map(SemanticNode::getLeafTextBlock).collect(new TextBlockCollector()); + textBlock = streamAllSubNodes().filter(SemanticNode::isLeaf) + .map(SemanticNode::getLeafTextBlock) + .collect(new TextBlockCollector()); } return textBlock; } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/TableTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/TableTest.java new file mode 100644 index 00000000..ac33da46 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/TableTest.java @@ -0,0 +1,160 @@ +package com.iqser.red.service.redaction.v1.server.document.graph; + +import static com.iqser.red.service.redaction.v1.server.utils.EntityVisualizationUtility.ENTITY_LAYER; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.awt.Color; +import java.io.File; +import java.nio.file.Path; +import java.util.List; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType; +import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; +import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table; +import com.iqser.red.service.redaction.v1.server.service.document.EntityCreationService; +import com.iqser.red.service.redaction.v1.server.service.document.EntityEnrichmentService; +import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; +import com.iqser.red.service.redaction.v1.server.utils.EntityVisualizationUtility; +import com.knecon.fforesight.service.viewerdoc.model.Visualizations; +import com.knecon.fforesight.service.viewerdoc.service.ViewerDocumentService; +import com.knecon.fforesight.tenantcommons.TenantContext; + +import lombok.SneakyThrows; + +public class TableTest extends BuildDocumentIntegrationTest { + + private static final boolean DRAW_FILE = false; + + @Autowired + private EntityEnrichmentService entityEnrichmentService; + + private EntityCreationService entityCreationService; + + private static final String TYPE_1 = "type1"; + private static final String TYPE_2 = "type2"; + private static final String TYPE_3 = "type3"; + private static final String TYPE_4 = "type4"; + + private Table table; + + private Set entities; + + + @SneakyThrows + @BeforeEach + public void createTable() { + + entityCreationService = new EntityCreationService(entityEnrichmentService); + + String fileName = "files/Minimal Examples/BasicTable.pdf"; + + Document document = buildGraph(fileName); + + table = (Table) document.streamAllSubNodesOfType(NodeType.TABLE) + .findAny() + .orElseThrow(); + + entities = List.of(// + entityCreationService.byString("Cell11", TYPE_1, EntityType.ENTITY, document), + entityCreationService.byString("Cell21", TYPE_1, EntityType.ENTITY, document), + entityCreationService.byString("Cell31", TYPE_1, EntityType.ENTITY, document), + entityCreationService.byString("Cell41", TYPE_1, EntityType.ENTITY, document), + entityCreationService.byString("Cell51", TYPE_1, EntityType.ENTITY, document), + + entityCreationService.byString("Cell12", TYPE_2, EntityType.ENTITY, document), + entityCreationService.byString("Cell32", TYPE_2, EntityType.ENTITY, document), + entityCreationService.byString("Cell42", TYPE_2, EntityType.ENTITY, document), + + entityCreationService.byString("Cell23", TYPE_3, EntityType.ENTITY, document), + entityCreationService.byString("Cell53", TYPE_3, EntityType.ENTITY, document), + + entityCreationService.byString("Cell14", TYPE_4, EntityType.ENTITY, document), + entityCreationService.byString("Cell34", TYPE_4, EntityType.ENTITY, document)) + .stream() + .flatMap(Function.identity()) + .collect(Collectors.toSet()); + + if (DRAW_FILE) { + File file = new File("/tmp/" + Path.of(fileName).getFileName().toString()); + storageService.downloadTo(TenantContext.getTenantId(), + RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.VIEWER_DOCUMENT), + file); + ViewerDocumentService viewerDocumentService = new ViewerDocumentService(null); + + var visualizationsOnPage = EntityVisualizationUtility.createVisualizationsOnPage(document.getEntities(), Color.MAGENTA); + + viewerDocumentService.addVisualizationsOnPage(file, + file, + Visualizations.builder() + .layer(ENTITY_LAYER) + .visualizationsOnPages(visualizationsOnPage) + .layerVisibilityDefaultValue(true) + .build()); + } + + } + + + @Test + public void testStreamEntitiesWhereRowContainsEntitiesOfType() { + + int type_2_count = table.getEntitiesOfType(TYPE_2).size(); + + assertEquals(type_2_count, + table.streamEntitiesWhereRowContainsEntitiesOfType(List.of(TYPE_1)) + .filter(textEntity -> textEntity.type().equals(TYPE_2)) + .count()); + + assertEquals(type_2_count, + table.streamEntitiesWhereRowContainsEntitiesOfType(List.of(TYPE_1, TYPE_4)) + .filter(textEntity -> textEntity.type().equals(TYPE_2)) + .count()); + + assertEquals(2, + table.streamEntitiesWhereRowContainsEntitiesOfEachType(List.of(TYPE_1, TYPE_4)) + .filter(textEntity -> textEntity.type().equals(TYPE_2)) + .count()); + + assertEquals(0, + table.streamEntitiesWhereRowContainsEntitiesOfEachType(List.of(TYPE_1, TYPE_3)) + .filter(textEntity -> textEntity.type().equals(TYPE_2)) + .count()); + + assertEquals(0, + table.streamEntitiesWhereRowContainsEntitiesOfEachType(List.of(TYPE_1, TYPE_3, TYPE_4)) + .filter(textEntity -> textEntity.type().equals(TYPE_2)) + .count()); + + assertEquals(type_2_count, + table.streamEntitiesWhereRowContainsEntitiesOfEachType(List.of()) + .filter(textEntity -> textEntity.type().equals(TYPE_2)) + .count()); + + assertEquals(3, + table.streamTextEntitiesInRow(1) + .count()); + + assertEquals(2, + table.streamTextEntitiesInRow(4) + .count()); + + assertEquals(5, + table.streamTextEntitiesInCol(1) + .count()); + + assertEquals(3, + table.streamTextEntitiesInRow(3) + .count()); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/EntityVisualizationUtility.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/EntityVisualizationUtility.java new file mode 100644 index 00000000..7fae90bc --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/EntityVisualizationUtility.java @@ -0,0 +1,61 @@ +package com.iqser.red.service.redaction.v1.server.utils; + +import java.awt.Color; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.pdfbox.cos.COSName; + +import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage; +import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page; +import com.knecon.fforesight.service.viewerdoc.ContentStreams; +import com.knecon.fforesight.service.viewerdoc.model.ColoredRectangle; +import com.knecon.fforesight.service.viewerdoc.model.VisualizationsOnPage; + +import lombok.experimental.UtilityClass; + +@UtilityClass +public class EntityVisualizationUtility { + + public static final ContentStreams.Identifier ENTITY_LAYER = new ContentStreams.Identifier("Entities", COSName.getPDFName("KNECON_ENTITIES"), true); + + + public Map createVisualizationsOnPage(Collection entity, Color color) { + + Map visualizations = new HashMap<>(); + Set pages = entity.stream() + .map(TextEntity::getPages) + .flatMap(Collection::stream) + .collect(Collectors.toSet()); + + pages.forEach(page -> visualizations.put(page.getNumber() - 1, buildVisualizationsOnPage(color, page))); + + return visualizations; + } + + + private static VisualizationsOnPage buildVisualizationsOnPage(Color color, Page page) { + + return VisualizationsOnPage.builder().coloredRectangles(getEntityRectangles(color, page)).build(); + } + + + private static List getEntityRectangles(Color color, Page page) { + + return page.getEntities() + .stream() + .map(TextEntity::getPositionsOnPagePerPage) + .flatMap(Collection::stream) + .filter(p -> p.getPage().equals(page)) + .map(PositionOnPage::getRectanglePerLine) + .flatMap(Collection::stream) + .map(r -> new ColoredRectangle(r, color, 1)) + .toList(); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/LayoutParsingRequestProvider.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/LayoutParsingRequestProvider.java index f9eaa926..43c9f983 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/LayoutParsingRequestProvider.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/LayoutParsingRequestProvider.java @@ -33,7 +33,9 @@ public class LayoutParsingRequestProvider { .textBlockFileStorageId(textBlockFileStorageId) .positionBlockFileStorageId(positionBlockFileStorageId) .pageFileStorageId(pageFileStorageId) - .simplifiedTextStorageId(simplifiedTextStorageId).viewerDocumentStorageId(viewerDocumentStorageId) + .simplifiedTextStorageId(simplifiedTextStorageId) + .viewerDocumentStorageId(viewerDocumentStorageId) + .visualLayoutParsingFileId(Optional.empty()) .build(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl index 95e4bf27..b1fbf077 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl @@ -157,18 +157,17 @@ rule "CBI.3.0: Redacted because Section contains a vertebrate" rule "CBI.3.1: Redacted because table row contains a vertebrate" when - $table: Table(hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $table: Table(hasEntitiesOfType("vertebrate"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + $cellsWithvertebrate: TableCell() from $table.streamTableCellsWhichContainType("vertebrate").toList() + $tableCell: TableCell(row == $cellsWithvertebrate.row) from $table.streamTableCells().toList() + $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( + $authorOrAddress.applyWithReferences( "CBI.3.1", "Vertebrate found", "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("vertebrate", entity) + $table.getEntitiesOfTypeInSameRow("vertebrate", $authorOrAddress) ); - }); end rule "CBI.3.2: Do not redact because Section does not contain a vertebrate" @@ -207,23 +206,21 @@ rule "CBI.4.0: Do not redact Names and Addresses if no_redaction_indicator is fo }); end -rule "CBI.4.1: Do not redact Names and Addresses if no_redaction_indicator is found in table row" +rule "CBI.4.1: Redacted because table row contains a vertebrate" when - $table: Table(hasEntitiesOfType("no_redaction_indicator"), - hasEntitiesOfType("vertebrate"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("vertebrate"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + TableCell($row: row) from $table.streamTableCellsWhichContainType("vertebrate").toList() + TableCell(row == $row) from $table.streamTableCellsWhichContainType("no_redaction_indicator").toList() + $tableCell: TableCell(row == $row) from $table.streamTableCells().toList() + $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate", "no-redaction_indicator")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.skipWithReferences( + $authorOrAddress.skipWithReferences( "CBI.4.1", "Vertebrate but a no redaction indicator found", Stream.concat( - $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), - $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() + $table.getEntitiesOfTypeInSameRow("vertebrate", $authorOrAddress).stream(), + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", $authorOrAddress).stream()).toList() ); - }); end @@ -250,22 +247,20 @@ rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also red rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in table row" when - $table: Table(hasEntitiesOfType("no_redaction_indicator"), - hasEntitiesOfType("redaction_indicator"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("redaction_indicator"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + TableCell($row: row) from $table.streamTableCellsWhichContainType("redaction_indicator").toList() + TableCell(row == $row) from $table.streamTableCellsWhichContainType("no_redaction_indicator").toList() + $tableCell: TableCell(row == $row) from $table.streamTableCells().toList() + $entity: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no_redaction_indicator")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( + $entity.applyWithReferences( "CBI.5.1", "no_redaction_indicator but also redaction_indicator found", "Reg (EC) No 1107/2009 Art. 63 (2g)", Stream.concat( - $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), - $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() + $table.getEntitiesOfTypeInSameRow("redaction_indicator", $entity).stream(), + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", $entity).stream()).toList() ); - }); end @@ -355,18 +350,17 @@ rule "CBI.8.0: Redacted because Section contains must_redact entity" rule "CBI.8.1: Redacted because table row contains must_redact entity" when - $table: Table(hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $table: Table(hasEntitiesOfType("must_redact"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + $cellsWithMustRedact: TableCell() from $table.streamTableCellsWhichContainType("must_redact").toList() + $tableCell: TableCell(row == $cellsWithMustRedact.row) from $table.streamTableCells().toList() + $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("must_redact")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( + $authorOrAddress.applyWithReferences( "CBI.8.1", - "must_redact entity found", + "Must_redact found", "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("must_redact", entity) + $table.getEntitiesOfTypeInSameRow("must_redact", $authorOrAddress) ); - }); end @@ -448,7 +442,6 @@ rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s TableCell(!header, containsAnyString("Yes", "Y"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() $authorCell: TableCell(row == $rowWithYes) from $table.streamCol($authorCol).toList() then - entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY) .ifPresent(authorEntity -> { authorEntity.redact("CBI.12.0", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(3) of Regulation (EC) No 178/2002"); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index fdd704b7..aed445b2 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -100,18 +100,17 @@ rule "CBI.3.0: Redacted because Section contains a vertebrate" rule "CBI.3.1: Redacted because table row contains a vertebrate" when - $table: Table(hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $table: Table(hasEntitiesOfType("vertebrate"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + $cellsWithvertebrate: TableCell() from $table.streamTableCellsWhichContainType("vertebrate").toList() + $tableCell: TableCell(row == $cellsWithvertebrate.row) from $table.streamTableCells().toList() + $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( + $authorOrAddress.applyWithReferences( "CBI.3.1", "Vertebrate found", "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("vertebrate", entity) + $table.getEntitiesOfTypeInSameRow("vertebrate", $authorOrAddress) ); - }); end rule "CBI.3.2: Do not redact because Section does not contain a vertebrate" @@ -150,23 +149,21 @@ rule "CBI.4.0: Do not redact Names and Addresses if no_redaction_indicator is fo }); end -rule "CBI.4.1: Do not redact Names and Addresses if no_redaction_indicator is found in table row" +rule "CBI.4.1: Redacted because table row contains a vertebrate" when - $table: Table(hasEntitiesOfType("no_redaction_indicator"), - hasEntitiesOfType("vertebrate"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("vertebrate"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + TableCell($row: row) from $table.streamTableCellsWhichContainType("vertebrate").toList() + TableCell(row == $row) from $table.streamTableCellsWhichContainType("no_redaction_indicator").toList() + $tableCell: TableCell(row == $row) from $table.streamTableCells().toList() + $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate", "no-redaction_indicator")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.skipWithReferences( + $authorOrAddress.skipWithReferences( "CBI.4.1", "Vertebrate but a no redaction indicator found", Stream.concat( - $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), - $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() + $table.getEntitiesOfTypeInSameRow("vertebrate", $authorOrAddress).stream(), + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", $authorOrAddress).stream()).toList() ); - }); end @@ -193,22 +190,20 @@ rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also red rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in table row" when - $table: Table(hasEntitiesOfType("no_redaction_indicator"), - hasEntitiesOfType("redaction_indicator"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("redaction_indicator"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + TableCell($row: row) from $table.streamTableCellsWhichContainType("redaction_indicator").toList() + TableCell(row == $row) from $table.streamTableCellsWhichContainType("no_redaction_indicator").toList() + $tableCell: TableCell(row == $row) from $table.streamTableCells().toList() + $entity: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no_redaction_indicator")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( + $entity.applyWithReferences( "CBI.5.1", "no_redaction_indicator but also redaction_indicator found", "Reg (EC) No 1107/2009 Art. 63 (2g)", Stream.concat( - $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), - $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() + $table.getEntitiesOfTypeInSameRow("redaction_indicator", $entity).stream(), + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", $entity).stream()).toList() ); - }); end @@ -230,18 +225,17 @@ rule "CBI.8.0: Redacted because Section contains must_redact entity" rule "CBI.8.1: Redacted because table row contains must_redact entity" when - $table: Table(hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $table: Table(hasEntitiesOfType("must_redact"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + $cellsWithMustRedact: TableCell() from $table.streamTableCellsWhichContainType("must_redact").toList() + $tableCell: TableCell(row == $cellsWithMustRedact.row) from $table.streamTableCells().toList() + $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("must_redact")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( + $authorOrAddress.applyWithReferences( "CBI.8.1", - "must_redact entity found", + "Must_redact found", "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("must_redact", entity) + $table.getEntitiesOfTypeInSameRow("must_redact", $authorOrAddress) ); - }); end @@ -295,7 +289,6 @@ rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s TableCell(!header, containsAnyString("Yes", "Y"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() $authorCell: TableCell(row == $rowWithYes) from $table.streamCol($authorCol).toList() then - entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY) .ifPresent(authorEntity -> { authorEntity.redact("CBI.12.0", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(3) of Regulation (EC) No 178/2002"); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/BasicTable.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/BasicTable.pdf new file mode 100644 index 0000000000000000000000000000000000000000..f692fccb1d60b7f4f0f57be31f8f7eb6c9c3d338 GIT binary patch literal 19603 zcmagF19T=qwC|fqCi-IAwylY6J6~*@6Hjd0ww;M>+jb__n>qKKd+vSrzV&*oUbSo2 zuIldUU-jy${x_L|h!`CcJv)G`zp%fvzqvmLz)Z+UXlG~%;N@kIF|jpsHYa5H>rrA5 zv#@qHabys)20ELFm>Ag^oAC1koSYp^fHna4%%~*Uh;>H9kn2x0?x)qV$pONrS7LSk(zj;EYOM+ zyUk;_v!{22m(KT-Aty6V%X3xOLa+6Td-aFUfwAu{pP!8+TTh)N!pXQE057?e%o2$( z&`4HJW;axuM@OUnO1 zO{#JBdgoUSZm;-#|L8AntMLX}3@>?q<*FY94WI)os_rNO#XR^w+mw2>QZ|2Vskn=) z4SV7DE&)UmiUhY*pEZFeHmB>8rjs7Z+2(ux!_B?g&-3Bi>#Ye!hb~OgA~B3jXvFU` zsJ~9uK=u9*bvApOdHiH=8tf>IXuim9IfX6V=mLKVlL_idr{&fdfIpFXAOG~+I2`WT3`#R(gvvXlRBk;aif03A)k}k!3xog z-P7CW>ri%@(=^}H)_wbNCXXw38->^(f4`J>hfAgi22t<18j3B5hCXp^@l;UGds8uo`^`J zSL#OEy^kB&pMR3X7s@YJq7C21y4}?eMjr5>t$U&_hdxJ=!aN5}V2Mn4n_v7T{4N*} zx_>sBaO$N<4Je;dwwWN8@Dn{!#MceHzPWV=UCsy9_WuHVM`GX$pst&&1r$kD%^scp zc>#x*GN)$Mu4L~pN_J`Mfo_+|^afx%;(0)ueV{s|=vY%1gAi<@USx;( zM0h7~i7YW+1NZssLT{OHJ>qylcoI_yRO;TJH2GQY zBvLMPn$p!26%yNxNYlN-*7$37EFMrUuSVhbhun*F7l#uGuq+M~>{o;xH_;$tQyWv& zugGv8Eey?B2mRj7spbqz;|s{bk2Nw62VAc7qa>FJW=^f6E(w0dB&**Wdzv(HY9MH4ZB(DRb1dlz5kUN=A%0aJYm8yXoaC6e|(|$={Tw%m_CUfe{`? zgnKr3Vp`N=FXUeGMxq`G9EejlNc@2eqG2sX!TFtWumndeWY6I;!l5jnO0N1ps3a2m zP|57y9qV;;`o_)lEdJb40LO>mgY4}7ggj4(qjm@Ng(I(x42vluMd#{-ZQRMG^=@V7 z6*JyJ6|{FQZY|bzhC4E+mk29_Dwt+-Fahq>K*3WzWTnNi@<%aJL?ttbXJzb-#FYyg zN-0$oyUCSzYzlRa*|L}mw6YZEsb2KrTpWK9&=KIYUy_QuhK&DFv}3KWBIy^izd&4P zWp#EM(nwCbwwajdIpgDz!XHoc!pUGZ*{eKfm#u2JdXJA)M0e8jJ(2%Dc@d zd~EnE)DbyrV+amc37hN-XMVZw`kIveL4(#Gr{C@3hTRYTbVReoF~=z`y# zIAu&M{C31fkFO<_H1&%M>q~;6BmK-ri&)54@!}@lvd;RqvckJ!Vc-t**F(`I5|and z=5*J*dcu~HrHZ7@)nW`Z7T9OMT+MiaW~fiP9NW9JV^Bnm_9N>^Fk!Z#*y#dSg;PlR z-Surox*2YpLv(o3#$tu=4&0&+^WgLDJehN>CqJ=d>q4##GOeMYxkPTheee7{e4Z$c zU~_jWwGT(bTkgrc2B8>LB*960@m8)>EJj&pAy*B*YHTOt$dIz$UMSriGc1_Hxo zC5`-B^SUs-Jm6bcH)n(k0tju7vWxaex7cQh?6ij+rJ`;Rw2oExw8-n>muw!@6OGL8 zV?1}0qY6Mb=9-K4so>B1wAtNwr$8FGFA$=p2D_t6+-0y)$A4)cQL($%5luBh<6ui& zdrJ`-$39Y11~hlbf#z|_!b29GlalOmTI3)P?X>kiq`Vyj0(ajyXCY$4E z`lQnrX`65lhqO~_pkh|^H-OU{(S~5+=gBv1o^X5Vy9VDq+&z9|I<&rA=7BDKe{7l| zr~a5a2)2I5-N>+6ChR4{MHD!5%_uP|<0fm)LK9h7Ah+it`g6YX!!ZM4%{-eLTF@!YVM(r@JiljgR~ii=Mw$! z^U-Q+ZpnKrZES_#c=4Q)GU`8q z3i-n>P1H}BG$czz# zSIWzdJ&1&s6xl&{xrrPAz3Vv|mz^?z4 zTp_d$%!T`o6S+!g6o|BTVAH$$b7hBiknq%bERCPIJU77-IQDLwob?8dokQKLK>|~A zH$_!ep&JA+n1=CbQc$FaIz$uNYH5g$!^xAmjb1?iY*tB3IK2?Iye1NjZ!%kXm;{l_t=xh@EoFLZjS z0!|v#niw-8Gv^@CLN{tsOIcU_JWwj6-m1Y}TDsFo1-)VM$TV$P{EO)(usl5YyxRM- zTftiU0U^ZAXTBwuzOC*f9?s3y;wIpz8JKb!h=WbgUB0Bcu!7clRC}D{t6y99_YM4P zY%xXZGxAP_vYJF~jyCZK4Yd?TbIDZw#O2jMqGAbaW;+T;c;Gb_kJ;6{HgK9gJa(e2 zHH15Dq`R!UU2CBRImznJ65EfcqM<#ubj6UtWYC{*yHgc4QEcyTJq#6EYe#(Ma#w~| zm6SgFQIjZ;YAap%ZS3w zT0w2n6lRM>*58Ze`5G~GILy-`2MgX~&JuUdf|^I?uOTZ`a@JbS>Z+=4cl@c!`Pkz+ zne;87fERE6ol%_k7C^pNjpAb0r*ZeqE0S~6f~a^d?e1Yr zGuSk=@6=giMW{$I%B*UYJ_RwXnNr6TgJoIu+|pT87gASTm!Vy*W6Q}x^j$|JycFe^ z_q5AYl6uXLObJ&ELsciLrjPU4!M!$RuXQfJCumCMR5}eyoL8Ugtwz zX^Kg@)RCEs)(>wkgAQrQy0gFz`lgDfQz3A?NEQ zrpNp9^w#a`RFq2nb*9HJ&@)wbD9B0@n5SE^Tybfc;>B({vKk^}t#N6NQz5zmP1`~> z;3&R%lZ~V23}(w7jB{4!kb*O3`>P$;fP=lMa30cTYd>eZ@i6g+FNWtM5vRECH>$5^ z{>eLm$wb;qy|HPZb>~5-bdh&koevE8#JJlv1RoB4O^#Fwo=uixcjEo8lOEh9B&|C< zqP&Ge3sp=zXvO+5gI`UVOTQzyIr9PwxLc(mAzfmZ1XUoNTnv`{g+e_Nlwi*pUCszA z19RcO<3*kl7W{j(Isb6}+lg_u+^dnDR3(#Gat__TLTo#7{0x>l6VW7yP%?+UgwfVN zKVXLHR&S~qEfz4=9$Z@jnFPE03Y}JAqFzBa{F6b!6zyQ^jM=dp*5Oj}Pd9F;*q$sn z-WzbTo%^d67L;xtJMz-gvH1iKL$aqgZtQH`x8PzrzI7e^!hu&hvpvK7CdD@lM}CHR zym3R2h|hN#go8gdOR-ry50~K>t+EGU73~`ygTiv$^HXonbkr=-Gd!ciu>mGU_SlU6 zP<%o-)orNENeXr|DT7bSB*|1j92#@fr(~mj5e|aG-0|o%Q}OMpxL8_mXBOk?3Ja_| zUawrH{BTTU14ay+z-lTqxR1-xq#hZ4`q!C4Z)Yd4t7fF3wb^KL@8Sc<>7f4Dzylnf zy1=XGJGJip(@<;kvgNfJggHa8H<4xd`)zF2HvN0Wvd(Yk; zFm*|F5RfNSiB|_oc($sc?*Nb6yTWcrIT+vO;fW?|{+m~V2^q22U6WVy?A|Loqzfxg zeoT*=!LE`GxTsn$lqkP(^&qVvMRm)1-D=MO_=oGTgY`USVDtWqAW68b=49{TE zhxHR~xrIZljRZBvmKjNz7eMe*Vbf|-@RE>9&SGw_(NnPkzuBVIZKEMp-l`F8ip<$? z$svXi%A5-R*uh;y(C#H8Uz7|&ITuB|{=87R#(sEu;)52onhLz)xPpMA?Dle=ss1BE z`cVnalpeJ048c7k0Vg$o&$eP+cO(9U42PIPxpblf*bxQ|G|V{DxK@(dL_I{Jc)#Xk zC00QcTADr=pK^8)ml86_ZOm$(GOB^4)kSK!58J#GkjZfb#{$U~HNsx6+-emLCdOuz z{&Skc4qsDeDS0aU?MeXmv?7OxfdAzqh~jNI)Mqt&ULG~9do!T(%G58iNss9i13NN( z&k($3nFrO{Lh?4VlM6@KHDdNxwGE?Zq{be0lt_Y^D$R@rx*>E36o&>NtB5cW6m)%> z!dh{(*t1QK?4ds2sVTM8g3B&xFRxFd0~tG4ZbIMvvr7B>87QsQU_QED$5%Apsx{E@ zPXZgs)7Dle`x?91sN2sCd*$QZp=!&=Vf_x@Ju&gYXwnroovv`z4Xo9p_4{y+b2Umg zSYuw-S^RNEKRl)CA6Nt^5Oj2%CA1Bw{h3C{wCblx&ywhCm!%yD6smFY1Z1pB}RF(l#~o@5A6 z!8EI_lLFH$Of7Qmhp?$SG#7@5U+7GDXL(Zac!oHmnckwA9*u^{eci10mW=+sQwBDi zotesd_hAk^ZM$-xpJmEy4oqjdm|GDq`C$$_vqsynmMzj7I<0PGYql<%YO0batWMa% zrg2f_LbFPQ5kQ2TQUu&?g!yq|bkXCFk^PSkG7@}Hu+?lL&&eO$mY`8@jab7~!WR8-bz>eEE~aX#<+ z!!Ew)Z)dtfnW+bQe9?dCq^hv2ffo1nqM0v720({60|K?cUa&!zSRobR*_~!a1nD51 zxEP&scXn_guN~~u12MQbfB#m5zuMa&gwz)l_}tE``RdUzI57O9ihF}=pZF=teM}e% zju{`ym0R|5U9|pn{~l>posPFY&2yzfspRPtO8II?y<}Pg*Pq}P2(9AQ-%MxvNS&_# z#1uO4E~PQ&|FD_?pr9ga68Q-kJnEP;ZK^h@b*d$Fba>hD#bhFUTDWV<4I^ELB8+GMW~p-!Yi5%Oc_Qn^|H$+^}Z*wcY<+LnIq@PD$eR-Gl&6IoQz5dcMTa*FrNJY z<0L{auSn0LQV670>O%kus3`8Gk)ORIN9YxLwsY<_+QDG@dn>uqXP}psoWZ}|mZ)5Y zq!T-&%-O-E-eIT;y9K>SjJ>-r{TV3Cv`6SqW~k1#bHhdv*GqM`v$k0gpnid97Q2pYW$% z`Q-Z8`E6Dg_u06_)cD+gio<%`*6+IHJT1aF?W?9#cZ94uUHap=Q6S)VDn)`t4EE$1 z;Zl;uKtYR@K+c@V03tMp4}gRU$5=n)8OPFo9oB+>hq*x~MCUZ|j|AVU>#YSxMu$4O z8Aq63=Dfr&zR_I6Etvd`>q2umcafU+VzC`UmZUyS%WwVAW)Q=z8un5%0xG%~ft?$o zJ3%=`d&=bHJ&}NtBMkglHeGziMXRid?Rcqy5%BA`@TGc#jq>7$>eij%+RbLcsdKod+%QsGh? zy-KetX!h&CKY4OonF?29E4@ya(!eqS23cQta$+Gk?u$*M3O)VGD{fI3$}1QCRNPs~|i z4TP0M+SrPG(=K9P-8g`zN)+j@98_gocLWdVvZ2@lX8D(geEFgnsjuRSi3OSH}tKrr^5P4aolPU^==f6D$7+Ur z%92;ck8!$!jq_0%UjHhRx7#^GlLv8bjW#62exKr&F|7W)y)^^ za_U&qW7Ue~>%MIxk01C+;j0JHkX?7zAA{SeTD4lJ3a96Hzo>%I5V=Z$;4^Lg!p~B4m_4MK2Eqv18_n33#A1~zc(C*N`5l`IUEn4T9#U+rT`?Vr-8KyBHni?`K=ly2VJsg`U`&qD4u%SSpb6qF$c z{ljoKyGvHuFO*w}uZAavs1jd?Pl5XSZ@ffdU!%oH)6gRIe2N$*86zgjBKE&~9Jo{; zf+)Ah1_2WQ89pi_UE>B$Tp)`V4>a-C0I$^1z8BaQ|3E}7*mYB6kQ4DHM5lHlLxRZ) z`Q4^}>!5^F!<|w+);h%qM5GgsL(ByUp7X;o5R7-4wl|0Lp68MLu`yrxmSF6mJm{>& zvf>r}p@vOx(!z$&uGzNg%45^(RrIydmv%eLkM#=y91pytlvUIiZ7FX~9Y6Ycg#Ejo z!55Z;VGRyMv2nMvl<$Vy3aCe7y7M?=ghzvZ2%3ab1T2jtf17%U>cEd~ihfH+G%2#* z*oGleSWA1WgADy1j39zoOL40c4a#5O+N1AMA@`Da6>+BtJ%-9<2?TP{vOI(xp{-ONZR2 zwj}u_1MxKTJ#<4Jj5E|I`dfF@r+Mmib5@W>Xkh*EIS8^^k7ZZ+- zT_%q!D$vz#CvTstyD@vXvtRY^&t(UXAB^*`DY1k2?$X)=SLg6s$DMRs8b&9%ewe?rt$H9$VRQ!D<3329?0`Z*z zF>ccbf$~GH55;qJq{Lp3&yQoxQv8^b!w_@c#)M;b+1M*5q*>8LdtjdufMua6_9yrO<-_;x?5%2iUwzU+ zaig*=U9golCjkK`*qAP)L=ywTZj6!*92)WNj)XcD`RiVKezI`dn4@{VxJF86KiaH| z%kV!Bm*NQz6!r^pjkDcb&It4y1k$%ShHuZZx1S@u1xd&>r5UyhRNL!05<^c)_%uqo zqZ-yA7!e{QJYH{}^PJl9q*$jyQF&{g9ZD)BrBM7xSf%D`36T_rBc~>l=hG;}f${9h zx7}T>^%8uD5u$@=ahF|n{J23dPlNmFzwVre6GqHgHAO!``Wz}Sm) zQ1kU|so(`tWd1Vm5iQtUyCK_7HrE$)1eXCN@t$j1N25(a4|H)QL!yLQxyvW%_JCgB z%7D*;w96P{%c8S}mJ#pRRFIX8WiYI-52Y2FLp zM{3^coV}pv}Mq}M-InWr&>E6 zc9lw&o?A5?6UD8|r+*}(*lXuQjaKjxJ2jsjhB%1hnLU55(Fsi%7BlXEond<%(+M{k zGx2eoHw&-k`Dt~NC@?z4vV2sQO{{VrU-VYTA#2frxE(6+cBs1)qZPNbMs;1_ro})J5TpUB|l7#7_Cg@=vkTDj6@cuC?`rOARO>*Gj0utp`2IdJYRdp?+U;9JO| zXiCpFmkH}=EA+!iHG3#0bcQ&HG~LUF9pKxOcRW9z$-Oy7FWFXxIG_WuBW@sGz2Q;& z958c!s#E;T`3dG1xU+3=GrudJCO%h@ zU_hLNpzIEoARzl4Kk_=7;kL73Pv)&gy>nA;D%aSoeCqo&Z1!lTV)nP@51Xa4-J$2Y z^16YQot3h>+qwp$^xtNoIVPW{SUJNtk;hm=PMr_W{`=O?NQ}ID9zoE)Ebp?W&)*n% zcRYHZK)k+ulCs~@H?!?Fl+UnwjxYEkpXIfl4sgJxfBfw{8ii1t=!S&sVKvPiWYqDW z@(5e}o!Yn2`+vKH_nxo~(vF}KBe^!qE%55sy~Jur+4Pe;-RUYf?_y$rJXp_#>Vh+ zN6S^#pu42VBW!r)!s3ywXYF0?et5Ec%G4do4=e^e8>Uvspqqj4`-Z6hD&3T;RaDU? zRETT!(99GFc6&(U8*Bo^kiiKnE@Lgk+>ca28p>T#*ce+|8X9dkCB8NbT``o6bboE; zL7P40l~%HXb~53$7VAq)@Mq7Hp>;b7-{p207j1F!3qaMl2oj=*0phnQHcPTk1?nXF z0*u5YqE5)1LTh;}Vz)w4+O#tY6DkSdFR58U1#)Bwn_jcDl~5=L+?^9VDq{YDDQ1gU zPHr-JaY$(dz>3>D23kL3*#AXi^G`kVPgujr#m4qOMK&D&EwWMZus2~4v$J&;F>x|- zw6J%!`>PrIo0S9Fm@o(n{@Z9|EDTK?fzB3ow#p`s7N&rIqB1ilLe{^=-$)@LJ9lk5 zHdYowIu=G2LMCPoPC_nDW?cqJXP~u(k)W-awFx03fI-m7$i&u}kduoE!0_*2|1xyU zOzZ##VW7Q)iG`WD^WS=Z3zeNsY}5!j|5o|e{9hfL2{Yk8S&$OougK@WbV8i}eF^_1 zdt>ML{}H|E#>?6UG9rpSW%i9bBl+8ah6TkcA(NtiC|V%=n204t3xw6QfAch*Q#&I* zS^YG9Pjt{wDH(f|em)aZ?k8^FYKA|$J%arOqvM4mLAJI<`-J8*IU^%@Vs-AKXHaU* zVXBGP{nFs!P_cr$ehZ2q_`IiO-_;d9DX#eD;*(L#fZ0n=47k&2oO$7wpZv1{i>S2N zZ295PQ>m?ghvBYIff&9NuR5{P$IAwxV^o7^te6;4NqY}s{dO%wL*M3n)ZoBR?ZX=G z>TsorMB!yT>HQ-*OgXBPL=DNY?@cAc(Kod7*yqg2Ky5^PDsd!c!t5ZR?FYX@_7&ef z{{zE|-o(f+7WWk6ukV`&oBG5Jev$C;A1KbT7dhl37deH0cl@|j{k#n0uhiFup5o7o znp?lvi<&ZlH>Q^7I3&du~{|k#;|9#K?Cw2_VE{4wk7*ug|G5HU#5YWlw-;MwOrq9CB$ywMO==hH+ z8Q_1~EX)7~bqix>b0=*^LUtxbLN-Q5LS_yQLKaRgLT0xAVYB|F^q-%F<$t9A74t9s z51Wk<@Gt!r_iyK5{)^s!mi?FiUt8?#?Ejnpe~I~z`nvyv-Txj_g#U(|5`aO)PSw`p zpBR(yAHe?`(SJ|*|E=U?V)_^Kf1&!nwQr1EjEw)Q_U+Ka%3XOm=|t=Jgl^q)#gh=| zd=O~UD<3(WQT z(~%kg&Im{REI!mP`IY=&1+qg1(in{^DebrF_GB7R0}53WMUD{Vx7XLL4^~PKYM#T- z>T3OGx!q0@1l$Njw5raW6LgwD3w~!THnNFH2-Y;l!s1;Sn;|Bur5fBGW2V)K zmI>LnY>)Z2!)?CRlh@mgnXZ#&#yx##PQ;y;TL1l6e&!kW=dCewoKz2pKpJGJ&hMjy zgxQ0WA-{L}D4uTv^nFAhbrA;zAyMFpMN8%`o`H3_wIjyO8WGYvUnSB$0P9!J`-#5TmPPApBGHOGQ z_@V6P-oIxG(RrV_D$PcqX-FW4aBf2GnmqruS=u?oRSl%+>KN^V3IAz} z`Am|BF$w^~4KT77aJZN5Iz*@m^!-#Pwc_Cw|3&`{lBu=xOy`J~|Lq9F-tFL3`*%plBJU$Yd6S4d92Ed*v%@NirZyv5ljke6Z2Bag|03RD7iA@gp`T{^mav0 z6o%vz3tALjof1F6rC6mifGo;H(rGE$Ca1*kmjFcXa#eV#v=*+;!2WUoKV_l+vNY}n zwQ1EtvV{P0h6PMq6sHwy;|QIiuq@jIcI0<#=&{P!goai_8pj_Rv&{}_Ai6}#(%H?y zBx&8c{>^GF%Q%%~S7WH7Ro(!t){)3!vVue6B2E@O{8-ZphH3AEd@T5oasaU-X(vN0 zQvU!CN%Q2yyXufqk|tG)sT%%QUIK57Y5Cz&vkY3o+GbB~$yLNuzPCz>Y~uN0uQU%RIgGB}-n~&EU&vV7+m&nJEmwG~Nc)_5>#O zSH%4^|ISpSA89I6s9`HpY3tV3b{5rE6Gtr4*m{vhD&wCT$;WCpFFF`jA3sZMN?KIh z@Y?0^zMEcA0o+1CQ%`k5j^v%(H_4l2Rd5!eUz0jh(<>0G6VAS-{(Pj$wjGe5ELf@@ z>LRuUzXTAcWC#k+xF?WFGFU`=9J~&(Dn!>5Fb*1#H5bvsq_JGU!NXe7Srj%Ild)I` zP(R3@JlACb)U;|Afii}vne1(Jh3`DbPVpdNpqrXWG(nsGkv&Nuej1}1>()Ma4ne|+ zweB?Xrp0n{rcwt-!57R8YkOedrt)h|q_mh3u@zgHU2ME{C2d&pmNKDbk~?W`V4sg_ zrr0H}O1x)4eyfRD+vQPLm}fwXh_PhH@(7*VssLKMu>4|~aTCF232Au3XT~aQ^>79C zXga_)!5p8e^Ljl8V>iB|lu+Ash(_Yt9}g&eGj%@XrXp+|Bt%MwCTc<<djUzmDFZK6t_QS@ncc0`RRNsB+3&nwW$;Gfj+X>f zwuXqYL97Df`Q|xtQH{I}l*q}T0P8ycEG6MD#=tu@sf5lX*jZF#SjR>#qr?oX?y%hq zM|Ze>t1^+<=mCD&w(D`iil1HI80aKNA8M|!?$>FR1udyrjtCYN1}LG=T%6(QLLB^@ z{AbN)&z#W(DeKZiyD$OhCx7xqL4cf4YCUl0Hs;MlD>vbu=;L!+xr3k(ft@s^Y%6Srb_zy;@mqwcdB`1a)VG#oa_WYq$k31K#0R0@X^OX9 zmJj?tj0HGfWZ^Y{femsVOzP|hxVM@djCKlI0q_|u^-w0GbG2w0wRQep%dKb;O`_&B zb#F_RC}WJ?gl{pATL>==>W|(>VfzRd(Wgr8#Ip8GIEqXaM$Z|)(P&GbbPK;R`qMaxMrRT3@CE`FrL|cYg#i{u<#80 zla`OZ*d;id%(RC^lFh!?i1ZeD!eI!H!kj<+OFhax%?O{>Faha2(!Btd;srb+3+S7u z++7MW3dZY4X@y5-6q-3XDYy8PIf=Ue_|h`?^BL5qB1vea$01Bh_kvmhts>b&r#wqA z1!lsuzdCmn{xM5Mex?}J4Q#u4Yk@ZYcW-F$Z_)b(DDj$~`JD+Vz1UR!+c zhXodN+WWO1t~Ts^u|6XFTKzix7oqx=Uab122QjFssr)+RDw^nAI%EmgW!Ph?rpQ6* zAfN|?;8xVC5@qFPsU9FwxF?AhBjp-1)OKG~-3`&m)pl>FH`)N_YKOvrMqKoz0f?(% z#zk0tI2ci-d?E~LI)2D|a~mUJQ4>14u%>P{_zW3BRYW@IbX4EZjlpafmr1m{gJK?t zH#2yu_$BnNW2c3r&zvCw2cZ9ZTul@F;@^pDQIRU-bD-nwqa($!o z@O1GJb%vK$w{oxF@Hp?%aH67GinOkrs=*M9}8!JKl8Pnw2&z?mhJ1JjhaR|Ju4<{Vmb*PO3s-<=LP-+dTGZj^8T~cckJHpc(htj6;+bli|JJ-b$wr?+?1H2~57 zIcXZ`j~%{A#C3)ju+4PId5L(8=f37JBd1mQ(yqB6rs1KCW1JKtPeVuccD6(qix3GY76uIL)I(M zJ4@-(99Ml$2iOYNh;+mO9j`HHQbZ0bW$v<6g@OB*wP`4uZ2KaH&_Zu zib*-#OscL1^`j_t9F(;RsP^XVoy7d}Vv+)wm&9`J!!>WCwHEAjoynu;Iet6^9g~TY zoP)yuImj@z>0x^{>9np}PKAC~iB;3^^oG#dZ080Y-ImkVth@nfuD>Sk&=QDob$e}Z z9$|1gXK3S)X3cN~w`PNmayemea{-HvIYF(ikEr4rs~A5^t4wO{6<;SmG|p^l%u8bQ z_vyvFH15~PN5vh^4RJ5OTYQGAxC=fh>`lJ677?EYH6II-(C38NyYW?gUMVaWx=P~)9E z@kgH>M8RyOV#T5cW%?p}$H(5gNspb2FvW@@NXM$4-E~9gg{0Sz@1pN!HAfg(@|Ho^ zs5w3J{G&C?PglJNt)w)aeEhTVWt@t}-Hs84EnN(?5w(q=M#2>c?}8gW{5PTDwTF!K zDQgewEA8Q!eXzdit7x;YQ{nTmW4#G=JRXiebE~J4E&`a8!Zhz>=uJoqJN9ScN`hHx zl#<}VWcJdWl0~c9YE32BWmrafayGA!VyE`|VYeasYy8tc2gOPAT@Jg=XgCu7s=1aF_&ziw{6HkKdC82J3A zpFimT&YITR55~>*^`9q#9}Vc?c_=Za-s!f)3jN`5mh#j$vOX3!D*C!P zuBW4M>fLsIN~b^V7QVWkp9%1NH8G{cj`(FAE=yW{!(J_@{=8KrbK}y_=9=X&kMlkn zQp$nL?tIJpIVB($HSfcEvS42N3YU%poV-?wfQNz^{3+#U1~h@hfr%jh3aRs;v${QF z@V)!=yOU6pTl#djY&S}vAog~-D%;qId8+HrkV6$$bB`1&FRG1c16jjJ?3`UO6ZrgcjML?9U&EwchrVL*PpicbWc-oi%G zHL_`yfkkb-+(>tWd7;TV>Ro#5c;la}r-$XQhvDnZxA|fIml-@&n~yjI?BVOqo9R`9 z<<~8|b+?@~_3jwR+GDnj=hgCymK?j(P=aM2tE<-%ewQBseu3^4>g?Ib@rJ0kmRPP9V9X9% zCpj)EKM6iMLj|=;j>3mnm!6NthpKEad2w4M-M-q2L6Kp}56DCA$MHb>)b@fAvy|O| zfGRKa3$gr-f?m*40~LSKwRHrV+HlhCl3q*7AbCp0LtOrnUM)^|eMJMjM1um*^E{{y z)Rmto?xCPKz_OdYz8dSMz9+O+L~PLnPz--*Ym=pll^U?)0Wg5f_7`;^C`_($m68F{ zQuz{PEkLcz5Ksw`as~un>IE!vfiLydsBWto;8iMoMSz|I%U;_-ae_G5zya1l5A8YB z_TMh%r2#rnp$|`#_DHNJejC^VsPe-x;v`C>nefsiVFz`i&v_bJT1@pyTFO1`dS?=( z@KK^g$pgmBOjvTmC@4)K$=ah!5+9`bESc5G?c`#XUVD%QTv`@{+*kxQa*?9VMPm6V zaVLc;B%9nm=Ng88yz&#MLNt{-+M917z$r?B-y>T0sHkX*DEi}0+}-cwsF*w(xaN%i zDDWml%ONxK6TvOPFG%_9Iy)271yc~uU4ai}6%^dYQHn>4P|O2~)rgjp3@eg_Gu+unoLB4^r6Q@I%N`GniJo0S!&u?_e5 zR(PfI=%INgC0=}4$*$P~B|k%6-rJY8Oe)k2ML;DgfxH}fv^|cM5=p}B$#*Ik@jl3X zqgz(Vh+v9-5>zIDwm0b@iI9J6{BBr!bJ?f}vsCid?5tB@EbN3ROA;oJP&(Fw2rD)a zd-Q^2mdTKK&?symYa(ZMfD{W`a@Z1GlRrIG0>w%QAWnS1DN4h{%rc-9mrF>zRk5MXlC!O>;PL!pZlfVN31w28OuGD`?sRH2S{ffCNp9j@89FEMTZv=v zemoQN~cU8W{hI|6^);17zR@{QU z@0j!zKk<|HDJLTZ{os1Hbpdn|yWWOvGpZ%YL0D)qAZYzOJV7RGm~YZ7^4}60#$lVxtW07Iu?}?tp9CscW_gTL{Yhb4GqhaO8#}gJ{E*@v!LG-oa`=o_9lz^Dua3ULroZ{#ycWGsLfp^ppYRqt(PS_NuVmeSY zZpS0zCixb3`NYtiguB~t%53I}1YBh>n@;+bvlwcouE2J&X&bU5;?`DnW$#5?MenKc?42bsbngk# z48$&iy?=6%^(5G8H{tTt`t;$h6Sr~G?{-Ku`vDMD-C^$C{z--lsYKXCaqO^Pp(J1A$8 z+h=W|7&Oux_u&+|=aCdf$U=6~M;8df*kNC8#&l-{P@M3~xBOz_{Z8K(oQn|(L~Qwu z{%3w2;n(CJ+dL+(TCa0ob(GszsqH z0g-s5sP&-#29OXL?KCr+&Axg6d;h-w{kxO--aKK`?2~(RJnwRr!EpKNul9aZY98!Z zJ1(O%y`Afsb$jeh;gX&f|G)eb;@tG>&XixR@|zTQb$81hpP6O*R$qUrJ@h25?HcyxNWS=YMX@JwI4=K0RJ zFYVUVrua1#w7)3NbGY8SFzLzB{IVLoL??NY{rvd1!BZ>JG!C2C-1yd4spYc{*@yDA z+PtFFI<0Q~s`j9#0qFsIV|sQs=20Gdy|y+SyRM#aC3Ak}?_RT99P8_+F7GUU8LzFk zcy{)AbCO-ti_A8kbqU-xp*mixAxT$h6VU7Y#zGqA)68}D8{N@d?MiUs58@ZLmkG&yLsjT(Ww+YNFumxIC%LqY?ku=*HX(=t^93yXaYZ;^q>r zM(~Ls->KEm{I}C-?3;7E2m5omi?gP0*V*l<`#y~SRbch>Rhm&ZYw~!XZr6EUeYxUr zMS-9vR{tIMpuZt*vfsmp5oyI|wN9TuRNLH0I^jJg*YXNayMM!u@s4A!<=pp6X~1J= zOMhUOy4d>PT$thT*wEFTTvF1#^~Q@g-P=wm6qW&z7p<;qPsO+u&^gJc^J|iq)@54i z=V(~D^Q*8J%aJ*O(PeY&oqlfG5Iyp8;jwiqtq)2%W_&FzDVed{LcMs(+{bHYc!~<} zMZtxVy(#j^2ZH9Njc*Myxa^)_y*>NPqTB@k$)(V_ZakxCB4+QCHp)FEEm?k{_{X-e z$`iMmePcR$N8~oHI5iBiGZH9cj|LJMY|?<3G#z6|oT?pvV}G zLkJ90{|h67f#mQU*$CN&Vz+@V6ebN+5Y8~n&6KkBljPJsk}MVQCy#b2Jdm}-;8W;1 z;>hOl#S<@4W8|St3vSF?zM=L=dxm4v0gWoQ%l4PWUL6xsbXGCk%i7;!Y@;g1au(*m z8?DQ?v?O#b%NDP$y8O}k^F2XJT{=Q;?b#Sw*SX>#cFVbAD|yE%(XKS}WQHbD(>+F9 z=6&Hn$v4To#`T@IR3v|1i6*?Q$99)y$19?mZ7mcZ5A#Ss;VS=)`YwxX$;Ej)ih_1- zAbNYyhHTYguQ`q(!Ld7)RmIby>QecQIoRECs*Gw`%QT0&9AS%{UU@e$GPlyTW&8!J zYu#Cn@vo;=-q&<`9o^NKG`%tUP5Pu7_l9krTVA)v{}k3sH!ke*^4SNjEG(2SmD?=! z-LA^~!!7j{9VzZn3*yTjfj`>~`CIZ7N0M?1hRZR05PZJPruT5+_b1h`a2N%ocn}T` zz*~Z21jG#JWuPttiWxA>z+(m+li(nL&Hyw9kk<%zO<=DP0>2A;jr(H=?L~}Dc}xo! z5R*ORhLR7>(198bkf971hm1pk`9P&2JV>SvVS}zf>_xJ8_3|MO%%U17 zjgqkgrAnsmqR*K`rc!8>;W8D3FfJr)KUqYylqu(6P&7dkvp_)yjgeqCQ6vsgI5LZ( zXp)1GBu$`TYYM<@fH@V!JP#x$)fwh1%J^2X*yavdgC2o_|KJh=v7R3tp$dVFWSqGa zC^Sq+Oq?U;sYj6tVc)*A&47<9UL@8b=i)6tNLbHIS{tv_<2#kOtp*|Tx;Uq{5 z<^#$5W#BggGSo~4UJhn5gaVT`lYzU!%=KWJ1oc8@_GprR#|KT*AghM>MPM8=_kqA9 zlgvZ)NCE|`X)XiX-dslFAa{o9Q3!Y>n9DHIe10HS@A{xf6zp;{dlY!Hy_KnzQc&Nd zWV&|+MJLF>anDsAS3o;A< literal 0 HcmV?d00001 diff --git a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl index ec55a2e6..57124c27 100644 --- a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl +++ b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl @@ -157,20 +157,20 @@ rule "CBI.3.0: Redacted because Section contains a vertebrate" rule "CBI.3.1: Redacted because table row contains a vertebrate" when - $table: Table(hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $table: Table(hasEntitiesOfType("vertebrate"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + $cellsWithvertebrate: TableCell() from $table.streamTableCellsWhichContainType("vertebrate").toList() + $tableCell: TableCell(row == $cellsWithvertebrate.row) from $table.streamTableCells().toList() + $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( + $authorOrAddress.applyWithReferences( "CBI.3.1", "Vertebrate found", "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("vertebrate", entity) + $table.getEntitiesOfTypeInSameRow("vertebrate", $authorOrAddress) ); - }); end + rule "CBI.3.2: Do not redact because Section does not contain a vertebrate" when $section: Section(!hasTables(), !hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) @@ -207,23 +207,21 @@ rule "CBI.4.0: Do not redact Names and Addresses if no_redaction_indicator is fo }); end -rule "CBI.4.1: Do not redact Names and Addresses if no_redaction_indicator is found in table row" +rule "CBI.4.1: Redacted because table row contains a vertebrate" when - $table: Table(hasEntitiesOfType("no_redaction_indicator"), - hasEntitiesOfType("vertebrate"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("vertebrate"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + TableCell($row: row) from $table.streamTableCellsWhichContainType("vertebrate").toList() + TableCell(row == $row) from $table.streamTableCellsWhichContainType("no_redaction_indicator").toList() + $tableCell: TableCell(row == $row) from $table.streamTableCells().toList() + $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate", "no-redaction_indicator")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.skipWithReferences( + $authorOrAddress.skipWithReferences( "CBI.4.1", "Vertebrate but a no redaction indicator found", Stream.concat( - $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), - $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() + $table.getEntitiesOfTypeInSameRow("vertebrate", $authorOrAddress).stream(), + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", $authorOrAddress).stream()).toList() ); - }); end @@ -250,22 +248,20 @@ rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also red rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in table row" when - $table: Table(hasEntitiesOfType("no_redaction_indicator"), - hasEntitiesOfType("redaction_indicator"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("redaction_indicator"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + TableCell($row: row) from $table.streamTableCellsWhichContainType("redaction_indicator").toList() + TableCell(row == $row) from $table.streamTableCellsWhichContainType("no_redaction_indicator").toList() + $tableCell: TableCell(row == $row) from $table.streamTableCells().toList() + $entity: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no_redaction_indicator")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( + $entity.applyWithReferences( "CBI.5.1", "no_redaction_indicator but also redaction_indicator found", "Reg (EC) No 1107/2009 Art. 63 (2g)", Stream.concat( - $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), - $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() + $table.getEntitiesOfTypeInSameRow("redaction_indicator", $entity).stream(), + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", $entity).stream()).toList() ); - }); end @@ -355,18 +351,17 @@ rule "CBI.8.0: Redacted because Section contains must_redact entity" rule "CBI.8.1: Redacted because table row contains must_redact entity" when - $table: Table(hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $table: Table(hasEntitiesOfType("must_redact"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + $cellsWithMustRedact: TableCell() from $table.streamTableCellsWhichContainType("must_redact").toList() + $tableCell: TableCell(row == $cellsWithMustRedact.row) from $table.streamTableCells().toList() + $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("must_redact")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( + $authorOrAddress.applyWithReferences( "CBI.8.1", - "must_redact entity found", + "Must_redact found", "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("must_redact", entity) + $table.getEntitiesOfTypeInSameRow("must_redact", $authorOrAddress) ); - }); end @@ -448,7 +443,6 @@ rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s TableCell(!header, containsAnyString("Yes", "Y"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() $authorCell: TableCell(row == $rowWithYes) from $table.streamCol($authorCol).toList() then - entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY) .ifPresent(authorEntity -> { authorEntity.redact("CBI.12.0", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(3) of Regulation (EC) No 178/2002"); diff --git a/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/RuleFileMigrationTest.java b/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/RuleFileMigrationTest.java index 41e292f1..5d78c6ad 100644 --- a/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/RuleFileMigrationTest.java +++ b/redaction-service-v1/rules-management/src/test/java/com/knecon/fforesight/utility/rules/management/RuleFileMigrationTest.java @@ -25,9 +25,9 @@ public class RuleFileMigrationTest { // Put your redaction service drools paths and dossier-templates paths both RM and DM here static final List ruleFileDirs = List.of( - "/Users/maverickstuder/Documents/RedactManager/redaction-service/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools", - "/Users/maverickstuder/Documents/DocuMine/dossier-templates-v2/", - "/Users/maverickstuder/Documents/RedactManager/dossier-templates-v2/"); + "/home/kschuettler/iqser/redaction/redaction-service/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools", + "/home/kschuettler/iqser/fforesight/dossier-templates-v2", + "/home/kschuettler/iqser/redaction/dossier-templates-v2"); @Test @@ -36,7 +36,11 @@ public class RuleFileMigrationTest { void migrateAllEntityRules() { for (String ruleFileDir : ruleFileDirs) { - Files.walk(Path.of(ruleFileDir)).filter(this::isEntityRuleFile).map(Path::toFile).peek(System.out::println).forEach(RuleFileMigrator::migrateFile); + Files.walk(Path.of(ruleFileDir)) + .filter(this::isEntityRuleFile) + .map(Path::toFile) + .peek(System.out::println) + .forEach(RuleFileMigrator::migrateFile); } } -- 2.47.2 From 8175f6d012eb59a46950bc1b5fdb629909723ec8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kilian=20Sch=C3=BCttler?= Date: Tue, 27 Feb 2024 10:04:38 +0100 Subject: [PATCH 04/21] RED-7384: fixes for migration backport --- .../build.gradle.kts | 5 +- .../LegacyRedactionLogMergeService.java | 6 +- .../v1/server/migration/MigrationMapper.java | 94 +++ .../migration/MigrationMessageReceiver.java | 21 +- ...dactionLogToEntityLogMigrationService.java | 135 ++--- .../v1/server/model/MigratedEntityLog.java | 4 +- .../v1/server/model/MigrationEntity.java | 333 ++++++----- .../v1/server/model/PrecursorEntity.java | 6 +- .../model/document/nodes/ImageType.java | 14 +- .../service/EntityLogCreatorService.java | 103 ++-- .../service/UnprocessedChangesService.java | 138 ++--- .../EntityFromPrecursorCreationService.java | 61 +- .../v1/server/utils/MigratedIdsCollector.java | 3 +- .../v1/server/MigrationIntegrationTest.java | 34 +- .../v1/server/RedactionIntegrationTest.java | 545 +++++++++++------- .../utils/LayoutParsingRequestProvider.java | 4 +- 16 files changed, 889 insertions(+), 617 deletions(-) create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/MigrationMapper.java diff --git a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts index a45fbb68..a527a019 100644 --- a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts +++ b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts @@ -12,11 +12,11 @@ plugins { description = "redaction-service-server-v1" -val layoutParserVersion = "0.86.0" +val layoutParserVersion = "0.91.0" val jacksonVersion = "2.15.2" val droolsVersion = "9.44.0.Final" val pdfBoxVersion = "3.0.0" -val persistenceServiceVersion = "2.338.0" +val persistenceServiceVersion = "2.359.0" val springBootStarterVersion = "3.1.5" configurations { @@ -65,6 +65,7 @@ dependencies { testImplementation("org.apache.pdfbox:pdfbox-tools:${pdfBoxVersion}") testImplementation("org.springframework.boot:spring-boot-starter-test:${springBootStarterVersion}") + testImplementation("com.knecon.fforesight:viewer-doc-processor:${layoutParserVersion}") testImplementation("com.knecon.fforesight:layoutparser-service-processor:${layoutParserVersion}") { exclude( group = "com.iqser.red.service", diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/LegacyRedactionLogMergeService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/LegacyRedactionLogMergeService.java index aaa8a1be..0d141c87 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/LegacyRedactionLogMergeService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/LegacyRedactionLogMergeService.java @@ -45,7 +45,7 @@ public class LegacyRedactionLogMergeService { public RedactionLog addManualAddEntriesAndRemoveSkippedImported(RedactionLog redactionLog, ManualRedactions manualRedactions, String dossierTemplateId) { Set skippedImportedRedactions = new HashSet<>(); - log.info("Merging Redaction log with manual redactions"); + log.info("Adding manual add Entries and removing skipped or imported entries"); if (manualRedactions != null) { var manualRedactionLogEntries = addManualAddEntries(manualRedactions.getEntriesToAdd(), redactionLog.getAnalysisNumber()); @@ -92,6 +92,10 @@ public class LegacyRedactionLogMergeService { return redactionLog; } + public long getNumberOfAffectedAnnotations(ManualRedactions manualRedactions) { + + return createManualRedactionWrappers(manualRedactions).stream().map(ManualRedactionWrapper::getId).distinct().count(); + } private List createManualRedactionWrappers(ManualRedactions manualRedactions) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/MigrationMapper.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/MigrationMapper.java new file mode 100644 index 00000000..5b049564 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/MigrationMapper.java @@ -0,0 +1,94 @@ +package com.iqser.red.service.redaction.v1.server.migration; + +import java.util.Collections; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ChangeType; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualChange; +import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Change; +import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine; +import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ManualRedactionType; +import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry; +import com.iqser.red.service.redaction.v1.server.model.MigrationEntity; + +public class MigrationMapper { + + public static com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Change toEntityLogChanges(Change change) { + + return new com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Change(change.getAnalysisNumber(), + toEntityLogType(change.getType()), + change.getDateTime()); + } + + + public static com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualChange toEntityLogManualChanges(com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ManualChange manualChange) { + + return new com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualChange(toManualRedactionType(manualChange.getManualRedactionType()), + manualChange.getProcessedDate(), + manualChange.getRequestedDate(), + manualChange.getUserId(), + manualChange.getPropertyChanges()); + } + + + public static ChangeType toEntityLogType(com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ChangeType type) { + + return switch (type) { + case ADDED -> ChangeType.ADDED; + case REMOVED -> ChangeType.REMOVED; + case CHANGED -> ChangeType.CHANGED; + }; + } + + + public static com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType toManualRedactionType(ManualRedactionType manualRedactionType) { + + return switch (manualRedactionType) { + case ADD_LOCALLY -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.ADD_LOCALLY; + case ADD_TO_DICTIONARY -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.ADD_TO_DICTIONARY; + case REMOVE_LOCALLY -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.REMOVE_LOCALLY; + case REMOVE_FROM_DICTIONARY -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.REMOVE_FROM_DICTIONARY; + case FORCE_REDACT -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.FORCE_REDACT; + case FORCE_HINT -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.FORCE_HINT; + case RECATEGORIZE -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.RECATEGORIZE; + case LEGAL_BASIS_CHANGE -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.LEGAL_BASIS_CHANGE; + case RESIZE -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.RESIZE; + }; + } + + + public static com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine toEntityLogEngine(Engine engine) { + + return switch (engine) { + case DICTIONARY -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine.DICTIONARY; + case NER -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine.NER; + case RULE -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine.RULE; + }; + } + + + public static Set getMigratedEngines(RedactionLogEntry entry) { + + if (entry.getEngines() == null) { + return Collections.emptySet(); + } + return entry.getEngines() + .stream() + .map(MigrationMapper::toEntityLogEngine) + .collect(Collectors.toSet()); + } + + + public List migrateManualChanges(List manualChanges) { + + if (manualChanges == null) { + return Collections.emptyList(); + } + return manualChanges.stream() + .map(MigrationMapper::toEntityLogManualChanges) + .toList(); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/MigrationMessageReceiver.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/MigrationMessageReceiver.java index 66e8db83..740d4116 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/MigrationMessageReceiver.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/MigrationMessageReceiver.java @@ -58,19 +58,28 @@ public class MigrationMessageReceiver { if (redactionLog.getAnalysisVersion() == 0) { redactionLog = legacyVersion0MigrationService.mergeDuplicateAnnotationIds(redactionLog); } else if (migrationRequest.getManualRedactions() != null) { - redactionLog = legacyRedactionLogMergeService.addManualAddEntriesAndRemoveSkippedImported(redactionLog, migrationRequest.getManualRedactions(), migrationRequest.getDossierTemplateId()); + redactionLog = legacyRedactionLogMergeService.addManualAddEntriesAndRemoveSkippedImported(redactionLog, + migrationRequest.getManualRedactions(), + migrationRequest.getDossierTemplateId()); } - MigratedEntityLog migratedEntityLog = redactionLogToEntityLogMigrationService.migrate(redactionLog, document, migrationRequest.getDossierTemplateId(), migrationRequest.getManualRedactions()); + MigratedEntityLog migratedEntityLog = redactionLogToEntityLogMigrationService.migrate(redactionLog, + document, + migrationRequest.getDossierTemplateId(), + migrationRequest.getManualRedactions(), + migrationRequest.getFileId()); + log.info("Storing migrated entityLog and ids to migrate in DB for file {}", migrationRequest.getFileId()); redactionStorageService.storeObject(migrationRequest.getDossierId(), migrationRequest.getFileId(), FileType.ENTITY_LOG, migratedEntityLog.getEntityLog()); redactionStorageService.storeObject(migrationRequest.getDossierId(), migrationRequest.getFileId(), FileType.MIGRATED_IDS, migratedEntityLog.getMigratedIds()); sendFinished(MigrationResponse.builder().dossierId(migrationRequest.getDossierId()).fileId(migrationRequest.getFileId()).build()); - log.info("Migrated {} redactionLog entries for dossierId {} and fileId {}", - migratedEntityLog.getEntityLog().getEntityLogEntry().size(), - migrationRequest.getDossierId(), - migrationRequest.getFileId()); + log.info("Migrated {} redactionLog entries, found {} annotation ids for migration in the db, {} new manual entries, for dossierId {} and fileId {}", + migratedEntityLog.getEntityLog().getEntityLogEntry().size(), + migratedEntityLog.getMigratedIds().getMappings().size(), + migratedEntityLog.getMigratedIds().getManualRedactionEntriesToAdd().size(), + migrationRequest.getDossierId(), + migrationRequest.getFileId()); log.info(""); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/RedactionLogToEntityLogMigrationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/RedactionLogToEntityLogMigrationService.java index 938109d4..a83069a1 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/RedactionLogToEntityLogMigrationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/RedactionLogToEntityLogMigrationService.java @@ -19,29 +19,24 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.migration.MigratedIds; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction; -import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ManualRedactionType; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogLegalBasis; -import com.iqser.red.service.redaction.v1.model.MigrationRequest; -import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity; import com.iqser.red.service.redaction.v1.server.model.MigratedEntityLog; import com.iqser.red.service.redaction.v1.server.model.MigrationEntity; +import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity; import com.iqser.red.service.redaction.v1.server.model.RectangleWithPage; -import com.iqser.red.service.redaction.v1.server.model.document.TextRange; -import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image; import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; import com.iqser.red.service.redaction.v1.server.service.DictionaryService; import com.iqser.red.service.redaction.v1.server.service.ManualChangesApplicationService; -import com.iqser.red.service.redaction.v1.server.service.document.EntityCreationService; import com.iqser.red.service.redaction.v1.server.service.document.EntityEnrichmentService; import com.iqser.red.service.redaction.v1.server.service.document.EntityFindingUtility; +import com.iqser.red.service.redaction.v1.server.service.document.EntityFromPrecursorCreationService; import com.iqser.red.service.redaction.v1.server.utils.IdBuilder; import com.iqser.red.service.redaction.v1.server.utils.MigratedIdsCollector; @@ -64,13 +59,16 @@ public class RedactionLogToEntityLogMigrationService { ManualChangesApplicationService manualChangesApplicationService; - public MigratedEntityLog migrate(RedactionLog redactionLog, Document document, String dossierTemplateId, ManualRedactions manualRedactions) { + public MigratedEntityLog migrate(RedactionLog redactionLog, Document document, String dossierTemplateId, ManualRedactions manualRedactions, String fileId) { + + log.info("Migrating entities for file {}", fileId); + List entitiesToMigrate = calculateMigrationEntitiesFromRedactionLog(redactionLog, document, dossierTemplateId, fileId); - List entitiesToMigrate = calculateMigrationEntitiesFromRedactionLog(redactionLog, document, dossierTemplateId); MigratedIds migratedIds = entitiesToMigrate.stream() .collect(new MigratedIdsCollector()); applyManualChanges(entitiesToMigrate, manualRedactions); + log.info("applying manual changes to migrated entities for file {}", fileId); EntityLog entityLog = new EntityLog(); entityLog.setAnalysisNumber(redactionLog.getAnalysisNumber()); @@ -85,6 +83,8 @@ public class RedactionLogToEntityLogMigrationService { .toList()); Map oldToNewIDMapping = migratedIds.buildOldToNewMapping(); + + log.info("Writing migrated entities to entityLog for file {}", fileId); entityLog.setEntityLogEntry(entitiesToMigrate.stream() .map(migrationEntity -> migrationEntity.toEntityLogEntry(oldToNewIDMapping)) .toList()); @@ -102,6 +102,13 @@ public class RedactionLogToEntityLogMigrationService { .filter(m -> !m.getOldId().equals(m.getNewId())) .collect(new MigratedIdsCollector()); + List manualRedactionEntriesToAdd = entitiesToMigrate.stream() + .filter(MigrationEntity::needsManualEntry) + .map(MigrationEntity::buildManualRedactionEntry) + .toList(); + + idsToMigrateInDb.setManualRedactionEntriesToAdd(manualRedactionEntriesToAdd); + return new MigratedEntityLog(idsToMigrateInDb, entityLog); } @@ -117,27 +124,14 @@ public class RedactionLogToEntityLogMigrationService { manualRedactions.getForceRedactions(), manualRedactions.getResizeRedactions(), manualRedactions.getLegalBasisChanges(), - manualRedactions.getRecategorizations(), - manualRedactions.getLegalBasisChanges()) + manualRedactions.getRecategorizations()) .flatMap(Collection::stream) .collect(Collectors.groupingBy(BaseAnnotation::getAnnotationId)); - entitiesToMigrate.forEach(migrationEntity -> manualChangesPerAnnotationId.getOrDefault(migrationEntity.getOldId(), Collections.emptyList()) - .forEach(manualChange -> { - if (manualChange instanceof ManualResizeRedaction manualResizeRedaction && migrationEntity.getMigratedEntity() instanceof TextEntity textEntity) { - ManualResizeRedaction migratedManualResizeRedaction = ManualResizeRedaction.builder() - .positions(manualResizeRedaction.getPositions()) - .annotationId(migrationEntity.getNewId()) - .updateDictionary(manualResizeRedaction.getUpdateDictionary()) - .addToAllDossiers(manualResizeRedaction.isAddToAllDossiers()) - .textAfter(manualResizeRedaction.getTextAfter()) - .textBefore(manualResizeRedaction.getTextBefore()) - .build(); - manualChangesApplicationService.resize(textEntity, migratedManualResizeRedaction); - } else { - migrationEntity.getMigratedEntity().getManualOverwrite().addChange(manualChange); - } - })); + entitiesToMigrate.forEach(migrationEntity -> migrationEntity.applyManualChanges(manualChangesPerAnnotationId.getOrDefault(migrationEntity.getOldId(), + Collections.emptyList()), + manualChangesApplicationService)); + } @@ -147,10 +141,10 @@ public class RedactionLogToEntityLogMigrationService { } - private List calculateMigrationEntitiesFromRedactionLog(RedactionLog redactionLog, Document document, String dossierTemplateId) { + private List calculateMigrationEntitiesFromRedactionLog(RedactionLog redactionLog, Document document, String dossierTemplateId, String fileId) { - List images = getImageBasedMigrationEntities(redactionLog, document, dossierTemplateId); - List textMigrationEntities = getTextBasedMigrationEntities(redactionLog, document, dossierTemplateId); + List images = getImageBasedMigrationEntities(redactionLog, document, fileId); + List textMigrationEntities = getTextBasedMigrationEntities(redactionLog, document, dossierTemplateId, fileId); return Stream.of(textMigrationEntities.stream(), images.stream()) .flatMap(Function.identity()) .toList(); @@ -163,7 +157,7 @@ public class RedactionLogToEntityLogMigrationService { } - private List getImageBasedMigrationEntities(RedactionLog redactionLog, Document document, String dossierTemplateId) { + private List getImageBasedMigrationEntities(RedactionLog redactionLog, Document document, String fileId) { List images = document.streamAllImages() .collect(Collectors.toList()); @@ -195,7 +189,8 @@ public class RedactionLogToEntityLogMigrationService { } String ruleIdentifier; - String reason = Optional.ofNullable(redactionLogImage.getReason()).orElse(""); + String reason = Optional.ofNullable(redactionLogImage.getReason()) + .orElse(""); if (redactionLogImage.getMatchedRule().isBlank() || redactionLogImage.getMatchedRule() == null) { ruleIdentifier = "OLDIMG.0.0"; } else { @@ -209,7 +204,7 @@ public class RedactionLogToEntityLogMigrationService { } else { closestImage.skip(ruleIdentifier, reason); } - migrationEntities.add(new MigrationEntity(null, redactionLogImage, closestImage, redactionLogImage.getId(), closestImage.getId())); + migrationEntities.add(MigrationEntity.fromRedactionLogImage(redactionLogImage, closestImage, fileId)); } return migrationEntities; } @@ -250,40 +245,20 @@ public class RedactionLogToEntityLogMigrationService { } - private List getTextBasedMigrationEntities(RedactionLog redactionLog, Document document, String dossierTemplateId) { + private List getTextBasedMigrationEntities(RedactionLog redactionLog, Document document, String dossierTemplateId, String fileId) { List entitiesToMigrate = redactionLog.getRedactionLogEntry() .stream() .filter(redactionLogEntry -> !redactionLogEntry.isImage()) - .map(entry -> MigrationEntity.fromRedactionLogEntry(entry, dictionaryService.isHint(entry.getType(), dossierTemplateId))) - .peek(migrationEntity -> { - if (migrationEntity.getPrecursorEntity().getEntityType().equals(EntityType.HINT) &&// - !migrationEntity.getRedactionLogEntry().isHint() &&// - !migrationEntity.getRedactionLogEntry().isRedacted()) { - migrationEntity.getPrecursorEntity().ignore(migrationEntity.getPrecursorEntity().getRuleIdentifier(), migrationEntity.getPrecursorEntity().getReason()); - } else if (migrationEntity.getRedactionLogEntry().lastChangeIsRemoved()) { - migrationEntity.getPrecursorEntity().remove(migrationEntity.getPrecursorEntity().getRuleIdentifier(), migrationEntity.getPrecursorEntity().getReason()); - } else if (lastManualChangeIsRemove(migrationEntity)) { - migrationEntity.getPrecursorEntity().ignore(migrationEntity.getPrecursorEntity().getRuleIdentifier(), migrationEntity.getPrecursorEntity().getReason()); - } else if (migrationEntity.getPrecursorEntity().isApplied() && migrationEntity.getRedactionLogEntry().isRecommendation()) { - migrationEntity.getPrecursorEntity() - .skip(migrationEntity.getPrecursorEntity().getRuleIdentifier(), migrationEntity.getPrecursorEntity().getReason()); - } else if (migrationEntity.getPrecursorEntity().isApplied()) { - migrationEntity.getPrecursorEntity() - .apply(migrationEntity.getPrecursorEntity().getRuleIdentifier(), - migrationEntity.getPrecursorEntity().getReason(), - migrationEntity.getPrecursorEntity().getLegalBasis()); - } else { - migrationEntity.getPrecursorEntity() - .skip(migrationEntity.getPrecursorEntity().getRuleIdentifier(), migrationEntity.getPrecursorEntity().getReason()); - } - }) + .map(entry -> MigrationEntity.fromRedactionLogEntry(entry, dictionaryService.isHint(entry.getType(), dossierTemplateId), fileId)) .toList(); - Map> tempEntitiesByValue = entityFindingUtility.findAllPossibleEntitiesAndGroupByValue(document, - entitiesToMigrate.stream() - .map(MigrationEntity::getPrecursorEntity) - .toList()); + List precursorEntities = entitiesToMigrate.stream() + .map(MigrationEntity::getPrecursorEntity) + .toList(); + + log.info("Finding all possible entities"); + Map> tempEntitiesByValue = entityFindingUtility.findAllPossibleEntitiesAndGroupByValue(document, precursorEntities); for (MigrationEntity migrationEntity : entitiesToMigrate) { Optional optionalTextEntity = entityFindingUtility.findClosestEntityAndReturnEmptyIfNotFound(migrationEntity.getPrecursorEntity(), @@ -297,45 +272,19 @@ public class RedactionLogToEntityLogMigrationService { continue; } - TextEntity entity = createCorrectEntity(migrationEntity.getPrecursorEntity(), document, optionalTextEntity.get().getTextRange()); - migrationEntity.setMigratedEntity(entity); - migrationEntity.setOldId(migrationEntity.getPrecursorEntity().getId()); - migrationEntity.setNewId(entity.getId()); // Can only be on one page, since redactionLogEntries can only be on one page + TextEntity migratedEntity = EntityFromPrecursorCreationService.createCorrectEntity(migrationEntity.getPrecursorEntity(), optionalTextEntity.get(), true); + migrationEntity.setMigratedEntity(migratedEntity); + migrationEntity.setOldId(migrationEntity.getPrecursorEntity().getId()); + migrationEntity.setNewId(migratedEntity.getId()); } tempEntitiesByValue.values() .stream() .flatMap(Collection::stream) .forEach(TextEntity::removeFromGraph); + return entitiesToMigrate; } - - private static boolean lastManualChangeIsRemove(MigrationEntity migrationEntity) { - - if (migrationEntity.getRedactionLogEntry().getManualChanges() == null) { - return false; - } - - return migrationEntity.getRedactionLogEntry().getManualChanges() - .stream() - .reduce((a, b) -> b) - .map(m -> m.getManualRedactionType().equals(ManualRedactionType.REMOVE_LOCALLY)) - .orElse(false); - } - - - private TextEntity createCorrectEntity(PrecursorEntity precursorEntity, SemanticNode node, TextRange closestTextRange) { - - EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService); - TextEntity correctEntity = entityCreationService.forceByTextRange(closestTextRange, precursorEntity.getType(), precursorEntity.getEntityType(), node); - - correctEntity.addMatchedRules(precursorEntity.getMatchedRuleList()); - correctEntity.setDictionaryEntry(precursorEntity.isDictionaryEntry()); - correctEntity.setDossierDictionaryEntry(precursorEntity.isDossierDictionaryEntry()); - correctEntity.getManualOverwrite().addChanges(precursorEntity.getManualOverwrite().getManualChangeLog()); - return correctEntity; - } - } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigratedEntityLog.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigratedEntityLog.java index aff967e5..31a9e30d 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigratedEntityLog.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigratedEntityLog.java @@ -1,7 +1,10 @@ package com.iqser.red.service.redaction.v1.server.model; +import java.util.List; + import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.migration.MigratedIds; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry; import lombok.AllArgsConstructor; import lombok.Builder; @@ -16,5 +19,4 @@ public class MigratedEntityLog { MigratedIds migratedIds; EntityLog entityLog; - } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigrationEntity.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigrationEntity.java index bd948b08..975bcc8b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigrationEntity.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigrationEntity.java @@ -1,34 +1,45 @@ package com.iqser.red.service.redaction.v1.server.model; +import static com.iqser.red.service.redaction.v1.server.service.EntityLogCreatorService.buildEntryState; +import static com.iqser.red.service.redaction.v1.server.service.EntityLogCreatorService.buildEntryType; + +import java.awt.geom.Rectangle2D; import java.util.Collections; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; -import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ChangeType; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryType; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position; -import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Change; -import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine; -import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ManualChange; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ManualRedactionType; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry; +import com.iqser.red.service.redaction.v1.server.migration.MigrationMapper; import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.ManualChangeOverwrite; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image; import com.iqser.red.service.redaction.v1.server.service.ManualChangeFactory; +import com.iqser.red.service.redaction.v1.server.service.ManualChangesApplicationService; import lombok.AllArgsConstructor; +import lombok.Builder; import lombok.Data; import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +@Slf4j @Data +@Builder @AllArgsConstructor @RequiredArgsConstructor public final class MigrationEntity { @@ -38,28 +49,73 @@ public final class MigrationEntity { private IEntity migratedEntity; private String oldId; private String newId; + private String fileId; + + @Builder.Default + List manualChanges = new LinkedList<>(); - public static MigrationEntity fromRedactionLogEntry(RedactionLogEntry redactionLogEntry, boolean hint) { + public static MigrationEntity fromRedactionLogEntry(RedactionLogEntry redactionLogEntry, boolean hint, String fileId) { - return new MigrationEntity(createPrecursorEntity(redactionLogEntry, hint), redactionLogEntry); + PrecursorEntity precursorEntity = createPrecursorEntity(redactionLogEntry, hint); + + if (precursorEntity.getEntityType().equals(EntityType.HINT) && !redactionLogEntry.isHint() && !redactionLogEntry.isRedacted()) { + precursorEntity.ignore(precursorEntity.getRuleIdentifier(), precursorEntity.getReason()); + } else if (redactionLogEntry.lastChangeIsRemoved()) { + precursorEntity.remove(precursorEntity.getRuleIdentifier(), precursorEntity.getReason()); + } else if (lastManualChangeIsRemove(redactionLogEntry)) { + precursorEntity.ignore(precursorEntity.getRuleIdentifier(), precursorEntity.getReason()); + } else if (precursorEntity.isApplied() && redactionLogEntry.isRecommendation()) { + precursorEntity.skip(precursorEntity.getRuleIdentifier(), precursorEntity.getReason()); + } else if (precursorEntity.isApplied()) { + precursorEntity.apply(precursorEntity.getRuleIdentifier(), precursorEntity.getReason(), precursorEntity.getLegalBasis()); + } else { + precursorEntity.skip(precursorEntity.getRuleIdentifier(), precursorEntity.getReason()); + } + + return MigrationEntity.builder().precursorEntity(precursorEntity).redactionLogEntry(redactionLogEntry).oldId(redactionLogEntry.getId()).fileId(fileId).build(); + } + + + public static MigrationEntity fromRedactionLogImage(RedactionLogEntry redactionLogImage, Image image, String fileId) { + + return MigrationEntity.builder().redactionLogEntry(redactionLogImage).migratedEntity(image).oldId(redactionLogImage.getId()).newId(image.getId()).fileId(fileId).build(); + } + + + private static boolean lastManualChangeIsRemove(RedactionLogEntry redactionLogEntry) { + + if (redactionLogEntry.getManualChanges() == null) { + return false; + } + + return redactionLogEntry.getManualChanges() + .stream() + .reduce((a, b) -> b) + .map(m -> m.getManualRedactionType().equals(ManualRedactionType.REMOVE_LOCALLY)) + .orElse(false); } public static PrecursorEntity createPrecursorEntity(RedactionLogEntry redactionLogEntry, boolean hint) { String ruleIdentifier = buildRuleIdentifier(redactionLogEntry); - List rectangleWithPages = redactionLogEntry.getPositions().stream().map(RectangleWithPage::fromRedactionLogRectangle).toList(); + List rectangleWithPages = redactionLogEntry.getPositions() + .stream() + .map(RectangleWithPage::fromRedactionLogRectangle) + .toList(); EntityType entityType = getEntityType(redactionLogEntry, hint); return PrecursorEntity.builder() .id(redactionLogEntry.getId()) .value(redactionLogEntry.getValue()) .entityPosition(rectangleWithPages) .ruleIdentifier(ruleIdentifier) - .reason(Optional.ofNullable(redactionLogEntry.getReason()).orElse("")) + .reason(Optional.ofNullable(redactionLogEntry.getReason()) + .orElse("")) .legalBasis(redactionLogEntry.getLegalBasis()) .type(redactionLogEntry.getType()) .section(redactionLogEntry.getSection()) + .engines(MigrationMapper.getMigratedEngines(redactionLogEntry)) .entityType(entityType) .applied(redactionLogEntry.isRedacted()) .isDictionaryEntry(redactionLogEntry.isDictionaryEntry()) @@ -100,14 +156,6 @@ public final class MigrationEntity { } - private static com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Change toEntityLogChanges(Change change) { - - return new com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Change(change.getAnalysisNumber(), - toEntityLogType(change.getType()), - change.getDateTime()); - } - - private static EntryType getEntryType(EntityType entityType) { return switch (entityType) { @@ -120,42 +168,6 @@ public final class MigrationEntity { } - private static com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualChange toEntityLogManualChanges(ManualChange manualChange) { - - return new com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualChange(toManualRedactionType(manualChange.getManualRedactionType()), - manualChange.getProcessedDate(), - manualChange.getRequestedDate(), - manualChange.getUserId(), - manualChange.getPropertyChanges()); - } - - - private static ChangeType toEntityLogType(com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ChangeType type) { - - return switch (type) { - case ADDED -> ChangeType.ADDED; - case REMOVED -> ChangeType.REMOVED; - case CHANGED -> ChangeType.CHANGED; - }; - } - - - private static com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType toManualRedactionType(ManualRedactionType manualRedactionType) { - - return switch (manualRedactionType) { - case ADD_LOCALLY -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.ADD_LOCALLY; - case ADD_TO_DICTIONARY -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.ADD_TO_DICTIONARY; - case REMOVE_LOCALLY -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.REMOVE_LOCALLY; - case REMOVE_FROM_DICTIONARY -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.REMOVE_FROM_DICTIONARY; - case FORCE_REDACT -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.FORCE_REDACT; - case FORCE_HINT -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.FORCE_HINT; - case RECATEGORIZE -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.RECATEGORIZE; - case LEGAL_BASIS_CHANGE -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.LEGAL_BASIS_CHANGE; - case RESIZE -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.RESIZE; - }; - } - - public EntityLogEntry toEntityLogEntry(Map oldToNewIdMapping) { EntityLogEntry entityLogEntry; @@ -171,10 +183,13 @@ public final class MigrationEntity { entityLogEntry.setManualChanges(ManualChangeFactory.toManualChangeList(migratedEntity.getManualOverwrite().getManualChangeLog(), redactionLogEntry.isHint())); entityLogEntry.setColor(redactionLogEntry.getColor()); - entityLogEntry.setChanges(redactionLogEntry.getChanges().stream().map(MigrationEntity::toEntityLogChanges).toList()); + entityLogEntry.setChanges(redactionLogEntry.getChanges() + .stream() + .map(MigrationMapper::toEntityLogChanges) + .toList()); entityLogEntry.setReference(migrateSetOfIds(redactionLogEntry.getReference(), oldToNewIdMapping)); entityLogEntry.setImportedRedactionIntersections(migrateSetOfIds(redactionLogEntry.getImportedRedactionIntersections(), oldToNewIdMapping)); - entityLogEntry.setEngines(getMigratedEngines(redactionLogEntry)); + entityLogEntry.setEngines(MigrationMapper.getMigratedEngines(redactionLogEntry)); if (redactionLogEntry.getLegalBasis() != null) { entityLogEntry.setLegalBasis(redactionLogEntry.getLegalBasis()); } @@ -198,47 +213,21 @@ public final class MigrationEntity { } - private List migrateManualChanges(List manualChanges) { - - if (manualChanges == null) { - return Collections.emptyList(); - } - return manualChanges.stream().map(MigrationEntity::toEntityLogManualChanges).toList(); - } - - - private static Set getMigratedEngines(RedactionLogEntry entry) { - - if (entry.getEngines() == null) { - return Collections.emptySet(); - } - return entry.getEngines().stream().map(MigrationEntity::toEntityLogEngine).collect(Collectors.toSet()); - } - - private Set migrateSetOfIds(Set ids, Map oldToNewIdMapping) { if (ids == null) { return Collections.emptySet(); } - return ids.stream().map(oldToNewIdMapping::get).collect(Collectors.toSet()); - } - - - private static com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine toEntityLogEngine(Engine engine) { - - return switch (engine) { - case DICTIONARY -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine.DICTIONARY; - case NER -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine.NER; - case RULE -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine.RULE; - }; + return ids.stream() + .map(oldToNewIdMapping::get) + .collect(Collectors.toSet()); } public EntityLogEntry createEntityLogEntry(Image image) { List positions = getPositionsFromOverride(image).orElse(List.of(new Position(image.getPosition(), image.getPage().getNumber()))); - return EntityLogEntry.builder() + return EntityLogEntry.builder() .id(image.getId()) .value(image.value()) .type(image.type()) @@ -249,7 +238,8 @@ public final class MigrationEntity { .positions(positions) .containingNodeId(image.getTreeId()) .closestHeadline(image.getHeadline().getTextBlock().getSearchText()) - .section(redactionLogEntry.getSection()) + .section(image.getManualOverwrite().getSection() + .orElse(redactionLogEntry.getSection())) .textAfter(redactionLogEntry.getTextAfter()) .textBefore(redactionLogEntry.getTextBefore()) .imageHasTransparency(image.isTransparent()) @@ -270,7 +260,8 @@ public final class MigrationEntity { .type(precursorEntity.type()) .state(buildEntryState(precursorEntity)) .entryType(buildEntryType(precursorEntity)) - .section(redactionLogEntry.getSection()) + .section(precursorEntity.getManualOverwrite().getSection() + .orElse(redactionLogEntry.getSection())) .textAfter(redactionLogEntry.getTextAfter()) .textBefore(redactionLogEntry.getTextBefore()) .containingNodeId(Collections.emptyList()) @@ -280,12 +271,11 @@ public final class MigrationEntity { .dossierDictionaryEntry(precursorEntity.isDossierDictionaryEntry()) .startOffset(-1) .endOffset(-1) - .positions(precursorEntity.getManualOverwrite() - .getPositions() - .orElse(precursorEntity.getEntityPosition()) - .stream() - .map(entityPosition -> new Position(entityPosition.rectangle2D(), entityPosition.pageNumber())) - .toList()) + .positions(precursorEntity.getManualOverwrite().getPositions() + .orElse(precursorEntity.getEntityPosition()) + .stream() + .map(entityPosition -> new Position(entityPosition.rectangle2D(), entityPosition.pageNumber())) + .toList()) .engines(Collections.emptySet()) .build(); } @@ -300,11 +290,13 @@ public final class MigrationEntity { .positions(rectanglesPerLine) .reason(entity.buildReasonWithManualChangeDescriptions()) .legalBasis(entity.legalBasis()) - .value(entity.getManualOverwrite().getValue().orElse(entity.getMatchedRule().isWriteValueWithLineBreaks() ? entity.getValueWithLineBreaks() : entity.getValue())) + .value(entity.getManualOverwrite().getValue() + .orElse(entity.getMatchedRule().isWriteValueWithLineBreaks() ? entity.getValueWithLineBreaks() : entity.getValue())) .type(entity.type()) - .section(redactionLogEntry.getSection()) - .textAfter(redactionLogEntry.getTextAfter()) - .textBefore(redactionLogEntry.getTextBefore()) + .section(entity.getManualOverwrite().getSection() + .orElse(redactionLogEntry.getSection())) + .textAfter(entity.getTextAfter()) + .textBefore(entity.getTextBefore()) .containingNodeId(entity.getDeepestFullyContainingNode().getTreeId()) .closestHeadline(entity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText()) .matchedRule(entity.getMatchedRule().getRuleIdentifier().toString()) @@ -322,54 +314,129 @@ public final class MigrationEntity { private static List getRectanglesPerLine(TextEntity entity) { return getPositionsFromOverride(entity).orElse(entity.getPositionsOnPagePerPage() - .get(0) - .getRectanglePerLine() - .stream() - .map(rectangle2D -> new Position(rectangle2D, entity.getPositionsOnPagePerPage().get(0).getPage().getNumber())) - .toList()); + .get(0).getRectanglePerLine() + .stream() + .map(rectangle2D -> new Position(rectangle2D, + entity.getPositionsOnPagePerPage() + .get(0).getPage().getNumber())) + .toList()); } private static Optional> getPositionsFromOverride(IEntity entity) { - return entity.getManualOverwrite().getPositions().map(rects -> rects.stream().map(r -> new Position(r.rectangle2D(), r.pageNumber())).toList()); - } - - - private EntryState buildEntryState(IEntity entity) { - - if (entity.applied() && entity.active()) { - return EntryState.APPLIED; - } else if (entity.skipped() && entity.active()) { - return EntryState.SKIPPED; - } else if (entity.ignored()) { - return EntryState.IGNORED; - } else { - return EntryState.REMOVED; - } - } - - - private EntryType buildEntryType(IEntity entity) { - - if (entity instanceof TextEntity textEntity) { - return getEntryType(textEntity.getEntityType()); - } else if (entity instanceof PrecursorEntity precursorEntity) { - if (precursorEntity.isRectangle()) { - return EntryType.AREA; - } - return getEntryType(precursorEntity.getEntityType()); - } else if (entity instanceof Image) { - return EntryType.IMAGE; - } - throw new UnsupportedOperationException(String.format("Entity subclass %s is not implemented!", entity.getClass())); + return entity.getManualOverwrite().getPositions() + .map(rects -> rects.stream() + .map(r -> new Position(r.rectangle2D(), r.pageNumber())) + .toList()); } public boolean hasManualChangesOrComments() { return !(redactionLogEntry.getManualChanges() == null || redactionLogEntry.getManualChanges().isEmpty()) || // - !(redactionLogEntry.getComments() == null || redactionLogEntry.getComments().isEmpty()); + !(redactionLogEntry.getComments() == null || redactionLogEntry.getComments().isEmpty()) // + || hasManualChanges(); + } + + + public boolean hasManualChanges() { + + return !manualChanges.isEmpty(); + } + + + public void applyManualChanges(List manualChangesToApply, ManualChangesApplicationService manualChangesApplicationService) { + + manualChanges.addAll(manualChangesToApply); + manualChangesToApply.forEach(manualChange -> { + if (manualChange instanceof ManualResizeRedaction manualResizeRedaction && migratedEntity instanceof TextEntity textEntity) { + // Due to the value in the old redaction log already being resized, there is no way to find the original entity ID and therefore to migrate the resize annotation correctly. + // Instead, we add an add_locally change to the db. + ManualResizeRedaction migratedManualResizeRedaction = ManualResizeRedaction.builder() + .positions(manualResizeRedaction.getPositions()) + .annotationId(getNewId()) + .updateDictionary(manualResizeRedaction.getUpdateDictionary()) + .addToAllDossiers(manualResizeRedaction.isAddToAllDossiers()) + .textAfter(manualResizeRedaction.getTextAfter()) + .textBefore(manualResizeRedaction.getTextBefore()) + .build(); + manualChangesApplicationService.resize(textEntity, migratedManualResizeRedaction); + } else { + migratedEntity.getManualOverwrite().addChange(manualChange); + } + }); + } + + + public ManualRedactionEntry buildManualRedactionEntry() { + + assert hasManualChanges(); + + // currently we need to insert a manual redaction entry, whenever an entity has been resized. + String user = manualChanges.stream() + .filter(mc -> mc instanceof ManualResizeRedaction) + .findFirst() + .orElse(manualChanges.get(0)).getUser(); + + return ManualRedactionEntry.builder() + .annotationId(newId) + .fileId(fileId) + .type(redactionLogEntry.getType()) + .value(redactionLogEntry.getValue()) + .reason(redactionLogEntry.getReason()) + .legalBasis(redactionLogEntry.getLegalBasis()) + .section(redactionLogEntry.getSection()) + .addToDictionary(false) + .addToDossierDictionary(false) + .rectangle(false) + .positions(buildPositions(migratedEntity)) + .user(user) + .build(); + } + + + private List buildPositions(IEntity entity) { + + if (entity instanceof TextEntity textEntity) { + + var positionsOnPage = textEntity.getPositionsOnPagePerPage() + .get(0); + return positionsOnPage.getRectanglePerLine() + .stream() + .map(p -> new Rectangle((float) p.getX(), (float) p.getY(), (float) p.getWidth(), (float) p.getHeight(), positionsOnPage.getPage().getNumber())) + .toList(); + } + if (entity instanceof PrecursorEntity pEntity) { + + return pEntity.getManualOverwrite().getPositions() + .orElse(pEntity.getEntityPosition()) + .stream() + .map(p -> new Rectangle((float) p.rectangle2D().getX(), + (float) p.rectangle2D().getY(), + (float) p.rectangle2D().getWidth(), + (float) p.rectangle2D().getHeight(), + p.pageNumber())) + .toList(); + } + if (entity instanceof Image image) { + + Rectangle2D position = image.getManualOverwrite().getPositions() + .map(p -> p.get(0).rectangle2D()) + .orElse(image.getPosition()); + + return List.of(new Rectangle((float) position.getX(), (float) position.getY(), (float) position.getWidth(), (float) position.getHeight(), image.getPage().getNumber())); + + } else { + throw new UnsupportedOperationException(); + } + } + + + public boolean needsManualEntry() { + + return manualChanges.stream() + .anyMatch(mc -> mc instanceof ManualResizeRedaction && !((ManualResizeRedaction) mc).getUpdateDictionary()) && !(migratedEntity instanceof Image); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/PrecursorEntity.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/PrecursorEntity.java index 22416c0e..3058c5d7 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/PrecursorEntity.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/PrecursorEntity.java @@ -43,7 +43,6 @@ public class PrecursorEntity implements IEntity { String type; String section; EntityType entityType; - EntryType entryType; boolean applied; boolean isDictionaryEntry; boolean isDossierDictionaryEntry; @@ -61,8 +60,8 @@ public class PrecursorEntity implements IEntity { .stream() .map(RectangleWithPage::fromAnnotationRectangle) .toList(); + var entityType = hint ? EntityType.HINT : EntityType.ENTITY; - var entryType = hint ? EntryType.HINT : (manualRedactionEntry.isRectangle() ? EntryType.AREA : EntryType.ENTITY); ManualChangeOverwrite manualChangeOverwrite = new ManualChangeOverwrite(entityType); manualChangeOverwrite.addChange(manualRedactionEntry); return PrecursorEntity.builder() @@ -75,7 +74,6 @@ public class PrecursorEntity implements IEntity { .type(manualRedactionEntry.getType()) .section(manualRedactionEntry.getSection()) .entityType(entityType) - .entryType(entryType) .applied(true) .isDictionaryEntry(false) .isDossierDictionaryEntry(false) @@ -103,7 +101,6 @@ public class PrecursorEntity implements IEntity { .type(entityLogEntry.getType()) .section(entityLogEntry.getSection()) .entityType(entityType) - .entryType(entityLogEntry.getEntryType()) .isDictionaryEntry(entityLogEntry.isDictionaryEntry()) .isDossierDictionaryEntry(entityLogEntry.isDossierDictionaryEntry()) .manualOverwrite(new ManualChangeOverwrite(entityType)) @@ -134,7 +131,6 @@ public class PrecursorEntity implements IEntity { .type(Optional.ofNullable(importedRedaction.getType()) .orElse(IMPORTED_REDACTION_TYPE)) .entityType(entityType) - .entryType(entryType) .isDictionaryEntry(false) .isDossierDictionaryEntry(false) .rectangle(value.isBlank() || entryType.equals(EntryType.IMAGE) || entryType.equals(EntryType.IMAGE_HINT) || entryType.equals(EntryType.AREA)) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/ImageType.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/ImageType.java index 397d7b11..e5e025f0 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/ImageType.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/ImageType.java @@ -6,10 +6,22 @@ public enum ImageType { LOGO, FORMULA, SIGNATURE, - OTHER, + OTHER { + @Override + public String toString() { + + return "image"; + } + }, OCR; + public String toString() { + + return name().toLowerCase(Locale.ENGLISH); + } + + public static ImageType fromString(String imageType) { return switch (imageType.toLowerCase(Locale.ROOT)) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java index 6cd69627..c77f7a32 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java @@ -4,7 +4,6 @@ import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.List; -import java.util.Locale; import java.util.Set; import java.util.stream.Collectors; @@ -19,7 +18,6 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryType; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.legalbasis.LegalBasis; import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings; import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient; @@ -32,7 +30,6 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionO import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType; import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; import lombok.AccessLevel; @@ -68,20 +65,19 @@ public class EntityLogCreatorService { List entityLogEntries = createEntityLogEntries(document, analyzeRequest, notFoundEntities); List legalBasis = legalBasisClient.getLegalBasisMapping(analyzeRequest.getDossierTemplateId()); - EntityLog entityLog = new EntityLog(redactionServiceSettings.getAnalysisVersion(), - analyzeRequest.getAnalysisNumber(), - entityLogEntries, - toEntityLogLegalBasis(legalBasis), - dictionaryVersion.getDossierTemplateVersion(), - dictionaryVersion.getDossierVersion(), - rulesVersion, - legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId())); List previousExistingEntityLogEntries = getPreviousEntityLogEntries(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); entityChangeLogService.computeChanges(previousExistingEntityLogEntries, entityLogEntries, analyzeRequest.getManualRedactions(), analyzeRequest.getAnalysisNumber()); - return entityLog; + return new EntityLog(redactionServiceSettings.getAnalysisVersion(), + analyzeRequest.getAnalysisNumber(), + entityLogEntries, + toEntityLogLegalBasis(legalBasis), + dictionaryVersion.getDossierTemplateVersion(), + dictionaryVersion.getDossierVersion(), + rulesVersion, + legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId())); } @@ -116,21 +112,24 @@ public class EntityLogCreatorService { DictionaryVersion dictionaryVersion) { List newEntityLogEntries = createEntityLogEntries(document, analyzeRequest, notFoundEntries).stream() - .filter(entry -> entry.getContainingNodeId().isEmpty() || sectionsToReanalyseIds.contains(entry.getContainingNodeId().get(0))) + .filter(entry -> entry.getContainingNodeId().isEmpty() || sectionsToReanalyseIds.contains(entry.getContainingNodeId() + .get(0))) .collect(Collectors.toList()); - Set newEntityIds = newEntityLogEntries.stream().map(EntityLogEntry::getId).collect(Collectors.toSet()); + Set newEntityIds = newEntityLogEntries.stream() + .map(EntityLogEntry::getId) + .collect(Collectors.toSet()); List previousEntriesFromReAnalyzedSections = previousEntityLog.getEntityLogEntry() .stream() .filter(entry -> (newEntityIds.contains(entry.getId()) || entry.getContainingNodeId().isEmpty() || sectionsToReanalyseIds.contains(entry.getContainingNodeId() - .get(0)))) + .get(0)))) .toList(); previousEntityLog.getEntityLogEntry().removeAll(previousEntriesFromReAnalyzedSections); boolean hasChanges = entityChangeLogService.computeChanges(previousEntriesFromReAnalyzedSections, - newEntityLogEntries, - analyzeRequest.getManualRedactions(), - analyzeRequest.getAnalysisNumber()); + newEntityLogEntries, + analyzeRequest.getManualRedactions(), + analyzeRequest.getAnalysisNumber()); previousEntityLog.getEntityLogEntry().addAll(newEntityLogEntries); return updateVersionsAndReturnChanges(previousEntityLog, dictionaryVersion, analyzeRequest, hasChanges); @@ -139,22 +138,6 @@ public class EntityLogCreatorService { private List createEntityLogEntries(Document document, AnalyzeRequest analyzeRequest, List notFoundPrecursorEntries) { - Set dictionaryEntries; - Set dictionaryEntriesValues; - - if (analyzeRequest.getManualRedactions() != null && !analyzeRequest.getManualRedactions().getEntriesToAdd().isEmpty()) { - dictionaryEntries = analyzeRequest.getManualRedactions().getEntriesToAdd() - .stream() - .filter(e -> e.isAddToDictionary() || e.isAddToDossierDictionary()) - .collect(Collectors.toSet()); - dictionaryEntriesValues = dictionaryEntries.stream() - .map(ManualRedactionEntry::getValue) - .collect(Collectors.toSet()); - } else { - dictionaryEntriesValues = new HashSet<>(); - dictionaryEntries = new HashSet<>(); - } - String dossierTemplateId = analyzeRequest.getDossierTemplateId(); List entries = new ArrayList<>(); @@ -164,22 +147,21 @@ public class EntityLogCreatorService { .filter(entity -> !entity.getValue().isEmpty()) .filter(EntityLogCreatorService::notFalsePositiveOrFalseRecommendation) .filter(entity -> !entity.removed()) - .forEach(entityNode -> entries.addAll(toEntityLogEntries(entityNode, dictionaryEntries, dictionaryEntriesValues))); - document.streamAllImages().filter(entity -> !entity.removed()).forEach(imageNode -> entries.add(createEntityLogEntry(imageNode, dossierTemplateId))); - notFoundPrecursorEntries.stream().filter(entity -> !entity.removed()).forEach(precursorEntity -> entries.add(createEntityLogEntry(precursorEntity, dossierTemplateId))); + .forEach(entityNode -> entries.addAll(toEntityLogEntries(entityNode))); + document.streamAllImages() + .filter(entity -> !entity.removed()) + .forEach(imageNode -> entries.add(createEntityLogEntry(imageNode, dossierTemplateId))); + notFoundPrecursorEntries.stream() + .filter(entity -> !entity.removed()) + .forEach(precursorEntity -> entries.add(createEntityLogEntry(precursorEntity, dossierTemplateId))); return entries; } - private List toEntityLogEntries(TextEntity textEntity, Set dictionaryEntries, Set dictionaryEntriesValues) { + private List toEntityLogEntries(TextEntity textEntity) { List entityLogEntries = new ArrayList<>(); - // Adding ADD_TO_DICTIONARY manual change to the entity's manual overwrite - if (dictionaryEntriesValues.contains(textEntity.getValue())) { - textEntity.getManualOverwrite().addChange(dictionaryEntries.stream().filter(entry -> entry.getValue().equals(textEntity.getValue())).findFirst().get()); - } - // split entity into multiple entries if it occurs on multiple pages, since FE can't handle multi page entities for (PositionOnPage positionOnPage : textEntity.getPositionsOnPagePerPage()) { @@ -202,12 +184,11 @@ public class EntityLogCreatorService { private EntityLogEntry createEntityLogEntry(Image image, String dossierTemplateId) { - String imageType = image.getImageType().equals(ImageType.OTHER) ? "image" : image.getImageType().toString().toLowerCase(Locale.ENGLISH); - boolean isHint = dictionaryService.isHint(imageType, dossierTemplateId); + boolean isHint = dictionaryService.isHint(image.type(), dossierTemplateId); return EntityLogEntry.builder() .id(image.getId()) .value(image.value()) - .type(imageType) + .type(image.type()) .reason(image.buildReasonWithManualChangeDescriptions()) .legalBasis(image.legalBasis()) .matchedRule(image.getMatchedRule().getRuleIdentifier().toString()) @@ -229,7 +210,8 @@ public class EntityLogCreatorService { private EntityLogEntry createEntityLogEntry(PrecursorEntity precursorEntity, String dossierTemplateId) { - String type = precursorEntity.getManualOverwrite().getType().orElse(precursorEntity.getType()); + String type = precursorEntity.getManualOverwrite().getType() + .orElse(precursorEntity.getType()); boolean isHint = isHint(precursorEntity.getEntityType()); return EntityLogEntry.builder() .id(precursorEntity.getId()) @@ -239,7 +221,8 @@ public class EntityLogCreatorService { .type(type) .state(buildEntryState(precursorEntity)) .entryType(buildEntryType(precursorEntity)) - .section(precursorEntity.getManualOverwrite().getSection().orElse(precursorEntity.getSection())) + .section(precursorEntity.getManualOverwrite().getSection() + .orElse(precursorEntity.getSection())) .containingNodeId(Collections.emptyList()) .closestHeadline("") .matchedRule(precursorEntity.getMatchedRule().getRuleIdentifier().toString()) @@ -267,12 +250,17 @@ public class EntityLogCreatorService { private EntityLogEntry createEntityLogEntry(TextEntity entity) { Set referenceIds = new HashSet<>(); - entity.references().stream().filter(TextEntity::active).forEach(ref -> ref.getPositionsOnPagePerPage().forEach(pos -> referenceIds.add(pos.getId()))); + entity.references() + .stream() + .filter(TextEntity::active) + .forEach(ref -> ref.getPositionsOnPagePerPage() + .forEach(pos -> referenceIds.add(pos.getId()))); boolean isHint = isHint(entity.getEntityType()); return EntityLogEntry.builder() .reason(entity.buildReasonWithManualChangeDescriptions()) .legalBasis(entity.legalBasis()) - .value(entity.getManualOverwrite().getValue().orElse(entity.getMatchedRule().isWriteValueWithLineBreaks() ? entity.getValueWithLineBreaks() : entity.getValue())) + .value(entity.getManualOverwrite().getValue() + .orElse(entity.getMatchedRule().isWriteValueWithLineBreaks() ? entity.getValueWithLineBreaks() : entity.getValue())) .type(entity.type()) .section(entity.getManualOverwrite().getSection() .orElse(entity.getDeepestFullyContainingNode().toString())) @@ -314,7 +302,7 @@ public class EntityLogCreatorService { } - private EntryState buildEntryState(IEntity entity) { + public static EntryState buildEntryState(IEntity entity) { if (entity.applied() && entity.active()) { return EntryState.APPLIED; @@ -328,12 +316,17 @@ public class EntityLogCreatorService { } - private EntryType buildEntryType(IEntity entity) { + public static EntryType buildEntryType(IEntity entity) { if (entity instanceof TextEntity textEntity) { return getEntryType(textEntity.getEntityType()); } else if (entity instanceof PrecursorEntity precursorEntity) { - return precursorEntity.getEntryType(); + if (precursorEntity.isRectangle()) { + return EntryType.AREA; + } + return getEntryType(precursorEntity.getEntityType()); + } else if (entity instanceof Image) { + return EntryType.IMAGE; } throw new UnsupportedOperationException(String.format("Entity subclass %s is not implemented!", entity.getClass())); } @@ -353,7 +346,9 @@ public class EntityLogCreatorService { private List toEntityLogLegalBasis(List legalBasis) { - return legalBasis.stream().map(l -> new EntityLogLegalBasis(l.getName(), l.getDescription(), l.getReason())).collect(Collectors.toList()); + return legalBasis.stream() + .map(l -> new EntityLogLegalBasis(l.getName(), l.getDescription(), l.getReason())) + .collect(Collectors.toList()); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/UnprocessedChangesService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/UnprocessedChangesService.java index b7dfd989..2a469cc1 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/UnprocessedChangesService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/UnprocessedChangesService.java @@ -76,11 +76,19 @@ public class UnprocessedChangesService { EntityLog previousEntityLog = redactionStorageService.getEntityLog(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); Document document = DocumentGraphMapper.toDocumentGraph(observedStorageService.getDocumentData(analyzeRequest.getDossierId(), analyzeRequest.getFileId())); - Set allAnnotationIds = analyzeRequest.getManualRedactions().getEntriesToAdd().stream().map(ManualRedactionEntry::getAnnotationId).collect(Collectors.toSet()); - Set resizeIds = analyzeRequest.getManualRedactions().getResizeRedactions().stream().map(ManualResizeRedaction::getAnnotationId).collect(Collectors.toSet()); + Set allAnnotationIds = analyzeRequest.getManualRedactions().getEntriesToAdd() + .stream() + .map(ManualRedactionEntry::getAnnotationId) + .collect(Collectors.toSet()); + Set resizeIds = analyzeRequest.getManualRedactions().getResizeRedactions() + .stream() + .map(ManualResizeRedaction::getAnnotationId) + .collect(Collectors.toSet()); allAnnotationIds.addAll(resizeIds); - List manualResizeRedactions = analyzeRequest.getManualRedactions().getResizeRedactions().stream().toList(); + List manualResizeRedactions = analyzeRequest.getManualRedactions().getResizeRedactions() + .stream() + .toList(); List manualEntitiesToBeResized = previousEntityLog.getEntityLogEntry() .stream() .filter(entityLogEntry -> resizeIds.contains(entityLogEntry.getId())) @@ -99,31 +107,36 @@ public class UnprocessedChangesService { notFoundManualEntities = entityFromPrecursorCreationService.toTextEntity(manualEntities, document); } - document.getEntities().forEach(textEntity -> { - Set processedIds = new HashSet<>(); - for (var positionsOnPerPage : textEntity.getPositionsOnPagePerPage()) { - if (processedIds.contains(positionsOnPerPage.getId())) { - continue; - } - processedIds.add(positionsOnPerPage.getId()); - List positions = positionsOnPerPage.getRectanglePerLine() - .stream() - .map(rectangle2D -> new Position(rectangle2D, positionsOnPerPage.getPage().getNumber())) - .collect(Collectors.toList()); - unprocessedManualEntities.add(UnprocessedManualEntity.builder() - .annotationId(allAnnotationIds.stream().filter(textEntity::matchesAnnotationId).findFirst().orElse("")) - .textBefore(textEntity.getTextBefore()) - .textAfter(textEntity.getTextAfter()) - .section(textEntity.getManualOverwrite().getSection().orElse(textEntity.getDeepestFullyContainingNode().toString())) - .positions(positions) - .build()); - } - }); + document.getEntities() + .forEach(textEntity -> { + Set processedIds = new HashSet<>(); + for (var positionsOnPerPage : textEntity.getPositionsOnPagePerPage()) { + if (processedIds.contains(positionsOnPerPage.getId())) { + continue; + } + processedIds.add(positionsOnPerPage.getId()); + List positions = positionsOnPerPage.getRectanglePerLine() + .stream() + .map(rectangle2D -> new Position(rectangle2D, positionsOnPerPage.getPage().getNumber())) + .collect(Collectors.toList()); + unprocessedManualEntities.add(UnprocessedManualEntity.builder() + .annotationId(allAnnotationIds.stream() + .filter(textEntity::matchesAnnotationId) + .findFirst() + .orElse("")) + .textBefore(textEntity.getTextBefore()) + .textAfter(textEntity.getTextAfter()) + .section(textEntity.getManualOverwrite().getSection() + .orElse(textEntity.getDeepestFullyContainingNode().toString())) + .positions(positions) + .build()); + } + }); notFoundManualEntities.forEach(manualEntity -> unprocessedManualEntities.add(builDefaultUnprocessedManualEntity(manualEntity))); rabbitTemplate.convertAndSend(QueueNames.REDACTION_ANALYSIS_RESPONSE_QUEUE, - AnalyzeResponse.builder().fileId(analyzeRequest.getFileId()).unprocessedManualEntities(unprocessedManualEntities).build()); + AnalyzeResponse.builder().fileId(analyzeRequest.getFileId()).unprocessedManualEntities(unprocessedManualEntities).build()); } @@ -143,13 +156,13 @@ public class UnprocessedChangesService { continue; } - TextEntity correctEntity = createCorrectEntity(precursorEntity, optionalTextEntity.get()); + TextEntity correctEntity = EntityFromPrecursorCreationService.createCorrectEntity(precursorEntity, optionalTextEntity.get()); Optional optionalManualResizeRedaction = manualResizeRedactions.stream() .filter(manualResizeRedaction -> manualResizeRedaction.getAnnotationId().equals(precursorEntity.getId())) .findFirst(); if (optionalManualResizeRedaction.isPresent()) { ManualResizeRedaction manualResizeRedaction = optionalManualResizeRedaction.get(); - manualChangesApplicationService.resizeEntityAndReinsert(correctEntity, manualResizeRedaction); + manualChangesApplicationService.resize(correctEntity, manualResizeRedaction); // If the entity's value is not the same as the manual resize request's value it means we didn't find it anywhere and we want to remove it // from the graph, so it does not get processed and sent back to persistence-service to update its value. @@ -160,60 +173,37 @@ public class UnprocessedChangesService { } // remove all temp entities from the graph - tempEntities.values().stream().flatMap(Collection::stream).forEach(TextEntity::removeFromGraph); + tempEntities.values() + .stream() + .flatMap(Collection::stream) + .forEach(TextEntity::removeFromGraph); } - private TextEntity createCorrectEntity(PrecursorEntity precursorEntity, TextEntity closestEntity) { + private UnprocessedManualEntity builDefaultUnprocessedManualEntity(PrecursorEntity precursorEntity) { - TextEntity correctEntity = TextEntity.initialEntityNode(closestEntity.getTextRange(), precursorEntity.type(), precursorEntity.getEntityType(), precursorEntity.getId()); - - correctEntity.setDeepestFullyContainingNode(closestEntity.getDeepestFullyContainingNode()); - correctEntity.setIntersectingNodes(new ArrayList<>(closestEntity.getIntersectingNodes())); - correctEntity.setDuplicateTextRanges(new ArrayList<>(closestEntity.getDuplicateTextRanges())); - correctEntity.setPages(new HashSet<>(closestEntity.getPages())); - - correctEntity.setValue(closestEntity.getValue()); - correctEntity.setTextAfter(closestEntity.getTextAfter()); - correctEntity.setTextBefore(closestEntity.getTextBefore()); - - correctEntity.getIntersectingNodes().forEach(n -> n.getEntities().add(correctEntity)); - correctEntity.getPages().forEach(page -> page.getEntities().add(correctEntity)); - - correctEntity.addMatchedRules(precursorEntity.getMatchedRuleList()); - correctEntity.setDictionaryEntry(precursorEntity.isDictionaryEntry()); - correctEntity.setDossierDictionaryEntry(precursorEntity.isDossierDictionaryEntry()); - correctEntity.getManualOverwrite().addChanges(precursorEntity.getManualOverwrite().getManualChangeLog()); - - return correctEntity; -} + return UnprocessedManualEntity.builder() + .annotationId(precursorEntity.getId()) + .textAfter("") + .textBefore("") + .section("") + .positions(precursorEntity.getManualOverwrite().getPositions() + .orElse(precursorEntity.getEntityPosition()) + .stream() + .map(entityPosition -> new Position(entityPosition.rectangle2D(), entityPosition.pageNumber())) + .toList()) + .build(); + } -private UnprocessedManualEntity builDefaultUnprocessedManualEntity(PrecursorEntity precursorEntity) { + private List manualEntitiesConverter(ManualRedactions manualRedactions, String dossierTemplateId) { - return UnprocessedManualEntity.builder() - .annotationId(precursorEntity.getId()) - .textAfter("") - .textBefore("") - .section("") - .positions(precursorEntity.getManualOverwrite() - .getPositions() - .orElse(precursorEntity.getEntityPosition()) - .stream() - .map(entityPosition -> new Position(entityPosition.rectangle2D(), entityPosition.pageNumber())) - .toList()) - .build(); -} - - -private List manualEntitiesConverter(ManualRedactions manualRedactions, String dossierTemplateId) { - - return manualRedactions.getEntriesToAdd() - .stream() - .filter(manualRedactionEntry -> manualRedactionEntry.getPositions() != null && !manualRedactionEntry.getPositions().isEmpty()) - .map(manualRedactionEntry -> PrecursorEntity.fromManualRedactionEntry(manualRedactionEntry, - dictionaryService.isHint(manualRedactionEntry.getType(), dossierTemplateId))) - .toList(); -} + return manualRedactions.getEntriesToAdd() + .stream() + .filter(manualRedactionEntry -> manualRedactionEntry.getPositions() != null && !manualRedactionEntry.getPositions().isEmpty()) + .map(manualRedactionEntry -> PrecursorEntity.fromManualRedactionEntry(manualRedactionEntry, + dictionaryService.isHint(manualRedactionEntry.getType(), dossierTemplateId))) + .toList(); + } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFromPrecursorCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFromPrecursorCreationService.java index faddf9bd..13033a6f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFromPrecursorCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFromPrecursorCreationService.java @@ -14,7 +14,6 @@ import org.springframework.stereotype.Service; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; @@ -51,18 +50,14 @@ public class EntityFromPrecursorCreationService { Set idRemovals = manualRedactions.getIdsToRemove(); List manualEntities = manualRedactions.getEntriesToAdd() .stream() - .filter(manualRedactionEntry -> !(idRemovals.stream() - .map(BaseAnnotation::getAnnotationId) - .toList() - .contains(manualRedactionEntry.getAnnotationId()) && manualRedactionEntry.getRequestDate() - .isBefore(idRemovals.stream() - .filter(idRemoval -> idRemoval.getAnnotationId().equals(manualRedactionEntry.getAnnotationId())) - .findFirst() - .get() - .getRequestDate()))) + .filter(manualRedactionEntry -> idRemovals.stream() + .filter(idRemoval -> idRemoval.getAnnotationId().equals(manualRedactionEntry.getAnnotationId())) + .filter(idRemoval -> idRemoval.getRequestDate().isBefore(manualRedactionEntry.getRequestDate())) + .findAny()// + .isEmpty()) .filter(manualRedactionEntry -> !(manualRedactionEntry.isAddToDictionary() || manualRedactionEntry.isAddToDossierDictionary())) - .map(manualRedactionEntry -> PrecursorEntity.fromManualRedactionEntry(manualRedactionEntry, - dictionaryService.isHint(manualRedactionEntry.getType(), dossierTemplateId))) + .map(manualRedactionEntry -> // + PrecursorEntity.fromManualRedactionEntry(manualRedactionEntry, dictionaryService.isHint(manualRedactionEntry.getType(), dossierTemplateId))) .peek(manualEntity -> { if (manualEntity.getEntityType().equals(EntityType.HINT)) { manualEntity.skip("MAN.5.1", "manual hint is skipped by default"); @@ -90,8 +85,14 @@ public class EntityFromPrecursorCreationService { public List toTextEntity(List precursorEntities, SemanticNode node) { - var notFoundEntities = precursorEntities.stream().filter(PrecursorEntity::isRectangle).collect(Collectors.toList()); - var findableEntities = precursorEntities.stream().filter(precursorEntity -> !precursorEntity.isRectangle()).toList(); + var notFoundEntities = precursorEntities.stream() + .filter(PrecursorEntity::isRectangle) + .collect(Collectors.toList()); + + var findableEntities = precursorEntities.stream() + .filter(precursorEntity -> !precursorEntity.isRectangle()) + .toList(); + Map> tempEntitiesByValue = entityFindingUtility.findAllPossibleEntitiesAndGroupByValue(node, findableEntities); for (PrecursorEntity precursorEntity : findableEntities) { @@ -102,7 +103,12 @@ public class EntityFromPrecursorCreationService { } createCorrectEntity(precursorEntity, optionalClosestEntity.get()); } - tempEntitiesByValue.values().stream().flatMap(Collection::stream).forEach(TextEntity::removeFromGraph); + + tempEntitiesByValue.values() + .stream() + .flatMap(Collection::stream) + .forEach(TextEntity::removeFromGraph); + return notFoundEntities; } @@ -113,9 +119,23 @@ public class EntityFromPrecursorCreationService { * @param precursorEntity The entity identifier for the RedactionEntity. * @param closestEntity The closest Boundary to the RedactionEntity. */ - private void createCorrectEntity(PrecursorEntity precursorEntity, TextEntity closestEntity) { + public static TextEntity createCorrectEntity(PrecursorEntity precursorEntity, TextEntity closestEntity) { - TextEntity correctEntity = TextEntity.initialEntityNode(closestEntity.getTextRange(), precursorEntity.type(), precursorEntity.getEntityType(), precursorEntity.getId()); + return createCorrectEntity(precursorEntity, closestEntity, false); + } + + + public static TextEntity createCorrectEntity(PrecursorEntity precursorEntity, TextEntity closestEntity, boolean generateId) { + + TextEntity correctEntity; + if (generateId) { + correctEntity = TextEntity.initialEntityNode(closestEntity.getTextRange(), + precursorEntity.type(), + precursorEntity.getEntityType(), + closestEntity.getDeepestFullyContainingNode()); + } else { + correctEntity = TextEntity.initialEntityNode(closestEntity.getTextRange(), precursorEntity.type(), precursorEntity.getEntityType(), precursorEntity.getId()); + } correctEntity.setDeepestFullyContainingNode(closestEntity.getDeepestFullyContainingNode()); correctEntity.setIntersectingNodes(new ArrayList<>(closestEntity.getIntersectingNodes())); correctEntity.setDuplicateTextRanges(new ArrayList<>(closestEntity.getDuplicateTextRanges())); @@ -125,14 +145,17 @@ public class EntityFromPrecursorCreationService { correctEntity.setTextAfter(closestEntity.getTextAfter()); correctEntity.setTextBefore(closestEntity.getTextBefore()); - correctEntity.getIntersectingNodes().forEach(n -> n.getEntities().add(correctEntity)); - correctEntity.getPages().forEach(page -> page.getEntities().add(correctEntity)); + correctEntity.getIntersectingNodes() + .forEach(n -> n.getEntities().add(correctEntity)); + correctEntity.getPages() + .forEach(page -> page.getEntities().add(correctEntity)); correctEntity.addMatchedRules(precursorEntity.getMatchedRuleList()); correctEntity.setDictionaryEntry(precursorEntity.isDictionaryEntry()); correctEntity.setDossierDictionaryEntry(precursorEntity.isDossierDictionaryEntry()); correctEntity.getManualOverwrite().addChanges(precursorEntity.getManualOverwrite().getManualChangeLog()); correctEntity.addEngines(precursorEntity.getEngines()); + return correctEntity; } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/MigratedIdsCollector.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/MigratedIdsCollector.java index 295b7e18..38656b66 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/MigratedIdsCollector.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/MigratedIdsCollector.java @@ -1,5 +1,6 @@ package com.iqser.red.service.redaction.v1.server.utils; +import java.util.Collections; import java.util.LinkedList; import java.util.Set; import java.util.function.BiConsumer; @@ -17,7 +18,7 @@ public class MigratedIdsCollector implements Collector supplier() { - return () -> new MigratedIds(new LinkedList<>()); + return () -> new MigratedIds(new LinkedList<>(), Collections.emptyList()); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/MigrationIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/MigrationIntegrationTest.java index db60cd34..18892630 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/MigrationIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/MigrationIntegrationTest.java @@ -31,6 +31,7 @@ import org.springframework.test.context.junit.jupiter.SpringExtension; import com.fasterxml.jackson.databind.ObjectMapper; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.migration.MigratedIds; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions; @@ -49,7 +50,6 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils; import com.iqser.red.service.redaction.v1.server.service.DictionaryService; import com.iqser.red.service.redaction.v1.server.service.document.EntityFindingUtility; -import com.iqser.red.service.redaction.v1.server.utils.RectangleTransformations; import com.knecon.fforesight.tenantcommons.TenantContext; import lombok.SneakyThrows; @@ -107,7 +107,7 @@ public class MigrationIntegrationTest extends BuildDocumentIntegrationTest { @SneakyThrows public void testSave() { - MigratedIds ids = new MigratedIds(new LinkedList<>()); + MigratedIds ids = new MigratedIds(new LinkedList<>(), null); ids.addMapping("123", "321"); ids.addMapping("123", "321"); ids.addMapping("123", "321"); @@ -173,7 +173,11 @@ public class MigrationIntegrationTest extends BuildDocumentIntegrationTest { mergedRedactionLog = redactionLog; } - MigratedEntityLog migratedEntityLog = redactionLogToEntityLogMigrationService.migrate(mergedRedactionLog, document, TEST_DOSSIER_TEMPLATE_ID, manualRedactions); + MigratedEntityLog migratedEntityLog = redactionLogToEntityLogMigrationService.migrate(mergedRedactionLog, + document, + TEST_DOSSIER_TEMPLATE_ID, + manualRedactions, + TEST_FILE_ID); redactionStorageService.storeObject(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ENTITY_LOG, migratedEntityLog.getEntityLog()); assertEquals(mergedRedactionLog.getRedactionLogEntry().size(), migratedEntityLog.getEntityLog().getEntityLogEntry().size()); @@ -187,10 +191,11 @@ public class MigrationIntegrationTest extends BuildDocumentIntegrationTest { assertEquals(mergedRedactionLog.getLegalBasis().size(), entityLog.getLegalBasis().size()); Map migratedIds = migratedEntityLog.getMigratedIds().buildOldToNewMapping(); +// assertEquals(legacyRedactionLogMergeService.getNumberOfAffectedAnnotations(manualRedactions), migratedIds.size()); + migratedIds.forEach((oldId, newId) -> assertEntryIsEqual(oldId, newId, mergedRedactionLog, entityLog, migratedIds)); - AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID) - .build()); + AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build()); File outputFile = Path.of(OsUtils.getTemporaryDirectory()).resolve(Path.of(fileName.replaceAll(".pdf", "_MIGRATED.pdf")).getFileName()).toFile(); try (FileOutputStream fileOutputStream = new FileOutputStream(outputFile)) { @@ -268,13 +273,24 @@ public class MigrationIntegrationTest extends BuildDocumentIntegrationTest { if (!redactionLogEntry.isImage()) { assertEquals(redactionLogEntry.getValue().toLowerCase(Locale.ENGLISH), entityLogEntry.getValue().toLowerCase(Locale.ENGLISH)); } + if (entityLogEntry.getManualChanges() + .stream() + .noneMatch(mc -> mc.getManualRedactionType().equals(ManualRedactionType.RECATEGORIZE))) { + assertEquals(redactionLogEntry.getType(), entityLogEntry.getType()); + } assertEquals(redactionLogEntry.getChanges().size(), entityLogEntry.getChanges().size()); assertTrue(redactionLogEntry.getManualChanges().size() <= entityLogEntry.getManualChanges().size()); assertEquals(redactionLogEntry.getPositions().size(), entityLogEntry.getPositions().size()); - assertTrue(positionsAlmostEqual(redactionLogEntry.getPositions(), entityLogEntry.getPositions())); -// assertEquals(redactionLogEntry.getColor(), entityLogEntry.getColor()); - assertEqualsNullSafe(redactionLogEntry.getLegalBasis(), entityLogEntry.getLegalBasis()); -// assertEqualsNullSafe(redactionLogEntry.getReason(), entityLogEntry.getReason()); + if (entityLogEntry.getManualChanges() + .stream() + .noneMatch(mc -> mc.getManualRedactionType().equals(ManualRedactionType.RESIZE) || mc.getManualRedactionType().equals(ManualRedactionType.RESIZE_IN_DICTIONARY))) { + assertTrue(positionsAlmostEqual(redactionLogEntry.getPositions(), entityLogEntry.getPositions())); + } + if (entityLogEntry.getManualChanges() + .stream() + .noneMatch(mc -> mc.getManualRedactionType().equals(ManualRedactionType.FORCE_REDACT) || mc.getManualRedactionType().equals(ManualRedactionType.FORCE_HINT))) { + assertEqualsNullSafe(redactionLogEntry.getLegalBasis(), entityLogEntry.getLegalBasis()); + } assertReferencesEqual(redactionLogEntry.getReference(), entityLogEntry.getReference(), oldToNewMapping); assertEquals(redactionLogEntry.isDictionaryEntry(), entityLogEntry.isDictionaryEntry()); assertEquals(redactionLogEntry.isDossierDictionaryEntry(), entityLogEntry.isDossierDictionaryEntry()); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index e10a911b..a67de70a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -119,15 +119,15 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, true)).thenReturn(List.of(Type.builder() - .id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID) - .type(DOSSIER_REDACTIONS_INDICATOR) - .dossierTemplateId(TEST_DOSSIER_ID) - .hexColor("#ffe187") - .isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .build())); + .id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID) + .type(DOSSIER_REDACTIONS_INDICATOR) + .dossierTemplateId(TEST_DOSSIER_ID) + .hexColor("#ffe187") + .isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .build())); mockDictionaryCalls(null); @@ -169,9 +169,10 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - entityLog.getEntityLogEntry().forEach(entry -> { - duplicates.computeIfAbsent(entry.getId(), v -> new ArrayList<>()).add(entry); - }); + entityLog.getEntityLogEntry() + .forEach(entry -> { + duplicates.computeIfAbsent(entry.getId(), v -> new ArrayList<>()).add(entry); + }); duplicates.forEach((key, value) -> assertThat(value.size()).isEqualTo(1)); @@ -216,12 +217,12 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { ManualRedactions manualRedactions = ManualRedactions.builder() .resizeRedactions(Set.of(ManualResizeRedaction.builder() - .annotationId("c6be5277f5ee60dc3d83527798b7fe02") - .value("Dr. Alan") - .positions(List.of(new Rectangle(236.8f, 182.90005f, 40.584f, 12.642f, 7))) - .requestDate(OffsetDateTime.now()) - .updateDictionary(false) - .build())) + .annotationId("c6be5277f5ee60dc3d83527798b7fe02") + .value("Dr. Alan") + .positions(List.of(new Rectangle(236.8f, 182.90005f, 40.584f, 12.642f, 7))) + .requestDate(OffsetDateTime.now()) + .updateDictionary(false) + .build())) .build(); request.setManualRedactions(manualRedactions); @@ -256,7 +257,10 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { var redactionLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - var values = redactionLog.getEntityLogEntry().stream().map(EntityLogEntry::getValue).collect(Collectors.toList()); + var values = redactionLog.getEntityLogEntry() + .stream() + .map(EntityLogEntry::getValue) + .collect(Collectors.toList()); assertThat(values).containsExactlyInAnyOrder("Lastname M.", "Doe", "Doe J.", "M. Mustermann", "Mustermann M.", "F. Lastname"); } @@ -268,8 +272,8 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { ClassPathResource importedRedactionClasspathResource = new ClassPathResource( "files/ImportedRedactions/18 Chlorothalonil RAR 08 Volume 3CA B 6a Oct 2017.IMPORTED_REDACTIONS.json"); storageService.storeObject(TenantContext.getTenantId(), - RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS), - importedRedactionClasspathResource.getInputStream()); + RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS), + importedRedactionClasspathResource.getInputStream()); AnalyzeRequest request = uploadFileToStorage("files/ImportedRedactions/18 Chlorothalonil RAR 08 Volume 3CA B 6a Oct 2017.pdf"); System.out.println("Start Full integration test"); @@ -353,10 +357,18 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { var mergedEntityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - var cbiAddressBeforeHintRemoval = entityLog.getEntityLogEntry().stream().filter(re -> re.getType().equalsIgnoreCase("CBI_Address")).findAny().get(); + var cbiAddressBeforeHintRemoval = entityLog.getEntityLogEntry() + .stream() + .filter(re -> re.getType().equalsIgnoreCase("CBI_Address")) + .findAny() + .get(); assertThat(cbiAddressBeforeHintRemoval.getState().equals(EntryState.APPLIED)).isFalse(); - var cbiAddressAfterHintRemoval = mergedEntityLog.getEntityLogEntry().stream().filter(re -> re.getType().equalsIgnoreCase("CBI_Address")).findAny().get(); + var cbiAddressAfterHintRemoval = mergedEntityLog.getEntityLogEntry() + .stream() + .filter(re -> re.getType().equalsIgnoreCase("CBI_Address")) + .findAny() + .get(); assertThat(cbiAddressAfterHintRemoval.getState().equals(EntryState.APPLIED)).isTrue(); } @@ -386,9 +398,10 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - entityLog.getEntityLogEntry().forEach(entry -> { - duplicates.computeIfAbsent(entry.getId(), v -> new ArrayList<>()).add(entry); - }); + entityLog.getEntityLogEntry() + .forEach(entry -> { + duplicates.computeIfAbsent(entry.getId(), v -> new ArrayList<>()).add(entry); + }); duplicates.forEach((id, redactionLogEntries) -> assertThat(redactionLogEntries.size()).isEqualTo(1)); @@ -421,11 +434,11 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { AnalyzeRequest request = uploadFileToStorage(fileName); request.setFileAttributes(List.of(FileAttribute.builder() - .id("fileAttributeId") - .label("Vertebrate Study") - .placeholder("{fileattributes.vertebrateStudy}") - .value("true") - .build())); + .id("fileAttributeId") + .label("Vertebrate Study") + .placeholder("{fileattributes.vertebrateStudy}") + .value("true") + .build())); analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request); AnalyzeResult result = analyzeService.analyze(request); @@ -449,7 +462,10 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { correctFound++; continue loop; } - if (Objects.equals(entityLogEntry.getContainingNodeId().get(0), section.getTreeId().get(0))) { + if (Objects.equals(entityLogEntry.getContainingNodeId() + .get(0), + section.getTreeId() + .get(0))) { String value = section.getTextBlock().subSequence(new TextRange(entityLogEntry.getStartOffset(), entityLogEntry.getEndOffset())).toString(); if (entityLogEntry.getValue().equalsIgnoreCase(value)) { correctFound++; @@ -481,12 +497,12 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { ManualRedactions manualRedactions = new ManualRedactions(); manualRedactions.setEntriesToAdd(Set.of(ManualRedactionEntry.builder() - .value("Redact") - .addToDictionary(true) - .addToDossierDictionary(true) - .positions(List.of(new Rectangle(new Point(95.96979999999999f, 515.7984f), 19.866899999999987f, 46.953f, 2))) - .type("dossier_redaction") - .build())); + .value("Redact") + .addToDictionary(true) + .addToDossierDictionary(true) + .positions(List.of(new Rectangle(new Point(95.96979999999999f, 515.7984f), 19.866899999999987f, 46.953f, 2))) + .type("dossier_redaction") + .build())); request.setManualRedactions(manualRedactions); @@ -548,7 +564,11 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - var changes = entityLog.getEntityLogEntry().stream().filter(entry -> entry.getValue() != null && entry.getValue().equals("report")).findFirst().get().getChanges(); + var changes = entityLog.getEntityLogEntry() + .stream() + .filter(entry -> entry.getValue() != null && entry.getValue().equals("report")) + .findFirst() + .get().getChanges(); assertThat(changes.size()).isEqualTo(2); @@ -568,18 +588,18 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { ClassPathResource responseJson = new ClassPathResource("files/crafted_document.NER_ENTITIES.json"); storageService.storeObject(TenantContext.getTenantId(), - RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), - responseJson.getInputStream()); + RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), + responseJson.getInputStream()); long start = System.currentTimeMillis(); AnalyzeRequest request = uploadFileToStorage(fileName); request.setFileAttributes(List.of(FileAttribute.builder() - .id("fileAttributeId") - .label("Vertebrate Study") - .placeholder("{fileattributes.vertebrateStudy}") - .value("true") - .build())); + .id("fileAttributeId") + .label("Vertebrate Study") + .placeholder("{fileattributes.vertebrateStudy}") + .value("true") + .build())); analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request); AnalyzeResult result = analyzeService.analyze(request); @@ -601,7 +621,11 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { .map(redactionLogEntry -> new TextRange(redactionLogEntry.getStartOffset(), redactionLogEntry.getEndOffset())) .map(boundary -> documentGraph.getTextBlock().subSequence(boundary).toString()) .toList(); - List valuesInRedactionLog = entityLog.getEntityLogEntry().stream().filter(e -> !e.getEntryType().equals(EntryType.IMAGE)).map(EntityLogEntry::getValue).toList(); + List valuesInRedactionLog = entityLog.getEntityLogEntry() + .stream() + .filter(e -> !e.getEntryType().equals(EntryType.IMAGE)) + .map(EntityLogEntry::getValue) + .toList(); assertEquals(valuesInRedactionLog, valuesInDocument); @@ -628,11 +652,11 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { ManualRedactions manualRedactions = new ManualRedactions(); manualRedactions.setRecategorizations(Set.of(ManualRecategorization.builder() - .annotationId("37eee3e9d589a5cc529bfec38c3ba479") - .fileId("fileId") - .type("signature") - .requestDate(OffsetDateTime.now()) - .build())); + .annotationId("37eee3e9d589a5cc529bfec38c3ba479") + .fileId("fileId") + .type("signature") + .requestDate(OffsetDateTime.now()) + .build())); request.setManualRedactions(manualRedactions); @@ -683,40 +707,40 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { ManualRedactions manualRedactions = new ManualRedactions(); manualRedactions.getIdsToRemove() .add(IdRemoval.builder() - .annotationId("308dab9015bfafd911568cffe0a7f7de") - .fileId(TEST_FILE_ID) - .requestDate(OffsetDateTime.of(2022, 05, 23, 8, 30, 07, 475479, ZoneOffset.UTC)) - .processedDate(OffsetDateTime.of(2022, 05, 23, 8, 30, 07, 483651, ZoneOffset.UTC)) - .build()); + .annotationId("308dab9015bfafd911568cffe0a7f7de") + .fileId(TEST_FILE_ID) + .requestDate(OffsetDateTime.of(2022, 05, 23, 8, 30, 07, 475479, ZoneOffset.UTC)) + .processedDate(OffsetDateTime.of(2022, 05, 23, 8, 30, 07, 483651, ZoneOffset.UTC)) + .build()); manualRedactions.getForceRedactions() .add(ManualForceRedaction.builder() - .annotationId("0b56ea1a87c83f351df177315af94f0d") - .fileId(TEST_FILE_ID) - .legalBasis("Something") - .requestDate(OffsetDateTime.of(2022, 05, 23, 9, 30, 15, 4653, ZoneOffset.UTC)) - .processedDate(OffsetDateTime.of(2022, 05, 23, 9, 30, 15, 794, ZoneOffset.UTC)) - .build()); + .annotationId("0b56ea1a87c83f351df177315af94f0d") + .fileId(TEST_FILE_ID) + .legalBasis("Something") + .requestDate(OffsetDateTime.of(2022, 05, 23, 9, 30, 15, 4653, ZoneOffset.UTC)) + .processedDate(OffsetDateTime.of(2022, 05, 23, 9, 30, 15, 794, ZoneOffset.UTC)) + .build()); manualRedactions.getIdsToRemove() .add(IdRemoval.builder() - .annotationId("0b56ea1a87c83f351df177315af94f0d") - .fileId(TEST_FILE_ID) - .requestDate(OffsetDateTime.of(2022, 05, 23, 8, 30, 23, 961721, ZoneOffset.UTC)) - .processedDate(OffsetDateTime.of(2022, 05, 23, 8, 30, 23, 96528, ZoneOffset.UTC)) - .build()); + .annotationId("0b56ea1a87c83f351df177315af94f0d") + .fileId(TEST_FILE_ID) + .requestDate(OffsetDateTime.of(2022, 05, 23, 8, 30, 23, 961721, ZoneOffset.UTC)) + .processedDate(OffsetDateTime.of(2022, 05, 23, 8, 30, 23, 96528, ZoneOffset.UTC)) + .build()); request.setManualRedactions(manualRedactions); AnalyzeResult result = analyzeService.analyze(request); AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder() - .manualRedactions(manualRedactions) - .colors(colors) - .types(types) - .dossierId(TEST_DOSSIER_ID) - .fileId(TEST_FILE_ID) - .build()); + .manualRedactions(manualRedactions) + .colors(colors) + .types(types) + .dossierId(TEST_DOSSIER_ID) + .fileId(TEST_FILE_ID) + .build()); try (FileOutputStream fileOutputStream = new FileOutputStream(OsUtils.getTemporaryDirectory() + "/Annotated.pdf")) { fileOutputStream.write(annotateResponse.getDocument()); @@ -932,12 +956,12 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder() - .manualRedactions(manualRedactions) - .colors(colors) - .types(types) - .dossierId(TEST_DOSSIER_ID) - .fileId(TEST_FILE_ID) - .build()); + .manualRedactions(manualRedactions) + .colors(colors) + .types(types) + .dossierId(TEST_DOSSIER_ID) + .fileId(TEST_FILE_ID) + .build()); try (FileOutputStream fileOutputStream = new FileOutputStream(OsUtils.getTemporaryDirectory() + "/Annotated.pdf")) { fileOutputStream.write(annotateResponse.getDocument()); @@ -960,15 +984,16 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { var redactionLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - redactionLog.getEntityLogEntry().forEach(entry -> { - if (!entry.getEntryType().equals(EntryType.HINT)) { - if (entry.getType().equals("CBI_author")) { - assertThat(entry.getReason()).isEqualTo("Not redacted because it's row does not belong to a vertebrate study"); - } else if (entry.getType().equals("CBI_address")) { - assertThat(entry.getReason()).isEqualTo("No vertebrate found"); - } - } - }); + redactionLog.getEntityLogEntry() + .forEach(entry -> { + if (!entry.getEntryType().equals(EntryType.HINT)) { + if (entry.getType().equals("CBI_author")) { + assertThat(entry.getReason()).isEqualTo("Not redacted because it's row does not belong to a vertebrate study"); + } else if (entry.getType().equals("CBI_address")) { + assertThat(entry.getReason()).isEqualTo("No vertebrate found"); + } + } + }); } @@ -1005,18 +1030,18 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { String manualAddId = UUID.randomUUID().toString(); manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder() - .annotationId("5b940b2cb401ed9f5be6fc24f6e77bcf") - .fileId("fileId") - .processedDate(OffsetDateTime.now()) - .requestDate(OffsetDateTime.now()) - .build())); + .annotationId("5b940b2cb401ed9f5be6fc24f6e77bcf") + .fileId("fileId") + .processedDate(OffsetDateTime.now()) + .requestDate(OffsetDateTime.now()) + .build())); manualRedactions.setForceRedactions(Set.of(ManualForceRedaction.builder() - .annotationId("675eba69b0c2917de55462c817adaa05") - .fileId("fileId") - .legalBasis("Something") - .requestDate(OffsetDateTime.now()) - .processedDate(OffsetDateTime.now()) - .build())); + .annotationId("675eba69b0c2917de55462c817adaa05") + .fileId("fileId") + .legalBasis("Something") + .requestDate(OffsetDateTime.now()) + .processedDate(OffsetDateTime.now()) + .build())); ManualRedactionEntry manualRedactionEntry = new ManualRedactionEntry(); manualRedactionEntry.setAnnotationId(manualAddId); @@ -1027,7 +1052,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { manualRedactionEntry.setProcessedDate(OffsetDateTime.now()); manualRedactionEntry.setRequestDate(OffsetDateTime.now()); manualRedactionEntry.setPositions(List.of(Rectangle.builder().topLeftX(375.61096f).topLeftY(241.282f).width(7.648041f).height(43.72262f).page(1).build(), - Rectangle.builder().topLeftX(384.83517f).topLeftY(241.282f).width(7.648041f).height(17.043358f).page(1).build())); + Rectangle.builder().topLeftX(384.83517f).topLeftY(241.282f).width(7.648041f).height(17.043358f).page(1).build())); // manualRedactions.getEntriesToAdd().add(manualRedactionEntry); @@ -1038,39 +1063,63 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { manualRedactions.getEntriesToAdd().add(manualRedactionEntry); manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder() - .annotationId("5b940b2cb401ed9f5be6fc24f6e77bcf") - .fileId("fileId") - .requestDate(OffsetDateTime.now()) - .processedDate(OffsetDateTime.now()) - .build())); + .annotationId("5b940b2cb401ed9f5be6fc24f6e77bcf") + .fileId("fileId") + .requestDate(OffsetDateTime.now()) + .processedDate(OffsetDateTime.now()) + .build())); manualRedactions.setLegalBasisChanges((Set.of(ManualLegalBasisChange.builder() - .annotationId("675eba69b0c2917de55462c817adaa05") - .fileId("fileId") - .legalBasis("Manual Legal Basis Change") - .processedDate(OffsetDateTime.now()) - .requestDate(OffsetDateTime.now()) - .build()))); + .annotationId("675eba69b0c2917de55462c817adaa05") + .fileId("fileId") + .legalBasis("Manual Legal Basis Change") + .processedDate(OffsetDateTime.now()) + .requestDate(OffsetDateTime.now()) + .build()))); manualRedactions.setResizeRedactions(Set.of(ManualResizeRedaction.builder() - .annotationId("fc287b74be2421156ab2895c7474ccdd") - .fileId("fileId") - .processedDate(OffsetDateTime.now()) - .requestDate(OffsetDateTime.now()) - .value("Syngenta Crop Protection AG, Basel, Switzerland RCC Ltd., Itingen, Switzerland") - .positions(List.of(Rectangle.builder().topLeftX(289.44595f).topLeftY(327.567f).width(7.648041f).height(82.51475f).page(1).build(), - Rectangle.builder().topLeftX(298.67056f).topLeftY(327.567f).width(7.648041f).height(75.32377f).page(1).build(), - Rectangle.builder().topLeftX(307.89517f).topLeftY(327.567f).width(7.648041f).height(61.670967f).page(1).build(), - Rectangle.builder().topLeftX(316.99985f).topLeftY(327.567f).width(7.648041f).height(38.104286f).page(1).build())) - .updateDictionary(false) - .build())); + .annotationId("fc287b74be2421156ab2895c7474ccdd") + .fileId("fileId") + .processedDate(OffsetDateTime.now()) + .requestDate(OffsetDateTime.now()) + .value("Syngenta Crop Protection AG, Basel, Switzerland RCC Ltd., Itingen, Switzerland") + .positions(List.of(Rectangle.builder() + .topLeftX(289.44595f) + .topLeftY(327.567f) + .width(7.648041f) + .height(82.51475f) + .page(1) + .build(), + Rectangle.builder() + .topLeftX(298.67056f) + .topLeftY(327.567f) + .width(7.648041f) + .height(75.32377f) + .page(1) + .build(), + Rectangle.builder() + .topLeftX(307.89517f) + .topLeftY(327.567f) + .width(7.648041f) + .height(61.670967f) + .page(1) + .build(), + Rectangle.builder() + .topLeftX(316.99985f) + .topLeftY(327.567f) + .width(7.648041f) + .height(38.104286f) + .page(1) + .build())) + .updateDictionary(false) + .build())); analyzeService.reanalyze(request); AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder() - .dossierId(TEST_DOSSIER_ID) - .fileId(TEST_FILE_ID) - .dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID) - .manualRedactions(manualRedactions) - .build()); + .dossierId(TEST_DOSSIER_ID) + .fileId(TEST_FILE_ID) + .dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID) + .manualRedactions(manualRedactions) + .build()); try (FileOutputStream fileOutputStream = new FileOutputStream(OsUtils.getTemporaryDirectory() + "/Annotated.pdf")) { fileOutputStream.write(annotateResponse.getDocument()); @@ -1110,8 +1159,8 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { AnalyzeRequest request = uploadFileToStorage("files/ImportedRedactions/RotateTestFile_without_highlights.pdf"); storageService.storeObject(TenantContext.getTenantId(), - RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS), - importedRedactions.getInputStream()); + RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS), + importedRedactions.getInputStream()); analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request); AnalyzeResult result = analyzeService.analyze(request); @@ -1124,17 +1173,18 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { fileOutputStream.write(annotateResponse.getDocument()); } - entityLog.getEntityLogEntry().forEach(entry -> { - if (entry.getValue() == null) { - return; - } - if (entry.getValue().equals("David")) { - assertThat(entry.getImportedRedactionIntersections()).hasSize(1); - } - if (entry.getValue().equals("annotation")) { - assertThat(entry.getImportedRedactionIntersections()).isEmpty(); - } - }); + entityLog.getEntityLogEntry() + .forEach(entry -> { + if (entry.getValue() == null) { + return; + } + if (entry.getValue().equals("David")) { + assertThat(entry.getImportedRedactionIntersections()).hasSize(1); + } + if (entry.getValue().equals("annotation")) { + assertThat(entry.getImportedRedactionIntersections()).isEmpty(); + } + }); } @@ -1163,7 +1213,10 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { } var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - var values = entityLog.getEntityLogEntry().stream().map(EntityLogEntry::getValue).collect(Collectors.toList()); + var values = entityLog.getEntityLogEntry() + .stream() + .map(EntityLogEntry::getValue) + .collect(Collectors.toList()); assertThat(values).contains("Mrs. Robinson"); assertThat(values).contains("Mr. Bojangles"); @@ -1178,8 +1231,8 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { ClassPathResource imageServiceResponseFileResource = new ClassPathResource("files/new/SYNGENTA_EFSA_sanitisation_GFL_v1 (1).IMAGE_INFO.json"); storageService.storeObject(TenantContext.getTenantId(), - RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMAGE_INFO), - imageServiceResponseFileResource.getInputStream()); + RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMAGE_INFO), + imageServiceResponseFileResource.getInputStream()); System.out.println("Start Full integration test"); analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request); @@ -1188,23 +1241,26 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { System.out.println("Finished analysis"); request.setManualRedactions(ManualRedactions.builder() - .legalBasisChanges(Set.of(ManualLegalBasisChange.builder() - .annotationId("3029651d0842a625f2d23f8375c23600") - .section("[19, 2]: Paragraph: Contact point: LexCo Contact:") - .value("0049 331 441 551 14") - .requestDate(OffsetDateTime.now()) - .fileId(TEST_FILE_ID) - .legalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002") - .build())) - .build()); + .legalBasisChanges(Set.of(ManualLegalBasisChange.builder() + .annotationId("3029651d0842a625f2d23f8375c23600") + .section("[19, 2]: Paragraph: Contact point: LexCo Contact:") + .value("0049 331 441 551 14") + .requestDate(OffsetDateTime.now()) + .fileId(TEST_FILE_ID) + .legalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002") + .build())) + .build()); analyzeService.reanalyze(request); System.out.println("Finished reanalysis"); var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - entityLog.getEntityLogEntry().stream().filter(entityLogEntry -> entityLogEntry.getType().equals("signature")).forEach(entityLogEntry -> { - assertThat(entityLogEntry.getState() == EntryState.APPLIED).isTrue(); - }); + entityLog.getEntityLogEntry() + .stream() + .filter(entityLogEntry -> entityLogEntry.getType().equals("signature")) + .forEach(entityLogEntry -> { + assertThat(entityLogEntry.getState() == EntryState.APPLIED).isTrue(); + }); } @@ -1215,8 +1271,8 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { ClassPathResource imageServiceResponseFileResource = new ClassPathResource("files/new/SYNGENTA_EFSA_sanitisation_GFL_v1 (1).IMAGE_INFO.json"); storageService.storeObject(TenantContext.getTenantId(), - RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMAGE_INFO), - imageServiceResponseFileResource.getInputStream()); + RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMAGE_INFO), + imageServiceResponseFileResource.getInputStream()); System.out.println("Start Full integration test"); analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request); @@ -1225,21 +1281,21 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { System.out.println("Finished analysis"); request.setManualRedactions(ManualRedactions.builder() - .legalBasisChanges(Set.of(ManualLegalBasisChange.builder() - .annotationId("3029651d0842a625f2d23f8375c23600") - .section("[19, 2]: Paragraph: Contact point: LexCo Contact:") - .value("0049 331 441 551 14") - .requestDate(OffsetDateTime.now()) - .fileId(TEST_FILE_ID) - .legalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002") - .build())) - .recategorizations(Set.of(ManualRecategorization.builder() - .annotationId("3029651d0842a625f2d23f8375c23600") - .type("CBI_author") - .requestDate(OffsetDateTime.now()) - .fileId(TEST_FILE_ID) - .build())) - .build()); + .legalBasisChanges(Set.of(ManualLegalBasisChange.builder() + .annotationId("3029651d0842a625f2d23f8375c23600") + .section("[19, 2]: Paragraph: Contact point: LexCo Contact:") + .value("0049 331 441 551 14") + .requestDate(OffsetDateTime.now()) + .fileId(TEST_FILE_ID) + .legalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002") + .build())) + .recategorizations(Set.of(ManualRecategorization.builder() + .annotationId("3029651d0842a625f2d23f8375c23600") + .type("CBI_author") + .requestDate(OffsetDateTime.now()) + .fileId(TEST_FILE_ID) + .build())) + .build()); analyzeService.reanalyze(request); System.out.println("Finished reanalysis"); @@ -1266,11 +1322,11 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { String manualAddId2 = UUID.randomUUID().toString(); List positions = List.of(Rectangle.builder().topLeftX(305.35f).topLeftY(332.5033f).width(71.40744f).height(13.645125f).page(1).build()); ManualRedactionEntry manualRedactionEntry = getManualRedactionEntry(manualAddId, - positions, - "the manufacturing or production process, including the method and innovative aspects thereof, as well as other technical and industrial specifications inherent to that process or method, except for information which is relevant to the assessment of safety"); + positions, + "the manufacturing or production process, including the method and innovative aspects thereof, as well as other technical and industrial specifications inherent to that process or method, except for information which is relevant to the assessment of safety"); ManualRedactionEntry manualRedactionEntry2 = getManualRedactionEntry(manualAddId2, - positions, - "commercial information revealing sourcing, market shares or business strategy of the applicant"); + positions, + "commercial information revealing sourcing, market shares or business strategy of the applicant"); IdRemoval idRemoval = getIdRemoval(manualAddId); IdRemoval idRemoval2 = getIdRemoval(manualAddId2); @@ -1282,55 +1338,101 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - assertTrue(entityLog.getEntityLogEntry().stream().anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId))); - assertEquals(entityLog.getEntityLogEntry().stream().filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId)).findFirst().get().getState(), EntryState.APPLIED); + assertTrue(entityLog.getEntityLogEntry() + .stream() + .anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId))); + assertEquals(entityLog.getEntityLogEntry() + .stream() + .filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId)) + .findFirst() + .get().getState(), EntryState.APPLIED); request.setManualRedactions(ManualRedactions.builder().entriesToAdd(Set.of(manualRedactionEntry)).idsToRemove(Set.of(idRemoval)).build()); analyzeService.reanalyze(request); entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - assertTrue(entityLog.getEntityLogEntry().stream().anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId))); - assertEquals(entityLog.getEntityLogEntry().stream().filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId)).findFirst().get().getState(), EntryState.REMOVED); + assertTrue(entityLog.getEntityLogEntry() + .stream() + .anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId))); + assertEquals(entityLog.getEntityLogEntry() + .stream() + .filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId)) + .findFirst() + .get().getState(), EntryState.REMOVED); request.setManualRedactions(ManualRedactions.builder().entriesToAdd(Set.of(manualRedactionEntry, manualRedactionEntry2)).idsToRemove(Set.of(idRemoval)).build()); analyzeService.reanalyze(request); entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - assertTrue(entityLog.getEntityLogEntry().stream().anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId))); - assertEquals(entityLog.getEntityLogEntry().stream().filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId)).findFirst().get().getState(), EntryState.REMOVED); - assertTrue(entityLog.getEntityLogEntry().stream().anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId2))); - assertEquals(entityLog.getEntityLogEntry().stream().filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId2)).findFirst().get().getState(), EntryState.APPLIED); + assertTrue(entityLog.getEntityLogEntry() + .stream() + .anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId))); + assertEquals(entityLog.getEntityLogEntry() + .stream() + .filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId)) + .findFirst() + .get().getState(), EntryState.REMOVED); + assertTrue(entityLog.getEntityLogEntry() + .stream() + .anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId2))); + assertEquals(entityLog.getEntityLogEntry() + .stream() + .filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId2)) + .findFirst() + .get().getState(), EntryState.APPLIED); request.setManualRedactions(ManualRedactions.builder() - .entriesToAdd(Set.of(manualRedactionEntry, manualRedactionEntry2)) - .idsToRemove(Set.of(idRemoval, idRemoval2)) - .build()); + .entriesToAdd(Set.of(manualRedactionEntry, manualRedactionEntry2)) + .idsToRemove(Set.of(idRemoval, idRemoval2)) + .build()); analyzeService.reanalyze(request); entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - assertTrue(entityLog.getEntityLogEntry().stream().anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId))); - assertEquals(entityLog.getEntityLogEntry().stream().filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId)).findFirst().get().getState(), EntryState.REMOVED); - assertTrue(entityLog.getEntityLogEntry().stream().anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId2))); - assertEquals(entityLog.getEntityLogEntry().stream().filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId2)).findFirst().get().getState(), EntryState.REMOVED); + assertTrue(entityLog.getEntityLogEntry() + .stream() + .anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId))); + assertEquals(entityLog.getEntityLogEntry() + .stream() + .filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId)) + .findFirst() + .get().getState(), EntryState.REMOVED); + assertTrue(entityLog.getEntityLogEntry() + .stream() + .anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId2))); + assertEquals(entityLog.getEntityLogEntry() + .stream() + .filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId2)) + .findFirst() + .get().getState(), EntryState.REMOVED); manualRedactionEntry.setRequestDate(OffsetDateTime.now()); - request.setManualRedactions(ManualRedactions.builder() - .entriesToAdd(Set.of(manualRedactionEntry, manualRedactionEntry2)) - .idsToRemove(Set.of(idRemoval, idRemoval2)) - .build()); + request.setManualRedactions(ManualRedactions.builder().entriesToAdd(Set.of(manualRedactionEntry, manualRedactionEntry2)).idsToRemove(Set.of(idRemoval2)).build()); analyzeService.reanalyze(request); entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - assertTrue(entityLog.getEntityLogEntry().stream().anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId))); - assertEquals(entityLog.getEntityLogEntry().stream().filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId)).findFirst().get().getState(), EntryState.APPLIED); - assertTrue(entityLog.getEntityLogEntry().stream().anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId2))); - assertEquals(entityLog.getEntityLogEntry().stream().filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId2)).findFirst().get().getState(), EntryState.REMOVED); + assertTrue(entityLog.getEntityLogEntry() + .stream() + .anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId))); + assertEquals(entityLog.getEntityLogEntry() + .stream() + .filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId)) + .findFirst() + .get().getState(), EntryState.APPLIED); + assertTrue(entityLog.getEntityLogEntry() + .stream() + .anyMatch(entityLogEntry -> entityLogEntry.getId().equals(manualAddId2))); + assertEquals(entityLog.getEntityLogEntry() + .stream() + .filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId2)) + .findFirst() + .get().getState(), EntryState.REMOVED); } + @Test @SneakyThrows public void testResizeWithUpdateDictionaryTrue() { @@ -1342,21 +1444,35 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { analyzeService.analyze(request); var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - var david = entityLog.getEntityLogEntry().stream().filter(e -> e.getValue().equals("David")).findFirst().get(); + var david = entityLog.getEntityLogEntry() + .stream() + .filter(e -> e.getValue().equals("David")) + .findFirst() + .get(); request.setManualRedactions(ManualRedactions.builder() - .resizeRedactions(Set.of(ManualResizeRedaction.builder() - .updateDictionary(true) - .annotationId(david.getId()) - .requestDate(OffsetDateTime.now()) - .value("David Ksenia") - .positions(List.of(Rectangle.builder().topLeftX(56.8f).topLeftY(293.564f).width(65.592f).height(15.408f).page(1).build())) - .addToAllDossiers(false) - .build())) - .build()); + .resizeRedactions(Set.of(ManualResizeRedaction.builder() + .updateDictionary(true) + .annotationId(david.getId()) + .requestDate(OffsetDateTime.now()) + .value("David Ksenia") + .positions(List.of(Rectangle.builder() + .topLeftX(56.8f) + .topLeftY(293.564f) + .width(65.592f) + .height(15.408f) + .page(1) + .build())) + .addToAllDossiers(false) + .build())) + .build()); analyzeService.reanalyze(request); entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - var resizedEntity = entityLog.getEntityLogEntry().stream().filter(e -> e.getId().equals(david.getId())).findFirst().get(); + var resizedEntity = entityLog.getEntityLogEntry() + .stream() + .filter(e -> e.getId().equals(david.getId())) + .findFirst() + .get(); assertEquals(resizedEntity.getState(), EntryState.APPLIED); assertEquals(resizedEntity.getValue(), "David Ksenia"); } @@ -1364,12 +1480,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { private IdRemoval getIdRemoval(String id) { - return IdRemoval.builder() - .annotationId(id) - .removeFromAllDossiers(false) - .removeFromDictionary(false) - .requestDate(OffsetDateTime.now()) - .build(); + return IdRemoval.builder().annotationId(id).removeFromAllDossiers(false).removeFromDictionary(false).requestDate(OffsetDateTime.now()).build(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/LayoutParsingRequestProvider.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/LayoutParsingRequestProvider.java index f9eaa926..43c9f983 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/LayoutParsingRequestProvider.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/LayoutParsingRequestProvider.java @@ -33,7 +33,9 @@ public class LayoutParsingRequestProvider { .textBlockFileStorageId(textBlockFileStorageId) .positionBlockFileStorageId(positionBlockFileStorageId) .pageFileStorageId(pageFileStorageId) - .simplifiedTextStorageId(simplifiedTextStorageId).viewerDocumentStorageId(viewerDocumentStorageId) + .simplifiedTextStorageId(simplifiedTextStorageId) + .viewerDocumentStorageId(viewerDocumentStorageId) + .visualLayoutParsingFileId(Optional.empty()) .build(); } -- 2.47.2 From d488bee1bb2d1cdafdb4cd15e08435db3c4d2e0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kilian=20Sch=C3=BCttler?= Date: Wed, 28 Feb 2024 13:12:54 +0100 Subject: [PATCH 05/21] RED-8615: backport --- .../redaction/v1/model/MigrationRequest.java | 1 - .../build.gradle.kts | 2 +- .../v1/server/migration/MigrationMapper.java | 9 ++-- .../v1/server/model/MigrationEntity.java | 3 +- .../server/service/ManualChangeFactory.java | 8 ++-- .../service/UnprocessedChangesService.java | 15 +------ .../EntityFromPrecursorCreationService.java | 4 +- .../drools/EntityDroolsExecutionService.java | 42 +++++++++++++------ .../v1/server/MigrationIntegrationTest.java | 2 +- .../v1/server/RedactionIntegrationTest.java | 3 +- 10 files changed, 47 insertions(+), 42 deletions(-) diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/MigrationRequest.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/MigrationRequest.java index b895661f..316fa2b1 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/MigrationRequest.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/MigrationRequest.java @@ -1,7 +1,6 @@ package com.iqser.red.service.redaction.v1.model; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions; -import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts index a527a019..71326592 100644 --- a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts +++ b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts @@ -16,7 +16,7 @@ val layoutParserVersion = "0.91.0" val jacksonVersion = "2.15.2" val droolsVersion = "9.44.0.Final" val pdfBoxVersion = "3.0.0" -val persistenceServiceVersion = "2.359.0" +val persistenceServiceVersion = "2.349.3" val springBootStarterVersion = "3.1.5" configurations { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/MigrationMapper.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/MigrationMapper.java index 5b049564..c9909086 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/MigrationMapper.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/MigrationMapper.java @@ -11,7 +11,6 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlo import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ManualRedactionType; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry; -import com.iqser.red.service.redaction.v1.server.model.MigrationEntity; public class MigrationMapper { @@ -46,12 +45,12 @@ public class MigrationMapper { public static com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType toManualRedactionType(ManualRedactionType manualRedactionType) { return switch (manualRedactionType) { - case ADD_LOCALLY -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.ADD_LOCALLY; + case ADD_LOCALLY -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.ADD; case ADD_TO_DICTIONARY -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.ADD_TO_DICTIONARY; - case REMOVE_LOCALLY -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.REMOVE_LOCALLY; + case REMOVE_LOCALLY -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.REMOVE; case REMOVE_FROM_DICTIONARY -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.REMOVE_FROM_DICTIONARY; - case FORCE_REDACT -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.FORCE_REDACT; - case FORCE_HINT -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.FORCE_HINT; + case FORCE_REDACT -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.FORCE; + case FORCE_HINT -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.FORCE; case RECATEGORIZE -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.RECATEGORIZE; case LEGAL_BASIS_CHANGE -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.LEGAL_BASIS_CHANGE; case RESIZE -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.RESIZE; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigrationEntity.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigrationEntity.java index 975bcc8b..91271443 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigrationEntity.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigrationEntity.java @@ -207,8 +207,7 @@ public final class MigrationEntity { return entityLogEntry.getManualChanges() .stream() .reduce((a, b) -> b) - .filter(mc -> mc.getManualRedactionType() - .equals(com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.REMOVE_LOCALLY)) + .filter(mc -> mc.getManualRedactionType().equals(com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.REMOVE)) .isPresent(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ManualChangeFactory.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ManualChangeFactory.java index 0add07c9..3d99a3f9 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ManualChangeFactory.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ManualChangeFactory.java @@ -33,13 +33,13 @@ public class ManualChangeFactory { if (baseAnnotation instanceof ManualRecategorization imageRecategorization) { manualChange.withManualRedactionType(ManualRedactionType.RECATEGORIZE).withChange("type", imageRecategorization.getType()); } else if (baseAnnotation instanceof IdRemoval manualRemoval) { - manualChange.withManualRedactionType(manualRemoval.isRemoveFromDictionary() ? ManualRedactionType.REMOVE_FROM_DICTIONARY : ManualRedactionType.REMOVE_LOCALLY); - } else if (baseAnnotation instanceof ManualForceRedaction) { - manualChange.withManualRedactionType(isHint ? ManualRedactionType.FORCE_HINT : ManualRedactionType.FORCE_REDACT); + manualChange.withManualRedactionType(manualRemoval.isRemoveFromDictionary() ? ManualRedactionType.REMOVE_FROM_DICTIONARY : ManualRedactionType.REMOVE); + } else if (baseAnnotation instanceof ManualForceRedaction manualForceRedaction) { + manualChange.withManualRedactionType(ManualRedactionType.FORCE).withChange("legalBasis", manualForceRedaction.getLegalBasis()); } else if (baseAnnotation instanceof ManualResizeRedaction manualResizeRedact) { manualChange.withManualRedactionType(manualResizeRedact.getUpdateDictionary() ? ManualRedactionType.RESIZE_IN_DICTIONARY : ManualRedactionType.RESIZE).withChange("value", manualResizeRedact.getValue()); } else if (baseAnnotation instanceof ManualRedactionEntry manualRedactionEntry) { - manualChange.withManualRedactionType(manualRedactionEntry.isAddToDictionary() ? ManualRedactionType.ADD_TO_DICTIONARY : ManualRedactionType.ADD_LOCALLY) + manualChange.withManualRedactionType(manualRedactionEntry.isAddToDictionary() ? ManualRedactionType.ADD_TO_DICTIONARY : ManualRedactionType.ADD) .withChange("value", manualRedactionEntry.getValue()); } else if (baseAnnotation instanceof ManualLegalBasisChange manualLegalBasisChange) { manualChange.withManualRedactionType(ManualRedactionType.LEGAL_BASIS_CHANGE) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/UnprocessedChangesService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/UnprocessedChangesService.java index 2a469cc1..0accaa89 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/UnprocessedChangesService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/UnprocessedChangesService.java @@ -16,6 +16,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequ import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction; import com.iqser.red.service.redaction.v1.model.AnalyzeResponse; @@ -25,15 +26,12 @@ import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.service.document.DocumentGraphMapper; -import com.iqser.red.service.redaction.v1.server.service.document.EntityCreationService; -import com.iqser.red.service.redaction.v1.server.service.document.EntityEnrichmentService; import com.iqser.red.service.redaction.v1.server.service.document.EntityFindingUtility; import com.iqser.red.service.redaction.v1.server.service.document.EntityFromPrecursorCreationService; import com.iqser.red.service.redaction.v1.server.storage.ObservedStorageService; import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; import io.micrometer.observation.annotation.Observed; -import jakarta.annotation.PostConstruct; import lombok.AccessLevel; import lombok.RequiredArgsConstructor; import lombok.experimental.FieldDefaults; @@ -51,20 +49,10 @@ public class UnprocessedChangesService { final ObservedStorageService observedStorageService; final EntityFindingUtility entityFindingUtility; final RedactionStorageService redactionStorageService; - final EntityEnrichmentService entityEnrichmentService; final EntityFromPrecursorCreationService entityFromPrecursorCreationService; final DictionaryService dictionaryService; final ManualChangesApplicationService manualChangesApplicationService; - EntityCreationService entityCreationService; - - - @PostConstruct - public void initEntityCreationService() { - - entityCreationService = new EntityCreationService(entityEnrichmentService); - } - @Observed(name = "UnprocessedChangesService", contextualName = "analyse-surrounding-text") public void analyseSurroundingText(AnalyzeRequest analyzeRequest) { @@ -201,6 +189,7 @@ public class UnprocessedChangesService { return manualRedactions.getEntriesToAdd() .stream() .filter(manualRedactionEntry -> manualRedactionEntry.getPositions() != null && !manualRedactionEntry.getPositions().isEmpty()) + .filter(BaseAnnotation::isLocal) .map(manualRedactionEntry -> PrecursorEntity.fromManualRedactionEntry(manualRedactionEntry, dictionaryService.isHint(manualRedactionEntry.getType(), dossierTemplateId))) .toList(); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFromPrecursorCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFromPrecursorCreationService.java index 13033a6f..22402160 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFromPrecursorCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFromPrecursorCreationService.java @@ -14,6 +14,7 @@ import org.springframework.stereotype.Service; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; @@ -50,12 +51,12 @@ public class EntityFromPrecursorCreationService { Set idRemovals = manualRedactions.getIdsToRemove(); List manualEntities = manualRedactions.getEntriesToAdd() .stream() + .filter(BaseAnnotation::isLocal) .filter(manualRedactionEntry -> idRemovals.stream() .filter(idRemoval -> idRemoval.getAnnotationId().equals(manualRedactionEntry.getAnnotationId())) .filter(idRemoval -> idRemoval.getRequestDate().isBefore(manualRedactionEntry.getRequestDate())) .findAny()// .isEmpty()) - .filter(manualRedactionEntry -> !(manualRedactionEntry.isAddToDictionary() || manualRedactionEntry.isAddToDossierDictionary())) .map(manualRedactionEntry -> // PrecursorEntity.fromManualRedactionEntry(manualRedactionEntry, dictionaryService.isHint(manualRedactionEntry.getType(), dossierTemplateId))) .peek(manualEntity -> { @@ -66,7 +67,6 @@ public class EntityFromPrecursorCreationService { } }) .toList(); - return toTextEntity(manualEntities, node); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/EntityDroolsExecutionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/EntityDroolsExecutionService.java index 88268f3a..9fb33529 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/EntityDroolsExecutionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/EntityDroolsExecutionService.java @@ -16,6 +16,7 @@ import org.springframework.stereotype.Service; import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute; import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation; import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings; import com.iqser.red.service.redaction.v1.server.model.NerEntities; import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary; @@ -55,7 +56,14 @@ public class EntityDroolsExecutionService { ManualRedactions manualRedactions, NerEntities nerEntities) { - return executeRules(kieContainer, document, document.streamChildren().toList(), dictionary, fileAttributes, manualRedactions, nerEntities); + return executeRules(kieContainer, + document, + document.streamChildren() + .toList(), + dictionary, + fileAttributes, + manualRedactions, + nerEntities); } @@ -80,19 +88,28 @@ public class EntityDroolsExecutionService { kieSession.setGlobal("dictionary", dictionary); kieSession.insert(document); - document.getEntities().forEach(kieSession::insert); + + document.getEntities() + .forEach(kieSession::insert); + sectionsToAnalyze.forEach(kieSession::insert); - sectionsToAnalyze.stream().flatMap(SemanticNode::streamAllSubNodes).forEach(kieSession::insert); - document.getPages().forEach(kieSession::insert); - fileAttributes.stream().filter(f -> f.getValue() != null).forEach(kieSession::insert); + + sectionsToAnalyze.stream() + .flatMap(SemanticNode::streamAllSubNodes) + .forEach(kieSession::insert); + + document.getPages() + .forEach(kieSession::insert); + + fileAttributes.stream() + .filter(f -> f.getValue() != null) + .forEach(kieSession::insert); if (manualRedactions != null) { - manualRedactions.getResizeRedactions().forEach(kieSession::insert); - manualRedactions.getRecategorizations().forEach(kieSession::insert); - manualRedactions.getEntriesToAdd().forEach(kieSession::insert); - manualRedactions.getForceRedactions().forEach(kieSession::insert); - manualRedactions.getIdsToRemove().forEach(kieSession::insert); - manualRedactions.getLegalBasisChanges().forEach(kieSession::insert); + manualRedactions.buildAll() + .stream() + .filter(BaseAnnotation::isLocal) + .forEach(kieSession::insert); } kieSession.insert(nerEntities); @@ -105,7 +122,8 @@ public class EntityDroolsExecutionService { }); try { - completableFuture.orTimeout(settings.getDroolsExecutionTimeoutSecs(), TimeUnit.SECONDS).get(); + completableFuture.orTimeout(settings.getDroolsExecutionTimeoutSecs(), TimeUnit.SECONDS) + .get(); } catch (ExecutionException e) { kieSession.dispose(); if (e.getCause() instanceof TimeoutException) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/MigrationIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/MigrationIntegrationTest.java index 18892630..e3b704cc 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/MigrationIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/MigrationIntegrationTest.java @@ -288,7 +288,7 @@ public class MigrationIntegrationTest extends BuildDocumentIntegrationTest { } if (entityLogEntry.getManualChanges() .stream() - .noneMatch(mc -> mc.getManualRedactionType().equals(ManualRedactionType.FORCE_REDACT) || mc.getManualRedactionType().equals(ManualRedactionType.FORCE_HINT))) { + .noneMatch(mc -> mc.getManualRedactionType().equals(ManualRedactionType.FORCE))) { assertEqualsNullSafe(redactionLogEntry.getLegalBasis(), entityLogEntry.getLegalBasis()); } assertReferencesEqual(redactionLogEntry.getReference(), entityLogEntry.getReference(), oldToNewMapping); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index a67de70a..5d5869b2 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -1474,7 +1474,8 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { .findFirst() .get(); assertEquals(resizedEntity.getState(), EntryState.APPLIED); - assertEquals(resizedEntity.getValue(), "David Ksenia"); + assertEquals(resizedEntity.getValue(), "David"); + assertEquals(0, resizedEntity.getManualChanges().size()); } -- 2.47.2 From 8820eb696c3c2b67eeae311eea7671a5c216cde9 Mon Sep 17 00:00:00 2001 From: Andrei Isvoran Date: Wed, 28 Feb 2024 17:22:45 +0200 Subject: [PATCH 06/21] RED-8586 - Fix confidentiality rules --- .../service/EntityChangeLogService.java | 28 +++---------------- .../service/EntityLogCreatorService.java | 2 +- .../resources/drools/acceptance_rules.drl | 15 ++++++++-- .../drools/all_redact_manager_rules.drl | 15 ++++++++-- .../src/test/resources/drools/rules.drl | 15 ++++++++-- .../EFSA_sanitisation_GFL_v1/rules.drl | 16 ++++++++--- .../resources/all_redact_manager_rules.drl | 15 ++++++++-- 7 files changed, 65 insertions(+), 41 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityChangeLogService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityChangeLogService.java index 2d50d3f6..e61f2ccd 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityChangeLogService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityChangeLogService.java @@ -56,35 +56,12 @@ public class EntityChangeLogService { ChangeType changeType = calculateChangeType(entityLogEntry.getState(), previousEntity.getState()); entityLogEntry.getChanges().add(new Change(analysisNumber, changeType, now)); } - - addManualChanges(entityLogEntry, previousEntity); } addRemovedEntriesAsRemoved(previousEntityLogEntries, newEntityLogEntries, manualRedactions, analysisNumber, now); return hasChanges; } - // If a manual change is present in the previous entity but not in the new entity, add it to the new one and - // sort them, so they are displayed in the correct order. - private void addManualChanges(EntityLogEntry entityLogEntry, EntityLogEntry previousEntity) { - - Comparator manualChangeComparator = - Comparator.comparing(ManualChange::getManualRedactionType) - .thenComparing(ManualChange::getRequestedDate); - - previousEntity.getManualChanges().forEach(manualChange -> { - boolean contains = entityLogEntry.getManualChanges() - .stream() - .anyMatch(existingChange -> manualChangeComparator.compare(existingChange, manualChange) == 0); - - if (!contains) { - entityLogEntry.getManualChanges().add(manualChange); - entityLogEntry.getManualChanges().sort(Comparator.comparing(ManualChange::getRequestedDate)); - } - }); - } - - private void addRemovedEntriesAsRemoved(List previousEntityLogEntries, List newEntityLogEntries, ManualRedactions manualRedactions, @@ -94,7 +71,10 @@ public class EntityChangeLogService { Set existingIds = newEntityLogEntries.stream().map(EntityLogEntry::getId).collect(Collectors.toSet()); List removedEntries = previousEntityLogEntries.stream() .filter(entry -> !existingIds.contains(entry.getId())) - .toList(); + .collect(Collectors.toList()); + var removedDossierRedaction = removedEntries.stream().filter(e -> e.getState() == EntryState.REMOVED && e.getType().equals("dossier_redaction")).toList(); + previousEntityLogEntries.removeAll(removedDossierRedaction); + removedEntries.removeAll(removedDossierRedaction); removedEntries.forEach(entry -> entry.getChanges().add(new Change(analysisNumber, ChangeType.REMOVED, now))); removedEntries.forEach(entry -> entry.setState(EntryState.REMOVED)); removedEntries.forEach(entry -> addManualChangeForDictionaryRemovals(entry, manualRedactions)); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java index c77f7a32..844ad6c6 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java @@ -123,7 +123,7 @@ public class EntityLogCreatorService { .stream() .filter(entry -> (newEntityIds.contains(entry.getId()) || entry.getContainingNodeId().isEmpty() || sectionsToReanalyseIds.contains(entry.getContainingNodeId() .get(0)))) - .toList(); + .collect(Collectors.toList()); previousEntityLog.getEntityLogEntry().removeAll(previousEntriesFromReAnalyzedSections); boolean hasChanges = entityChangeLogService.computeChanges(previousEntriesFromReAnalyzedSections, diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl index ff854a43..79bfd519 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl @@ -690,13 +690,22 @@ rule "ETC.3.1: Redact logos (vertebrate study)" // Rule unit: ETC.5 -rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confidential'" +rule "ETC.5.0: Skip dossier_redaction entries if confidentiality is 'confidential'" + when + FileAttribute(label == "Confidentiality", value == "confidential") + $dossierRedaction: TextEntity(type() == "dossier_redaction") + then + $dossierRedaction.skip("ETC.5.0", "Ignore dossier_redaction when confidential"); + $dossierRedaction.getIntersectingNodes().forEach(node -> update(node)); + end + +rule "ETC.5.1: Remove dossier_redaction entries if confidentiality is not 'confidential'" when not FileAttribute(label == "Confidentiality", value == "confidential") $dossierRedaction: TextEntity(type() == "dossier_redaction") then - $dossierRedaction.ignore("ETC.5.0", "Ignore dossier redactions, when not confidential"); - $dossierRedaction.getIntersectingNodes().forEach(node -> update(node)); + $dossierRedaction.remove("ETC.5.1", "Remove dossier_redaction when not confidential"); + retract($dossierRedaction); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl index b1fbf077..11c4f729 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl @@ -1148,13 +1148,22 @@ rule "ETC.4.2: Redact dossier dictionary entries" // Rule unit: ETC.5 -rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confidential'" +rule "ETC.5.0: Skip dossier_redaction entries if confidentiality is 'confidential'" + when + FileAttribute(label == "Confidentiality", value == "confidential") + $dossierRedaction: TextEntity(type() == "dossier_redaction") + then + $dossierRedaction.skip("ETC.5.0", "Ignore dossier_redaction when confidential"); + $dossierRedaction.getIntersectingNodes().forEach(node -> update(node)); + end + +rule "ETC.5.1: Remove dossier_redaction entries if confidentiality is not 'confidential'" when not FileAttribute(label == "Confidentiality", value == "confidential") $dossierRedaction: TextEntity(type() == "dossier_redaction") then - $dossierRedaction.ignore("ETC.5.0", "Ignore dossier redactions, when not confidential"); - $dossierRedaction.getIntersectingNodes().forEach(node -> update(node)); + $dossierRedaction.remove("ETC.5.1", "Remove dossier_redaction when not confidential"); + retract($dossierRedaction); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index aed445b2..0418db92 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -800,13 +800,22 @@ rule "ETC.4.0: Redact dossier dictionary entries" // Rule unit: ETC.5 -rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confidential'" +rule "ETC.5.0: Skip dossier_redaction entries if confidentiality is 'confidential'" + when + FileAttribute(label == "Confidentiality", value == "confidential") + $dossierRedaction: TextEntity(type() == "dossier_redaction") + then + $dossierRedaction.skip("ETC.5.0", "Ignore dossier_redaction when confidential"); + $dossierRedaction.getIntersectingNodes().forEach(node -> update(node)); + end + +rule "ETC.5.1: Remove dossier_redaction entries if confidentiality is not 'confidential'" when not FileAttribute(label == "Confidentiality", value == "confidential") $dossierRedaction: TextEntity(type() == "dossier_redaction") then - $dossierRedaction.ignore("ETC.5.0", "Ignore dossier redactions, when not confidential"); - $dossierRedaction.getIntersectingNodes().forEach(node -> update(node)); + $dossierRedaction.remove("ETC.5.1", "Remove dossier_redaction when not confidential"); + retract($dossierRedaction); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl index 8b28390c..9cc2d93f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl @@ -449,14 +449,22 @@ rule "ETC.3.1: Redact logos (non vertebrate study)" // Rule unit: ETC.5 -rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confidential'" +rule "ETC.5.0: Skip dossier_redaction entries if confidentiality is 'confidential'" + when + FileAttribute(label == "Confidentiality", value == "confidential") + $dossierRedaction: TextEntity(type() == "dossier_redaction") + then + $dossierRedaction.skip("ETC.5.0", "Ignore dossier_redaction when confidential"); + $dossierRedaction.getIntersectingNodes().forEach(node -> update(node)); + end + +rule "ETC.5.1: Remove dossier_redaction entries if confidentiality is not 'confidential'" when not FileAttribute(label == "Confidentiality", value == "confidential") $dossierRedaction: TextEntity(type() == "dossier_redaction") then - $dossierRedaction.ignore("ETC.5.0", "Ignore dossier redactions, when not confidential"); - update($dossierRedaction); - $dossierRedaction.getIntersectingNodes().forEach(node -> update(node)); + $dossierRedaction.remove("ETC.5.1", "Remove dossier_redaction when not confidential"); + retract($dossierRedaction); end diff --git a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl index 57124c27..9b8b529e 100644 --- a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl +++ b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl @@ -1157,13 +1157,22 @@ rule "ETC.4.2: Redact dossier dictionary entries" // Rule unit: ETC.5 -rule "ETC.5.0: Ignore dossier_redaction entries if confidentiality is not 'confidential'" +rule "ETC.5.0: Skip dossier_redaction entries if confidentiality is 'confidential'" + when + FileAttribute(label == "Confidentiality", value == "confidential") + $dossierRedaction: TextEntity(type() == "dossier_redaction") + then + $dossierRedaction.skip("ETC.5.0", "Ignore dossier_redaction when confidential"); + $dossierRedaction.getIntersectingNodes().forEach(node -> update(node)); + end + +rule "ETC.5.1: Remove dossier_redaction entries if confidentiality is not 'confidential'" when not FileAttribute(label == "Confidentiality", value == "confidential") $dossierRedaction: TextEntity(type() == "dossier_redaction") then - $dossierRedaction.ignore("ETC.5.0", "Ignore dossier redactions, when not confidential"); - $dossierRedaction.getIntersectingNodes().forEach(node -> update(node)); + $dossierRedaction.remove("ETC.5.1", "Remove dossier_redaction when not confidential"); + retract($dossierRedaction); end -- 2.47.2 From 62ccf460693bd6a27823e5f77ba907e56f5cf7df Mon Sep 17 00:00:00 2001 From: Maverick Studer Date: Thu, 29 Feb 2024 15:01:53 +0100 Subject: [PATCH 07/21] RED-8550: Faulty table recognition and text duplication leads to huge sections --- .../redaction-service-server-v1/build.gradle.kts | 4 ++-- .../v1/server/utils/LayoutParsingRequestProvider.java | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts index 71326592..2e413162 100644 --- a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts +++ b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts @@ -12,11 +12,11 @@ plugins { description = "redaction-service-server-v1" -val layoutParserVersion = "0.91.0" +val layoutParserVersion = "0.89.3" val jacksonVersion = "2.15.2" val droolsVersion = "9.44.0.Final" val pdfBoxVersion = "3.0.0" -val persistenceServiceVersion = "2.349.3" +val persistenceServiceVersion = "2.349.4" val springBootStarterVersion = "3.1.5" configurations { diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/LayoutParsingRequestProvider.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/LayoutParsingRequestProvider.java index 43c9f983..7d266acf 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/LayoutParsingRequestProvider.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/LayoutParsingRequestProvider.java @@ -16,7 +16,6 @@ public class LayoutParsingRequestProvider { var originFileStorageId = RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), FileType.ORIGIN); var tablesFileStorageId = RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), FileType.TABLES); var imagesFileStorageId = RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), FileType.IMAGE_INFO); - var sectionGridStorageId = RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), FileType.SECTION_GRID); var structureFileStorageId = RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), FileType.DOCUMENT_STRUCTURE); var textBlockFileStorageId = RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), FileType.DOCUMENT_TEXT); var positionBlockFileStorageId = RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), FileType.DOCUMENT_POSITION); @@ -35,7 +34,6 @@ public class LayoutParsingRequestProvider { .pageFileStorageId(pageFileStorageId) .simplifiedTextStorageId(simplifiedTextStorageId) .viewerDocumentStorageId(viewerDocumentStorageId) - .visualLayoutParsingFileId(Optional.empty()) .build(); } -- 2.47.2 From cde17075e5d2ca6f78d4be9c09173af4decbd424 Mon Sep 17 00:00:00 2001 From: Andrei Isvoran Date: Thu, 29 Feb 2024 16:26:51 +0200 Subject: [PATCH 08/21] RED-8586 - Add higher salience to rule ETC.5.1 --- .../service/EntityChangeLogService.java | 29 +++++++++++-------- .../resources/drools/acceptance_rules.drl | 1 + .../drools/all_redact_manager_rules.drl | 1 + .../src/test/resources/drools/rules.drl | 1 + .../EFSA_sanitisation_GFL_v1/rules.drl | 1 + .../resources/all_redact_manager_rules.drl | 1 + 6 files changed, 22 insertions(+), 12 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityChangeLogService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityChangeLogService.java index e61f2ccd..1533b8b7 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityChangeLogService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityChangeLogService.java @@ -1,7 +1,6 @@ package com.iqser.red.service.redaction.v1.server.service; import java.time.OffsetDateTime; -import java.util.Comparator; import java.util.List; import java.util.Optional; import java.util.Set; @@ -13,7 +12,6 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ChangeType; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState; -import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualChange; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; @@ -26,10 +24,9 @@ import lombok.extern.slf4j.Slf4j; @Slf4j @Service @RequiredArgsConstructor -@FieldDefaults(makeFinal=true, level= AccessLevel.PRIVATE) +@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) public class EntityChangeLogService { - @Timed("redactmanager_computeChanges") public boolean computeChanges(List previousEntityLogEntries, List newEntityLogEntries, ManualRedactions manualRedactions, int analysisNumber) { @@ -42,7 +39,9 @@ public class EntityChangeLogService { boolean hasChanges = false; for (EntityLogEntry entityLogEntry : newEntityLogEntries) { - Optional optionalPreviousEntity = previousEntityLogEntries.stream().filter(entry -> entry.getId().equals(entityLogEntry.getId())).findAny(); + Optional optionalPreviousEntity = previousEntityLogEntries.stream() + .filter(entry -> entry.getId().equals(entityLogEntry.getId())) + .findAny(); if (optionalPreviousEntity.isEmpty()) { hasChanges = true; entityLogEntry.getChanges().add(new Change(analysisNumber, ChangeType.ADDED, now)); @@ -63,16 +62,20 @@ public class EntityChangeLogService { private void addRemovedEntriesAsRemoved(List previousEntityLogEntries, - List newEntityLogEntries, - ManualRedactions manualRedactions, - int analysisNumber, - OffsetDateTime now) { + List newEntityLogEntries, + ManualRedactions manualRedactions, + int analysisNumber, + OffsetDateTime now) { - Set existingIds = newEntityLogEntries.stream().map(EntityLogEntry::getId).collect(Collectors.toSet()); + Set existingIds = newEntityLogEntries.stream() + .map(EntityLogEntry::getId) + .collect(Collectors.toSet()); List removedEntries = previousEntityLogEntries.stream() .filter(entry -> !existingIds.contains(entry.getId())) .collect(Collectors.toList()); - var removedDossierRedaction = removedEntries.stream().filter(e -> e.getState() == EntryState.REMOVED && e.getType().equals("dossier_redaction")).toList(); + List removedDossierRedaction = removedEntries.stream() + .filter(e -> e.getState() == EntryState.REMOVED && e.getType().equals("dossier_redaction")) + .toList(); previousEntityLogEntries.removeAll(removedDossierRedaction); removedEntries.removeAll(removedDossierRedaction); removedEntries.forEach(entry -> entry.getChanges().add(new Change(analysisNumber, ChangeType.REMOVED, now))); @@ -88,13 +91,15 @@ public class EntityChangeLogService { return; } - manualRedactions.getIdsToRemove().stream() + manualRedactions.getIdsToRemove() + .stream() .filter(IdRemoval::isRemoveFromDictionary)// .filter(removed -> removed.getAnnotationId().equals(entry.getId()))// .findFirst()// .ifPresent(idRemove -> entry.getManualChanges().add(ManualChangeFactory.toManualChange(idRemove, false))); } + private ChangeType calculateChangeType(EntryState state, EntryState previousState) { if (state.equals(previousState)) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl index 79bfd519..7c3c9cb3 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl @@ -700,6 +700,7 @@ rule "ETC.5.0: Skip dossier_redaction entries if confidentiality is 'confidentia end rule "ETC.5.1: Remove dossier_redaction entries if confidentiality is not 'confidential'" + salience 256 when not FileAttribute(label == "Confidentiality", value == "confidential") $dossierRedaction: TextEntity(type() == "dossier_redaction") diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl index 11c4f729..e971d5b2 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl @@ -1158,6 +1158,7 @@ rule "ETC.5.0: Skip dossier_redaction entries if confidentiality is 'confidentia end rule "ETC.5.1: Remove dossier_redaction entries if confidentiality is not 'confidential'" + salience 256 when not FileAttribute(label == "Confidentiality", value == "confidential") $dossierRedaction: TextEntity(type() == "dossier_redaction") diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index 0418db92..ec07fff7 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -810,6 +810,7 @@ rule "ETC.5.0: Skip dossier_redaction entries if confidentiality is 'confidentia end rule "ETC.5.1: Remove dossier_redaction entries if confidentiality is not 'confidential'" + salience 256 when not FileAttribute(label == "Confidentiality", value == "confidential") $dossierRedaction: TextEntity(type() == "dossier_redaction") diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl index 9cc2d93f..ecdd6e65 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/performance/dictionaries/EFSA_sanitisation_GFL_v1/rules.drl @@ -459,6 +459,7 @@ rule "ETC.5.0: Skip dossier_redaction entries if confidentiality is 'confidentia end rule "ETC.5.1: Remove dossier_redaction entries if confidentiality is not 'confidential'" + salience 256 when not FileAttribute(label == "Confidentiality", value == "confidential") $dossierRedaction: TextEntity(type() == "dossier_redaction") diff --git a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl index 9b8b529e..21c0f0b0 100644 --- a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl +++ b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl @@ -1167,6 +1167,7 @@ rule "ETC.5.0: Skip dossier_redaction entries if confidentiality is 'confidentia end rule "ETC.5.1: Remove dossier_redaction entries if confidentiality is not 'confidential'" + salience 256 when not FileAttribute(label == "Confidentiality", value == "confidential") $dossierRedaction: TextEntity(type() == "dossier_redaction") -- 2.47.2 From 8362296edd65ca16927d2f5e63d51c802805f63e Mon Sep 17 00:00:00 2001 From: Andrei Isvoran Date: Fri, 1 Mar 2024 12:38:43 +0100 Subject: [PATCH 09/21] RED-8586 - Don't treat dossier redactions differently --- .../service/EntityChangeLogService.java | 21 ------------------- 1 file changed, 21 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityChangeLogService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityChangeLogService.java index 1533b8b7..d7b7dc80 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityChangeLogService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityChangeLogService.java @@ -72,34 +72,13 @@ public class EntityChangeLogService { .collect(Collectors.toSet()); List removedEntries = previousEntityLogEntries.stream() .filter(entry -> !existingIds.contains(entry.getId())) - .collect(Collectors.toList()); - List removedDossierRedaction = removedEntries.stream() - .filter(e -> e.getState() == EntryState.REMOVED && e.getType().equals("dossier_redaction")) .toList(); - previousEntityLogEntries.removeAll(removedDossierRedaction); - removedEntries.removeAll(removedDossierRedaction); removedEntries.forEach(entry -> entry.getChanges().add(new Change(analysisNumber, ChangeType.REMOVED, now))); removedEntries.forEach(entry -> entry.setState(EntryState.REMOVED)); - removedEntries.forEach(entry -> addManualChangeForDictionaryRemovals(entry, manualRedactions)); newEntityLogEntries.addAll(removedEntries); } - private void addManualChangeForDictionaryRemovals(EntityLogEntry entry, ManualRedactions manualRedactions) { - - if (manualRedactions == null || manualRedactions.getIdsToRemove().isEmpty()) { - return; - } - - manualRedactions.getIdsToRemove() - .stream() - .filter(IdRemoval::isRemoveFromDictionary)// - .filter(removed -> removed.getAnnotationId().equals(entry.getId()))// - .findFirst()// - .ifPresent(idRemove -> entry.getManualChanges().add(ManualChangeFactory.toManualChange(idRemove, false))); - } - - private ChangeType calculateChangeType(EntryState state, EntryState previousState) { if (state.equals(previousState)) { -- 2.47.2 From 0b833f2f22d47d24e5e1bd7755f82a9d6b6515a9 Mon Sep 17 00:00:00 2001 From: Corina Olariu Date: Fri, 1 Mar 2024 13:29:48 +0100 Subject: [PATCH 10/21] RED-8590 - Missing reason for added CBI Address, recategorized LOGO and signature --- .../src/test/resources/drools/acceptance_rules.drl | 2 +- .../src/test/resources/drools/all_redact_manager_rules.drl | 2 +- .../src/test/resources/drools/documine_flora.drl | 2 +- .../src/test/resources/drools/manual_redaction_rules.drl | 2 +- .../src/test/resources/drools/rules.drl | 2 +- .../src/test/resources/drools/rules_v2.drl | 2 +- .../src/test/resources/drools/table_demo.drl | 2 +- .../src/test/resources/drools/test_rules.drl | 2 +- .../src/main/resources/all_redact_manager_rules.drl | 2 +- .../rules-management/src/main/resources/all_rules_documine.drl | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl index 7c3c9cb3..f11dffcc 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl @@ -856,7 +856,7 @@ rule "MAN.3.2: Apply image recategorization" rule "MAN.3.3: Apply recategorization entities by default" salience 128 when - $entity: IEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) + $entity: TextEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) then $entity.apply("MAN.3.3", "Recategorized entities are applied by default.", $entity.legalBasis()); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl index e971d5b2..3d88ae62 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl @@ -1419,7 +1419,7 @@ rule "MAN.3.2: Apply image recategorization" rule "MAN.3.3: Apply recategorization entities by default" salience 128 when - $entity: IEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) + $entity: TextEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) then $entity.apply("MAN.3.3", "Recategorized entities are applied by default.", $entity.legalBasis()); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl index 228989b6..d2df3761 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl @@ -1276,7 +1276,7 @@ rule "MAN.3.2: Apply image recategorization" rule "MAN.3.3: Apply recategorization entities by default" salience 128 when - $entity: IEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) + $entity: TextEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) then $entity.apply("MAN.3.3", "Recategorized entities are applied by default.", $entity.legalBasis()); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl index 3cb50691..340a01ad 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/manual_redaction_rules.drl @@ -188,7 +188,7 @@ rule "MAN.3.2: Apply image recategorization" rule "MAN.3.3: Apply recategorization entities by default" salience 128 when - $entity: IEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) + $entity: TextEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) then $entity.apply("MAN.3.3", "Recategorized entities are applied by default.", $entity.legalBasis()); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index ec07fff7..d64439c8 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -1009,7 +1009,7 @@ rule "MAN.3.2: Apply image recategorization" rule "MAN.3.3: Apply recategorization entities by default" salience 128 when - $entity: IEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) + $entity: TextEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) then $entity.apply("MAN.3.3", "Recategorized entities are applied by default.", $entity.legalBasis()); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl index 2984605c..be158e09 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl @@ -212,7 +212,7 @@ rule "MAN.3.1: Apply entity recategorization of same type" rule "MAN.3.3: Apply recategorization entities by default" salience 128 when - $entity: IEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) + $entity: TextEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) then $entity.apply("MAN.3.3", "Recategorized entities are applied by default.", $entity.legalBasis()); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo.drl index 61cf03cd..a40618d5 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo.drl @@ -338,7 +338,7 @@ rule "MAN.3.2: Apply image recategorization" rule "MAN.3.3: Apply recategorization entities by default" salience 128 when - $entity: IEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) + $entity: TextEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) then $entity.apply("MAN.3.3", "Recategorized entities are applied by default.", $entity.legalBasis()); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/test_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/test_rules.drl index 17a85e9b..2c299166 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/test_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/test_rules.drl @@ -238,7 +238,7 @@ rule "MAN.3.2: Apply image recategorization" rule "MAN.3.3: Apply recategorization entities by default" salience 128 when - $entity: IEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) + $entity: TextEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) then $entity.apply("MAN.3.3", "Recategorized entities are applied by default.", $entity.legalBasis()); end diff --git a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl index 21c0f0b0..d89b9fee 100644 --- a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl +++ b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl @@ -1442,7 +1442,7 @@ rule "MAN.3.2: Apply image recategorization" rule "MAN.3.3: Apply recategorization entities by default" salience 128 when - $entity: IEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) + $entity: TextEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) then $entity.apply("MAN.3.3", "Recategorized entities are applied by default.", $entity.legalBasis()); end diff --git a/redaction-service-v1/rules-management/src/main/resources/all_rules_documine.drl b/redaction-service-v1/rules-management/src/main/resources/all_rules_documine.drl index 2bf9e43e..8c1edfc3 100644 --- a/redaction-service-v1/rules-management/src/main/resources/all_rules_documine.drl +++ b/redaction-service-v1/rules-management/src/main/resources/all_rules_documine.drl @@ -1423,7 +1423,7 @@ rule "MAN.3.2: Apply image recategorization" rule "MAN.3.3: Apply recategorization entities by default" salience 128 when - $entity: IEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) + $entity: TextEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) then $entity.apply("MAN.3.3", "Recategorized entities are applied by default.", $entity.legalBasis()); end -- 2.47.2 From ec04a902d528135fc71f2c40401ac896e23f7ec7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kilian=20Sch=C3=BCttler?= Date: Tue, 5 Mar 2024 16:58:12 +0100 Subject: [PATCH 11/21] Image name backport --- .../v1/server/model/document/nodes/Image.java | 7 +++++-- .../v1/server/model/document/nodes/ImageType.java | 14 +------------- 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Image.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Image.java index e591af11..0a3243fb 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Image.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Image.java @@ -70,7 +70,9 @@ public class Image implements GenericSemanticNode, IEntity { @Override public TextBlock getTextBlock() { - return streamAllSubNodes().filter(SemanticNode::isLeaf).map(SemanticNode::getLeafTextBlock).collect(new TextBlockCollector()); + return streamAllSubNodes().filter(SemanticNode::isLeaf) + .map(SemanticNode::getLeafTextBlock) + .collect(new TextBlockCollector()); } @@ -91,7 +93,8 @@ public class Image implements GenericSemanticNode, IEntity { @Override public String type() { - return getManualOverwrite().getType().orElse(imageType.toString()); + return getManualOverwrite().getType() + .orElse(imageType.toString().toLowerCase(Locale.ENGLISH)); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/ImageType.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/ImageType.java index e5e025f0..397d7b11 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/ImageType.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/ImageType.java @@ -6,22 +6,10 @@ public enum ImageType { LOGO, FORMULA, SIGNATURE, - OTHER { - @Override - public String toString() { - - return "image"; - } - }, + OTHER, OCR; - public String toString() { - - return name().toLowerCase(Locale.ENGLISH); - } - - public static ImageType fromString(String imageType) { return switch (imageType.toLowerCase(Locale.ROOT)) { -- 2.47.2 From 364f994ffdfa12a6ef27e4a74c9009da0fe3e4a3 Mon Sep 17 00:00:00 2001 From: Andrei Isvoran Date: Fri, 8 Mar 2024 14:34:03 +0100 Subject: [PATCH 12/21] RED-8645 - Update RM rules --- .../resources/drools/acceptance_rules.drl | 284 +++++++++--- .../drools/all_redact_manager_rules.drl | 412 +++++++++++++----- .../src/test/resources/drools/rules.drl | 335 ++++++++++---- .../src/test/resources/drools/rules_v2.drl | 4 +- .../resources/all_redact_manager_rules.drl | 411 ++++++++++++----- 5 files changed, 1088 insertions(+), 358 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl index f11dffcc..b058ab5b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl @@ -222,7 +222,19 @@ rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study // Rule unit: CBI.16 -rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" +rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -230,12 +242,12 @@ rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(entity); }); end -rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (vertebrate study)" +rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -243,7 +255,7 @@ rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (vertebrate study)" then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(entity); }); end @@ -268,7 +280,19 @@ rule "CBI.17.1: Add recommendation for Addresses in Test Organism sections, with // Rule unit: CBI.20 -rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (non vertebrate study)" +rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\")" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) + then + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + .forEach(laboratoryEntity -> { + laboratoryEntity.redact("CBI.20.0", "PERFORMING LABORATORY was found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + dictionary.recommendEverywhere(laboratoryEntity); + }); + end + +rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -276,12 +300,12 @@ rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC then entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { - laboratoryEntity.skip("CBI.20.0", "PERFORMING LABORATORY was found for non vertebrate study"); + laboratoryEntity.skip("CBI.20.1", "PERFORMING LABORATORY was found for non vertebrate study"); dictionary.recommendEverywhere(laboratoryEntity); }); end -rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (vertebrate study)" +rule "CBI.20.2: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -289,7 +313,7 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC then entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { - laboratoryEntity.redact("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + laboratoryEntity.redact("CBI.20.2", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(laboratoryEntity); }); end @@ -298,45 +322,78 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC //------------------------------------ PII rules ------------------------------------ // Rule unit: PII.0 -rule "PII.0.0: Redact all PII (non vertebrate study)" +rule "PII.0.0: Redact all PII" + when + $pii: TextEntity(type() == "PII", dictionaryEntry) + then + $pii.redact("PII.0.0", "Personal Information found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + end + +rule "PII.0.1: Redact all PII (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $pii: TextEntity(type() == "PII", dictionaryEntry) then - $pii.redact("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $pii.redact("PII.0.1", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end -rule "PII.0.1: Redact all PII (vertebrate study)" +rule "PII.0.2: Redact all PII (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $pii: TextEntity(type() == "PII", dictionaryEntry) then - $pii.redact("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $pii.redact("PII.0.2", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end + // Rule unit: PII.1 -rule "PII.1.0: Redact Emails by RegEx (Non vertebrate study)" +rule "PII.1.0: Redact Emails by RegEx" + when + $section: Section(containsString("@")) + then + entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.0", "Found by Email Regex", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.1.1: Redact Emails by RegEx (Non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("@")) then entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) - .forEach(emailEntity -> emailEntity.redact("PII.1.0", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(emailEntity -> emailEntity.redact("PII.1.1", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.1.1: Redact Emails by RegEx (vertebrate study)" +rule "PII.1.2: Redact Emails by RegEx (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("@")) then entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) - .forEach(emailEntity -> emailEntity.redact("PII.1.1", "Found by Email Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(emailEntity -> emailEntity.redact("PII.1.2", "Found by Email Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.2 -rule "PII.2.0: Redact Phone and Fax by RegEx (non vertebrate study)" +rule "PII.2.0: Redact Phone and Fax by RegEx" + when + $section: Section(containsString("Contact") || + containsString("Telephone") || + containsString("Phone") || + containsString("Ph.") || + containsString("Fax") || + containsString("Tel") || + containsString("Ter") || + containsString("Mobile") || + containsString("Fel") || + containsString("Fer")) + then + entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) + .forEach(contactEntity -> contactEntity.redact("PII.2.0", "Found by Phone and Fax Regex", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.2.1: Redact Phone and Fax by RegEx (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("Contact") || @@ -351,10 +408,10 @@ rule "PII.2.0: Redact Phone and Fax by RegEx (non vertebrate study)" containsString("Fer")) then entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) - .forEach(contactEntity -> contactEntity.redact("PII.2.0", "Found by Phone and Fax Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.2.1", "Found by Phone and Fax Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.2.1: Redact Phone and Fax by RegEx (vertebrate study)" +rule "PII.2.2: Redact Phone and Fax by RegEx (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("Contact") || @@ -369,34 +426,41 @@ rule "PII.2.1: Redact Phone and Fax by RegEx (vertebrate study)" containsString("Fer")) then entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) - .forEach(contactEntity -> contactEntity.redact("PII.2.1", "Found by Phone and Fax Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.2.2", "Found by Phone and Fax Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.3 -rule "PII.3.0: Redact telephone numbers by RegEx (Non vertebrate study)" +rule "PII.3.0: Redact telephone numbers by RegEx" + when + $section: Section(matchesRegex("[+]\\d{1,}")) + then + entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) + .forEach(entity -> entity.redact("PII.3.0", "Telephone number found by regex", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.3.1: Redact telephone numbers by RegEx (Non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(matchesRegex("[+]\\d{1,}")) then entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) - .forEach(entity -> entity.redact("PII.3.0", "Telephone number found by regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.3.1", "Telephone number found by regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.3.1: Redact telephone numbers by RegEx (vertebrate study)" +rule "PII.3.2: Redact telephone numbers by RegEx (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(matchesRegex("[+]\\d{1,}")) then entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) - .forEach(entity -> entity.redact("PII.3.1", "Telephone number found by regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.3.2", "Telephone number found by regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.4 -rule "PII.4.0: Redact line after contact information keywords (non vertebrate study)" +rule "PII.4.0: Redact line after contact information keywords" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $contactKeyword: String() from List.of("Contact point:", "Contact:", "Alternative contact:", @@ -422,9 +486,9 @@ rule "PII.4.0: Redact line after contact information keywords (non vertebrate st .forEach(contactEntity -> contactEntity.redact("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end -rule "PII.4.1: Redact line after contact information keywords (vertebrate study)" +rule "PII.4.1: Redact line after contact information keywords (non vertebrate study)" when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $contactKeyword: String() from List.of("Contact point:", "Contact:", "Alternative contact:", @@ -450,9 +514,49 @@ rule "PII.4.1: Redact line after contact information keywords (vertebrate study) .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end +rule "PII.4.2: Redact line after contact information keywords (vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $contactKeyword: String() from List.of("Contact point:", + "Contact:", + "Alternative contact:", + "European contact:", + "No:", + "Contact:", + "Tel.:", + "Tel:", + "Telephone number:", + "Telephone No:", + "Telephone:", + "Phone No.", + "Phone:", + "Fax number:", + "Fax:", + "E-mail:", + "Email:", + "e-mail:", + "E-mail address:") + $section: Section(containsString($contactKeyword)) + then + entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) + .forEach(contactEntity -> contactEntity.redact("PII.4.2", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + // Rule unit: PII.5 -rule "PII.5.0: Redact line after contact information keywords reduced (non vertebrate study)" +rule "PII.5.0: Redact line after contact information keywords reduced" + when + $contactKeyword: String() from List.of("Contact point:", + "Contact:", + "Alternative contact:", + "European contact:") + $section: Section(containsString($contactKeyword)) + then + entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) + .forEach(contactEntity -> contactEntity.redact("PII.5.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.5.1: Redact line after contact information keywords reduced (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $contactKeyword: String() from List.of("Contact point:", @@ -462,10 +566,10 @@ rule "PII.5.0: Redact line after contact information keywords reduced (non verte $section: Section(containsString($contactKeyword)) then entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.5.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.5.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.5.1: Redact line after contact information keywords reduced (Vertebrate study)" +rule "PII.5.2: Redact line after contact information keywords reduced (Vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $contactKeyword: String() from List.of("Contact point:", @@ -475,12 +579,23 @@ rule "PII.5.1: Redact line after contact information keywords reduced (Vertebrat $section: Section(containsString($contactKeyword)) then entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.5.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.5.2", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.6 -rule "PII.6.0: Redact line between contact keywords (non vertebrate study)" +rule "PII.6.0: Redact line between contact keywords" + when + $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) + then + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + ) + .forEach(contactEntity -> contactEntity.redact("PII.6.0", "Found between contact keywords", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.6.1: Redact line between contact keywords (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) @@ -489,10 +604,10 @@ rule "PII.6.0: Redact line between contact keywords (non vertebrate study)" entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) ) - .forEach(contactEntity -> contactEntity.redact("PII.6.0", "Found between contact keywords", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.6.1", "Found between contact keywords", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.6.1: Redact line between contact keywords (vertebrate study)" +rule "PII.6.2: Redact line between contact keywords (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) @@ -501,12 +616,28 @@ rule "PII.6.1: Redact line between contact keywords (vertebrate study)" entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) ) - .forEach(contactEntity -> contactEntity.redact("PII.6.1", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.6.2", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end - // Rule unit: PII.7 -rule "PII.7.0: Redact contact information if applicant is found (non vertebrate study)" +rule "PII.7.0: Redact contact information if applicant is found" + when + $section: Section(getHeadline().containsString("applicant") || + getHeadline().containsString("Primary contact") || + getHeadline().containsString("Alternative contact") || + containsString("Applicant") || + containsString("Telephone number:")) + then + Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", + "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + )) + .forEach(entity -> entity.redact("PII.7.0", "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.7.1: Redact contact information if applicant is found (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(getHeadline().containsString("applicant") || @@ -521,10 +652,10 @@ rule "PII.7.0: Redact contact information if applicant is found (non vertebrate entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.redact("PII.7.0", "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.7.1", "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.7.1: Redact contact information if applicant is found (vertebrate study)" +rule "PII.7.2: Redact contact information if applicant is found (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(getHeadline().containsString("applicant") || @@ -539,14 +670,13 @@ rule "PII.7.1: Redact contact information if applicant is found (vertebrate stud entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.redact("PII.7.1", "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.7.2", "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.8 -rule "PII.8.0: Redact contact information if producer is found (non vertebrate study)" +rule "PII.8.0: Redact contact information if producer is found" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsStringIgnoreCase("producer of the plant protection") || containsStringIgnoreCase("producer of the active substance") || containsStringIgnoreCase("manufacturer of the active substance") || @@ -562,7 +692,25 @@ rule "PII.8.0: Redact contact information if producer is found (non vertebrate s .forEach(entity -> entity.redact("PII.8.0", "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end -rule "PII.8.1: Redact contact information if producer is found (vertebrate study)" +rule "PII.8.1: Redact contact information if producer is found (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsStringIgnoreCase("producer of the plant protection") || + containsStringIgnoreCase("producer of the active substance") || + containsStringIgnoreCase("manufacturer of the active substance") || + containsStringIgnoreCase("manufacturer:") || + containsStringIgnoreCase("Producer or producers of the active substance")) + then + Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", + "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + )) + .forEach(entity -> entity.redact("PII.8.1", "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.8.2: Redact contact information if producer is found (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsStringIgnoreCase("producer of the plant protection") || @@ -577,27 +725,35 @@ rule "PII.8.1: Redact contact information if producer is found (vertebrate study entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.redact("PII.8.1", "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.8.2", "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.9 -rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (non vertebrate study)" +rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" + when + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) + then + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + .forEach(authorEntity -> authorEntity.redact("PII.9.0", "AUTHOR(S) was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) - .forEach(authorEntity -> authorEntity.redact("PII.9.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(authorEntity -> authorEntity.redact("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (vertebrate study)" +rule "PII.9.2: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) - .forEach(authorEntity -> authorEntity.redact("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(authorEntity -> authorEntity.redact("PII.9.2", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end @@ -654,38 +810,52 @@ rule "ETC.0.0: Purity Hint" // Rule unit: ETC.2 -rule "ETC.2.0: Redact signatures (non vertebrate study)" +rule "ETC.2.0: Redact signatures" + when + $signature: Image(imageType == ImageType.SIGNATURE) + then + $signature.redact("ETC.2.0", "Signature Found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + end + +rule "ETC.2.1: Redact signatures (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $signature: Image(imageType == ImageType.SIGNATURE) then - $signature.redact("ETC.2.0", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $signature.redact("ETC.2.1", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end -rule "ETC.2.1: Redact signatures (vertebrate study)" +rule "ETC.2.2: Redact signatures (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $signature: Image(imageType == ImageType.SIGNATURE) then - $signature.redact("ETC.2.1", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $signature.redact("ETC.2.2", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end // Rule unit: ETC.3 -rule "ETC.3.0: Skip logos (non vertebrate study)" +rule "ETC.3.0: Redact logos" + when + $logo: Image(imageType == ImageType.LOGO) + then + $logo.redact("ETC.3.0", "Logo Found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + end + +rule "ETC.3.1: Skip logos (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $logo: Image(imageType == ImageType.LOGO) then - $logo.skip("ETC.3.0", "Logo Found"); + $logo.skip("ETC.3.1", "Logo Found"); end -rule "ETC.3.1: Redact logos (vertebrate study)" +rule "ETC.3.2: Redact logos (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $logo: Image(imageType == ImageType.LOGO) then - $logo.redact("ETC.3.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $logo.redact("ETC.3.2", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl index 3d88ae62..610b02a2 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl @@ -157,17 +157,18 @@ rule "CBI.3.0: Redacted because Section contains a vertebrate" rule "CBI.3.1: Redacted because table row contains a vertebrate" when - $table: Table(hasEntitiesOfType("vertebrate"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) - $cellsWithvertebrate: TableCell() from $table.streamTableCellsWhichContainType("vertebrate").toList() - $tableCell: TableCell(row == $cellsWithvertebrate.row) from $table.streamTableCells().toList() - $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() + $table: Table(hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - $authorOrAddress.applyWithReferences( + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( "CBI.3.1", "Vertebrate found", "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("vertebrate", $authorOrAddress) + $table.getEntitiesOfTypeInSameRow("vertebrate", entity) ); + }); end rule "CBI.3.2: Do not redact because Section does not contain a vertebrate" @@ -206,21 +207,23 @@ rule "CBI.4.0: Do not redact Names and Addresses if no_redaction_indicator is fo }); end -rule "CBI.4.1: Redacted because table row contains a vertebrate" +rule "CBI.4.1: Don't redact authors or addresses which appear in the same row as a vertebrate and a no_redaction_indicator" when - $table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("vertebrate"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) - TableCell($row: row) from $table.streamTableCellsWhichContainType("vertebrate").toList() - TableCell(row == $row) from $table.streamTableCellsWhichContainType("no_redaction_indicator").toList() - $tableCell: TableCell(row == $row) from $table.streamTableCells().toList() - $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() + $table: Table(hasEntitiesOfType("no_redaction_indicator"), + hasEntitiesOfType("vertebrate"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - $authorOrAddress.skipWithReferences( + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate", "no-redaction_indicator")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.skipWithReferences( "CBI.4.1", "Vertebrate but a no redaction indicator found", Stream.concat( - $table.getEntitiesOfTypeInSameRow("vertebrate", $authorOrAddress).stream(), - $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", $authorOrAddress).stream()).toList() + $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() ); + }); end @@ -247,20 +250,22 @@ rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also red rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in table row" when - $table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("redaction_indicator"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) - TableCell($row: row) from $table.streamTableCellsWhichContainType("redaction_indicator").toList() - TableCell(row == $row) from $table.streamTableCellsWhichContainType("no_redaction_indicator").toList() - $tableCell: TableCell(row == $row) from $table.streamTableCells().toList() - $entity: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() + $table: Table(hasEntitiesOfType("no_redaction_indicator"), + hasEntitiesOfType("redaction_indicator"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - $entity.applyWithReferences( + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no_redaction_indicator")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( "CBI.5.1", "no_redaction_indicator but also redaction_indicator found", "Reg (EC) No 1107/2009 Art. 63 (2g)", Stream.concat( - $table.getEntitiesOfTypeInSameRow("redaction_indicator", $entity).stream(), - $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", $entity).stream()).toList() + $table.getEntitiesOfTypeInSameRow("redaction_indicator", entity).stream(), + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() ); + }); end @@ -350,17 +355,18 @@ rule "CBI.8.0: Redacted because Section contains must_redact entity" rule "CBI.8.1: Redacted because table row contains must_redact entity" when - $table: Table(hasEntitiesOfType("must_redact"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) - $cellsWithMustRedact: TableCell() from $table.streamTableCellsWhichContainType("must_redact").toList() - $tableCell: TableCell(row == $cellsWithMustRedact.row) from $table.streamTableCells().toList() - $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() + $table: Table(hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - $authorOrAddress.applyWithReferences( - "CBI.8.1", - "Must_redact found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("must_redact", $authorOrAddress) - ); + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("must_redact")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( + "CBI.8.1", + "Must_redact found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $table.getEntitiesOfTypeInSameRow("must_redact", entity) + ); + }); end @@ -432,7 +438,22 @@ rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study // Rule unit: CBI.12 -rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes' (non vertebrate study)" +rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes'" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $table: Table(hasHeader("Author(s)") || hasHeader("Author"), hasHeaderIgnoreCase("Vertebrate Study Y/N")) + TableCell(header, containsAnyStringIgnoreCase("Author", "Author(s)"), $authorCol: col) from $table.streamHeaders().toList() + TableCell(header, containsStringIgnoreCase("Vertebrate study Y/N"), $vertebrateCol: col) from $table.streamHeaders().toList() + $rowCell: TableCell(!header, containsAnyString("Yes", "Y"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() + TableCell(row == $rowWithYes) from $table.streamCol($authorCol).toList() + then + entityCreationService.bySemanticNode($rowCell, "must_redact", EntityType.HINT) + .ifPresent(yesEntity -> { + yesEntity.skip("CBI.12.0", "must_redact"); + }); + end + +rule "CBI.12.1: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes' (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -444,13 +465,13 @@ rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s then entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY) .ifPresent(authorEntity -> { - authorEntity.redact("CBI.12.0", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + authorEntity.redact("CBI.12.1", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(3) of Regulation (EC) No 178/2002"); dictionary.addMultipleAuthorsAsRecommendation(authorEntity); }); end -rule "CBI.12.1: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes' (vertebrate study)" +rule "CBI.12.2: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes' (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -463,13 +484,13 @@ rule "CBI.12.1: Redact and recommend TableCell with header 'Author' or 'Author(s entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY) .ifPresent(authorEntity -> { - authorEntity.redact("CBI.12.1", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + authorEntity.redact("CBI.12.2", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(2) of Regulation (EC) No 178/2002"); dictionary.addMultipleAuthorsAsRecommendation(authorEntity); }); end -rule "CBI.12.2: Skip TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'No'" +rule "CBI.12.3: Skip TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'No'" when $table: Table(hasHeader("Author(s)") || hasHeader("Author"), hasHeaderIgnoreCase("Vertebrate Study Y/N")) TableCell(header, containsAnyStringIgnoreCase("Author", "Author(s)"), $authorCol: col) from $table.streamHeaders().toList() @@ -478,7 +499,7 @@ rule "CBI.12.2: Skip TableCell with header 'Author' or 'Author(s)' and header 'V $authorCell: TableCell(row == $rowWithNo) from $table.streamCol($authorCol).toList() then entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY) - .ifPresent(authorEntity -> authorEntity.skip("CBI.12.2", "Not redacted because it's row does not belong to a vertebrate study")); + .ifPresent(authorEntity -> authorEntity.skip("CBI.12.3", "Not redacted because it's row does not belong to a vertebrate study")); end @@ -496,9 +517,12 @@ rule "CBI.13.0: Ignore CBI Address recommendations" // Rule unit: CBI.14 rule "CBI.14.0: Redact CBI_sponsor entities if preceded by \"batches produced at\"" when + $section: Section(containsStringIgnoreCase("batches produced at")) $sponsorEntity: TextEntity(type() == "CBI_sponsor", textBefore.contains("batches produced at")) then $sponsorEntity.redact("CBI.14.0", "Redacted because it represents a sponsor company", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + entityCreationService.byString("batches produced at", "must_redact", EntityType.HINT, $section) + .forEach(entity -> entity.skip("CBI.14.0", "must_redact")); end @@ -519,10 +543,10 @@ rule "CBI.15.0: Redact row if row contains \"determination of residues\" and liv containsStringIgnoreCase($residueKeyword), containsStringIgnoreCase($keyword)) then - entityCreationService.byString($keyword, "must_redact", EntityType.ENTITY, $section) - .toList(); + entityCreationService.byString($keyword, "must_redact", EntityType.HINT, $section) + .forEach(entity -> entity.skip("CBI.15.0", "must_redact")); - $section.getEntitiesOfType(List.of($keyword, $residueKeyword)) + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) .forEach(redactionEntity -> redactionEntity.redact("CBI.15.0", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)")); end @@ -540,8 +564,8 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio $residueKeyword: String() from List.of("determination of residues", "determination of total residues") $table: Table(containsStringIgnoreCase($residueKeyword), containsStringIgnoreCase($keyword)) then - entityCreationService.byString($keyword, "must_redact", EntityType.ENTITY, $table) - .toList(); + entityCreationService.byString($keyword, "must_redact", EntityType.HINT, $table) + .forEach(entity -> entity.skip("CBI.15.1", "must_redact")); $table.streamEntitiesWhereRowContainsStringsIgnoreCase(List.of($keyword, $residueKeyword)) .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) @@ -550,7 +574,19 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio // Rule unit: CBI.16 -rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" +rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -558,12 +594,12 @@ rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(entity); }); end -rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (vertebrate study)" +rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -571,7 +607,7 @@ rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (vertebrate study)" then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(entity); }); end @@ -629,7 +665,19 @@ rule "CBI.19.0: Expand CBI_author entities with salutation prefix" // Rule unit: CBI.20 -rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (non vertebrate study)" +rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\")" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) + then + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + .forEach(laboratoryEntity -> { + laboratoryEntity.redact("CBI.20.0", "PERFORMING LABORATORY was found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + dictionary.recommendEverywhere(laboratoryEntity); + }); + end + +rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -637,12 +685,12 @@ rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC then entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { - laboratoryEntity.skip("CBI.20.0", "PERFORMING LABORATORY was found for non vertebrate study"); + laboratoryEntity.skip("CBI.20.1", "PERFORMING LABORATORY was found for non vertebrate study"); dictionary.recommendEverywhere(laboratoryEntity); }); end -rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (vertebrate study)" +rule "CBI.20.2: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -650,7 +698,7 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC then entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { - laboratoryEntity.redact("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + laboratoryEntity.redact("CBI.20.2", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(laboratoryEntity); }); end @@ -697,53 +745,94 @@ rule "CBI.22.0: Redact Addresses in Reference Tables for vertebrate studies in n //------------------------------------ PII rules ------------------------------------ // Rule unit: PII.0 -rule "PII.0.0: Redact all PII (non vertebrate study)" +rule "PII.0.0: Redact all PII" + when + $pii: TextEntity(type() == "PII", dictionaryEntry) + then + $pii.redact("PII.0.0", "Personal Information found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + end + +rule "PII.0.1: Redact all PII (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $pii: TextEntity(type() == "PII", dictionaryEntry) then - $pii.redact("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $pii.redact("PII.0.1", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end -rule "PII.0.1: Redact all PII (vertebrate study)" +rule "PII.0.2: Redact all PII (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $pii: TextEntity(type() == "PII", dictionaryEntry) then - $pii.redact("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $pii.redact("PII.0.2", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end + // Rule unit: PII.1 -rule "PII.1.0: Redact Emails by RegEx (Non vertebrate study)" +rule "PII.1.0: Redact Emails by RegEx" + when + $section: Section(containsString("@")) + then + entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.0", "Found by Email Regex", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.1.1: Redact Emails by RegEx (Non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("@")) then entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) - .forEach(emailEntity -> emailEntity.redact("PII.1.0", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(emailEntity -> emailEntity.redact("PII.1.1", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.1.1: Redact Emails by RegEx (vertebrate study)" +rule "PII.1.2: Redact Emails by RegEx (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("@")) then entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) - .forEach(emailEntity -> emailEntity.redact("PII.1.1", "Found by Email Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(emailEntity -> emailEntity.redact("PII.1.2", "Found by Email Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end -rule "PII.1.2: Redact typoed Emails with indicator" +rule "PII.1.3: Redact typoed Emails with indicator" when $section: Section(containsString("@") || containsStringIgnoreCase("mail")) then entityCreationService.byRegexIgnoreCase("mail[:\\.\\s]{1,2}([\\w\\/\\-\\{\\(\\. ]{3,20}(@|a|f)\\s?[\\w\\/\\-\\{\\(\\. ]{3,20}(\\. \\w{2,4}\\b|\\.\\B|\\.\\w{1,4}\\b))", "PII", EntityType.ENTITY, 1, $section) - .forEach(emailEntity -> emailEntity.redact("PII.1.2", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(emailEntity -> emailEntity.redact("PII.1.3", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + end + +rule "PII.1.4: Redact typoed Emails with indicator" + when + $section: Section(containsString("@") || containsStringIgnoreCase("mail")) + then + entityCreationService.byRegexIgnoreCase("mail[:\\.\\s]{1,2}([\\w\\/\\-\\{\\(\\. ]{3,20}(@|a|f)\\s?[\\w\\/\\-\\{\\(\\. ]{3,20}(\\. \\w{2,4}\\b|\\.\\B|\\.\\w{1,4}\\b))", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.4", "Personal information found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end // Rule unit: PII.2 -rule "PII.2.0: Redact Phone and Fax by RegEx (non vertebrate study)" +rule "PII.2.0: Redact Phone and Fax by RegEx" + when + $section: Section(containsString("Contact") || + containsString("Telephone") || + containsString("Phone") || + containsString("Ph.") || + containsString("Fax") || + containsString("Tel") || + containsString("Ter") || + containsString("Mobile") || + containsString("Fel") || + containsString("Fer")) + then + entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) + .forEach(contactEntity -> contactEntity.redact("PII.2.0", "Found by Phone and Fax Regex", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.2.1: Redact Phone and Fax by RegEx (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("Contact") || @@ -758,10 +847,10 @@ rule "PII.2.0: Redact Phone and Fax by RegEx (non vertebrate study)" containsString("Fer")) then entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) - .forEach(contactEntity -> contactEntity.redact("PII.2.0", "Found by Phone and Fax Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.2.1", "Found by Phone and Fax Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.2.1: Redact Phone and Fax by RegEx (vertebrate study)" +rule "PII.2.2: Redact Phone and Fax by RegEx (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("Contact") || @@ -776,42 +865,49 @@ rule "PII.2.1: Redact Phone and Fax by RegEx (vertebrate study)" containsString("Fer")) then entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) - .forEach(contactEntity -> contactEntity.redact("PII.2.1", "Found by Phone and Fax Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.2.2", "Found by Phone and Fax Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end -rule "PII.2.2: Redact phone numbers without indicators" +rule "PII.2.3: Redact phone numbers without indicators" when $section: Section(containsString("+")) then entityCreationService.byRegex("(\\+[\\dO]{1,2} )(\\([\\dO]{1,3}\\))?[\\d\\-O ]{8,15}", "PII", EntityType.ENTITY, $section) - .forEach(entity -> entity.redact("PII.2.2", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.2.3", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end // Rule unit: PII.3 -rule "PII.3.0: Redact telephone numbers by RegEx (Non vertebrate study)" +rule "PII.3.0: Redact telephone numbers by RegEx" + when + $section: Section(matchesRegex("[+]\\d{1,}")) + then + entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) + .forEach(entity -> entity.redact("PII.3.0", "Telephone number found by regex", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.3.1: Redact telephone numbers by RegEx (Non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(matchesRegex("[+]\\d{1,}")) then entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) - .forEach(entity -> entity.redact("PII.3.0", "Telephone number found by regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.3.1", "Telephone number found by regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.3.1: Redact telephone numbers by RegEx (vertebrate study)" +rule "PII.3.2: Redact telephone numbers by RegEx (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(matchesRegex("[+]\\d{1,}")) then entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) - .forEach(entity -> entity.redact("PII.3.1", "Telephone number found by regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.3.2", "Telephone number found by regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.4 -rule "PII.4.0: Redact line after contact information keywords (non vertebrate study)" +rule "PII.4.0: Redact line after contact information keywords" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $contactKeyword: String() from List.of("Contact point:", "Contact:", "Alternative contact:", @@ -837,9 +933,9 @@ rule "PII.4.0: Redact line after contact information keywords (non vertebrate st .forEach(contactEntity -> contactEntity.redact("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end -rule "PII.4.1: Redact line after contact information keywords (vertebrate study)" +rule "PII.4.1: Redact line after contact information keywords (non vertebrate study)" when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $contactKeyword: String() from List.of("Contact point:", "Contact:", "Alternative contact:", @@ -865,9 +961,49 @@ rule "PII.4.1: Redact line after contact information keywords (vertebrate study) .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end +rule "PII.4.2: Redact line after contact information keywords (vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $contactKeyword: String() from List.of("Contact point:", + "Contact:", + "Alternative contact:", + "European contact:", + "No:", + "Contact:", + "Tel.:", + "Tel:", + "Telephone number:", + "Telephone No:", + "Telephone:", + "Phone No.", + "Phone:", + "Fax number:", + "Fax:", + "E-mail:", + "Email:", + "e-mail:", + "E-mail address:") + $section: Section(containsString($contactKeyword)) + then + entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) + .forEach(contactEntity -> contactEntity.redact("PII.4.2", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + // Rule unit: PII.5 -rule "PII.5.0: Redact line after contact information keywords reduced (non vertebrate study)" +rule "PII.5.0: Redact line after contact information keywords reduced" + when + $contactKeyword: String() from List.of("Contact point:", + "Contact:", + "Alternative contact:", + "European contact:") + $section: Section(containsString($contactKeyword)) + then + entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) + .forEach(contactEntity -> contactEntity.redact("PII.5.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.5.1: Redact line after contact information keywords reduced (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $contactKeyword: String() from List.of("Contact point:", @@ -877,10 +1013,10 @@ rule "PII.5.0: Redact line after contact information keywords reduced (non verte $section: Section(containsString($contactKeyword)) then entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.5.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.5.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.5.1: Redact line after contact information keywords reduced (Vertebrate study)" +rule "PII.5.2: Redact line after contact information keywords reduced (Vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $contactKeyword: String() from List.of("Contact point:", @@ -890,12 +1026,23 @@ rule "PII.5.1: Redact line after contact information keywords reduced (Vertebrat $section: Section(containsString($contactKeyword)) then entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.5.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.5.2", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.6 -rule "PII.6.0: Redact line between contact keywords (non vertebrate study)" +rule "PII.6.0: Redact line between contact keywords" + when + $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) + then + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + ) + .forEach(contactEntity -> contactEntity.redact("PII.6.0", "Found between contact keywords", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.6.1: Redact line between contact keywords (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) @@ -904,10 +1051,10 @@ rule "PII.6.0: Redact line between contact keywords (non vertebrate study)" entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) ) - .forEach(contactEntity -> contactEntity.redact("PII.6.0", "Found between contact keywords", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.6.1", "Found between contact keywords", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.6.1: Redact line between contact keywords (vertebrate study)" +rule "PII.6.2: Redact line between contact keywords (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) @@ -916,12 +1063,28 @@ rule "PII.6.1: Redact line between contact keywords (vertebrate study)" entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) ) - .forEach(contactEntity -> contactEntity.redact("PII.6.1", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.6.2", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end - // Rule unit: PII.7 -rule "PII.7.0: Redact contact information if applicant is found (non vertebrate study)" +rule "PII.7.0: Redact contact information if applicant is found" + when + $section: Section(getHeadline().containsString("applicant") || + getHeadline().containsString("Primary contact") || + getHeadline().containsString("Alternative contact") || + containsString("Applicant") || + containsString("Telephone number:")) + then + Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", + "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + )) + .forEach(entity -> entity.redact("PII.7.0", "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.7.1: Redact contact information if applicant is found (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(getHeadline().containsString("applicant") || @@ -936,10 +1099,10 @@ rule "PII.7.0: Redact contact information if applicant is found (non vertebrate entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.redact("PII.7.0", "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.7.1", "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.7.1: Redact contact information if applicant is found (vertebrate study)" +rule "PII.7.2: Redact contact information if applicant is found (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(getHeadline().containsString("applicant") || @@ -954,14 +1117,13 @@ rule "PII.7.1: Redact contact information if applicant is found (vertebrate stud entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.redact("PII.7.1", "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.7.2", "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.8 -rule "PII.8.0: Redact contact information if producer is found (non vertebrate study)" +rule "PII.8.0: Redact contact information if producer is found" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsStringIgnoreCase("producer of the plant protection") || containsStringIgnoreCase("producer of the active substance") || containsStringIgnoreCase("manufacturer of the active substance") || @@ -977,7 +1139,25 @@ rule "PII.8.0: Redact contact information if producer is found (non vertebrate s .forEach(entity -> entity.redact("PII.8.0", "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end -rule "PII.8.1: Redact contact information if producer is found (vertebrate study)" +rule "PII.8.1: Redact contact information if producer is found (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsStringIgnoreCase("producer of the plant protection") || + containsStringIgnoreCase("producer of the active substance") || + containsStringIgnoreCase("manufacturer of the active substance") || + containsStringIgnoreCase("manufacturer:") || + containsStringIgnoreCase("Producer or producers of the active substance")) + then + Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", + "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + )) + .forEach(entity -> entity.redact("PII.8.1", "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.8.2: Redact contact information if producer is found (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsStringIgnoreCase("producer of the plant protection") || @@ -992,27 +1172,35 @@ rule "PII.8.1: Redact contact information if producer is found (vertebrate study entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.redact("PII.8.1", "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.8.2", "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.9 -rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (non vertebrate study)" +rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" + when + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) + then + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + .forEach(authorEntity -> authorEntity.redact("PII.9.0", "AUTHOR(S) was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) - .forEach(authorEntity -> authorEntity.redact("PII.9.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(authorEntity -> authorEntity.redact("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (vertebrate study)" +rule "PII.9.2: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) - .forEach(authorEntity -> authorEntity.redact("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(authorEntity -> authorEntity.redact("PII.9.2", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end @@ -1089,38 +1277,52 @@ rule "ETC.1.0: Redact Purity" // Rule unit: ETC.2 -rule "ETC.2.0: Redact signatures (non vertebrate study)" +rule "ETC.2.0: Redact signatures" + when + $signature: Image(imageType == ImageType.SIGNATURE) + then + $signature.redact("ETC.2.0", "Signature Found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + end + +rule "ETC.2.1: Redact signatures (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $signature: Image(imageType == ImageType.SIGNATURE) then - $signature.redact("ETC.2.0", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $signature.redact("ETC.2.1", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end -rule "ETC.2.1: Redact signatures (vertebrate study)" +rule "ETC.2.2: Redact signatures (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $signature: Image(imageType == ImageType.SIGNATURE) then - $signature.redact("ETC.2.1", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $signature.redact("ETC.2.2", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end // Rule unit: ETC.3 -rule "ETC.3.0: Skip logos (non vertebrate study)" +rule "ETC.3.0: Redact logos" + when + $logo: Image(imageType == ImageType.LOGO) + then + $logo.redact("ETC.3.0", "Logo Found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + end + +rule "ETC.3.1: Skip logos (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $logo: Image(imageType == ImageType.LOGO) then - $logo.skip("ETC.3.0", "Logo Found"); + $logo.skip("ETC.3.1", "Logo Found"); end -rule "ETC.3.1: Redact logos (vertebrate study)" +rule "ETC.3.2: Redact logos (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $logo: Image(imageType == ImageType.LOGO) then - $logo.redact("ETC.3.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $logo.redact("ETC.3.2", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index d64439c8..3ee479cd 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -100,17 +100,18 @@ rule "CBI.3.0: Redacted because Section contains a vertebrate" rule "CBI.3.1: Redacted because table row contains a vertebrate" when - $table: Table(hasEntitiesOfType("vertebrate"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) - $cellsWithvertebrate: TableCell() from $table.streamTableCellsWhichContainType("vertebrate").toList() - $tableCell: TableCell(row == $cellsWithvertebrate.row) from $table.streamTableCells().toList() - $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() + $table: Table(hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - $authorOrAddress.applyWithReferences( + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( "CBI.3.1", "Vertebrate found", "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("vertebrate", $authorOrAddress) + $table.getEntitiesOfTypeInSameRow("vertebrate", entity) ); + }); end rule "CBI.3.2: Do not redact because Section does not contain a vertebrate" @@ -149,21 +150,23 @@ rule "CBI.4.0: Do not redact Names and Addresses if no_redaction_indicator is fo }); end -rule "CBI.4.1: Redacted because table row contains a vertebrate" +rule "CBI.4.1: Don't redact authors or addresses which appear in the same row as a vertebrate and a no_redaction_indicator" when - $table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("vertebrate"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) - TableCell($row: row) from $table.streamTableCellsWhichContainType("vertebrate").toList() - TableCell(row == $row) from $table.streamTableCellsWhichContainType("no_redaction_indicator").toList() - $tableCell: TableCell(row == $row) from $table.streamTableCells().toList() - $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() + $table: Table(hasEntitiesOfType("no_redaction_indicator"), + hasEntitiesOfType("vertebrate"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - $authorOrAddress.skipWithReferences( + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate", "no-redaction_indicator")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.skipWithReferences( "CBI.4.1", "Vertebrate but a no redaction indicator found", Stream.concat( - $table.getEntitiesOfTypeInSameRow("vertebrate", $authorOrAddress).stream(), - $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", $authorOrAddress).stream()).toList() + $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() ); + }); end @@ -190,20 +193,22 @@ rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also red rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in table row" when - $table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("redaction_indicator"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) - TableCell($row: row) from $table.streamTableCellsWhichContainType("redaction_indicator").toList() - TableCell(row == $row) from $table.streamTableCellsWhichContainType("no_redaction_indicator").toList() - $tableCell: TableCell(row == $row) from $table.streamTableCells().toList() - $entity: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() + $table: Table(hasEntitiesOfType("no_redaction_indicator"), + hasEntitiesOfType("redaction_indicator"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - $entity.applyWithReferences( + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no_redaction_indicator")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( "CBI.5.1", "no_redaction_indicator but also redaction_indicator found", "Reg (EC) No 1107/2009 Art. 63 (2g)", Stream.concat( - $table.getEntitiesOfTypeInSameRow("redaction_indicator", $entity).stream(), - $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", $entity).stream()).toList() + $table.getEntitiesOfTypeInSameRow("redaction_indicator", entity).stream(), + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() ); + }); end @@ -225,17 +230,18 @@ rule "CBI.8.0: Redacted because Section contains must_redact entity" rule "CBI.8.1: Redacted because table row contains must_redact entity" when - $table: Table(hasEntitiesOfType("must_redact"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) - $cellsWithMustRedact: TableCell() from $table.streamTableCellsWhichContainType("must_redact").toList() - $tableCell: TableCell(row == $cellsWithMustRedact.row) from $table.streamTableCells().toList() - $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() + $table: Table(hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - $authorOrAddress.applyWithReferences( - "CBI.8.1", - "Must_redact found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("must_redact", $authorOrAddress) - ); + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("must_redact")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( + "CBI.8.1", + "Must_redact found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $table.getEntitiesOfTypeInSameRow("must_redact", entity) + ); + }); end @@ -279,7 +285,22 @@ rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study // Rule unit: CBI.12 -rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes' (non vertebrate study)" +rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes'" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $table: Table(hasHeader("Author(s)") || hasHeader("Author"), hasHeaderIgnoreCase("Vertebrate Study Y/N")) + TableCell(header, containsAnyStringIgnoreCase("Author", "Author(s)"), $authorCol: col) from $table.streamHeaders().toList() + TableCell(header, containsStringIgnoreCase("Vertebrate study Y/N"), $vertebrateCol: col) from $table.streamHeaders().toList() + $rowCell: TableCell(!header, containsAnyString("Yes", "Y"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() + TableCell(row == $rowWithYes) from $table.streamCol($authorCol).toList() + then + entityCreationService.bySemanticNode($rowCell, "must_redact", EntityType.HINT) + .ifPresent(yesEntity -> { + yesEntity.skip("CBI.12.0", "must_redact"); + }); + end + +rule "CBI.12.1: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes' (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -291,13 +312,13 @@ rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s then entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY) .ifPresent(authorEntity -> { - authorEntity.redact("CBI.12.0", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + authorEntity.redact("CBI.12.1", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(3) of Regulation (EC) No 178/2002"); dictionary.addMultipleAuthorsAsRecommendation(authorEntity); }); end -rule "CBI.12.1: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes' (vertebrate study)" +rule "CBI.12.2: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes' (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -310,13 +331,13 @@ rule "CBI.12.1: Redact and recommend TableCell with header 'Author' or 'Author(s entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY) .ifPresent(authorEntity -> { - authorEntity.redact("CBI.12.1", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + authorEntity.redact("CBI.12.2", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(2) of Regulation (EC) No 178/2002"); dictionary.addMultipleAuthorsAsRecommendation(authorEntity); }); end -rule "CBI.12.2: Skip TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'No'" +rule "CBI.12.3: Skip TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'No'" when $table: Table(hasHeader("Author(s)") || hasHeader("Author"), hasHeaderIgnoreCase("Vertebrate Study Y/N")) TableCell(header, containsAnyStringIgnoreCase("Author", "Author(s)"), $authorCol: col) from $table.streamHeaders().toList() @@ -325,16 +346,19 @@ rule "CBI.12.2: Skip TableCell with header 'Author' or 'Author(s)' and header 'V $authorCell: TableCell(row == $rowWithNo) from $table.streamCol($authorCol).toList() then entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY) - .ifPresent(authorEntity -> authorEntity.skip("CBI.12.2", "Not redacted because it's row does not belong to a vertebrate study")); + .ifPresent(authorEntity -> authorEntity.skip("CBI.12.3", "Not redacted because it's row does not belong to a vertebrate study")); end // Rule unit: CBI.14 rule "CBI.14.0: Redact CBI_sponsor entities if preceded by \"batches produced at\"" when + $section: Section(containsStringIgnoreCase("batches produced at")) $sponsorEntity: TextEntity(type() == "CBI_sponsor", textBefore.contains("batches produced at")) then $sponsorEntity.redact("CBI.14.0", "Redacted because it represents a sponsor company", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + entityCreationService.byString("batches produced at", "must_redact", EntityType.HINT, $section) + .forEach(entity -> entity.skip("CBI.14.0", "must_redact")); end @@ -355,10 +379,10 @@ rule "CBI.15.0: Redact row if row contains \"determination of residues\" and liv containsStringIgnoreCase($residueKeyword), containsStringIgnoreCase($keyword)) then - entityCreationService.byString($keyword, "must_redact", EntityType.ENTITY, $section) - .toList(); + entityCreationService.byString($keyword, "must_redact", EntityType.HINT, $section) + .forEach(entity -> entity.skip("CBI.15.0", "must_redact")); - $section.getEntitiesOfType(List.of($keyword, $residueKeyword)) + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) .forEach(redactionEntity -> redactionEntity.redact("CBI.15.0", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)")); end @@ -376,8 +400,8 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio $residueKeyword: String() from List.of("determination of residues", "determination of total residues") $table: Table(containsStringIgnoreCase($residueKeyword), containsStringIgnoreCase($keyword)) then - entityCreationService.byString($keyword, "must_redact", EntityType.ENTITY, $table) - .toList(); + entityCreationService.byString($keyword, "must_redact", EntityType.HINT, $table) + .forEach(entity -> entity.skip("CBI.15.1", "must_redact")); $table.streamEntitiesWhereRowContainsStringsIgnoreCase(List.of($keyword, $residueKeyword)) .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) @@ -386,7 +410,19 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio // Rule unit: CBI.16 -rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" +rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -394,12 +430,12 @@ rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(entity); }); end -rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (vertebrate study)" +rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -407,7 +443,7 @@ rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (vertebrate study)" then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(entity); }); end @@ -465,7 +501,19 @@ rule "CBI.19.0: Expand CBI_author entities with salutation prefix" // Rule unit: CBI.20 -rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (non vertebrate study)" +rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\")" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) + then + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + .forEach(laboratoryEntity -> { + laboratoryEntity.redact("CBI.20.0", "PERFORMING LABORATORY was found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + dictionary.recommendEverywhere(laboratoryEntity); + }); + end + +rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -473,12 +521,12 @@ rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC then entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { - laboratoryEntity.skip("CBI.20.0", "PERFORMING LABORATORY was found for non vertebrate study"); + laboratoryEntity.skip("CBI.20.1", "PERFORMING LABORATORY was found for non vertebrate study"); dictionary.recommendEverywhere(laboratoryEntity); }); end -rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (vertebrate study)" +rule "CBI.20.2: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -486,7 +534,7 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC then entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { - laboratoryEntity.redact("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + laboratoryEntity.redact("CBI.20.2", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(laboratoryEntity); }); end @@ -495,47 +543,62 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC //------------------------------------ PII rules ------------------------------------ // Rule unit: PII.0 -rule "PII.0.0: Redact all PII (non vertebrate study)" +rule "PII.0.0: Redact all PII" + when + $pii: TextEntity(type() == "PII", dictionaryEntry) + then + $pii.redact("PII.0.0", "Personal Information found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + end + +rule "PII.0.1: Redact all PII (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $pii: TextEntity(type() == "PII", dictionaryEntry) then - $pii.redact("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $pii.redact("PII.0.1", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end -rule "PII.0.1: Redact all PII (vertebrate study)" +rule "PII.0.2: Redact all PII (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $pii: TextEntity(type() == "PII", dictionaryEntry) then - $pii.redact("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $pii.redact("PII.0.2", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end + // Rule unit: PII.1 -rule "PII.1.0: Redact Emails by RegEx (Non vertebrate study)" +rule "PII.1.0: Redact Emails by RegEx" + when + $section: Section(containsString("@")) + then + entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.0", "Found by Email Regex", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.1.1: Redact Emails by RegEx (Non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("@")) then entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) - .forEach(emailEntity -> emailEntity.redact("PII.1.0", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(emailEntity -> emailEntity.redact("PII.1.1", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.1.1: Redact Emails by RegEx (vertebrate study)" +rule "PII.1.2: Redact Emails by RegEx (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("@")) then entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) - .forEach(emailEntity -> emailEntity.redact("PII.1.1", "Found by Email Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(emailEntity -> emailEntity.redact("PII.1.2", "Found by Email Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.4 -rule "PII.4.0: Redact line after contact information keywords (non vertebrate study)" +rule "PII.4.0: Redact line after contact information keywords" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $contactKeyword: String() from List.of("Contact point:", "Contact:", "Alternative contact:", @@ -561,9 +624,9 @@ rule "PII.4.0: Redact line after contact information keywords (non vertebrate st .forEach(contactEntity -> contactEntity.redact("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end -rule "PII.4.1: Redact line after contact information keywords (vertebrate study)" +rule "PII.4.1: Redact line after contact information keywords (non vertebrate study)" when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $contactKeyword: String() from List.of("Contact point:", "Contact:", "Alternative contact:", @@ -589,9 +652,48 @@ rule "PII.4.1: Redact line after contact information keywords (vertebrate study) .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end +rule "PII.4.2: Redact line after contact information keywords (vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $contactKeyword: String() from List.of("Contact point:", + "Contact:", + "Alternative contact:", + "European contact:", + "No:", + "Contact:", + "Tel.:", + "Tel:", + "Telephone number:", + "Telephone No:", + "Telephone:", + "Phone No.", + "Phone:", + "Fax number:", + "Fax:", + "E-mail:", + "Email:", + "e-mail:", + "E-mail address:") + $section: Section(containsString($contactKeyword)) + then + entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) + .forEach(contactEntity -> contactEntity.redact("PII.4.2", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + // Rule unit: PII.6 -rule "PII.6.0: Redact line between contact keywords (non vertebrate study)" +rule "PII.6.0: Redact line between contact keywords" + when + $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) + then + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + ) + .forEach(contactEntity -> contactEntity.redact("PII.6.0", "Found between contact keywords", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.6.1: Redact line between contact keywords (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) @@ -600,10 +702,10 @@ rule "PII.6.0: Redact line between contact keywords (non vertebrate study)" entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) ) - .forEach(contactEntity -> contactEntity.redact("PII.6.0", "Found between contact keywords", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.6.1", "Found between contact keywords", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.6.1: Redact line between contact keywords (vertebrate study)" +rule "PII.6.2: Redact line between contact keywords (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) @@ -612,12 +714,28 @@ rule "PII.6.1: Redact line between contact keywords (vertebrate study)" entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) ) - .forEach(contactEntity -> contactEntity.redact("PII.6.1", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.6.2", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end - // Rule unit: PII.7 -rule "PII.7.0: Redact contact information if applicant is found (non vertebrate study)" +rule "PII.7.0: Redact contact information if applicant is found" + when + $section: Section(getHeadline().containsString("applicant") || + getHeadline().containsString("Primary contact") || + getHeadline().containsString("Alternative contact") || + containsString("Applicant") || + containsString("Telephone number:")) + then + Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", + "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + )) + .forEach(entity -> entity.redact("PII.7.0", "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.7.1: Redact contact information if applicant is found (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(getHeadline().containsString("applicant") || @@ -632,10 +750,10 @@ rule "PII.7.0: Redact contact information if applicant is found (non vertebrate entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.redact("PII.7.0", "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.7.1", "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.7.1: Redact contact information if applicant is found (vertebrate study)" +rule "PII.7.2: Redact contact information if applicant is found (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(getHeadline().containsString("applicant") || @@ -650,14 +768,13 @@ rule "PII.7.1: Redact contact information if applicant is found (vertebrate stud entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.redact("PII.7.1", "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.7.2", "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.8 -rule "PII.8.0: Redact contact information if producer is found (non vertebrate study)" +rule "PII.8.0: Redact contact information if producer is found" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsStringIgnoreCase("producer of the plant protection") || containsStringIgnoreCase("producer of the active substance") || containsStringIgnoreCase("manufacturer of the active substance") || @@ -673,7 +790,25 @@ rule "PII.8.0: Redact contact information if producer is found (non vertebrate s .forEach(entity -> entity.redact("PII.8.0", "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end -rule "PII.8.1: Redact contact information if producer is found (vertebrate study)" +rule "PII.8.1: Redact contact information if producer is found (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsStringIgnoreCase("producer of the plant protection") || + containsStringIgnoreCase("producer of the active substance") || + containsStringIgnoreCase("manufacturer of the active substance") || + containsStringIgnoreCase("manufacturer:") || + containsStringIgnoreCase("Producer or producers of the active substance")) + then + Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", + "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + )) + .forEach(entity -> entity.redact("PII.8.1", "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.8.2: Redact contact information if producer is found (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsStringIgnoreCase("producer of the plant protection") || @@ -688,27 +823,35 @@ rule "PII.8.1: Redact contact information if producer is found (vertebrate study entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.redact("PII.8.1", "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.8.2", "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.9 -rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (non vertebrate study)" +rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" + when + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) + then + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + .forEach(authorEntity -> authorEntity.redact("PII.9.0", "AUTHOR(S) was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) - .forEach(authorEntity -> authorEntity.redact("PII.9.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(authorEntity -> authorEntity.redact("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (vertebrate study)" +rule "PII.9.2: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) - .forEach(authorEntity -> authorEntity.redact("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(authorEntity -> authorEntity.redact("PII.9.2", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end @@ -755,38 +898,52 @@ rule "ETC.1.0: Redact Purity" // Rule unit: ETC.2 -rule "ETC.2.0: Redact signatures (non vertebrate study)" +rule "ETC.2.0: Redact signatures" + when + $signature: Image(imageType == ImageType.SIGNATURE) + then + $signature.redact("ETC.2.0", "Signature Found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + end + +rule "ETC.2.1: Redact signatures (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $signature: Image(imageType == ImageType.SIGNATURE) then - $signature.redact("ETC.2.0", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $signature.redact("ETC.2.1", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end -rule "ETC.2.1: Redact signatures (vertebrate study)" +rule "ETC.2.2: Redact signatures (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $signature: Image(imageType == ImageType.SIGNATURE) then - $signature.redact("ETC.2.1", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $signature.redact("ETC.2.2", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end // Rule unit: ETC.3 -rule "ETC.3.0: Skip logos (non vertebrate study)" +rule "ETC.3.0: Redact logos" + when + $logo: Image(imageType == ImageType.LOGO) + then + $logo.redact("ETC.3.0", "Logo Found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + end + +rule "ETC.3.1: Skip logos (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $logo: Image(imageType == ImageType.LOGO) then - $logo.skip("ETC.3.0", "Logo Found"); + $logo.skip("ETC.3.1", "Logo Found"); end -rule "ETC.3.1: Redact logos (vertebrate study)" +rule "ETC.3.2: Redact logos (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $logo: Image(imageType == ImageType.LOGO) then - $logo.redact("ETC.3.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $logo.redact("ETC.3.2", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl index be158e09..aaebfcdb 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl @@ -80,12 +80,12 @@ rule "CBI.0.0: Redact CBI Authors (non vertebrate Study)" //------------------------------------ PII rules ------------------------------------ // Rule unit: PII.0 -rule "PII.0.0: Redact all PII (non vertebrate study)" +rule "PII.0.1: Redact all PII (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $pii: TextEntity(type() == "PII", dictionaryEntry) then - $pii.redact("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $pii.redact("PII.0.1", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end diff --git a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl index d89b9fee..896aa811 100644 --- a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl +++ b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl @@ -157,17 +157,18 @@ rule "CBI.3.0: Redacted because Section contains a vertebrate" rule "CBI.3.1: Redacted because table row contains a vertebrate" when - $table: Table(hasEntitiesOfType("vertebrate"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) - $cellsWithvertebrate: TableCell() from $table.streamTableCellsWhichContainType("vertebrate").toList() - $tableCell: TableCell(row == $cellsWithvertebrate.row) from $table.streamTableCells().toList() - $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() + $table: Table(hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - $authorOrAddress.applyWithReferences( + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( "CBI.3.1", "Vertebrate found", "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("vertebrate", $authorOrAddress) + $table.getEntitiesOfTypeInSameRow("vertebrate", entity) ); + }); end @@ -207,21 +208,23 @@ rule "CBI.4.0: Do not redact Names and Addresses if no_redaction_indicator is fo }); end -rule "CBI.4.1: Redacted because table row contains a vertebrate" +rule "CBI.4.1: Don't redact authors or addresses which appear in the same row as a vertebrate and a no_redaction_indicator" when - $table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("vertebrate"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) - TableCell($row: row) from $table.streamTableCellsWhichContainType("vertebrate").toList() - TableCell(row == $row) from $table.streamTableCellsWhichContainType("no_redaction_indicator").toList() - $tableCell: TableCell(row == $row) from $table.streamTableCells().toList() - $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() + $table: Table(hasEntitiesOfType("no_redaction_indicator"), + hasEntitiesOfType("vertebrate"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - $authorOrAddress.skipWithReferences( + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate", "no-redaction_indicator")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.skipWithReferences( "CBI.4.1", "Vertebrate but a no redaction indicator found", Stream.concat( - $table.getEntitiesOfTypeInSameRow("vertebrate", $authorOrAddress).stream(), - $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", $authorOrAddress).stream()).toList() + $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() ); + }); end @@ -248,20 +251,22 @@ rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also red rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in table row" when - $table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("redaction_indicator"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) - TableCell($row: row) from $table.streamTableCellsWhichContainType("redaction_indicator").toList() - TableCell(row == $row) from $table.streamTableCellsWhichContainType("no_redaction_indicator").toList() - $tableCell: TableCell(row == $row) from $table.streamTableCells().toList() - $entity: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() + $table: Table(hasEntitiesOfType("no_redaction_indicator"), + hasEntitiesOfType("redaction_indicator"), + (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - $entity.applyWithReferences( + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no_redaction_indicator")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( "CBI.5.1", "no_redaction_indicator but also redaction_indicator found", "Reg (EC) No 1107/2009 Art. 63 (2g)", Stream.concat( - $table.getEntitiesOfTypeInSameRow("redaction_indicator", $entity).stream(), - $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", $entity).stream()).toList() + $table.getEntitiesOfTypeInSameRow("redaction_indicator", entity).stream(), + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() ); + }); end @@ -351,17 +356,18 @@ rule "CBI.8.0: Redacted because Section contains must_redact entity" rule "CBI.8.1: Redacted because table row contains must_redact entity" when - $table: Table(hasEntitiesOfType("must_redact"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) - $cellsWithMustRedact: TableCell() from $table.streamTableCellsWhichContainType("must_redact").toList() - $tableCell: TableCell(row == $cellsWithMustRedact.row) from $table.streamTableCells().toList() - $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() + $table: Table(hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) then - $authorOrAddress.applyWithReferences( - "CBI.8.1", - "Must_redact found", - "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("must_redact", $authorOrAddress) - ); + $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("must_redact")) + .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) + .forEach(entity -> { + entity.applyWithReferences( + "CBI.8.1", + "Must_redact found", + "Reg (EC) No 1107/2009 Art. 63 (2g)", + $table.getEntitiesOfTypeInSameRow("must_redact", entity) + ); + }); end @@ -433,7 +439,22 @@ rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study // Rule unit: CBI.12 -rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes' (non vertebrate study)" +rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes'" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $table: Table(hasHeader("Author(s)") || hasHeader("Author"), hasHeaderIgnoreCase("Vertebrate Study Y/N")) + TableCell(header, containsAnyStringIgnoreCase("Author", "Author(s)"), $authorCol: col) from $table.streamHeaders().toList() + TableCell(header, containsStringIgnoreCase("Vertebrate study Y/N"), $vertebrateCol: col) from $table.streamHeaders().toList() + $rowCell: TableCell(!header, containsAnyString("Yes", "Y"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() + TableCell(row == $rowWithYes) from $table.streamCol($authorCol).toList() + then + entityCreationService.bySemanticNode($rowCell, "must_redact", EntityType.HINT) + .ifPresent(yesEntity -> { + yesEntity.skip("CBI.12.0", "must_redact"); + }); + end + +rule "CBI.12.1: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes' (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -445,13 +466,13 @@ rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s then entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY) .ifPresent(authorEntity -> { - authorEntity.redact("CBI.12.0", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + authorEntity.redact("CBI.12.1", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(3) of Regulation (EC) No 178/2002"); dictionary.addMultipleAuthorsAsRecommendation(authorEntity); }); end -rule "CBI.12.1: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes' (vertebrate study)" +rule "CBI.12.2: Redact and recommend TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'Yes' (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -464,13 +485,13 @@ rule "CBI.12.1: Redact and recommend TableCell with header 'Author' or 'Author(s entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY) .ifPresent(authorEntity -> { - authorEntity.redact("CBI.12.1", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + authorEntity.redact("CBI.12.2", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(2) of Regulation (EC) No 178/2002"); dictionary.addMultipleAuthorsAsRecommendation(authorEntity); }); end -rule "CBI.12.2: Skip TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'No'" +rule "CBI.12.3: Skip TableCell with header 'Author' or 'Author(s)' and header 'Vertebrate study Y/N' with value 'No'" when $table: Table(hasHeader("Author(s)") || hasHeader("Author"), hasHeaderIgnoreCase("Vertebrate Study Y/N")) TableCell(header, containsAnyStringIgnoreCase("Author", "Author(s)"), $authorCol: col) from $table.streamHeaders().toList() @@ -479,7 +500,7 @@ rule "CBI.12.2: Skip TableCell with header 'Author' or 'Author(s)' and header 'V $authorCell: TableCell(row == $rowWithNo) from $table.streamCol($authorCol).toList() then entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY) - .ifPresent(authorEntity -> authorEntity.skip("CBI.12.2", "Not redacted because it's row does not belong to a vertebrate study")); + .ifPresent(authorEntity -> authorEntity.skip("CBI.12.3", "Not redacted because it's row does not belong to a vertebrate study")); end @@ -497,9 +518,12 @@ rule "CBI.13.0: Ignore CBI Address recommendations" // Rule unit: CBI.14 rule "CBI.14.0: Redact CBI_sponsor entities if preceded by \"batches produced at\"" when + $section: Section(containsStringIgnoreCase("batches produced at")) $sponsorEntity: TextEntity(type() == "CBI_sponsor", textBefore.contains("batches produced at")) then $sponsorEntity.redact("CBI.14.0", "Redacted because it represents a sponsor company", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + entityCreationService.byString("batches produced at", "must_redact", EntityType.HINT, $section) + .forEach(entity -> entity.skip("CBI.14.0", "must_redact")); end @@ -520,10 +544,10 @@ rule "CBI.15.0: Redact row if row contains \"determination of residues\" and liv containsStringIgnoreCase($residueKeyword), containsStringIgnoreCase($keyword)) then - entityCreationService.byString($keyword, "must_redact", EntityType.ENTITY, $section) - .toList(); + entityCreationService.byString($keyword, "must_redact", EntityType.HINT, $section) + .forEach(entity -> entity.skip("CBI.15.0", "must_redact")); - $section.getEntitiesOfType(List.of($keyword, $residueKeyword)) + $section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) .forEach(redactionEntity -> redactionEntity.redact("CBI.15.0", "Determination of residues and keyword \"" + $keyword + "\" was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)")); end @@ -541,8 +565,8 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio $residueKeyword: String() from List.of("determination of residues", "determination of total residues") $table: Table(containsStringIgnoreCase($residueKeyword), containsStringIgnoreCase($keyword)) then - entityCreationService.byString($keyword, "must_redact", EntityType.ENTITY, $table) - .toList(); + entityCreationService.byString($keyword, "must_redact", EntityType.HINT, $table) + .forEach(entity -> entity.skip("CBI.15.1", "must_redact")); $table.streamEntitiesWhereRowContainsStringsIgnoreCase(List.of($keyword, $residueKeyword)) .filter(redactionEntity -> redactionEntity.isAnyType(List.of("CBI_author", "CBI_address"))) @@ -551,7 +575,19 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio // Rule unit: CBI.16 -rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" +rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + dictionary.recommendEverywhere(entity); + }); + end + +rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -559,12 +595,12 @@ rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(entity); }); end -rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (vertebrate study)" +rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -572,7 +608,7 @@ rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (vertebrate study)" then entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) .forEach(entity -> { - entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(entity); }); end @@ -630,7 +666,19 @@ rule "CBI.19.0: Expand CBI_author entities with salutation prefix" // Rule unit: CBI.20 -rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (non vertebrate study)" +rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\")" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) + then + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + .forEach(laboratoryEntity -> { + laboratoryEntity.redact("CBI.20.0", "PERFORMING LABORATORY was found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + dictionary.recommendEverywhere(laboratoryEntity); + }); + end + +rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (non vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -638,12 +686,12 @@ rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC then entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { - laboratoryEntity.skip("CBI.20.0", "PERFORMING LABORATORY was found for non vertebrate study"); + laboratoryEntity.skip("CBI.20.1", "PERFORMING LABORATORY was found for non vertebrate study"); dictionary.recommendEverywhere(laboratoryEntity); }); end -rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (vertebrate study)" +rule "CBI.20.2: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\" (vertebrate study)" agenda-group "LOCAL_DICTIONARY_ADDS" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") @@ -651,7 +699,7 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC then entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) .forEach(laboratoryEntity -> { - laboratoryEntity.redact("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + laboratoryEntity.redact("CBI.20.2", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); dictionary.recommendEverywhere(laboratoryEntity); }); end @@ -697,53 +745,93 @@ rule "CBI.22.0: Redact Addresses in Reference Tables for vertebrate studies in n //------------------------------------ PII rules ------------------------------------ // Rule unit: PII.0 -rule "PII.0.0: Redact all PII (non vertebrate study)" +rule "PII.0.0: Redact all PII" + when + $pii: TextEntity(type() == "PII", dictionaryEntry) + then + $pii.redact("PII.0.0", "Personal Information found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + end + +rule "PII.0.1: Redact all PII (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $pii: TextEntity(type() == "PII", dictionaryEntry) then - $pii.redact("PII.0.0", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $pii.redact("PII.0.1", "Personal Information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end -rule "PII.0.1: Redact all PII (vertebrate study)" +rule "PII.0.2: Redact all PII (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $pii: TextEntity(type() == "PII", dictionaryEntry) then - $pii.redact("PII.0.1", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $pii.redact("PII.0.2", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end // Rule unit: PII.1 -rule "PII.1.0: Redact Emails by RegEx (Non vertebrate study)" +rule "PII.1.0: Redact Emails by RegEx" + when + $section: Section(containsString("@")) + then + entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.0", "Found by Email Regex", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.1.1: Redact Emails by RegEx (Non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("@")) then entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) - .forEach(emailEntity -> emailEntity.redact("PII.1.0", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(emailEntity -> emailEntity.redact("PII.1.1", "Found by Email Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.1.1: Redact Emails by RegEx (vertebrate study)" +rule "PII.1.2: Redact Emails by RegEx (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("@")) then entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) - .forEach(emailEntity -> emailEntity.redact("PII.1.1", "Found by Email Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(emailEntity -> emailEntity.redact("PII.1.2", "Found by Email Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end -rule "PII.1.2: Redact typoed Emails with indicator" +rule "PII.1.3: Redact typoed Emails with indicator" when $section: Section(containsString("@") || containsStringIgnoreCase("mail")) then entityCreationService.byRegexIgnoreCase("mail[:\\.\\s]{1,2}([\\w\\/\\-\\{\\(\\. ]{3,20}(@|a|f)\\s?[\\w\\/\\-\\{\\(\\. ]{3,20}(\\. \\w{2,4}\\b|\\.\\B|\\.\\w{1,4}\\b))", "PII", EntityType.ENTITY, 1, $section) - .forEach(emailEntity -> emailEntity.redact("PII.1.2", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(emailEntity -> emailEntity.redact("PII.1.3", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + end + +rule "PII.1.4: Redact typoed Emails with indicator" + when + $section: Section(containsString("@") || containsStringIgnoreCase("mail")) + then + entityCreationService.byRegexIgnoreCase("mail[:\\.\\s]{1,2}([\\w\\/\\-\\{\\(\\. ]{3,20}(@|a|f)\\s?[\\w\\/\\-\\{\\(\\. ]{3,20}(\\. \\w{2,4}\\b|\\.\\B|\\.\\w{1,4}\\b))", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.4", "Personal information found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end // Rule unit: PII.2 -rule "PII.2.0: Redact Phone and Fax by RegEx (non vertebrate study)" +rule "PII.2.0: Redact Phone and Fax by RegEx" + when + $section: Section(containsString("Contact") || + containsString("Telephone") || + containsString("Phone") || + containsString("Ph.") || + containsString("Fax") || + containsString("Tel") || + containsString("Ter") || + containsString("Mobile") || + containsString("Fel") || + containsString("Fer")) + then + entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) + .forEach(contactEntity -> contactEntity.redact("PII.2.0", "Found by Phone and Fax Regex", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.2.1: Redact Phone and Fax by RegEx (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("Contact") || @@ -758,10 +846,10 @@ rule "PII.2.0: Redact Phone and Fax by RegEx (non vertebrate study)" containsString("Fer")) then entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) - .forEach(contactEntity -> contactEntity.redact("PII.2.0", "Found by Phone and Fax Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.2.1", "Found by Phone and Fax Regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.2.1: Redact Phone and Fax by RegEx (vertebrate study)" +rule "PII.2.2: Redact Phone and Fax by RegEx (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsString("Contact") || @@ -776,41 +864,48 @@ rule "PII.2.1: Redact Phone and Fax by RegEx (vertebrate study)" containsString("Fer")) then entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section) - .forEach(contactEntity -> contactEntity.redact("PII.2.1", "Found by Phone and Fax Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.2.2", "Found by Phone and Fax Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end -rule "PII.2.2: Redact phone numbers without indicators" +rule "PII.2.3: Redact phone numbers without indicators" when $section: Section(containsString("+")) then entityCreationService.byRegex("(\\+[\\dO]{1,2} )(\\([\\dO]{1,3}\\))?[\\d\\-O ]{8,15}", "PII", EntityType.ENTITY, $section) - .forEach(entity -> entity.redact("PII.2.2", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.2.3", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end // Rule unit: PII.3 -rule "PII.3.0: Redact telephone numbers by RegEx (Non vertebrate study)" +rule "PII.3.0: Redact telephone numbers by RegEx" + when + $section: Section(matchesRegex("[+]\\d{1,}")) + then + entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) + .forEach(entity -> entity.redact("PII.3.0", "Telephone number found by regex", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.3.1: Redact telephone numbers by RegEx (Non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(matchesRegex("[+]\\d{1,}")) then entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) - .forEach(entity -> entity.redact("PII.3.0", "Telephone number found by regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.3.1", "Telephone number found by regex", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.3.1: Redact telephone numbers by RegEx (vertebrate study)" +rule "PII.3.2: Redact telephone numbers by RegEx (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(matchesRegex("[+]\\d{1,}")) then entityCreationService.byRegex("((([+]\\d{1,3} (\\d{7,12})\\b)|([+]\\d{1,3}(\\d{3,12})\\b|[+]\\d{1,3}([ -]\\(?\\d{1,6}\\)?){2,4})|[+]\\d{1,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", "PII", EntityType.ENTITY, 1, $section) - .forEach(entity -> entity.redact("PII.3.1", "Telephone number found by regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.3.2", "Telephone number found by regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.4 -rule "PII.4.0: Redact line after contact information keywords (non vertebrate study)" +rule "PII.4.0: Redact line after contact information keywords" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $contactKeyword: String() from List.of("Contact point:", "Contact:", "Alternative contact:", @@ -836,9 +931,9 @@ rule "PII.4.0: Redact line after contact information keywords (non vertebrate st .forEach(contactEntity -> contactEntity.redact("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end -rule "PII.4.1: Redact line after contact information keywords (vertebrate study)" +rule "PII.4.1: Redact line after contact information keywords (non vertebrate study)" when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $contactKeyword: String() from List.of("Contact point:", "Contact:", "Alternative contact:", @@ -864,9 +959,49 @@ rule "PII.4.1: Redact line after contact information keywords (vertebrate study) .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end +rule "PII.4.2: Redact line after contact information keywords (vertebrate study)" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $contactKeyword: String() from List.of("Contact point:", + "Contact:", + "Alternative contact:", + "European contact:", + "No:", + "Contact:", + "Tel.:", + "Tel:", + "Telephone number:", + "Telephone No:", + "Telephone:", + "Phone No.", + "Phone:", + "Fax number:", + "Fax:", + "E-mail:", + "Email:", + "e-mail:", + "E-mail address:") + $section: Section(containsString($contactKeyword)) + then + entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) + .forEach(contactEntity -> contactEntity.redact("PII.4.2", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + // Rule unit: PII.5 -rule "PII.5.0: Redact line after contact information keywords reduced (non vertebrate study)" +rule "PII.5.0: Redact line after contact information keywords reduced" + when + $contactKeyword: String() from List.of("Contact point:", + "Contact:", + "Alternative contact:", + "European contact:") + $section: Section(containsString($contactKeyword)) + then + entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) + .forEach(contactEntity -> contactEntity.redact("PII.5.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.5.1: Redact line after contact information keywords reduced (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $contactKeyword: String() from List.of("Contact point:", @@ -876,10 +1011,10 @@ rule "PII.5.0: Redact line after contact information keywords reduced (non verte $section: Section(containsString($contactKeyword)) then entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.5.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.5.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.5.1: Redact line after contact information keywords reduced (Vertebrate study)" +rule "PII.5.2: Redact line after contact information keywords reduced (Vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $contactKeyword: String() from List.of("Contact point:", @@ -889,12 +1024,23 @@ rule "PII.5.1: Redact line after contact information keywords reduced (Vertebrat $section: Section(containsString($contactKeyword)) then entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.5.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.5.2", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.6 -rule "PII.6.0: Redact line between contact keywords (non vertebrate study)" +rule "PII.6.0: Redact line between contact keywords" + when + $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) + then + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + ) + .forEach(contactEntity -> contactEntity.redact("PII.6.0", "Found between contact keywords", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.6.1: Redact line between contact keywords (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) @@ -903,10 +1049,10 @@ rule "PII.6.0: Redact line between contact keywords (non vertebrate study)" entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) ) - .forEach(contactEntity -> contactEntity.redact("PII.6.0", "Found between contact keywords", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.6.1", "Found between contact keywords", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.6.1: Redact line between contact keywords (vertebrate study)" +rule "PII.6.2: Redact line between contact keywords (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) @@ -915,12 +1061,28 @@ rule "PII.6.1: Redact line between contact keywords (vertebrate study)" entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) ) - .forEach(contactEntity -> contactEntity.redact("PII.6.1", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(contactEntity -> contactEntity.redact("PII.6.2", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end - // Rule unit: PII.7 -rule "PII.7.0: Redact contact information if applicant is found (non vertebrate study)" +rule "PII.7.0: Redact contact information if applicant is found" + when + $section: Section(getHeadline().containsString("applicant") || + getHeadline().containsString("Primary contact") || + getHeadline().containsString("Alternative contact") || + containsString("Applicant") || + containsString("Telephone number:")) + then + Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", + "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + )) + .forEach(entity -> entity.redact("PII.7.0", "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.7.1: Redact contact information if applicant is found (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(getHeadline().containsString("applicant") || @@ -935,10 +1097,10 @@ rule "PII.7.0: Redact contact information if applicant is found (non vertebrate entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.redact("PII.7.0", "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.7.1", "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.7.1: Redact contact information if applicant is found (vertebrate study)" +rule "PII.7.2: Redact contact information if applicant is found (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(getHeadline().containsString("applicant") || @@ -953,14 +1115,13 @@ rule "PII.7.1: Redact contact information if applicant is found (vertebrate stud entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.redact("PII.7.1", "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.7.2", "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.8 -rule "PII.8.0: Redact contact information if producer is found (non vertebrate study)" +rule "PII.8.0: Redact contact information if producer is found" when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsStringIgnoreCase("producer of the plant protection") || containsStringIgnoreCase("producer of the active substance") || containsStringIgnoreCase("manufacturer of the active substance") || @@ -976,7 +1137,25 @@ rule "PII.8.0: Redact contact information if producer is found (non vertebrate s .forEach(entity -> entity.redact("PII.8.0", "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end -rule "PII.8.1: Redact contact information if producer is found (vertebrate study)" +rule "PII.8.1: Redact contact information if producer is found (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section(containsStringIgnoreCase("producer of the plant protection") || + containsStringIgnoreCase("producer of the active substance") || + containsStringIgnoreCase("manufacturer of the active substance") || + containsStringIgnoreCase("manufacturer:") || + containsStringIgnoreCase("Producer or producers of the active substance")) + then + Stream.concat(entityCreationService.lineAfterStrings(List.of("Contact point:", "Contact:", "Alternative contact:", "European contact:", "No:", "Contact:", "Tel.:", "Tel:", "Telephone number:", + "Telephone No:", "Telephone:", "Phone No.", "Phone:", "Fax number:", "Fax:", "E-mail:", "Email:", "e-mail:", "E-mail address:"), "PII", EntityType.ENTITY, $section), + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + )) + .forEach(entity -> entity.redact("PII.8.1", "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.8.2: Redact contact information if producer is found (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(containsStringIgnoreCase("producer of the plant protection") || @@ -991,27 +1170,35 @@ rule "PII.8.1: Redact contact information if producer is found (vertebrate study entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.redact("PII.8.1", "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(entity -> entity.redact("PII.8.2", "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end // Rule unit: PII.9 -rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (non vertebrate study)" +rule "PII.9.0: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" + when + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) + then + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + .forEach(authorEntity -> authorEntity.redact("PII.9.0", "AUTHOR(S) was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + end + +rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) - .forEach(authorEntity -> authorEntity.redact("PII.9.0", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); + .forEach(authorEntity -> authorEntity.redact("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end -rule "PII.9.1: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (vertebrate study)" +rule "PII.9.2: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) - .forEach(authorEntity -> authorEntity.redact("PII.9.1", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); + .forEach(authorEntity -> authorEntity.redact("PII.9.2", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end @@ -1098,38 +1285,52 @@ rule "ETC.1.0: Redact Purity" // Rule unit: ETC.2 -rule "ETC.2.0: Redact signatures (non vertebrate study)" +rule "ETC.2.0: Redact signatures" + when + $signature: Image(imageType == ImageType.SIGNATURE) + then + $signature.redact("ETC.2.0", "Signature Found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + end + +rule "ETC.2.1: Redact signatures (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $signature: Image(imageType == ImageType.SIGNATURE) then - $signature.redact("ETC.2.0", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + $signature.redact("ETC.2.1", "Signature Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end -rule "ETC.2.1: Redact signatures (vertebrate study)" +rule "ETC.2.2: Redact signatures (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $signature: Image(imageType == ImageType.SIGNATURE) then - $signature.redact("ETC.2.1", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $signature.redact("ETC.2.2", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end // Rule unit: ETC.3 -rule "ETC.3.0: Skip logos (non vertebrate study)" +rule "ETC.3.0: Redact logos" + when + $logo: Image(imageType == ImageType.LOGO) + then + $logo.redact("ETC.3.0", "Logo Found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + end + +rule "ETC.3.1: Skip logos (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $logo: Image(imageType == ImageType.LOGO) then - $logo.skip("ETC.3.0", "Logo Found"); + $logo.skip("ETC.3.1", "Logo Found"); end -rule "ETC.3.1: Redact logos (vertebrate study)" +rule "ETC.3.2: Redact logos (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $logo: Image(imageType == ImageType.LOGO) then - $logo.redact("ETC.3.1", "Logo Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + $logo.redact("ETC.3.2", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end -- 2.47.2 From dc7910cd07a32c95459fa322cf22a86cea3c3a33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominique=20Eifl=C3=A4nder?= Date: Tue, 12 Mar 2024 09:34:39 +0100 Subject: [PATCH 13/21] RED-7384: Fixed migration problem for a specific file --- .../red/service/redaction/v1/server/model/MigrationEntity.java | 1 + .../v1/server/service/document/EntityFindingUtility.java | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigrationEntity.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigrationEntity.java index 91271443..740a3651 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigrationEntity.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigrationEntity.java @@ -357,6 +357,7 @@ public final class MigrationEntity { .annotationId(getNewId()) .updateDictionary(manualResizeRedaction.getUpdateDictionary()) .addToAllDossiers(manualResizeRedaction.isAddToAllDossiers()) + .requestDate(manualResizeRedaction.getRequestDate()) .textAfter(manualResizeRedaction.getTextAfter()) .textBefore(manualResizeRedaction.getTextBefore()) .build(); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java index 76b9cfb2..e6bb27ed 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java @@ -161,7 +161,8 @@ public class EntityFindingUtility { pageNumbers.stream().filter(pageNumber -> !node.onPage(pageNumber)).toList(), node.getPages())); } - SearchImplementation searchImplementation = new SearchImplementation(entryValues, true); + + SearchImplementation searchImplementation = new SearchImplementation(entryValues.stream().map(String::trim).collect(Collectors.toSet()), true); return searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange()) .stream() -- 2.47.2 From 76f587aae4dfb18c0bb16dc8395adc43a8e8e87d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominique=20Eifl=C3=A4nder?= Date: Tue, 12 Mar 2024 12:00:31 +0100 Subject: [PATCH 14/21] RED-7384: Fixed missing requestDate in new created manualRedactions for resizes --- .../service/redaction/v1/server/model/MigrationEntity.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigrationEntity.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigrationEntity.java index 740a3651..4761da03 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigrationEntity.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigrationEntity.java @@ -379,6 +379,12 @@ public final class MigrationEntity { .findFirst() .orElse(manualChanges.get(0)).getUser(); + var requestDate = manualChanges.stream() + .filter(mc -> mc instanceof ManualResizeRedaction) + .findFirst() + .orElse(manualChanges.get(0)).getRequestDate(); + + return ManualRedactionEntry.builder() .annotationId(newId) .fileId(fileId) @@ -387,6 +393,7 @@ public final class MigrationEntity { .reason(redactionLogEntry.getReason()) .legalBasis(redactionLogEntry.getLegalBasis()) .section(redactionLogEntry.getSection()) + .requestDate(requestDate) .addToDictionary(false) .addToDossierDictionary(false) .rectangle(false) -- 2.47.2 From 1c3c632fd2dd5c3ec63101de808ada51f5876d43 Mon Sep 17 00:00:00 2001 From: Andrei Isvoran Date: Wed, 13 Mar 2024 08:47:52 +0100 Subject: [PATCH 15/21] RED-8645 - Fix some more rules --- .../resources/drools/acceptance_rules.drl | 58 +------------------ .../drools/all_redact_manager_rules.drl | 58 +------------------ .../src/test/resources/drools/rules.drl | 58 +------------------ .../resources/all_redact_manager_rules.drl | 58 +------------------ 4 files changed, 4 insertions(+), 228 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl index b058ab5b..be46f5d1 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl @@ -486,62 +486,6 @@ rule "PII.4.0: Redact line after contact information keywords" .forEach(contactEntity -> contactEntity.redact("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end -rule "PII.4.1: Redact line after contact information keywords (non vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $contactKeyword: String() from List.of("Contact point:", - "Contact:", - "Alternative contact:", - "European contact:", - "No:", - "Contact:", - "Tel.:", - "Tel:", - "Telephone number:", - "Telephone No:", - "Telephone:", - "Phone No.", - "Phone:", - "Fax number:", - "Fax:", - "E-mail:", - "Email:", - "e-mail:", - "E-mail address:") - $section: Section(containsString($contactKeyword)) - then - entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); - end - -rule "PII.4.2: Redact line after contact information keywords (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $contactKeyword: String() from List.of("Contact point:", - "Contact:", - "Alternative contact:", - "European contact:", - "No:", - "Contact:", - "Tel.:", - "Tel:", - "Telephone number:", - "Telephone No:", - "Telephone:", - "Phone No.", - "Phone:", - "Fax number:", - "Fax:", - "E-mail:", - "Email:", - "e-mail:", - "E-mail address:") - $section: Section(containsString($contactKeyword)) - then - entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.4.2", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); - end - // Rule unit: PII.5 rule "PII.5.0: Redact line after contact information keywords reduced" @@ -707,7 +651,7 @@ rule "PII.8.1: Redact contact information if producer is found (non vertebrate s entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.redact("PII.8.1", "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + .forEach(entity -> entity.redact("PII.8.1", "Producer was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end rule "PII.8.2: Redact contact information if producer is found (vertebrate study)" diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl index 610b02a2..3261622e 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl @@ -933,62 +933,6 @@ rule "PII.4.0: Redact line after contact information keywords" .forEach(contactEntity -> contactEntity.redact("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end -rule "PII.4.1: Redact line after contact information keywords (non vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $contactKeyword: String() from List.of("Contact point:", - "Contact:", - "Alternative contact:", - "European contact:", - "No:", - "Contact:", - "Tel.:", - "Tel:", - "Telephone number:", - "Telephone No:", - "Telephone:", - "Phone No.", - "Phone:", - "Fax number:", - "Fax:", - "E-mail:", - "Email:", - "e-mail:", - "E-mail address:") - $section: Section(containsString($contactKeyword)) - then - entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); - end - -rule "PII.4.2: Redact line after contact information keywords (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $contactKeyword: String() from List.of("Contact point:", - "Contact:", - "Alternative contact:", - "European contact:", - "No:", - "Contact:", - "Tel.:", - "Tel:", - "Telephone number:", - "Telephone No:", - "Telephone:", - "Phone No.", - "Phone:", - "Fax number:", - "Fax:", - "E-mail:", - "Email:", - "e-mail:", - "E-mail address:") - $section: Section(containsString($contactKeyword)) - then - entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.4.2", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); - end - // Rule unit: PII.5 rule "PII.5.0: Redact line after contact information keywords reduced" @@ -1154,7 +1098,7 @@ rule "PII.8.1: Redact contact information if producer is found (non vertebrate s entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.redact("PII.8.1", "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + .forEach(entity -> entity.redact("PII.8.1", "Producer was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end rule "PII.8.2: Redact contact information if producer is found (vertebrate study)" diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index 3ee479cd..cff2c57d 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -624,62 +624,6 @@ rule "PII.4.0: Redact line after contact information keywords" .forEach(contactEntity -> contactEntity.redact("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end -rule "PII.4.1: Redact line after contact information keywords (non vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $contactKeyword: String() from List.of("Contact point:", - "Contact:", - "Alternative contact:", - "European contact:", - "No:", - "Contact:", - "Tel.:", - "Tel:", - "Telephone number:", - "Telephone No:", - "Telephone:", - "Phone No.", - "Phone:", - "Fax number:", - "Fax:", - "E-mail:", - "Email:", - "e-mail:", - "E-mail address:") - $section: Section(containsString($contactKeyword)) - then - entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); - end - -rule "PII.4.2: Redact line after contact information keywords (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $contactKeyword: String() from List.of("Contact point:", - "Contact:", - "Alternative contact:", - "European contact:", - "No:", - "Contact:", - "Tel.:", - "Tel:", - "Telephone number:", - "Telephone No:", - "Telephone:", - "Phone No.", - "Phone:", - "Fax number:", - "Fax:", - "E-mail:", - "Email:", - "e-mail:", - "E-mail address:") - $section: Section(containsString($contactKeyword)) - then - entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.4.2", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); - end - // Rule unit: PII.6 rule "PII.6.0: Redact line between contact keywords" @@ -805,7 +749,7 @@ rule "PII.8.1: Redact contact information if producer is found (non vertebrate s entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.redact("PII.8.1", "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + .forEach(entity -> entity.redact("PII.8.1", "Producer was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end rule "PII.8.2: Redact contact information if producer is found (vertebrate study)" diff --git a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl index 896aa811..55cd6d88 100644 --- a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl +++ b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl @@ -931,62 +931,6 @@ rule "PII.4.0: Redact line after contact information keywords" .forEach(contactEntity -> contactEntity.redact("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end -rule "PII.4.1: Redact line after contact information keywords (non vertebrate study)" - when - not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $contactKeyword: String() from List.of("Contact point:", - "Contact:", - "Alternative contact:", - "European contact:", - "No:", - "Contact:", - "Tel.:", - "Tel:", - "Telephone number:", - "Telephone No:", - "Telephone:", - "Phone No.", - "Phone:", - "Fax number:", - "Fax:", - "E-mail:", - "Email:", - "e-mail:", - "E-mail address:") - $section: Section(containsString($contactKeyword)) - then - entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); - end - -rule "PII.4.2: Redact line after contact information keywords (vertebrate study)" - when - FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") - $contactKeyword: String() from List.of("Contact point:", - "Contact:", - "Alternative contact:", - "European contact:", - "No:", - "Contact:", - "Tel.:", - "Tel:", - "Telephone number:", - "Telephone No:", - "Telephone:", - "Phone No.", - "Phone:", - "Fax number:", - "Fax:", - "E-mail:", - "Email:", - "e-mail:", - "E-mail address:") - $section: Section(containsString($contactKeyword)) - then - entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) - .forEach(contactEntity -> contactEntity.redact("PII.4.2", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); - end - // Rule unit: PII.5 rule "PII.5.0: Redact line after contact information keywords reduced" @@ -1152,7 +1096,7 @@ rule "PII.8.1: Redact contact information if producer is found (non vertebrate s entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) )) - .forEach(entity -> entity.redact("PII.8.1", "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); + .forEach(entity -> entity.redact("PII.8.1", "Producer was found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end rule "PII.8.2: Redact contact information if producer is found (vertebrate study)" -- 2.47.2 From 3579c060337f3708b985c7fcf76944c2b6d67bd5 Mon Sep 17 00:00:00 2001 From: Andrei Isvoran Date: Wed, 13 Mar 2024 16:59:14 +0200 Subject: [PATCH 16/21] RED-8680 - Add rules for syngenta sanitisation seeds --- .../resources/drools/acceptance_rules.drl | 139 ++++++++++++++++ .../drools/all_redact_manager_rules.drl | 147 +++++++++++++++++ .../src/test/resources/drools/rules.drl | 114 ++++++++++++++ .../resources/all_redact_manager_rules.drl | 149 ++++++++++++++++++ 4 files changed, 549 insertions(+) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl index be46f5d1..c5d87449 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl @@ -181,6 +181,19 @@ rule "CBI.9.1: Redact all cells with Header Author as CBI_author (non vertebrate .forEach(redactionEntity -> redactionEntity.redact("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end +rule "CBI.9.2: Redact all cells with Header Author(s) as CBI_author (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.9.2", "Author(s) found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: CBI.10 rule "CBI.10.0: Redact all cells with Header Author(s) as CBI_author (vertebrate study)" @@ -209,6 +222,32 @@ rule "CBI.10.1: Redact all cells with Header Author as CBI_author (vertebrate st .forEach(redactionEntity -> redactionEntity.redact("CBI.10.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "CBI.10.2: Redact all cells with Header Author(s) as CBI_author (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.2", "Author(s) found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + +rule "CBI.10.3: Redact all cells with Header Author as CBI_author (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author")) + then + $table.streamTableCellsWithHeader("Author") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.3", "Author found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: CBI.11 rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study Y/N Header" @@ -260,6 +299,18 @@ rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" }); end +rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + dictionary.recommendEverywhere(entity); + }); + end + // Rule unit: CBI.17 rule "CBI.17.0: Add recommendation for Addresses in Test Organism sections, without colon" @@ -318,6 +369,18 @@ rule "CBI.20.2: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC }); end +rule "CBI.20.3: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\"" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) + then + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + .forEach(laboratoryEntity -> { + laboratoryEntity.redact("CBI.20.3", "PERFORMING LABORATORY was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + dictionary.recommendEverywhere(laboratoryEntity); + }); + end + //------------------------------------ PII rules ------------------------------------ @@ -345,6 +408,12 @@ rule "PII.0.2: Redact all PII (vertebrate study)" $pii.redact("PII.0.2", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end +rule "PII.0.3: Redact all PII" + when + $pii: TextEntity(type() == "PII", dictionaryEntry) + then + $pii.redact("PII.0.3", "Personal Information found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end // Rule unit: PII.1 @@ -374,6 +443,14 @@ rule "PII.1.2: Redact Emails by RegEx (vertebrate study)" .forEach(emailEntity -> emailEntity.redact("PII.1.2", "Found by Email Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "PII.1.5: Redact Emails by RegEx" + when + $section: Section(containsString("@")) + then + entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.5", "Found by Email Regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.2 rule "PII.2.0: Redact Phone and Fax by RegEx" @@ -486,6 +563,33 @@ rule "PII.4.0: Redact line after contact information keywords" .forEach(contactEntity -> contactEntity.redact("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end +rule "PII.4.1: Redact line after contact information keywords" + when + $contactKeyword: String() from List.of("Contact point:", + "Contact:", + "Alternative contact:", + "European contact:", + "No:", + "Contact:", + "Tel.:", + "Tel:", + "Telephone number:", + "Telephone No:", + "Telephone:", + "Phone No.", + "Phone:", + "Fax number:", + "Fax:", + "E-mail:", + "Email:", + "e-mail:", + "E-mail address:") + $section: Section(containsString($contactKeyword)) + then + entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) + .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.5 rule "PII.5.0: Redact line after contact information keywords reduced" @@ -563,6 +667,19 @@ rule "PII.6.2: Redact line between contact keywords (vertebrate study)" .forEach(contactEntity -> contactEntity.redact("PII.6.2", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "PII.6.3: Redact line between contact keywords (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) + then + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + ) + .forEach(contactEntity -> contactEntity.redact("PII.6.3", "Found between contact keywords", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + + // Rule unit: PII.7 rule "PII.7.0: Redact contact information if applicant is found" when @@ -710,6 +827,14 @@ rule "PII.10.0: Redact study director abbreviation" .forEach(entity -> entity.redact("PII.10.0", "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end +rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" + when + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) + then + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + .forEach(authorEntity -> authorEntity.redact("PII.9.3", "AUTHOR(S) was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.11 rule "PII.11.0: Redact On behalf of Sequani Ltd.:" @@ -777,6 +902,13 @@ rule "ETC.2.2: Redact signatures (vertebrate study)" $signature.redact("ETC.2.2", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end +rule "ETC.2.3: Redact signatures" + when + $signature: Image(imageType == ImageType.SIGNATURE) + then + $signature.redact("ETC.2.3", "Signature Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end + // Rule unit: ETC.3 rule "ETC.3.0: Redact logos" @@ -802,6 +934,13 @@ rule "ETC.3.2: Redact logos (vertebrate study)" $logo.redact("ETC.3.2", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end +rule "ETC.3.3: Redact logos" + when + $logo: Image(imageType == ImageType.LOGO) + then + $logo.redact("ETC.3.3", "Logo Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end + // Rule unit: ETC.5 rule "ETC.5.0: Skip dossier_redaction entries if confidentiality is 'confidential'" diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl index 3261622e..c4a828b9 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl @@ -397,6 +397,19 @@ rule "CBI.9.1: Redact all cells with Header Author as CBI_author (non vertebrate .forEach(redactionEntity -> redactionEntity.redact("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end +rule "CBI.9.2: Redact all cells with Header Author(s) as CBI_author (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.9.2", "Author(s) found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: CBI.10 rule "CBI.10.0: Redact all cells with Header Author(s) as CBI_author (vertebrate study)" @@ -425,6 +438,32 @@ rule "CBI.10.1: Redact all cells with Header Author as CBI_author (vertebrate st .forEach(redactionEntity -> redactionEntity.redact("CBI.10.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "CBI.10.2: Redact all cells with Header Author(s) as CBI_author (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.2", "Author(s) found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + +rule "CBI.10.3: Redact all cells with Header Author as CBI_author (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author")) + then + $table.streamTableCellsWithHeader("Author") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.3", "Author found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: CBI.11 rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study Y/N Header" @@ -612,6 +651,18 @@ rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" }); end +rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + dictionary.recommendEverywhere(entity); + }); + end + // Rule unit: CBI.17 rule "CBI.17.0: Add recommendation for Addresses in Test Organism sections, without colon" @@ -703,6 +754,18 @@ rule "CBI.20.2: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC }); end +rule "CBI.20.3: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\"" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) + then + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + .forEach(laboratoryEntity -> { + laboratoryEntity.redact("CBI.20.3", "PERFORMING LABORATORY was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + dictionary.recommendEverywhere(laboratoryEntity); + }); + end + // Rule unit: CBI.21 rule "CBI.21.0: Redact short Authors section (non vertebrate study)" @@ -768,6 +831,12 @@ rule "PII.0.2: Redact all PII (vertebrate study)" $pii.redact("PII.0.2", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end +rule "PII.0.3: Redact all PII" + when + $pii: TextEntity(type() == "PII", dictionaryEntry) + then + $pii.redact("PII.0.3", "Personal Information found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end // Rule unit: PII.1 @@ -813,6 +882,22 @@ rule "PII.1.4: Redact typoed Emails with indicator" .forEach(emailEntity -> emailEntity.redact("PII.1.4", "Personal information found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end +rule "PII.1.5: Redact Emails by RegEx" + when + $section: Section(containsString("@")) + then + entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.5", "Found by Email Regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + +rule "PII.1.6: Redact typoed Emails with indicator" + when + $section: Section(containsString("@") || containsStringIgnoreCase("mail")) + then + entityCreationService.byRegexIgnoreCase("mail[:\\.\\s]{1,2}([\\w\\/\\-\\{\\(\\. ]{3,20}(@|a|f)\\s?[\\w\\/\\-\\{\\(\\. ]{3,20}(\\. \\w{2,4}\\b|\\.\\B|\\.\\w{1,4}\\b))", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.6", "Personal information found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.2 rule "PII.2.0: Redact Phone and Fax by RegEx" @@ -933,6 +1018,33 @@ rule "PII.4.0: Redact line after contact information keywords" .forEach(contactEntity -> contactEntity.redact("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end +rule "PII.4.1: Redact line after contact information keywords" + when + $contactKeyword: String() from List.of("Contact point:", + "Contact:", + "Alternative contact:", + "European contact:", + "No:", + "Contact:", + "Tel.:", + "Tel:", + "Telephone number:", + "Telephone No:", + "Telephone:", + "Phone No.", + "Phone:", + "Fax number:", + "Fax:", + "E-mail:", + "Email:", + "e-mail:", + "E-mail address:") + $section: Section(containsString($contactKeyword)) + then + entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) + .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.5 rule "PII.5.0: Redact line after contact information keywords reduced" @@ -1010,6 +1122,19 @@ rule "PII.6.2: Redact line between contact keywords (vertebrate study)" .forEach(contactEntity -> contactEntity.redact("PII.6.2", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "PII.6.3: Redact line between contact keywords (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) + then + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + ) + .forEach(contactEntity -> contactEntity.redact("PII.6.3", "Found between contact keywords", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + + // Rule unit: PII.7 rule "PII.7.0: Redact contact information if applicant is found" when @@ -1147,6 +1272,14 @@ rule "PII.9.2: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (ver .forEach(authorEntity -> authorEntity.redact("PII.9.2", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" + when + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) + then + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + .forEach(authorEntity -> authorEntity.redact("PII.9.3", "AUTHOR(S) was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.10 rule "PII.10.0: Redact study director abbreviation" @@ -1244,6 +1377,13 @@ rule "ETC.2.2: Redact signatures (vertebrate study)" $signature.redact("ETC.2.2", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end +rule "ETC.2.3: Redact signatures" + when + $signature: Image(imageType == ImageType.SIGNATURE) + then + $signature.redact("ETC.2.3", "Signature Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end + // Rule unit: ETC.3 rule "ETC.3.0: Redact logos" @@ -1269,6 +1409,13 @@ rule "ETC.3.2: Redact logos (vertebrate study)" $logo.redact("ETC.3.2", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end +rule "ETC.3.3: Redact logos" + when + $logo: Image(imageType == ImageType.LOGO) + then + $logo.redact("ETC.3.3", "Logo Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end + // Rule unit: ETC.4 rule "ETC.4.0: Redact dossier dictionary entries" diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index cff2c57d..514b2297 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -272,6 +272,19 @@ rule "CBI.9.1: Redact all cells with Header Author as CBI_author (non vertebrate .forEach(redactionEntity -> redactionEntity.redact("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end +rule "CBI.9.2: Redact all cells with Header Author(s) as CBI_author (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.9.2", "Author(s) found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: CBI.11 rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study Y/N Header" @@ -448,6 +461,18 @@ rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" }); end +rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + dictionary.recommendEverywhere(entity); + }); + end + // Rule unit: CBI.17 rule "CBI.17.0: Add recommendation for Addresses in Test Organism sections, without colon" @@ -539,6 +564,18 @@ rule "CBI.20.2: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC }); end +rule "CBI.20.3: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\"" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) + then + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + .forEach(laboratoryEntity -> { + laboratoryEntity.redact("CBI.20.3", "PERFORMING LABORATORY was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + dictionary.recommendEverywhere(laboratoryEntity); + }); + end + //------------------------------------ PII rules ------------------------------------ @@ -566,6 +603,12 @@ rule "PII.0.2: Redact all PII (vertebrate study)" $pii.redact("PII.0.2", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end +rule "PII.0.3: Redact all PII" + when + $pii: TextEntity(type() == "PII", dictionaryEntry) + then + $pii.redact("PII.0.3", "Personal Information found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end // Rule unit: PII.1 @@ -595,6 +638,14 @@ rule "PII.1.2: Redact Emails by RegEx (vertebrate study)" .forEach(emailEntity -> emailEntity.redact("PII.1.2", "Found by Email Regex", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "PII.1.5: Redact Emails by RegEx" + when + $section: Section(containsString("@")) + then + entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.5", "Found by Email Regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.4 rule "PII.4.0: Redact line after contact information keywords" @@ -624,6 +675,33 @@ rule "PII.4.0: Redact line after contact information keywords" .forEach(contactEntity -> contactEntity.redact("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end +rule "PII.4.1: Redact line after contact information keywords" + when + $contactKeyword: String() from List.of("Contact point:", + "Contact:", + "Alternative contact:", + "European contact:", + "No:", + "Contact:", + "Tel.:", + "Tel:", + "Telephone number:", + "Telephone No:", + "Telephone:", + "Phone No.", + "Phone:", + "Fax number:", + "Fax:", + "E-mail:", + "Email:", + "e-mail:", + "E-mail address:") + $section: Section(containsString($contactKeyword)) + then + entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) + .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.6 rule "PII.6.0: Redact line between contact keywords" @@ -661,6 +739,20 @@ rule "PII.6.2: Redact line between contact keywords (vertebrate study)" .forEach(contactEntity -> contactEntity.redact("PII.6.2", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end + +rule "PII.6.3: Redact line between contact keywords (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) + then + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + ) + .forEach(contactEntity -> contactEntity.redact("PII.6.3", "Found between contact keywords", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + + // Rule unit: PII.7 rule "PII.7.0: Redact contact information if applicant is found" when @@ -798,6 +890,14 @@ rule "PII.9.2: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (ver .forEach(authorEntity -> authorEntity.redact("PII.9.2", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" + when + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) + then + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + .forEach(authorEntity -> authorEntity.redact("PII.9.3", "AUTHOR(S) was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.11 rule "PII.11.0: Redact On behalf of Sequani Ltd.:" @@ -865,6 +965,13 @@ rule "ETC.2.2: Redact signatures (vertebrate study)" $signature.redact("ETC.2.2", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end +rule "ETC.2.3: Redact signatures" + when + $signature: Image(imageType == ImageType.SIGNATURE) + then + $signature.redact("ETC.2.3", "Signature Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end + // Rule unit: ETC.3 rule "ETC.3.0: Redact logos" @@ -890,6 +997,13 @@ rule "ETC.3.2: Redact logos (vertebrate study)" $logo.redact("ETC.3.2", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end +rule "ETC.3.3: Redact logos" + when + $logo: Image(imageType == ImageType.LOGO) + then + $logo.redact("ETC.3.3", "Logo Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end + // Rule unit: ETC.4 rule "ETC.4.0: Redact dossier dictionary entries" diff --git a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl index 55cd6d88..892ec7f3 100644 --- a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl +++ b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl @@ -398,6 +398,19 @@ rule "CBI.9.1: Redact all cells with Header Author as CBI_author (non vertebrate .forEach(redactionEntity -> redactionEntity.redact("CBI.9.1", "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002")); end +rule "CBI.9.2: Redact all cells with Header Author(s) as CBI_author (non vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.9.2", "Author(s) found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: CBI.10 rule "CBI.10.0: Redact all cells with Header Author(s) as CBI_author (vertebrate study)" @@ -426,6 +439,32 @@ rule "CBI.10.1: Redact all cells with Header Author as CBI_author (vertebrate st .forEach(redactionEntity -> redactionEntity.redact("CBI.10.1", "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "CBI.10.2: Redact all cells with Header Author(s) as CBI_author (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author(s)")) + then + $table.streamTableCellsWithHeader("Author(s)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.2", "Author(s) found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + +rule "CBI.10.3: Redact all cells with Header Author as CBI_author (vertebrate study)" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $table: Table(hasHeader("Author")) + then + $table.streamTableCellsWithHeader("Author") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.redact("CBI.10.3", "Author found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: CBI.11 rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study Y/N Header" @@ -613,6 +652,18 @@ rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" }); end +rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(containsString("et al.")) + then + entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) + .forEach(entity -> { + entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + dictionary.recommendEverywhere(entity); + }); + end + // Rule unit: CBI.17 rule "CBI.17.0: Add recommendation for Addresses in Test Organism sections, without colon" @@ -704,6 +755,19 @@ rule "CBI.20.2: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC }); end +rule "CBI.20.3: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJECT ID:\"" + agenda-group "LOCAL_DICTIONARY_ADDS" + when + $section: Section(!hasTables(), containsString("PERFORMING LABORATORY:"), containsString("LABORATORY PROJECT ID:")) + then + entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section) + .forEach(laboratoryEntity -> { + laboratoryEntity.redact("CBI.20.3", "PERFORMING LABORATORY was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + dictionary.recommendEverywhere(laboratoryEntity); + }); + end + + // Rule unit: CBI.21 rule "CBI.21.0: Redact short Authors section (non vertebrate study)" when @@ -768,6 +832,13 @@ rule "PII.0.2: Redact all PII (vertebrate study)" $pii.redact("PII.0.2", "Personal Information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end +rule "PII.0.3: Redact all PII" + when + $pii: TextEntity(type() == "PII", dictionaryEntry) + then + $pii.redact("PII.0.3", "Personal Information found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end + // Rule unit: PII.1 rule "PII.1.0: Redact Emails by RegEx" @@ -812,6 +883,22 @@ rule "PII.1.4: Redact typoed Emails with indicator" .forEach(emailEntity -> emailEntity.redact("PII.1.4", "Personal information found", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end +rule "PII.1.5: Redact Emails by RegEx" + when + $section: Section(containsString("@")) + then + entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.5", "Found by Email Regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + +rule "PII.1.6: Redact typoed Emails with indicator" + when + $section: Section(containsString("@") || containsStringIgnoreCase("mail")) + then + entityCreationService.byRegexIgnoreCase("mail[:\\.\\s]{1,2}([\\w\\/\\-\\{\\(\\. ]{3,20}(@|a|f)\\s?[\\w\\/\\-\\{\\(\\. ]{3,20}(\\. \\w{2,4}\\b|\\.\\B|\\.\\w{1,4}\\b))", "PII", EntityType.ENTITY, 1, $section) + .forEach(emailEntity -> emailEntity.redact("PII.1.6", "Personal information found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.2 rule "PII.2.0: Redact Phone and Fax by RegEx" @@ -931,6 +1018,33 @@ rule "PII.4.0: Redact line after contact information keywords" .forEach(contactEntity -> contactEntity.redact("PII.4.0", "Found after \"" + $contactKeyword + "\" contact keyword", "Reg (EC) No 1107/2009 Art. 63 (2e)")); end +rule "PII.4.1: Redact line after contact information keywords" + when + $contactKeyword: String() from List.of("Contact point:", + "Contact:", + "Alternative contact:", + "European contact:", + "No:", + "Contact:", + "Tel.:", + "Tel:", + "Telephone number:", + "Telephone No:", + "Telephone:", + "Phone No.", + "Phone:", + "Fax number:", + "Fax:", + "E-mail:", + "Email:", + "e-mail:", + "E-mail address:") + $section: Section(containsString($contactKeyword)) + then + entityCreationService.lineAfterString($contactKeyword, "PII", EntityType.ENTITY, $section) + .forEach(contactEntity -> contactEntity.redact("PII.4.1", "Found after \"" + $contactKeyword + "\" contact keyword", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.5 rule "PII.5.0: Redact line after contact information keywords reduced" @@ -1008,6 +1122,19 @@ rule "PII.6.2: Redact line between contact keywords (vertebrate study)" .forEach(contactEntity -> contactEntity.redact("PII.6.2", "Found between contact keywords", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "PII.6.3: Redact line between contact keywords (non vertebrate study)" + when + not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") + $section: Section((containsString("No:") && containsString("Fax")) || (containsString("Contact:") && containsString("Tel"))) + then + Stream.concat( + entityCreationService.betweenStrings("No:", "Fax", "PII", EntityType.ENTITY, $section), + entityCreationService.betweenStrings("Contact:", "Tel", "PII", EntityType.ENTITY, $section) + ) + .forEach(contactEntity -> contactEntity.redact("PII.6.3", "Found between contact keywords", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + + // Rule unit: PII.7 rule "PII.7.0: Redact contact information if applicant is found" when @@ -1145,6 +1272,14 @@ rule "PII.9.2: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\" (ver .forEach(authorEntity -> authorEntity.redact("PII.9.2", "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002")); end +rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" + when + $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) + then + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + .forEach(authorEntity -> authorEntity.redact("PII.9.3", "AUTHOR(S) was found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)")); + end + // Rule unit: PII.10 rule "PII.10.0: Redact study director abbreviation" @@ -1252,6 +1387,13 @@ rule "ETC.2.2: Redact signatures (vertebrate study)" $signature.redact("ETC.2.2", "Signature Found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); end +rule "ETC.2.3: Redact signatures" + when + $signature: Image(imageType == ImageType.SIGNATURE) + then + $signature.redact("ETC.2.3", "Signature Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end + // Rule unit: ETC.3 rule "ETC.3.0: Redact logos" @@ -1277,6 +1419,13 @@ rule "ETC.3.2: Redact logos (vertebrate study)" $logo.redact("ETC.3.2", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end +rule "ETC.3.3: Redact logos" + when + $logo: Image(imageType == ImageType.LOGO) + then + $logo.redact("ETC.3.3", "Logo Found", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)"); + end + // Rule unit: ETC.4 rule "ETC.4.0: Redact dossier dictionary entries" -- 2.47.2 From 69d0ab07544cdaf54f9860b58cf67199021c1c68 Mon Sep 17 00:00:00 2001 From: Andrei Isvoran Date: Thu, 14 Mar 2024 17:14:31 +0200 Subject: [PATCH 17/21] RED-8705 - Fix image type --- .../v1/server/service/EntityLogCreatorService.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java index 844ad6c6..91fc9feb 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java @@ -4,6 +4,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.List; +import java.util.Locale; import java.util.Set; import java.util.stream.Collectors; @@ -30,6 +31,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionO import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType; import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; import lombok.AccessLevel; @@ -184,11 +186,12 @@ public class EntityLogCreatorService { private EntityLogEntry createEntityLogEntry(Image image, String dossierTemplateId) { - boolean isHint = dictionaryService.isHint(image.type(), dossierTemplateId); + String imageType = image.getImageType().equals(ImageType.OTHER) ? "image" : image.getImageType().toString().toLowerCase(Locale.ENGLISH); + boolean isHint = dictionaryService.isHint(imageType, dossierTemplateId); return EntityLogEntry.builder() .id(image.getId()) .value(image.value()) - .type(image.type()) + .type(imageType) .reason(image.buildReasonWithManualChangeDescriptions()) .legalBasis(image.legalBasis()) .matchedRule(image.getMatchedRule().getRuleIdentifier().toString()) -- 2.47.2 From 7777e74a4406fa2dd357c4b6fd2129b475c73215 Mon Sep 17 00:00:00 2001 From: Andrei Isvoran Date: Fri, 15 Mar 2024 16:01:31 +0200 Subject: [PATCH 18/21] RED-8680 - Add specific CBI rules for seeds --- .../resources/drools/acceptance_rules.drl | 26 +++++++++++++++++++ .../drools/all_redact_manager_rules.drl | 26 +++++++++++++++++++ .../resources/all_redact_manager_rules.drl | 26 +++++++++++++++++++ 3 files changed, 78 insertions(+) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl index c5d87449..b3e45289 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl @@ -153,6 +153,32 @@ rule "CBI.7.1: Do not redact Names and Addresses if published information found $authorOrAddress.skipWithReferences("CBI.7.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress)); end +rule "CBI.7.2: Do not redact PII if published information found in Section without tables" + when + $section: Section(!hasTables(), + hasEntitiesOfType("published_information"), + hasEntitiesOfType("PII")) + then + $section.getEntitiesOfType("PII") + .forEach(redactionEntity -> { + redactionEntity.skipWithReferences( + "CBI.7.2", + "Published Information found in section", + $section.getEntitiesOfType("published_information") + ); + }); + end + +rule "CBI.7.3: Do not redact PII if published information found in same table row" + when + $table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII")) + $cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList() + $tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList() + $pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities() + then + $pii.skipWithReferences("CBI.7.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii)); + end + // Rule unit: CBI.9 rule "CBI.9.0: Redact all cells with Header Author(s) as CBI_author (non vertebrate study)" diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl index c4a828b9..6c1e2519 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl @@ -336,6 +336,32 @@ rule "CBI.7.1: Do not redact Names and Addresses if published information found $authorOrAddress.skipWithReferences("CBI.7.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress)); end +rule "CBI.7.2: Do not redact PII if published information found in Section without tables" + when + $section: Section(!hasTables(), + hasEntitiesOfType("published_information"), + hasEntitiesOfType("PII")) + then + $section.getEntitiesOfType("PII") + .forEach(redactionEntity -> { + redactionEntity.skipWithReferences( + "CBI.7.2", + "Published Information found in section", + $section.getEntitiesOfType("published_information") + ); + }); + end + +rule "CBI.7.3: Do not redact PII if published information found in same table row" + when + $table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII")) + $cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList() + $tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList() + $pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities() + then + $pii.skipWithReferences("CBI.7.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii)); + end + // Rule unit: CBI.8 rule "CBI.8.0: Redacted because Section contains must_redact entity" diff --git a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl index 892ec7f3..01244c8f 100644 --- a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl +++ b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl @@ -337,6 +337,32 @@ rule "CBI.7.1: Do not redact Names and Addresses if published information found $authorOrAddress.skipWithReferences("CBI.7.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress)); end +rule "CBI.7.2: Do not redact PII if published information found in Section without tables" + when + $section: Section(!hasTables(), + hasEntitiesOfType("published_information"), + hasEntitiesOfType("PII")) + then + $section.getEntitiesOfType("PII") + .forEach(redactionEntity -> { + redactionEntity.skipWithReferences( + "CBI.7.2", + "Published Information found in section", + $section.getEntitiesOfType("published_information") + ); + }); + end + +rule "CBI.7.3: Do not redact PII if published information found in same table row" + when + $table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII")) + $cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList() + $tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList() + $pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities() + then + $pii.skipWithReferences("CBI.7.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii)); + end + // Rule unit: CBI.8 rule "CBI.8.0: Redacted because Section contains must_redact entity" -- 2.47.2 From 07d6fea992fa9fb30d1baf3b9bb8d815f5468c3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominique=20Eifl=C3=A4nder?= Date: Mon, 18 Mar 2024 11:03:19 +0100 Subject: [PATCH 19/21] RED-7384: Ignore redactionLog entries on non existing pages for migration --- ...dactionLogToEntityLogMigrationService.java | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/RedactionLogToEntityLogMigrationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/RedactionLogToEntityLogMigrationService.java index a83069a1..d1dd7619 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/RedactionLogToEntityLogMigrationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/RedactionLogToEntityLogMigrationService.java @@ -89,9 +89,10 @@ public class RedactionLogToEntityLogMigrationService { .map(migrationEntity -> migrationEntity.toEntityLogEntry(oldToNewIDMapping)) .toList()); - if (getNumberOfApprovedEntries(redactionLog) != entityLog.getEntityLogEntry().size()) { + if (getNumberOfApprovedEntries(redactionLog, document.getNumberOfPages()) != entityLog.getEntityLogEntry().size()) { String message = String.format("Not all entities have been found during the migration redactionLog has %d entries and new entityLog %d", - redactionLog.getRedactionLogEntry().size(), + redactionLog.getRedactionLogEntry() + .size(), entityLog.getEntityLogEntry().size()); log.error(message); throw new AssertionError(message); @@ -135,9 +136,9 @@ public class RedactionLogToEntityLogMigrationService { } - private static long getNumberOfApprovedEntries(RedactionLog redactionLog) { + private long getNumberOfApprovedEntries(RedactionLog redactionLog, int numberOfPages) { - return redactionLog.getRedactionLogEntry().size(); + return redactionLog.getRedactionLogEntry().stream().filter(redactionLogEntry -> isOnExistingPage(redactionLogEntry, numberOfPages)).collect(Collectors.toList()).size(); } @@ -250,6 +251,7 @@ public class RedactionLogToEntityLogMigrationService { List entitiesToMigrate = redactionLog.getRedactionLogEntry() .stream() .filter(redactionLogEntry -> !redactionLogEntry.isImage()) + .filter(redactionLogEntry -> isOnExistingPage(redactionLogEntry, document.getNumberOfPages())) .map(entry -> MigrationEntity.fromRedactionLogEntry(entry, dictionaryService.isHint(entry.getType(), dossierTemplateId), fileId)) .toList(); @@ -287,4 +289,18 @@ public class RedactionLogToEntityLogMigrationService { return entitiesToMigrate; } + + + private boolean isOnExistingPage(RedactionLogEntry redactionLogEntry, int numberOfPages){ + var pages = redactionLogEntry.getPositions().stream().map(Rectangle::getPage).collect(Collectors.toSet()); + + for (int page: pages){ + if(page > numberOfPages){ + return false; + } + } + return true; + } + + } -- 2.47.2 From f641824270354f355dff708bf9803cf099f75727 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kilian=20Sch=C3=BCttler?= Date: Mon, 18 Mar 2024 13:17:11 +0100 Subject: [PATCH 20/21] RED-7384-bp: add useful fields to ManualRedactionEntry --- .../build.gradle.kts | 2 +- ...dactionLogToEntityLogMigrationService.java | 4 +- .../v1/server/model/MigrationEntity.java | 31 +-- .../document/EntityFindingUtility.java | 41 ++- .../EntityFromPrecursorCreationService.java | 13 +- .../v1/server/RedactionAcceptanceTest.java | 45 ++-- .../v1/server/RedactionIntegrationTest.java | 23 +- .../ManualChangesEnd2EndTest.java | 131 +++++---- .../ManualChangesIntegrationTest.java | 123 +++++++-- .../manualchanges/ManualChangesUnitTest.java | 66 +++-- .../manualchanges/PrecursorEntityTest.java | 51 ++-- .../UnprocessedChangesServiceTest.java | 252 +++++++++++++----- 12 files changed, 527 insertions(+), 255 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts index 2e413162..08413ee2 100644 --- a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts +++ b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts @@ -16,7 +16,7 @@ val layoutParserVersion = "0.89.3" val jacksonVersion = "2.15.2" val droolsVersion = "9.44.0.Final" val pdfBoxVersion = "3.0.0" -val persistenceServiceVersion = "2.349.4" +val persistenceServiceVersion = "2.349.7" val springBootStarterVersion = "3.1.5" configurations { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/RedactionLogToEntityLogMigrationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/RedactionLogToEntityLogMigrationService.java index a83069a1..c9c4f996 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/RedactionLogToEntityLogMigrationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/RedactionLogToEntityLogMigrationService.java @@ -34,7 +34,6 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image; import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType; import com.iqser.red.service.redaction.v1.server.service.DictionaryService; import com.iqser.red.service.redaction.v1.server.service.ManualChangesApplicationService; -import com.iqser.red.service.redaction.v1.server.service.document.EntityEnrichmentService; import com.iqser.red.service.redaction.v1.server.service.document.EntityFindingUtility; import com.iqser.red.service.redaction.v1.server.service.document.EntityFromPrecursorCreationService; import com.iqser.red.service.redaction.v1.server.utils.IdBuilder; @@ -54,7 +53,6 @@ public class RedactionLogToEntityLogMigrationService { private static final double MATCH_THRESHOLD = 10; EntityFindingUtility entityFindingUtility; - EntityEnrichmentService entityEnrichmentService; DictionaryService dictionaryService; ManualChangesApplicationService manualChangesApplicationService; @@ -67,8 +65,8 @@ public class RedactionLogToEntityLogMigrationService { MigratedIds migratedIds = entitiesToMigrate.stream() .collect(new MigratedIdsCollector()); - applyManualChanges(entitiesToMigrate, manualRedactions); log.info("applying manual changes to migrated entities for file {}", fileId); + applyManualChanges(entitiesToMigrate, manualRedactions); EntityLog entityLog = new EntityLog(); entityLog.setAnalysisNumber(redactionLog.getAnalysisNumber()); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigrationEntity.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigrationEntity.java index 4761da03..5d14ad0f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigrationEntity.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigrationEntity.java @@ -4,6 +4,7 @@ import static com.iqser.red.service.redaction.v1.server.service.EntityLogCreator import static com.iqser.red.service.redaction.v1.server.service.EntityLogCreatorService.buildEntryType; import java.awt.geom.Rectangle2D; +import java.time.OffsetDateTime; import java.util.Collections; import java.util.LinkedList; import java.util.List; @@ -20,6 +21,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction; +import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.DictionaryEntryType; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ManualRedactionType; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry; import com.iqser.red.service.redaction.v1.server.migration.MigrationMapper; @@ -350,18 +352,8 @@ public final class MigrationEntity { manualChanges.addAll(manualChangesToApply); manualChangesToApply.forEach(manualChange -> { if (manualChange instanceof ManualResizeRedaction manualResizeRedaction && migratedEntity instanceof TextEntity textEntity) { - // Due to the value in the old redaction log already being resized, there is no way to find the original entity ID and therefore to migrate the resize annotation correctly. - // Instead, we add an add_locally change to the db. - ManualResizeRedaction migratedManualResizeRedaction = ManualResizeRedaction.builder() - .positions(manualResizeRedaction.getPositions()) - .annotationId(getNewId()) - .updateDictionary(manualResizeRedaction.getUpdateDictionary()) - .addToAllDossiers(manualResizeRedaction.isAddToAllDossiers()) - .requestDate(manualResizeRedaction.getRequestDate()) - .textAfter(manualResizeRedaction.getTextAfter()) - .textBefore(manualResizeRedaction.getTextBefore()) - .build(); - manualChangesApplicationService.resize(textEntity, migratedManualResizeRedaction); + manualResizeRedaction.setAnnotationId(newId); + manualChangesApplicationService.resize(textEntity, manualResizeRedaction); } else { migratedEntity.getManualOverwrite().addChange(manualChange); } @@ -379,26 +371,25 @@ public final class MigrationEntity { .findFirst() .orElse(manualChanges.get(0)).getUser(); - var requestDate = manualChanges.stream() - .filter(mc -> mc instanceof ManualResizeRedaction) - .findFirst() - .orElse(manualChanges.get(0)).getRequestDate(); - + OffsetDateTime requestDate = manualChanges.get(0).getRequestDate(); return ManualRedactionEntry.builder() .annotationId(newId) .fileId(fileId) + .user(user) + .requestDate(requestDate) .type(redactionLogEntry.getType()) .value(redactionLogEntry.getValue()) .reason(redactionLogEntry.getReason()) .legalBasis(redactionLogEntry.getLegalBasis()) .section(redactionLogEntry.getSection()) - .requestDate(requestDate) + .rectangle(false) .addToDictionary(false) .addToDossierDictionary(false) - .rectangle(false) .positions(buildPositions(migratedEntity)) - .user(user) + .textAfter(redactionLogEntry.getTextAfter()) + .textBefore(redactionLogEntry.getTextBefore()) + .dictionaryEntryType(DictionaryEntryType.ENTRY) .build(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java index e6bb27ed..77f80a0a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java @@ -47,7 +47,9 @@ public class EntityFindingUtility { } - public Optional findClosestEntityAndReturnEmptyIfNotFound(PrecursorEntity precursorEntity, Map> entitiesWithSameValue, double matchThreshold) { + public Optional findClosestEntityAndReturnEmptyIfNotFound(PrecursorEntity precursorEntity, + Map> entitiesWithSameValue, + double matchThreshold) { if (precursorEntity.getValue() == null) { return Optional.empty(); @@ -56,7 +58,7 @@ public class EntityFindingUtility { List possibleEntities = entitiesWithSameValue.get(precursorEntity.getValue().toLowerCase(Locale.ENGLISH)); if (entityIdentifierValueNotFound(possibleEntities)) { - log.warn("Entity could not be created with precursorEntity: {}, due to the value {} not being found anywhere.", precursorEntity, precursorEntity.getValue()); + log.info("Entity could not be created with precursorEntity: {}, due to the value {} not being found anywhere.", precursorEntity, precursorEntity.getValue()); return Optional.empty(); } @@ -66,18 +68,22 @@ public class EntityFindingUtility { .min(Comparator.comparingDouble(ClosestEntity::getDistance)); if (optionalClosestEntity.isEmpty()) { - log.warn("No Entity with value {} found on page {}", precursorEntity.getValue(), precursorEntity.getEntityPosition()); + log.info("No Entity with value {} found on page {}", precursorEntity.getValue(), precursorEntity.getEntityPosition()); return Optional.empty(); } ClosestEntity closestEntity = optionalClosestEntity.get(); if (closestEntity.getDistance() > matchThreshold) { - log.warn("For entity {} on page {} with positions {} distance to closest found entity is {} and therefore higher than the threshold of {}", - precursorEntity.getValue(), - precursorEntity.getEntityPosition().get(0).pageNumber(), - precursorEntity.getEntityPosition().stream().map(RectangleWithPage::rectangle2D).toList(), - closestEntity.getDistance(), - matchThreshold); + log.info("For entity {} on page {} with positions {} distance to closest found entity is {} and therefore higher than the threshold of {}", + precursorEntity.getValue(), + precursorEntity.getEntityPosition() + .get(0).pageNumber(), + precursorEntity.getEntityPosition() + .stream() + .map(RectangleWithPage::rectangle2D) + .toList(), + closestEntity.getDistance(), + matchThreshold); return Optional.empty(); } @@ -93,8 +99,14 @@ public class EntityFindingUtility { private static boolean pagesMatch(TextEntity entity, List originalPositions) { - Set entityPageNumbers = entity.getPositionsOnPagePerPage().stream().map(PositionOnPage::getPage).map(Page::getNumber).collect(Collectors.toSet()); - Set originalPageNumbers = originalPositions.stream().map(RectangleWithPage::pageNumber).collect(Collectors.toSet()); + Set entityPageNumbers = entity.getPositionsOnPagePerPage() + .stream() + .map(PositionOnPage::getPage) + .map(Page::getNumber) + .collect(Collectors.toSet()); + Set originalPageNumbers = originalPositions.stream() + .map(RectangleWithPage::pageNumber) + .collect(Collectors.toSet()); return entityPageNumbers.containsAll(originalPageNumbers); } @@ -105,15 +117,16 @@ public class EntityFindingUtility { return Double.MAX_VALUE; } return originalPositions.stream() - .mapToDouble(rectangleWithPage -> calculateMinDistancePerRectangle(entity, rectangleWithPage.pageNumber(), rectangleWithPage.rectangle2D())) - .average() + .mapToDouble(rectangleWithPage -> calculateMinDistancePerRectangle(entity, rectangleWithPage.pageNumber(), rectangleWithPage.rectangle2D())).average() .orElse(Double.MAX_VALUE); } private static long countRectangles(TextEntity entity) { - return entity.getPositionsOnPagePerPage().stream().mapToLong(redactionPosition -> redactionPosition.getRectanglePerLine().size()).sum(); + return entity.getPositionsOnPagePerPage() + .stream() + .mapToLong(redactionPosition -> redactionPosition.getRectanglePerLine().size()).sum(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFromPrecursorCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFromPrecursorCreationService.java index 22402160..7607a990 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFromPrecursorCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFromPrecursorCreationService.java @@ -9,7 +9,6 @@ import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; -import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions; @@ -23,29 +22,21 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNo import com.iqser.red.service.redaction.v1.server.service.DictionaryService; import lombok.AccessLevel; +import lombok.RequiredArgsConstructor; import lombok.experimental.FieldDefaults; import lombok.extern.slf4j.Slf4j; @Slf4j @Service +@RequiredArgsConstructor @FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) public class EntityFromPrecursorCreationService { static double MATCH_THRESHOLD = 10; // Is compared to the average sum of distances in pdf coordinates for each corner of the bounding box of the entities EntityFindingUtility entityFindingUtility; - EntityCreationService entityCreationService; DictionaryService dictionaryService; - @Autowired - public EntityFromPrecursorCreationService(EntityEnrichmentService entityEnrichmentService, DictionaryService dictionaryService, EntityFindingUtility entityFindingUtility) { - - this.entityFindingUtility = entityFindingUtility; - entityCreationService = new EntityCreationService(entityEnrichmentService); - this.dictionaryService = dictionaryService; - } - - public List createEntitiesIfFoundAndReturnNotFoundEntries(ManualRedactions manualRedactions, SemanticNode node, String dossierTemplateId) { Set idRemovals = manualRedactions.getIdsToRemove(); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java index 3f7726bc..90c9bfa1 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java @@ -87,15 +87,15 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest { when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, true)).thenReturn(List.of(Type.builder() - .id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID) - .type(DOSSIER_REDACTIONS_INDICATOR) - .dossierTemplateId(TEST_DOSSIER_ID) - .hexColor("#ffe187") - .isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .build())); + .id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID) + .type(DOSSIER_REDACTIONS_INDICATOR) + .dossierTemplateId(TEST_DOSSIER_ID) + .hexColor("#ffe187") + .isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .build())); mockDictionaryCalls(null); @@ -122,6 +122,7 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest { assertThat(recommendations).containsExactlyInAnyOrder("Michael N.", "Funnarie B.", "Feuer A."); } + @Test public void acceptanceTests() throws IOException { @@ -133,8 +134,10 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest { System.out.println("Finished analysis"); EntityLog entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - var publishedInformationEntry1 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst().orElseThrow(); - var asyaLyon1 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry1.getContainingNodeId()).findFirst().orElseThrow(); + var publishedInformationEntry1 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst() + .orElseThrow(); + var asyaLyon1 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry1.getContainingNodeId()).findFirst() + .orElseThrow(); assertEquals(EntryState.SKIPPED, asyaLyon1.getState()); @@ -146,8 +149,10 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest { entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - var publishedInformationEntry2 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst().orElseThrow(); - var asyaLyon2 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry2.getContainingNodeId()).findFirst().orElseThrow(); + var publishedInformationEntry2 = findEntityByTypeAndValue(entityLog, "published_information", "Oxford University Press").findFirst() + .orElseThrow(); + var asyaLyon2 = findEntityByTypeAndValueAndSectionNumber(entityLog, "CBI_author", "Asya Lyon", publishedInformationEntry2.getContainingNodeId()).findFirst() + .orElseThrow(); assertEquals(EntryState.APPLIED, asyaLyon2.getState()); @@ -168,13 +173,17 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest { .stream() .filter(entry -> entry.getType().equals(type)) .filter(entry -> entry.getValue().equals(value)) - .filter(entry -> entry.getContainingNodeId().get(0).equals(sectionNumber.get(0))); + .filter(entry -> entry.getContainingNodeId() + .get(0).equals(sectionNumber.get(0))); } private static Stream findEntityByTypeAndValue(EntityLog redactionLog, String type, String value) { - return redactionLog.getEntityLogEntry().stream().filter(entry -> entry.getType().equals(type)).filter(entry -> entry.getValue().equals(value)); + return redactionLog.getEntityLogEntry() + .stream() + .filter(entry -> entry.getType().equals(type)) + .filter(entry -> entry.getValue().equals(value)); } @@ -201,13 +210,15 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest { var redactionLog2 = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); assertEquals(EntryState.IGNORED, - findEntityByTypeAndValue(redactionLog2, "CBI_author", "Desiree").filter(entry -> entry.getEntryType().equals(EntryType.ENTITY)).findFirst().get().getState()); + findEntityByTypeAndValue(redactionLog2, "CBI_author", "Desiree").filter(entry -> entry.getEntryType().equals(EntryType.ENTITY)) + .findFirst() + .get().getState()); } private static IdRemoval buildIdRemoval(String id) { - return IdRemoval.builder().annotationId(id).requestDate(OffsetDateTime.now()).fileId(TEST_FILE_ID).build(); + return IdRemoval.builder().annotationId(id).user("user").requestDate(OffsetDateTime.now()).fileId(TEST_FILE_ID).build(); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index 5d5869b2..da2a57ad 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -218,10 +218,12 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { ManualRedactions manualRedactions = ManualRedactions.builder() .resizeRedactions(Set.of(ManualResizeRedaction.builder() .annotationId("c6be5277f5ee60dc3d83527798b7fe02") + .fileId(TEST_FILE_ID) .value("Dr. Alan") .positions(List.of(new Rectangle(236.8f, 182.90005f, 40.584f, 12.642f, 7))) .requestDate(OffsetDateTime.now()) .updateDictionary(false) + .user("user") .build())) .build(); @@ -656,6 +658,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { .fileId("fileId") .type("signature") .requestDate(OffsetDateTime.now()) + .user("user") .build())); request.setManualRedactions(manualRedactions); @@ -710,6 +713,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { .annotationId("308dab9015bfafd911568cffe0a7f7de") .fileId(TEST_FILE_ID) .requestDate(OffsetDateTime.of(2022, 05, 23, 8, 30, 07, 475479, ZoneOffset.UTC)) + .user("user") .processedDate(OffsetDateTime.of(2022, 05, 23, 8, 30, 07, 483651, ZoneOffset.UTC)) .build()); @@ -718,6 +722,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { .annotationId("0b56ea1a87c83f351df177315af94f0d") .fileId(TEST_FILE_ID) .legalBasis("Something") + .user("user") .requestDate(OffsetDateTime.of(2022, 05, 23, 9, 30, 15, 4653, ZoneOffset.UTC)) .processedDate(OffsetDateTime.of(2022, 05, 23, 9, 30, 15, 794, ZoneOffset.UTC)) .build()); @@ -726,6 +731,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { .add(IdRemoval.builder() .annotationId("0b56ea1a87c83f351df177315af94f0d") .fileId(TEST_FILE_ID) + .user("user") .requestDate(OffsetDateTime.of(2022, 05, 23, 8, 30, 23, 961721, ZoneOffset.UTC)) .processedDate(OffsetDateTime.of(2022, 05, 23, 8, 30, 23, 96528, ZoneOffset.UTC)) .build()); @@ -945,6 +951,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { .textBefore("") .updateDictionary(false) .textAfter("") + .user("user") .build(); manualRedactions.getResizeRedactions().add(manualResizeRedaction); @@ -1032,12 +1039,14 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder() .annotationId("5b940b2cb401ed9f5be6fc24f6e77bcf") .fileId("fileId") + .user("user") .processedDate(OffsetDateTime.now()) .requestDate(OffsetDateTime.now()) .build())); manualRedactions.setForceRedactions(Set.of(ManualForceRedaction.builder() .annotationId("675eba69b0c2917de55462c817adaa05") .fileId("fileId") + .user("user") .legalBasis("Something") .requestDate(OffsetDateTime.now()) .processedDate(OffsetDateTime.now()) @@ -1248,6 +1257,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { .requestDate(OffsetDateTime.now()) .fileId(TEST_FILE_ID) .legalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002") + .user("user") .build())) .build()); @@ -1288,12 +1298,14 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { .requestDate(OffsetDateTime.now()) .fileId(TEST_FILE_ID) .legalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002") + .user("user") .build())) .recategorizations(Set.of(ManualRecategorization.builder() .annotationId("3029651d0842a625f2d23f8375c23600") .type("CBI_author") .requestDate(OffsetDateTime.now()) .fileId(TEST_FILE_ID) + .user("user") .build())) .build()); @@ -1454,6 +1466,8 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { .resizeRedactions(Set.of(ManualResizeRedaction.builder() .updateDictionary(true) .annotationId(david.getId()) + .fileId(TEST_FILE_ID) + .user("user") .requestDate(OffsetDateTime.now()) .value("David Ksenia") .positions(List.of(Rectangle.builder() @@ -1481,7 +1495,14 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { private IdRemoval getIdRemoval(String id) { - return IdRemoval.builder().annotationId(id).removeFromAllDossiers(false).removeFromDictionary(false).requestDate(OffsetDateTime.now()).build(); + return IdRemoval.builder() + .annotationId(id) + .removeFromAllDossiers(false) + .fileId(TEST_FILE_ID) + .user("user") + .removeFromDictionary(false) + .requestDate(OffsetDateTime.now()) + .build(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesEnd2EndTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesEnd2EndTest.java index c483a49a..75e0a12c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesEnd2EndTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesEnd2EndTest.java @@ -127,15 +127,15 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest { when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder() - .id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID) - .type(DOSSIER_REDACTIONS_INDICATOR) - .dossierTemplateId(TEST_DOSSIER_ID) - .hexColor("#ffe187") - .isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .build())); + .id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID) + .type(DOSSIER_REDACTIONS_INDICATOR) + .dossierTemplateId(TEST_DOSSIER_ID) + .hexColor("#ffe187") + .isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .build())); mockDictionaryCalls(null); @@ -155,29 +155,40 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest { String testEntityValue1 = "Desiree"; String testEntityValue2 = "Melanie"; EntityLog redactionLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); - assertEquals(2, redactionLog.getEntityLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue1)).count()); - assertEquals(2, redactionLog.getEntityLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue2)).count()); + assertEquals(2, + redactionLog.getEntityLogEntry() + .stream() + .filter(entry -> entry.getValue().equals(testEntityValue1)) + .count()); + assertEquals(2, + redactionLog.getEntityLogEntry() + .stream() + .filter(entry -> entry.getValue().equals(testEntityValue2)) + .count()); Document document = DocumentGraphMapper.toDocumentGraph(redactionStorageService.getDocumentData(TEST_DOSSIER_ID, TEST_FILE_ID)); String expandedEntityKeyword = "Lorem ipsum dolor sit amet, consectetur adipiscing elit Desiree et al sed do eiusmod tempor incididunt ut labore et dolore magna aliqua Melanie et al. Reference No 12345 Lorem ipsum."; - entityCreationService.byString(expandedEntityKeyword, "PII", EntityType.ENTITY, document).findFirst().get(); + entityCreationService.byString(expandedEntityKeyword, "PII", EntityType.ENTITY, document) + .findFirst() + .get(); String idToResize = redactionLog.getEntityLogEntry() .stream() .filter(entry -> entry.getValue().equals(testEntityValue1)) .max(Comparator.comparingInt(EntityLogEntry::getStartOffset)) - .get() - .getId(); + .get().getId(); ManualRedactions manualRedactions = new ManualRedactions(); - manualRedactions.getResizeRedactions().add(ManualResizeRedaction.builder() - .annotationId(idToResize) - .value(expandedEntityKeyword) - .positions(List.of(Rectangle.builder().topLeftX(56.8f).topLeftY(454.664f).height(15.408f).width(493.62f).page(3).build(), - Rectangle.builder().topLeftX(56.8f).topLeftY(440.864f).height(15.408f).width(396f).page(3).build())) - .addToAllDossiers(false) - .updateDictionary(false) - .requestDate(OffsetDateTime.now()) - .build()); + manualRedactions.getResizeRedactions() + .add(ManualResizeRedaction.builder() + .annotationId(idToResize) + .fileId(TEST_FILE_ID) + .value(expandedEntityKeyword) + .positions(List.of(Rectangle.builder().topLeftX(56.8f).topLeftY(454.664f).height(15.408f).width(493.62f).page(3).build(), + Rectangle.builder().topLeftX(56.8f).topLeftY(440.864f).height(15.408f).width(396f).page(3).build())) + .addToAllDossiers(false) + .updateDictionary(false) + .requestDate(OffsetDateTime.now()) + .build()); request.setManualRedactions(manualRedactions); analyzeService.reanalyze(request); @@ -188,21 +199,32 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest { try (FileOutputStream fileOutputStream = new FileOutputStream(tmpFile)) { fileOutputStream.write(annotateResponse.getDocument()); } - EntityLogEntry resizedEntry = redactionLog.getEntityLogEntry().stream().filter(entry -> entry.getValue().equals(expandedEntityKeyword)).findFirst().get(); + EntityLogEntry resizedEntry = redactionLog.getEntityLogEntry() + .stream() + .filter(entry -> entry.getValue().equals(expandedEntityKeyword)) + .findFirst() + .get(); assertEquals(idToResize, resizedEntry.getId()); - assertEquals(1, redactionLog.getEntityLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue1)).count()); assertEquals(1, - redactionLog.getEntityLogEntry().stream().filter(entry -> entry.getValue().equals(testEntityValue2) && !entry.getState().equals(EntryState.REMOVED)).count()); + redactionLog.getEntityLogEntry() + .stream() + .filter(entry -> entry.getValue().equals(testEntityValue1)) + .count()); + assertEquals(1, + redactionLog.getEntityLogEntry() + .stream() + .filter(entry -> entry.getValue().equals(testEntityValue2) && !entry.getState().equals(EntryState.REMOVED)) + .count()); } private static com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle toAnnotationRectangle(Rectangle2D rectangle2D, int pageNumber) { return new com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle((float) rectangle2D.getMaxX(), - (float) rectangle2D.getMaxY() - (float) rectangle2D.getHeight(), - (float) rectangle2D.getWidth(), - -(float) rectangle2D.getHeight(), - pageNumber); + (float) rectangle2D.getMaxY() - (float) rectangle2D.getHeight(), + (float) rectangle2D.getWidth(), + -(float) rectangle2D.getHeight(), + pageNumber); } @@ -219,10 +241,10 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest { manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder().annotationId("5b940b2cb401ed9f5be6fc24f6e77bcf").fileId("fileId").build())); manualRedactions.setForceRedactions(Set.of(ManualForceRedaction.builder() - .annotationId("675eba69b0c2917de55462c817adaa05") - .fileId("fileId") - .legalBasis("Something") - .build())); + .annotationId("675eba69b0c2917de55462c817adaa05") + .fileId("fileId") + .legalBasis("Something") + .build())); ManualRedactionEntry manualRedactionEntry = new ManualRedactionEntry(); manualRedactionEntry.setAnnotationId(manualAddId); @@ -232,7 +254,7 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest { manualRedactionEntry.setValue("O'Loughlin C.K."); manualRedactionEntry.setReason("Manual Redaction"); manualRedactionEntry.setPositions(List.of(Rectangle.builder().topLeftX(375.61096f).topLeftY(241.282f).width(7.648041f).height(43.72262f).page(1).build(), - Rectangle.builder().topLeftX(384.83517f).topLeftY(241.282f).width(7.648041f).height(17.043358f).page(1).build())); + Rectangle.builder().topLeftX(384.83517f).topLeftY(241.282f).width(7.648041f).height(17.043358f).page(1).build())); AnalyzeRequest request = uploadFileToStorage(pdfFile); request.setManualRedactions(manualRedactions); @@ -242,11 +264,11 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest { manualRedactions.getEntriesToAdd().add(manualRedactionEntry); manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder().annotationId("5b940b2cb401ed9f5be6fc24f6e77bcf").fileId("fileId").build())); manualRedactions.setLegalBasisChanges((Set.of(ManualLegalBasisChange.builder() - .annotationId("675eba69b0c2917de55462c817adaa05") - .fileId("fileId") - .legalBasis("Manual Legal Basis Change") - .requestDate(OffsetDateTime.now()) - .build()))); + .annotationId("675eba69b0c2917de55462c817adaa05") + .fileId("fileId") + .legalBasis("Manual Legal Basis Change") + .requestDate(OffsetDateTime.now()) + .build()))); analyzeService.reanalyze(request); @@ -295,7 +317,10 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest { fileOutputStream.write(annotateResponse.getDocument()); } long end = System.currentTimeMillis(); - var optionalEntry = redactionLog.getEntityLogEntry().stream().filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId)).findAny(); + var optionalEntry = redactionLog.getEntityLogEntry() + .stream() + .filter(entityLogEntry -> entityLogEntry.getId().equals(manualAddId)) + .findAny(); assertTrue(optionalEntry.isPresent()); assertEquals(2, optionalEntry.get().getContainingNodeId().size()); // 2 is the depth of the table instead of the table cell System.out.println("duration: " + (end - start)); @@ -345,9 +370,9 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest { EntityLog redactionLog2 = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); assertFalse(redactionLog2.getEntityLogEntry() - .stream() - .filter(entry -> entry.getType().equals("published_information")) - .anyMatch(entry -> entry.getValue().equals("Oxford University Press"))); + .stream() + .filter(entry -> entry.getType().equals("published_information")) + .anyMatch(entry -> entry.getValue().equals("Oxford University Press"))); var oxfordUniversityPressRecategorized = redactionLog2.getEntityLogEntry() .stream() @@ -381,15 +406,15 @@ public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest { String annotationId = "testAnnotationId"; manualRedactions.setEntriesToAdd(Set.of(ManualRedactionEntry.builder() - .annotationId(annotationId) - .requestDate(OffsetDateTime.now()) - .type("manual") - .value("Expand to Hint Clarissa’s Donut ← not added to Dict, should be not annotated Simpson's Tower ← added to Authors-Dict, should be annotated") - .positions(List.of(// - new Rectangle(new Point(56.8f, 496.27f), 61.25f, 12.83f, 2), // - new Rectangle(new Point(56.8f, 482.26f), 303.804f, 15.408f, 2), // - new Rectangle(new Point(56.8f, 468.464f), 314.496f, 15.408f, 2))) // - .build())); + .annotationId(annotationId) + .requestDate(OffsetDateTime.now()) + .type("manual") + .value("Expand to Hint Clarissa’s Donut ← not added to Dict, should be not annotated Simpson's Tower ← added to Authors-Dict, should be annotated") + .positions(List.of(// + new Rectangle(new Point(56.8f, 496.27f), 61.25f, 12.83f, 2), // + new Rectangle(new Point(56.8f, 482.26f), 303.804f, 15.408f, 2), // + new Rectangle(new Point(56.8f, 468.464f), 314.496f, 15.408f, 2))) // + .build())); ManualResizeRedaction manualResizeRedaction = ManualResizeRedaction.builder() .annotationId(annotationId) .requestDate(OffsetDateTime.now()) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesIntegrationTest.java index 860f4d1b..ce91c661 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesIntegrationTest.java @@ -32,18 +32,33 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest { public void manualResizeRedactionTest() { Document document = buildGraph("files/new/crafted document"); - Set entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet()); + Set entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document) + .collect(Collectors.toUnmodifiableSet()); Set biggerEntities = entityCreationService.byString("David Ksenia Max Mustermann", "CBI_author", EntityType.ENTITY, document) .collect(Collectors.toUnmodifiableSet()); - TextEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get(); - TextEntity biggerEntity = biggerEntities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get(); + TextEntity entity = entities.stream() + .filter(e -> e.getPages() + .stream() + .anyMatch(p -> p.getNumber() == 1)) + .findFirst() + .get(); + TextEntity biggerEntity = biggerEntities.stream() + .filter(e -> e.getPages() + .stream() + .anyMatch(p -> p.getNumber() == 1)) + .findFirst() + .get(); - String initialId = entity.getPositionsOnPagePerPage().get(0).getId(); + String initialId = entity.getPositionsOnPagePerPage() + .get(0).getId(); ManualResizeRedaction manualResizeRedaction = ManualResizeRedaction.builder() .annotationId(initialId) + .fileId(TEST_FILE_ID) + .user("user") .value(biggerEntity.getValue()) - .positions(toAnnotationRectangles(biggerEntity.getPositionsOnPagePerPage().get(0))) + .positions(toAnnotationRectangles(biggerEntity.getPositionsOnPagePerPage() + .get(0))) .requestDate(OffsetDateTime.now()) .updateDictionary(false) .build(); @@ -55,8 +70,13 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest { assertTrue(Sets.difference(new HashSet<>(biggerEntity.getIntersectingNodes()), new HashSet<>(entity.getIntersectingNodes())).isEmpty()); assertEquals(biggerEntity.getPages(), entity.getPages()); assertEquals(biggerEntity.getValue(), entity.getValue()); - assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId()); - assertRectanglesAlmostEqual(biggerEntity.getPositionsOnPagePerPage().get(0).getRectanglePerLine(), entity.getPositionsOnPagePerPage().get(0).getRectanglePerLine()); + assertEquals(initialId, + entity.getPositionsOnPagePerPage() + .get(0).getId()); + assertRectanglesAlmostEqual(biggerEntity.getPositionsOnPagePerPage() + .get(0).getRectanglePerLine(), + entity.getPositionsOnPagePerPage() + .get(0).getRectanglePerLine()); assertTrue(entity.resized()); } @@ -65,12 +85,25 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest { public void manualForceRedactionTest() { Document document = buildGraph("files/new/crafted document"); - Set entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet()); + Set entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document) + .collect(Collectors.toUnmodifiableSet()); - TextEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get(); + TextEntity entity = entities.stream() + .filter(e -> e.getPages() + .stream() + .anyMatch(p -> p.getNumber() == 1)) + .findFirst() + .get(); - String initialId = entity.getPositionsOnPagePerPage().get(0).getId(); - ManualForceRedaction manualForceRedaction = ManualForceRedaction.builder().annotationId(initialId).legalBasis("Something").requestDate(OffsetDateTime.now()).build(); + String initialId = entity.getPositionsOnPagePerPage() + .get(0).getId(); + ManualForceRedaction manualForceRedaction = ManualForceRedaction.builder() + .annotationId(initialId) + .fileId(TEST_FILE_ID) + .user("user") + .legalBasis("Something") + .requestDate(OffsetDateTime.now()) + .build(); doAnalysis(document, List.of(manualForceRedaction)); @@ -78,8 +111,12 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest { assertFalse(entity.getIntersectingNodes().isEmpty()); assertEquals(1, entity.getPages().size()); assertEquals("David Ksenia", entity.getValue()); - assertEquals("Something", entity.getManualOverwrite().getLegalBasis().orElse(entity.getMatchedRule().getLegalBasis())); - assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId()); + assertEquals("Something", + entity.getManualOverwrite().getLegalBasis() + .orElse(entity.getMatchedRule().getLegalBasis())); + assertEquals(initialId, + entity.getPositionsOnPagePerPage() + .get(0).getId()); assertFalse(entity.removed()); assertTrue(entity.hasManualChanges()); assertTrue(entity.applied()); @@ -90,17 +127,26 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest { public void manualIDRemovalTest() { Document document = buildGraph("files/new/crafted document"); - Set entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet()); + Set entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document) + .collect(Collectors.toUnmodifiableSet()); - TextEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get(); + TextEntity entity = entities.stream() + .filter(e -> e.getPages() + .stream() + .anyMatch(p -> p.getNumber() == 1)) + .findFirst() + .get(); - String initialId = entity.getPositionsOnPagePerPage().get(0).getId(); - IdRemoval idRemoval = IdRemoval.builder().annotationId(initialId).requestDate(OffsetDateTime.now()).build(); + String initialId = entity.getPositionsOnPagePerPage() + .get(0).getId(); + IdRemoval idRemoval = IdRemoval.builder().annotationId(initialId).requestDate(OffsetDateTime.now()).fileId(TEST_FILE_ID).user("user").build(); doAnalysis(document, List.of(idRemoval)); assertEquals("David Ksenia", entity.getValue()); - assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId()); + assertEquals(initialId, + entity.getPositionsOnPagePerPage() + .get(0).getId()); assertTrue(entity.ignored()); } @@ -109,13 +155,25 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest { public void manualIDRemovalButAlsoForceRedactionTest() { Document document = buildGraph("files/new/crafted document"); - Set entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document).collect(Collectors.toUnmodifiableSet()); + Set entities = entityCreationService.byString("David Ksenia", "CBI_author", EntityType.ENTITY, document) + .collect(Collectors.toUnmodifiableSet()); - TextEntity entity = entities.stream().filter(e -> e.getPages().stream().anyMatch(p -> p.getNumber() == 1)).findFirst().get(); + TextEntity entity = entities.stream() + .filter(e -> e.getPages() + .stream() + .anyMatch(p -> p.getNumber() == 1)) + .findFirst() + .get(); - String initialId = entity.getPositionsOnPagePerPage().get(0).getId(); - IdRemoval idRemoval = IdRemoval.builder().annotationId(initialId).requestDate(OffsetDateTime.now()).build(); - ManualForceRedaction manualForceRedaction = ManualForceRedaction.builder().annotationId(initialId).legalBasis("Something").requestDate(OffsetDateTime.now()).build(); + String initialId = entity.getPositionsOnPagePerPage() + .get(0).getId(); + ManualForceRedaction manualForceRedaction = ManualForceRedaction.builder() + .annotationId(initialId) + .legalBasis("Something") + .requestDate(OffsetDateTime.now()) + .fileId(TEST_FILE_ID) + .user("user") + .build(); doAnalysis(document, List.of(manualForceRedaction)); @@ -123,7 +181,9 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest { assertFalse(entity.getIntersectingNodes().isEmpty()); assertEquals(1, entity.getPages().size()); assertEquals("David Ksenia", entity.getValue()); - assertEquals(initialId, entity.getPositionsOnPagePerPage().get(0).getId()); + assertEquals(initialId, + entity.getPositionsOnPagePerPage() + .get(0).getId()); assertFalse(entity.removed()); assertFalse(entity.ignored()); } @@ -131,7 +191,9 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest { private void assertRectanglesAlmostEqual(Collection rects1, Collection rects2) { - if (rects1.stream().allMatch(rect1 -> rects2.stream().anyMatch(rect2 -> rectanglesAlmostEqual(rect1, rect2)))) { + if (rects1.stream() + .allMatch(rect1 -> rects2.stream() + .anyMatch(rect2 -> rectanglesAlmostEqual(rect1, rect2)))) { return; } // use this for nice formatting of error message @@ -143,15 +205,18 @@ public class ManualChangesIntegrationTest extends RulesIntegrationTest { double tolerance = 1e-1; return Math.abs(r1.getX() - r2.getX()) < tolerance &&// - Math.abs(r1.getY() - r2.getY()) < tolerance &&// - Math.abs(r1.getWidth() - r2.getWidth()) < tolerance &&// - Math.abs(r1.getHeight() - r2.getHeight()) < tolerance; + Math.abs(r1.getY() - r2.getY()) < tolerance &&// + Math.abs(r1.getWidth() - r2.getWidth()) < tolerance &&// + Math.abs(r1.getHeight() - r2.getHeight()) < tolerance; } private static List toAnnotationRectangles(PositionOnPage positionsOnPage) { - return positionsOnPage.getRectanglePerLine().stream().map(rectangle2D -> toAnnotationRectangle(rectangle2D, positionsOnPage.getPage().getNumber())).toList(); + return positionsOnPage.getRectanglePerLine() + .stream() + .map(rectangle2D -> toAnnotationRectangle(rectangle2D, positionsOnPage.getPage().getNumber())) + .toList(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesUnitTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesUnitTest.java index 85d8311c..dbf0a1c4 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesUnitTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesUnitTest.java @@ -43,7 +43,9 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest { OffsetDateTime start = OffsetDateTime.now(); String reason = "whatever"; Document document = buildGraphNoImages("files/new/crafted document.pdf"); - List entities = entityCreationService.byString("David Ksenia", "test", EntityType.ENTITY, document).peek(e -> e.apply("T.0.0", reason)).toList(); + List entities = entityCreationService.byString("David Ksenia", "test", EntityType.ENTITY, document) + .peek(e -> e.apply("T.0.0", reason)) + .toList(); assertFalse(entities.isEmpty()); TextEntity entity = entities.get(0); assertTrue(entity.active()); @@ -52,10 +54,11 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest { assertFalse(entity.resized()); assertFalse(entity.ignored()); assertEquals("n-a", entity.getMatchedRule().getLegalBasis()); - String annotationId = entity.getPositionsOnPagePerPage().get(0).getId(); + String annotationId = entity.getPositionsOnPagePerPage() + .get(0).getId(); // remove first - IdRemoval removal = IdRemoval.builder().requestDate(start).fileId(TEST_FILE_ID).annotationId(annotationId).build(); + IdRemoval removal = IdRemoval.builder().requestDate(start).fileId(TEST_FILE_ID).user("user").annotationId(annotationId).build(); entity.getManualOverwrite().addChange(removal); assertTrue(entity.ignored()); assertFalse(entity.applied()); @@ -65,6 +68,7 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest { ManualForceRedaction forceRedaction = ManualForceRedaction.builder() .requestDate(start.plusSeconds(1)) .fileId(TEST_FILE_ID) + .user("user") .annotationId(annotationId) .legalBasis("coolio") .build(); @@ -73,10 +77,12 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest { assertFalse(entity.ignored()); assertFalse(entity.removed()); assertEquals(reason + ", removed by manual override, forced by manual override", entity.buildReasonWithManualChangeDescriptions()); - assertEquals("coolio", entity.getManualOverwrite().getLegalBasis().orElse(entity.getMatchedRule().getLegalBasis())); + assertEquals("coolio", + entity.getManualOverwrite().getLegalBasis() + .orElse(entity.getMatchedRule().getLegalBasis())); // remove again - IdRemoval removal2 = IdRemoval.builder().requestDate(start.plusSeconds(3)).fileId(TEST_FILE_ID).annotationId(annotationId).build(); + IdRemoval removal2 = IdRemoval.builder().requestDate(start.plusSeconds(3)).fileId(TEST_FILE_ID).annotationId(annotationId).user("user").build(); entity.getManualOverwrite().addChange(removal2); assertTrue(entity.ignored()); assertFalse(entity.applied()); @@ -86,6 +92,7 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest { ManualForceRedaction forceRedaction2 = ManualForceRedaction.builder() .requestDate(start.plusSeconds(2)) .fileId(TEST_FILE_ID) + .user("user") .annotationId(annotationId) .legalBasis("coolio") .build(); @@ -93,7 +100,7 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest { assertTrue(entity.ignored()); assertFalse(entity.applied()); assertEquals(reason + ", removed by manual override, forced by manual override, forced by manual override, removed by manual override", - entity.buildReasonWithManualChangeDescriptions()); + entity.buildReasonWithManualChangeDescriptions()); String legalBasis = "Yeah"; String section = "Some random section!"; @@ -103,6 +110,7 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest { .annotationId(annotationId) .requestDate(start.plusSeconds(4)) .section(section) + .fileId(TEST_FILE_ID) .user("peter") .value(value) .build(); @@ -110,16 +118,32 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest { assertTrue(entity.ignored()); assertFalse(entity.applied()); assertEquals(reason + ", removed by manual override, forced by manual override, forced by manual override, removed by manual override, legal basis was manually changed", - entity.buildReasonWithManualChangeDescriptions()); - assertEquals(value, entity.getManualOverwrite().getValue().orElse(entity.getValue())); - assertEquals(legalBasis, entity.getManualOverwrite().getLegalBasis().orElse(entity.getMatchedRule().getLegalBasis())); - assertEquals(section, entity.getManualOverwrite().getSection().orElse(entity.getDeepestFullyContainingNode().toString())); + entity.buildReasonWithManualChangeDescriptions()); + assertEquals(value, + entity.getManualOverwrite().getValue() + .orElse(entity.getValue())); + assertEquals(legalBasis, + entity.getManualOverwrite().getLegalBasis() + .orElse(entity.getMatchedRule().getLegalBasis())); + assertEquals(section, + entity.getManualOverwrite().getSection() + .orElse(entity.getDeepestFullyContainingNode().toString())); - ManualRecategorization imageRecategorizationRequest = ManualRecategorization.builder().type("type").requestDate(start.plusSeconds(5)).annotationId(annotationId).build(); + ManualRecategorization imageRecategorizationRequest = ManualRecategorization.builder() + .type("type") + .requestDate(start.plusSeconds(5)) + .fileId(TEST_FILE_ID) + .user("user") + .annotationId(annotationId) + .build(); entity.getManualOverwrite().addChange(imageRecategorizationRequest); - assertTrue(entity.getManualOverwrite().getRecategorized().isPresent()); - assertTrue(entity.getManualOverwrite().getRecategorized().get()); - assertEquals("type", entity.getManualOverwrite().getType().orElse(entity.type())); + assertTrue(entity.getManualOverwrite().getRecategorized() + .isPresent()); + assertTrue(entity.getManualOverwrite().getRecategorized() + .get()); + assertEquals("type", + entity.getManualOverwrite().getType() + .orElse(entity.type())); } @@ -129,7 +153,9 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest { OffsetDateTime start = OffsetDateTime.now(); String reason = "whatever"; Document document = buildGraphNoImages("files/new/crafted document.pdf"); - List entities = entityCreationService.byString("David Ksenia", "test", EntityType.HINT, document).peek(e -> e.apply("T.0.0", reason)).toList(); + List entities = entityCreationService.byString("David Ksenia", "test", EntityType.HINT, document) + .peek(e -> e.apply("T.0.0", reason)) + .toList(); assertFalse(entities.isEmpty()); TextEntity entity = entities.get(0); assertTrue(entity.active()); @@ -138,10 +164,11 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest { assertFalse(entity.resized()); assertFalse(entity.ignored()); assertEquals("n-a", entity.getMatchedRule().getLegalBasis()); - String annotationId = entity.getPositionsOnPagePerPage().get(0).getId(); + String annotationId = entity.getPositionsOnPagePerPage() + .get(0).getId(); // remove first - IdRemoval removal = IdRemoval.builder().requestDate(start).fileId(TEST_FILE_ID).annotationId(annotationId).build(); + IdRemoval removal = IdRemoval.builder().requestDate(start).fileId(TEST_FILE_ID).annotationId(annotationId).user("user").build(); entity.getManualOverwrite().addChange(removal); assertTrue(entity.ignored()); assertFalse(entity.applied()); @@ -152,6 +179,7 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest { .requestDate(start.plusSeconds(1)) .fileId(TEST_FILE_ID) .annotationId(annotationId) + .user("user") .legalBasis("coolio") .build(); entity.getManualOverwrite().addChange(forceRedaction); @@ -159,7 +187,9 @@ public class ManualChangesUnitTest extends BuildDocumentIntegrationTest { assertFalse(entity.ignored()); assertFalse(entity.removed()); assertEquals(reason + ", removed by manual override, forced by manual override", entity.buildReasonWithManualChangeDescriptions()); - assertEquals("coolio", entity.getManualOverwrite().getLegalBasis().orElse(entity.getMatchedRule().getLegalBasis())); + assertEquals("coolio", + entity.getManualOverwrite().getLegalBasis() + .orElse(entity.getMatchedRule().getLegalBasis())); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/PrecursorEntityTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/PrecursorEntityTest.java index b10c8a6a..b6a1efcb 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/PrecursorEntityTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/PrecursorEntityTest.java @@ -84,7 +84,7 @@ public class PrecursorEntityTest extends BuildDocumentIntegrationTest { public void testFoundManualAddRedactionAndRemovedHasStateRemoved() { DocumentAndEntity context = createFoundManualRedaction(); - IdRemoval removal = IdRemoval.builder().requestDate(OffsetDateTime.now()).build(); + IdRemoval removal = IdRemoval.builder().annotationId("123").user("user").fileId(TEST_FILE_ID).requestDate(OffsetDateTime.now()).build(); context.entity().getManualOverwrite().addChange(removal); assertTrue(context.entity().removed()); } @@ -95,7 +95,7 @@ public class PrecursorEntityTest extends BuildDocumentIntegrationTest { public void testNotFoundManualAddRedactionAndRemovedHasStateRemoved() { DocumentAndEntity context = createNotFoundManualRedaction(); - IdRemoval removal = IdRemoval.builder().requestDate(OffsetDateTime.now()).build(); + IdRemoval removal = IdRemoval.builder().fileId(TEST_FILE_ID).user("user").annotationId("123").requestDate(OffsetDateTime.now()).build(); context.entity().getManualOverwrite().addChange(removal); assertTrue(context.entity().removed()); } @@ -108,8 +108,11 @@ public class PrecursorEntityTest extends BuildDocumentIntegrationTest { String value = "To: Syngenta Ltd. Jealott’s Hill"; String type = DICTIONARY_AUTHOR; ManualRedactionEntry manualRedactionEntry = ManualRedactionEntry.builder() + .annotationId("123") .type(type) .value(value) + .user("user") + .fileId(TEST_FILE_ID) .reason("reason") .legalBasis("n-a") .section("n-a") @@ -122,17 +125,20 @@ public class PrecursorEntityTest extends BuildDocumentIntegrationTest { assertTrue(document.getEntities().isEmpty()); - List notFoundManualEntities = entityFromPrecursorCreationService.createEntitiesIfFoundAndReturnNotFoundEntries(ManualRedactions.builder().entriesToAdd(Set.of(manualRedactionEntry)).build(), - document, - TEST_DOSSIER_TEMPLATE_ID); + List notFoundManualEntities = entityFromPrecursorCreationService.createEntitiesIfFoundAndReturnNotFoundEntries(ManualRedactions.builder() + .entriesToAdd(Set.of( + manualRedactionEntry)) + .build(), + document, + TEST_DOSSIER_TEMPLATE_ID); assertEquals(1, notFoundManualEntities.size()); assertTrue(document.getEntities().isEmpty()); List redactionLogEntries = entityLogCreatorService.createInitialEntityLog(new AnalyzeRequest(), - document, - notFoundManualEntities, - new DictionaryVersion(), - 0L).getEntityLogEntry(); + document, + notFoundManualEntities, + new DictionaryVersion(), + 0L).getEntityLogEntry(); assertEquals(1, redactionLogEntries.size()); assertEquals(value, redactionLogEntries.get(0).getValue()); @@ -146,7 +152,8 @@ public class PrecursorEntityTest extends BuildDocumentIntegrationTest { Document document = buildGraph("files/new/VV-919901.pdf"); EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService); - List tempEntities = entityCreationService.byString("To: Syngenta Ltd.", "temp", EntityType.ENTITY, document).toList(); + List tempEntities = entityCreationService.byString("To: Syngenta Ltd.", "temp", EntityType.ENTITY, document) + .toList(); assertFalse(tempEntities.isEmpty()); var tempEntity = tempEntities.get(0); List positions = tempEntity.getPositionsOnPagePerPage() @@ -158,8 +165,11 @@ public class PrecursorEntityTest extends BuildDocumentIntegrationTest { ManualRedactionEntry manualRedactionEntry = ManualRedactionEntry.builder() .type("manual") + .annotationId("123") .value(tempEntity.getValue()) .reason("reason") + .user("user") + .fileId(TEST_FILE_ID) .legalBasis("n-a") .section(tempEntity.getDeepestFullyContainingNode().toString()) .rectangle(false) @@ -172,21 +182,28 @@ public class PrecursorEntityTest extends BuildDocumentIntegrationTest { tempEntity.removeFromGraph(); assertTrue(document.getEntities().isEmpty()); - List notFoundManualEntities = entityFromPrecursorCreationService.createEntitiesIfFoundAndReturnNotFoundEntries(ManualRedactions.builder().entriesToAdd(Set.of(manualRedactionEntry)).build(), - document, - TEST_DOSSIER_TEMPLATE_ID); + List notFoundManualEntities = entityFromPrecursorCreationService.createEntitiesIfFoundAndReturnNotFoundEntries(ManualRedactions.builder() + .entriesToAdd(Set.of( + manualRedactionEntry)) + .build(), + document, + TEST_DOSSIER_TEMPLATE_ID); assertTrue(notFoundManualEntities.isEmpty()); assertEquals(1, document.getEntities().size()); - return new DocumentAndEntity(document, document.getEntities().stream().findFirst().get()); + return new DocumentAndEntity(document, + document.getEntities() + .stream() + .findFirst() + .get()); } public static Rectangle toAnnotationRectangle(Rectangle2D rectangle2D, int pageNumber) { return new Rectangle(new Point((float) rectangle2D.getMinX(), (float) (rectangle2D.getMinY() + rectangle2D.getHeight())), - (float) rectangle2D.getWidth(), - -(float) rectangle2D.getHeight(), - pageNumber); + (float) rectangle2D.getWidth(), + -(float) rectangle2D.getHeight(), + pageNumber); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/service/document/UnprocessedChangesServiceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/service/document/UnprocessedChangesServiceTest.java index 8eeba13c..d60b1c9f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/service/document/UnprocessedChangesServiceTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/service/document/UnprocessedChangesServiceTest.java @@ -8,6 +8,7 @@ import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import java.time.OffsetDateTime; import java.util.List; import java.util.Optional; import java.util.Set; @@ -34,7 +35,6 @@ import org.springframework.test.context.junit.jupiter.SpringExtension; import com.iqser.red.commons.jackson.ObjectMapperFactory; import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType; -import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry; @@ -84,6 +84,7 @@ public class UnprocessedChangesServiceTest extends AbstractRedactionIntegrationT @SpyBean RabbitTemplate rabbitTemplate; + @BeforeEach public void stubClients() { @@ -101,21 +102,22 @@ public class UnprocessedChangesServiceTest extends AbstractRedactionIntegrationT when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, true)).thenReturn(List.of(Type.builder() - .id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID) - .type(DOSSIER_REDACTIONS_INDICATOR) - .dossierTemplateId(TEST_DOSSIER_ID) - .hexColor("#ffe187") - .isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) - .build())); + .id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID) + .type(DOSSIER_REDACTIONS_INDICATOR) + .dossierTemplateId(TEST_DOSSIER_ID) + .hexColor("#ffe187") + .isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .build())); mockDictionaryCalls(null); when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors); } + @Test @SneakyThrows public void testManualSurroundingText() { @@ -125,10 +127,20 @@ public class UnprocessedChangesServiceTest extends AbstractRedactionIntegrationT ManualRedactions manualRedactions = new ManualRedactions(); var aoelId = UUID.randomUUID().toString(); - ManualRedactionEntry manualRedactionEntry = prepareManualRedactionEntry(aoelId, List.of(Rectangle.builder().topLeftX(355.53775f).topLeftY(266.1895f).width(29.32224f).height(10.048125f).page(1).build()), "AOEL"); + ManualRedactionEntry manualRedactionEntry = prepareManualRedactionEntry(aoelId, + List.of(Rectangle.builder() + .topLeftX(355.53775f) + .topLeftY(266.1895f) + .width(29.32224f) + .height(10.048125f) + .page(1) + .build()), + "AOEL"); var notFoundId = UUID.randomUUID().toString(); - ManualRedactionEntry manualRedactionEntry2 = prepareManualRedactionEntry(notFoundId, List.of(Rectangle.builder().topLeftX(1f).topLeftY(1f).width(1f).height(1f).page(1).build()), "Random"); + ManualRedactionEntry manualRedactionEntry2 = prepareManualRedactionEntry(notFoundId, + List.of(Rectangle.builder().topLeftX(1f).topLeftY(1f).width(1f).height(1f).page(1).build()), + "Random"); manualRedactions.getEntriesToAdd().add(manualRedactionEntry); manualRedactions.getEntriesToAdd().add(manualRedactionEntry2); @@ -147,30 +159,43 @@ public class UnprocessedChangesServiceTest extends AbstractRedactionIntegrationT assertFalse(unprocessedManualEntities.isEmpty()); assertEquals(unprocessedManualEntities.size(), 2); - Optional optionalUnprocessedManualEntity = unprocessedManualEntities.stream().filter(manualEntity -> manualEntity.getAnnotationId().equals(aoelId)).findFirst(); + Optional optionalUnprocessedManualEntity = unprocessedManualEntities.stream() + .filter(manualEntity -> manualEntity.getAnnotationId().equals(aoelId)) + .findFirst(); assertTrue(optionalUnprocessedManualEntity.isPresent()); UnprocessedManualEntity unprocessedManualEntity = optionalUnprocessedManualEntity.get(); assertEquals(unprocessedManualEntity.getTextBefore(), "was above the "); assertEquals(unprocessedManualEntity.getTextAfter(), " without PPE (34%"); assertEquals(unprocessedManualEntity.getSection(), "[1, 1]: Paragraph: A9396G containing 960 g/L"); - assertEquals(unprocessedManualEntity.getPositions().get(0).x(), 355.53775f); - assertEquals(unprocessedManualEntity.getPositions().get(0).y(), 266.49002f); - assertEquals(unprocessedManualEntity.getPositions().get(0).w(), 29.322266f); - assertEquals(unprocessedManualEntity.getPositions().get(0).h(), 11.017679f); + assertEquals(unprocessedManualEntity.getPositions() + .get(0).x(), 355.53775f); + assertEquals(unprocessedManualEntity.getPositions() + .get(0).y(), 266.49002f); + assertEquals(unprocessedManualEntity.getPositions() + .get(0).w(), 29.322266f); + assertEquals(unprocessedManualEntity.getPositions() + .get(0).h(), 11.017679f); - Optional optionalNotFoundUnprocessedManualEntity = unprocessedManualEntities.stream().filter(manualEntity -> manualEntity.getAnnotationId().equals(notFoundId)).findFirst(); + Optional optionalNotFoundUnprocessedManualEntity = unprocessedManualEntities.stream() + .filter(manualEntity -> manualEntity.getAnnotationId().equals(notFoundId)) + .findFirst(); assertTrue(optionalNotFoundUnprocessedManualEntity.isPresent()); UnprocessedManualEntity unprocessedNotFoundManualEntity = optionalNotFoundUnprocessedManualEntity.get(); assertEquals(unprocessedNotFoundManualEntity.getTextBefore(), ""); assertEquals(unprocessedNotFoundManualEntity.getTextAfter(), ""); assertEquals(unprocessedNotFoundManualEntity.getSection(), ""); - assertEquals(unprocessedNotFoundManualEntity.getPositions().get(0).getPageNumber(), 1); - assertEquals(unprocessedNotFoundManualEntity.getPositions().get(0).getRectangle()[0], 1f); - assertEquals(unprocessedNotFoundManualEntity.getPositions().get(0).getRectangle()[1], 1f); - assertEquals(unprocessedNotFoundManualEntity.getPositions().get(0).getRectangle()[2], 1f); - assertEquals(unprocessedNotFoundManualEntity.getPositions().get(0).getRectangle()[3], 1f); + assertEquals(unprocessedNotFoundManualEntity.getPositions() + .get(0).getPageNumber(), 1); + assertEquals(unprocessedNotFoundManualEntity.getPositions() + .get(0).getRectangle()[0], 1f); + assertEquals(unprocessedNotFoundManualEntity.getPositions() + .get(0).getRectangle()[1], 1f); + assertEquals(unprocessedNotFoundManualEntity.getPositions() + .get(0).getRectangle()[2], 1f); + assertEquals(unprocessedNotFoundManualEntity.getPositions() + .get(0).getRectangle()[3], 1f); analyzeService.reanalyze(request); @@ -190,10 +215,14 @@ public class UnprocessedChangesServiceTest extends AbstractRedactionIntegrationT assertEquals(unprocessedManualEntities.get(0).getTextAfter(), " without PPE (34%"); assertEquals(unprocessedManualEntities.get(0).getTextBefore(), "to EFSA guidance "); assertEquals(unprocessedManualEntities.get(0).getSection(), "[1, 1]: Paragraph: A9396G containing 960 g/L"); - assertEquals(unprocessedManualEntities.get(0).getPositions().get(0).x(), positions.get(0).getTopLeftX()); - assertEquals(unprocessedManualEntities.get(0).getPositions().get(0).y(), positions.get(0).getTopLeftY()); - assertEquals(unprocessedManualEntities.get(0).getPositions().get(0).w(), positions.get(0).getWidth()); - assertEquals(unprocessedManualEntities.get(0).getPositions().get(0).h(), positions.get(0).getHeight()); + assertEquals(unprocessedManualEntities.get(0).getPositions() + .get(0).x(), positions.get(0).getTopLeftX()); + assertEquals(unprocessedManualEntities.get(0).getPositions() + .get(0).y(), positions.get(0).getTopLeftY()); + assertEquals(unprocessedManualEntities.get(0).getPositions() + .get(0).w(), positions.get(0).getWidth()); + assertEquals(unprocessedManualEntities.get(0).getPositions() + .get(0).h(), positions.get(0).getHeight()); } @@ -205,13 +234,37 @@ public class UnprocessedChangesServiceTest extends AbstractRedactionIntegrationT ManualRedactions manualRedactions = new ManualRedactions(); var aoelId = UUID.randomUUID().toString(); - ManualRedactionEntry manualRedactionEntry = prepareManualRedactionEntry(aoelId, List.of(Rectangle.builder().topLeftX(384.85536f).topLeftY(240.8695f).width(13.49088f).height(10.048125f).page(1).build()), "EL"); + ManualRedactionEntry manualRedactionEntry = prepareManualRedactionEntry(aoelId, + List.of(Rectangle.builder() + .topLeftX(384.85536f) + .topLeftY(240.8695f) + .width(13.49088f) + .height(10.048125f) + .page(1) + .build()), + "EL"); var cormsId = UUID.randomUUID().toString(); - ManualRedactionEntry manualRedactionEntry2 = prepareManualRedactionEntry(cormsId, List.of(Rectangle.builder().topLeftX(129.86f).topLeftY(505.7295f).width(35.9904f).height(10.048125f).page(1).build()), "CoRMS"); + ManualRedactionEntry manualRedactionEntry2 = prepareManualRedactionEntry(cormsId, + List.of(Rectangle.builder() + .topLeftX(129.86f) + .topLeftY(505.7295f) + .width(35.9904f) + .height(10.048125f) + .page(1) + .build()), + "CoRMS"); var a9Id = UUID.randomUUID().toString(); - ManualRedactionEntry manualRedactionEntry3 = prepareManualRedactionEntry(a9Id, List.of(Rectangle.builder().topLeftX(140.1096f).topLeftY(291.5095f).width(37.84512f).height(10.048125f).page(1).build()), "A9396G"); + ManualRedactionEntry manualRedactionEntry3 = prepareManualRedactionEntry(a9Id, + List.of(Rectangle.builder() + .topLeftX(140.1096f) + .topLeftY(291.5095f) + .width(37.84512f) + .height(10.048125f) + .page(1) + .build()), + "A9396G"); manualRedactions.getEntriesToAdd().add(manualRedactionEntry3); manualRedactions.getEntriesToAdd().add(manualRedactionEntry2); @@ -238,35 +291,53 @@ public class UnprocessedChangesServiceTest extends AbstractRedactionIntegrationT assertFalse(unprocessedManualEntities.isEmpty()); assertEquals(unprocessedManualEntities.size(), 3); - var resizedAoel = unprocessedManualEntities.stream().filter(unprocessedManualEntity -> unprocessedManualEntity.getAnnotationId().equals(aoelId)).findFirst(); + var resizedAoel = unprocessedManualEntities.stream() + .filter(unprocessedManualEntity -> unprocessedManualEntity.getAnnotationId().equals(aoelId)) + .findFirst(); assertTrue(resizedAoel.isPresent()); assertEquals(resizedAoel.get().getTextAfter(), " (max. 43% of"); assertEquals(resizedAoel.get().getTextBefore(), "is below the "); assertEquals(resizedAoel.get().getSection(), "[1, 1]: Paragraph: A9396G containing 960 g/L"); - assertEquals(resizedAoel.get().getPositions().get(0).x(), positions.get(0).getTopLeftX()); - assertEquals(resizedAoel.get().getPositions().get(0).y(), positions.get(0).getTopLeftY()); - assertEquals(resizedAoel.get().getPositions().get(0).w(), positions.get(0).getWidth()); - assertEquals(resizedAoel.get().getPositions().get(0).h(), positions.get(0).getHeight()); + assertEquals(resizedAoel.get().getPositions() + .get(0).x(), positions.get(0).getTopLeftX()); + assertEquals(resizedAoel.get().getPositions() + .get(0).y(), positions.get(0).getTopLeftY()); + assertEquals(resizedAoel.get().getPositions() + .get(0).w(), positions.get(0).getWidth()); + assertEquals(resizedAoel.get().getPositions() + .get(0).h(), positions.get(0).getHeight()); - var cormsResized = unprocessedManualEntities.stream().filter(unprocessedManualEntity -> unprocessedManualEntity.getAnnotationId().equals(cormsId)).findFirst(); + var cormsResized = unprocessedManualEntities.stream() + .filter(unprocessedManualEntity -> unprocessedManualEntity.getAnnotationId().equals(cormsId)) + .findFirst(); assertTrue(cormsResized.isPresent()); assertEquals(cormsResized.get().getTextAfter(), " a NOAEL of"); assertEquals(cormsResized.get().getTextBefore(), "mg/kg bw/d. Furthermore "); assertEquals(cormsResized.get().getSection(), "[0, 3]: Paragraph: The Co-RMS indicated the"); - assertEquals(cormsResized.get().getPositions().get(0).x(), positions2.get(0).getTopLeftX()); - assertEquals(cormsResized.get().getPositions().get(0).y(), positions2.get(0).getTopLeftY()); - assertEquals(cormsResized.get().getPositions().get(0).w(), positions2.get(0).getWidth()); - assertEquals(cormsResized.get().getPositions().get(0).h(), positions2.get(0).getHeight()); + assertEquals(cormsResized.get().getPositions() + .get(0).x(), positions2.get(0).getTopLeftX()); + assertEquals(cormsResized.get().getPositions() + .get(0).y(), positions2.get(0).getTopLeftY()); + assertEquals(cormsResized.get().getPositions() + .get(0).w(), positions2.get(0).getWidth()); + assertEquals(cormsResized.get().getPositions() + .get(0).h(), positions2.get(0).getHeight()); - var a9Resized = unprocessedManualEntities.stream().filter(unprocessedManualEntity -> unprocessedManualEntity.getAnnotationId().equals(a9Id)).findFirst(); + var a9Resized = unprocessedManualEntities.stream() + .filter(unprocessedManualEntity -> unprocessedManualEntity.getAnnotationId().equals(a9Id)) + .findFirst(); assertTrue(a9Resized.isPresent()); assertEquals(a9Resized.get().getTextAfter(), " were obtained from"); assertEquals(a9Resized.get().getTextBefore(), "data for S"); assertEquals(a9Resized.get().getSection(), "[1, 1]: Paragraph: A9396G containing 960 g/L"); - assertEquals(a9Resized.get().getPositions().get(0).x(), positions3.get(0).getTopLeftX()); - assertEquals(a9Resized.get().getPositions().get(0).y(), positions3.get(0).getTopLeftY()); - assertEquals(a9Resized.get().getPositions().get(0).w(), positions3.get(0).getWidth()); - assertEquals(a9Resized.get().getPositions().get(0).h(), positions3.get(0).getHeight()); + assertEquals(a9Resized.get().getPositions() + .get(0).x(), positions3.get(0).getTopLeftX()); + assertEquals(a9Resized.get().getPositions() + .get(0).y(), positions3.get(0).getTopLeftY()); + assertEquals(a9Resized.get().getPositions() + .get(0).w(), positions3.get(0).getWidth()); + assertEquals(a9Resized.get().getPositions() + .get(0).h(), positions3.get(0).getHeight()); } @@ -277,7 +348,15 @@ public class UnprocessedChangesServiceTest extends AbstractRedactionIntegrationT ManualRedactions manualRedactions = new ManualRedactions(); var aoelId = UUID.randomUUID().toString(); - ManualRedactionEntry manualRedactionEntry = prepareManualRedactionEntry(aoelId, List.of(Rectangle.builder().topLeftX(384.85536f).topLeftY(240.8695f).width(13.49088f).height(10.048125f).page(1).build()), "EL"); + ManualRedactionEntry manualRedactionEntry = prepareManualRedactionEntry(aoelId, + List.of(Rectangle.builder() + .topLeftX(384.85536f) + .topLeftY(240.8695f) + .width(13.49088f) + .height(10.048125f) + .page(1) + .build()), + "EL"); manualRedactions.getEntriesToAdd().add(manualRedactionEntry); AnalyzeRequest request = uploadFileToStorage(pdfFile); @@ -301,10 +380,14 @@ public class UnprocessedChangesServiceTest extends AbstractRedactionIntegrationT assertEquals(unprocessedManualEntities.get(0).getTextAfter(), " (max. 43% of"); assertEquals(unprocessedManualEntities.get(0).getTextBefore(), "is below the "); assertEquals(unprocessedManualEntities.get(0).getSection(), "[1, 1]: Paragraph: A9396G containing 960 g/L"); - assertEquals(unprocessedManualEntities.get(0).getPositions().get(0).x(), positions.get(0).getTopLeftX()); - assertEquals(unprocessedManualEntities.get(0).getPositions().get(0).y(), positions.get(0).getTopLeftY()); - assertEquals(unprocessedManualEntities.get(0).getPositions().get(0).w(), positions.get(0).getWidth()); - assertEquals(unprocessedManualEntities.get(0).getPositions().get(0).h(), positions.get(0).getHeight()); + assertEquals(unprocessedManualEntities.get(0).getPositions() + .get(0).x(), positions.get(0).getTopLeftX()); + assertEquals(unprocessedManualEntities.get(0).getPositions() + .get(0).y(), positions.get(0).getTopLeftY()); + assertEquals(unprocessedManualEntities.get(0).getPositions() + .get(0).w(), positions.get(0).getWidth()); + assertEquals(unprocessedManualEntities.get(0).getPositions() + .get(0).h(), positions.get(0).getHeight()); } @@ -315,7 +398,15 @@ public class UnprocessedChangesServiceTest extends AbstractRedactionIntegrationT ManualRedactions manualRedactions = new ManualRedactions(); var aoelId = UUID.randomUUID().toString(); - ManualRedactionEntry manualRedactionEntry = prepareManualRedactionEntry(aoelId, List.of(Rectangle.builder().topLeftX(384.85536f).topLeftY(240.8695f).width(13.49088f).height(10.048125f).page(1).build()), "EL"); + ManualRedactionEntry manualRedactionEntry = prepareManualRedactionEntry(aoelId, + List.of(Rectangle.builder() + .topLeftX(384.85536f) + .topLeftY(240.8695f) + .width(13.49088f) + .height(10.048125f) + .page(1) + .build()), + "EL"); manualRedactions.getEntriesToAdd().add(manualRedactionEntry); AnalyzeRequest request = uploadFileToStorage(pdfFile); @@ -339,10 +430,14 @@ public class UnprocessedChangesServiceTest extends AbstractRedactionIntegrationT assertEquals(unprocessedManualEntities.get(0).getTextAfter(), ", the same"); assertEquals(unprocessedManualEntities.get(0).getTextBefore(), "to set an "); assertEquals(unprocessedManualEntities.get(0).getSection(), "[0, 4]: Paragraph: With respect to the"); - assertEquals(unprocessedManualEntities.get(0).getPositions().get(0).x(), positions.get(0).getTopLeftX()); - assertEquals(unprocessedManualEntities.get(0).getPositions().get(0).y(), positions.get(0).getTopLeftY()); - assertEquals(unprocessedManualEntities.get(0).getPositions().get(0).w(), positions.get(0).getWidth()); - assertEquals(unprocessedManualEntities.get(0).getPositions().get(0).h(), positions.get(0).getHeight()); + assertEquals(unprocessedManualEntities.get(0).getPositions() + .get(0).x(), positions.get(0).getTopLeftX()); + assertEquals(unprocessedManualEntities.get(0).getPositions() + .get(0).y(), positions.get(0).getTopLeftY()); + assertEquals(unprocessedManualEntities.get(0).getPositions() + .get(0).w(), positions.get(0).getWidth()); + assertEquals(unprocessedManualEntities.get(0).getPositions() + .get(0).h(), positions.get(0).getHeight()); } @@ -353,7 +448,15 @@ public class UnprocessedChangesServiceTest extends AbstractRedactionIntegrationT ManualRedactions manualRedactions = new ManualRedactions(); var aoelId = UUID.randomUUID().toString(); - ManualRedactionEntry manualRedactionEntry = prepareManualRedactionEntry(aoelId, List.of(Rectangle.builder().topLeftX(384.85536f).topLeftY(240.8695f).width(13.49088f).height(10.048125f).page(1).build()), "EL"); + ManualRedactionEntry manualRedactionEntry = prepareManualRedactionEntry(aoelId, + List.of(Rectangle.builder() + .topLeftX(384.85536f) + .topLeftY(240.8695f) + .width(13.49088f) + .height(10.048125f) + .page(1) + .build()), + "EL"); manualRedactions.getEntriesToAdd().add(manualRedactionEntry); AnalyzeRequest request = uploadFileToStorage(pdfFile); @@ -377,25 +480,32 @@ public class UnprocessedChangesServiceTest extends AbstractRedactionIntegrationT private static ManualResizeRedaction prepareManualSizeRedaction(String id, List positions, String value) { - ManualResizeRedaction manualResizeRedaction = new ManualResizeRedaction(); - manualResizeRedaction.setAnnotationId(id); - manualResizeRedaction.setPositions(positions); - manualResizeRedaction.setUpdateDictionary(false); - manualResizeRedaction.setAddToAllDossiers(false); - manualResizeRedaction.setValue(value); - return manualResizeRedaction; + return ManualResizeRedaction.builder() + .annotationId(id) + .fileId("fileId") + .user("user") + .positions(positions) + .updateDictionary(false) + .addToAllDossiers(false) + .value(value) + .requestDate(OffsetDateTime.now()) + .build(); } private static ManualRedactionEntry prepareManualRedactionEntry(String id, List positions, String value) { - ManualRedactionEntry manualRedactionEntry = new ManualRedactionEntry(); - manualRedactionEntry.setAnnotationId(id); - manualRedactionEntry.setFileId("fileId"); - manualRedactionEntry.setType("CBI_author"); - manualRedactionEntry.setValue(value); - manualRedactionEntry.setReason("Manual Redaction"); - manualRedactionEntry.setPositions(positions); - return manualRedactionEntry; + return ManualRedactionEntry.builder() + .annotationId(id) + .fileId("fileId") + .user("user") + .type("CBI_author") + .value(value) + .reason("Manual Redaction") + .processedDate(OffsetDateTime.now()) + .requestDate(OffsetDateTime.now()) + .positions(positions) + .build(); } + } -- 2.47.2 From afc83db24966dd349c094eb8dfa96df633d50b28 Mon Sep 17 00:00:00 2001 From: Corina Olariu Date: Mon, 18 Mar 2024 18:19:38 +0200 Subject: [PATCH 21/21] RED-8773 - Wrong value for recategorized and forced logo - backport - use image.getValue instead of image.value() so that following recategorizations for images will get the updated value --- .../redaction/v1/server/service/EntityLogCreatorService.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java index 91fc9feb..2f03ef2c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java @@ -190,7 +190,7 @@ public class EntityLogCreatorService { boolean isHint = dictionaryService.isHint(imageType, dossierTemplateId); return EntityLogEntry.builder() .id(image.getId()) - .value(image.value()) + .value(image.getValue()) .type(imageType) .reason(image.buildReasonWithManualChangeDescriptions()) .legalBasis(image.legalBasis()) -- 2.47.2