diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/SeparatorUtils.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/SeparatorUtils.java index 391a98aa..7e525a8c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/SeparatorUtils.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/SeparatorUtils.java @@ -55,6 +55,7 @@ public final class SeparatorUtils { return textRange.end() == textBlock.getTextRange().end() ||// SeparatorUtils.isSeparator(textBlock.charAt(textRange.end())) ||// + SeparatorUtils.isSeparator(textBlock.charAt(textRange.end() - 1)) ||// SeparatorUtils.isJapaneseSeparator(textBlock.charAt(textRange.end() - 1)); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/SearchImplementationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/SearchImplementationTest.java index 283fd487..a98865ae 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/SearchImplementationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/SearchImplementationTest.java @@ -20,6 +20,18 @@ public class SearchImplementationTest extends BuildDocumentIntegrationTest { private EntityEnrichmentService entityEnrichmentService; + @Test + public void testSearchImplementationWithPunctuation() { + + Document document = buildGraph("files/Minimal Examples/TestPunctuation"); + + SearchImplementation searchImplementation = new SearchImplementation(List.of("Kuhn, J. O."), true); + EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService); + List entities = entityCreationService.bySearchImplementation(searchImplementation, "CBI_author", EntityType.ENTITY, document) + .toList(); + assertEquals(2, entities.size()); + } + @Test public void testSearchImplementationWithSingleEntry() { diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/PrecursorEntityTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/PrecursorEntityTest.java index 9a3f8dba..212f5ae9 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/PrecursorEntityTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/PrecursorEntityTest.java @@ -100,6 +100,17 @@ public class PrecursorEntityTest extends BuildDocumentIntegrationTest { assertTrue(context.entity().removed()); } + @Test + public void createFoundManualRedaction2() { + + Document document = buildGraph("files/Minimal Examples/TestPunctuation"); + EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService); + + List tempEntities = entityCreationService.byString("Kuhn, J. O.", "CBI_author", EntityType.ENTITY, document) + .toList(); + assertFalse(tempEntities.isEmpty()); + assertEquals(2, tempEntities.size()); + } private DocumentAndEntity createNotFoundManualRedaction() { diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl index 8719d52e..328a01fb 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/acceptance_rules.drl @@ -856,7 +856,7 @@ rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" when $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then - entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document, 200) .forEach(authorEntity -> authorEntity.redact("PII.9.3", "AUTHOR(S) was found", "personal_data_geolocation")); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl index fe77a3d6..06aeeb78 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl @@ -1416,7 +1416,7 @@ rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" when $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then - entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document, 200) .forEach(authorEntity -> authorEntity.redact("PII.9.3", "AUTHOR(S) was found", "personal_data_geolocation")); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index 4e9159e5..991cbd8a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -837,7 +837,7 @@ rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" when $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then - entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document, 200) .forEach(authorEntity -> authorEntity.redact("PII.9.3", "AUTHOR(S) was found", "personal_data_geolocation")); end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/TestPunctuation.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/TestPunctuation.pdf new file mode 100644 index 00000000..84564100 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/TestPunctuation.pdf differ diff --git a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl index f1bf0dc5..1fc73ad7 100644 --- a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl +++ b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl @@ -1430,7 +1430,7 @@ rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\"" when $document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE")) then - entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document) + entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document, 200) .forEach(authorEntity -> authorEntity.redact("PII.9.3", "AUTHOR(S) was found", "personal_data_geolocation")); end