RED-10046 The punctuation mark “.” should be treated as a word boundary when...
This commit is contained in:
parent
213d3bf645
commit
dfd262e9e1
@ -55,6 +55,7 @@ public final class SeparatorUtils {
|
||||
|
||||
return textRange.end() == textBlock.getTextRange().end() ||//
|
||||
SeparatorUtils.isSeparator(textBlock.charAt(textRange.end())) ||//
|
||||
SeparatorUtils.isSeparator(textBlock.charAt(textRange.end() - 1)) ||//
|
||||
SeparatorUtils.isJapaneseSeparator(textBlock.charAt(textRange.end() - 1));
|
||||
}
|
||||
|
||||
|
||||
@ -20,6 +20,18 @@ public class SearchImplementationTest extends BuildDocumentIntegrationTest {
|
||||
private EntityEnrichmentService entityEnrichmentService;
|
||||
|
||||
|
||||
@Test
|
||||
public void testSearchImplementationWithPunctuation() {
|
||||
|
||||
Document document = buildGraph("files/Minimal Examples/TestPunctuation");
|
||||
|
||||
SearchImplementation searchImplementation = new SearchImplementation(List.of("Kuhn, J. O."), true);
|
||||
EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService);
|
||||
List<TextEntity> entities = entityCreationService.bySearchImplementation(searchImplementation, "CBI_author", EntityType.ENTITY, document)
|
||||
.toList();
|
||||
assertEquals(2, entities.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSearchImplementationWithSingleEntry() {
|
||||
|
||||
|
||||
@ -100,6 +100,17 @@ public class PrecursorEntityTest extends BuildDocumentIntegrationTest {
|
||||
assertTrue(context.entity().removed());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void createFoundManualRedaction2() {
|
||||
|
||||
Document document = buildGraph("files/Minimal Examples/TestPunctuation");
|
||||
EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService);
|
||||
|
||||
List<TextEntity> tempEntities = entityCreationService.byString("Kuhn, J. O.", "CBI_author", EntityType.ENTITY, document)
|
||||
.toList();
|
||||
assertFalse(tempEntities.isEmpty());
|
||||
assertEquals(2, tempEntities.size());
|
||||
}
|
||||
|
||||
private DocumentAndEntity createNotFoundManualRedaction() {
|
||||
|
||||
|
||||
@ -856,7 +856,7 @@ rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\""
|
||||
when
|
||||
$document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE"))
|
||||
then
|
||||
entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document)
|
||||
entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document, 200)
|
||||
.forEach(authorEntity -> authorEntity.redact("PII.9.3", "AUTHOR(S) was found", "personal_data_geolocation"));
|
||||
end
|
||||
|
||||
|
||||
@ -1416,7 +1416,7 @@ rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\""
|
||||
when
|
||||
$document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE"))
|
||||
then
|
||||
entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document)
|
||||
entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document, 200)
|
||||
.forEach(authorEntity -> authorEntity.redact("PII.9.3", "AUTHOR(S) was found", "personal_data_geolocation"));
|
||||
end
|
||||
|
||||
|
||||
@ -837,7 +837,7 @@ rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\""
|
||||
when
|
||||
$document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE"))
|
||||
then
|
||||
entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document)
|
||||
entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document, 200)
|
||||
.forEach(authorEntity -> authorEntity.redact("PII.9.3", "AUTHOR(S) was found", "personal_data_geolocation"));
|
||||
end
|
||||
|
||||
|
||||
Binary file not shown.
@ -1430,7 +1430,7 @@ rule "PII.9.3: Redact between \"AUTHOR(S)\" and \"(STUDY) COMPLETION DATE\""
|
||||
when
|
||||
$document: Document(containsStringIgnoreCase("AUTHOR(S)"), containsAnyStringIgnoreCase("COMPLETION DATE", "STUDY COMPLETION DATE"))
|
||||
then
|
||||
entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document)
|
||||
entityCreationService.shortestBetweenAnyStringIgnoreCase(List.of("AUTHOR(S)", "AUTHOR(S):"), List.of("COMPLETION DATE", "COMPLETION DATE:", "STUDY COMPLETION DATE", "STUDY COMPLETION DATE:"), "PII", EntityType.ENTITY, $document, 200)
|
||||
.forEach(authorEntity -> authorEntity.redact("PII.9.3", "AUTHOR(S) was found", "personal_data_geolocation"));
|
||||
end
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user