diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java index c3f110a9..76c7b335 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java @@ -159,6 +159,8 @@ public class EntitySearchUtils { // HashSet keeps old value but we want the new. entities.removeAll(found); entities.addAll(found); + + removeEntitiesContainedInLarger(entities); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index 46db8067..59781278 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -673,7 +673,7 @@ public class RedactionIntegrationTest { public void redactionTest() throws IOException { long start = System.currentTimeMillis(); - ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/new/just-a-document-20211215.pdf"); AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream()); request.setExcludedPages(Set.of(1)); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/CBI_address.txt b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/CBI_address.txt index b6acc607..e896369d 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/CBI_address.txt +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/CBI_address.txt @@ -1655,3 +1655,11 @@ Zyma SA, Nyon, Switzerland Mambo-Tox Ltd. Biomedical Sciences Building Bassett Crescent East Southampton SO16 7PX UK Syngenta Environmental Sciences Jealott’s Hill International Research Centre Bracknell, Berkshire RG42 6EY UK Test Ignored Hint CBI_ADDRESS +CTL +CTL - Central Toxicology Laboratory +CTL/P/3557 +CTL/P/3607 +CTL/P/S 161 +CTL/Pl4333 +CTL/with dictionary entry + diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/just-a-document-20211215.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/just-a-document-20211215.pdf new file mode 100644 index 00000000..938b7a05 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/just-a-document-20211215.pdf differ