diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java index 5597e8a4..c67b5644 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java @@ -193,15 +193,35 @@ public class EntitySearchUtils { Set result = new HashSet<>(); if (existingEntities != null && foundEntities != null) { - for (Entity existingEntity : existingEntities) { - for (Entity foundEntity : foundEntities) { - if (existingEntity.getEnd() < foundEntity.getStart() || foundEntity.getEnd() < existingEntity.getStart()) { - result.add(foundEntity); - } + for (Entity foundEntity : foundEntities) { + + if (!overlaps(existingEntities, foundEntity)) { + result.add(foundEntity); } + } } return result; } + + private boolean overlaps(Set existingEntities, Entity found) { + + for (Entity existing : existingEntities) { + + if(existing.getStart().equals(found.getStart())){ + continue; + } + + for (int i = existing.getStart(); i <= existing.getEnd(); i++) { + for (int j = found.getStart(); j <= found.getEnd(); j++) { + if (i == j) { + return true; + } + } + } + } + return false; + } + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index b4b388bd..1b74e85f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -611,6 +611,23 @@ public class RedactionIntegrationTest { } + @Test + public void redactionExpansionOverlap() throws IOException { + + ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/ExpansionTest.pdf"); + AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream()); + + analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); + AnalyzeResult result = analyzeService.analyze(request); + + var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID); + + var values = redactionLog.getRedactionLogEntry().stream().map(RedactionLogEntry::getValue).collect(Collectors.toList()); + + assertThat(values).containsExactlyInAnyOrder("Lastname M.", "Doe", "Doe J.", "M. Mustermann", "Mustermann M.", "F. Lastname"); + } + + @Test @Ignore public void noExceptionShouldBeThrownForAnyFiles() throws IOException { diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtilsTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtilsTest.java index eec7d419..f1d419d4 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtilsTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtilsTest.java @@ -151,8 +151,7 @@ public class EntitySearchUtilsTest { Set result = EntitySearchUtils.findNonOverlappingMatchEntities(existingEntities, foundEntities); // Assert - assertThat(result.size()).isEqualTo(1); - assertThat(result).contains(foundEntities2); + assertThat(result.size()).isEqualTo(0); } @@ -182,8 +181,7 @@ public class EntitySearchUtilsTest { Set result = EntitySearchUtils.findNonOverlappingMatchEntities(existingEntities, foundEntities); // Assert - assertThat(result.size()).isEqualTo(1); - assertThat(result).contains(foundEntities2); + assertThat(result.size()).isEqualTo(0); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/CBI_author.txt b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/CBI_author.txt index 9cf34080..634f9d81 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/CBI_author.txt +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/CBI_author.txt @@ -8615,4 +8615,9 @@ Zoriki Hosomi R. Zoriki Hosomi Rosana Zuberer D Zubrod J -Zwicker R.E. \ No newline at end of file +Zwicker R.E. +Doe +M. Mustermann +F. Lastname +Mustermann +Lastname \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/ExpansionTest.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/ExpansionTest.pdf new file mode 100644 index 00000000..5d55d76a Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/ExpansionTest.pdf differ