From efe49ac2c1ae8a27feb6e07c1b4614619a0b2ece Mon Sep 17 00:00:00 2001 From: deiflaender Date: Thu, 5 Nov 2020 12:27:24 +0100 Subject: [PATCH] RED-419: Avoid duplicate entries --- .../v1/server/redaction/utils/IdBuilder.java | 2 +- .../service/AnnotationHighlightService.java | 19 ++++++++++++++++++- .../v1/server/RedactionIntegrationTest.java | 13 +++++++++++-- 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/IdBuilder.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/IdBuilder.java index 6f6ba35c..70885daa 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/IdBuilder.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/IdBuilder.java @@ -18,7 +18,7 @@ public class IdBuilder { StringBuilder sb = new StringBuilder(); crossSequenceParts.forEach(sequencePart -> sequencePart.getTextPositions().forEach(textPosition -> { - sb.append(textPosition.getTextMatrix()); + sb.append(textPosition.getTextMatrix()).append(sequencePart.getPage()); })); return hashFunction.hashString(sb.toString(), StandardCharsets.UTF_8).toString(); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/AnnotationHighlightService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/AnnotationHighlightService.java index ebd355d4..072bc8bb 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/AnnotationHighlightService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/AnnotationHighlightService.java @@ -95,6 +95,10 @@ public class AnnotationHighlightService { List annotations = pdPage.getAnnotations(); + // Duplicates can exist due table extraction colums over multiple rows. + Set processedIds = new HashSet<>(); + + entityLoop: for (Entity entity : classifiedDoc.getEntities().get(page)) { if (flatRedaction && !isRedactionType(entity)) { @@ -104,7 +108,15 @@ public class AnnotationHighlightService { RedactionLogEntry redactionLogEntry = createRedactionLogEntry(entity); boolean requestedToRemove = false; List comments = null; + for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) { + if (processedIds.contains(entityPositionSequence.getId())) { + + // TODO refactor this outer loop jump as soon as we have the time. + continue entityLoop; + } else { + processedIds.add(entityPositionSequence.getId()); + } if (manualRedactions != null && !manualRedactions.getIdsToRemove().isEmpty()) { for (IdRemoval manualRemoval : manualRedactions.getIdsToRemove()) { @@ -142,12 +154,17 @@ public class AnnotationHighlightService { } redactionLogEntry.getPositions().addAll(rectanglesPerLine); + annotations.addAll(createAnnotation(rectanglesPerLine, entityPositionSequence.getId(), createAnnotationContent(entity), getColor(entity, requestedToRemove), comments, !isHint(entity))); } redactionLogEntry.setId(entityPositionSequence.getId()); } - classifiedDoc.getRedactionLogEntities().add(redactionLogEntry); + + // FIXME ids should never be null. Figure out why this happens. + if (redactionLogEntry.getId() != null) { + classifiedDoc.getRedactionLogEntities().add(redactionLogEntry); + } } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index e8349e3f..ddcb4239 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -51,6 +51,7 @@ import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry; import com.iqser.red.service.redaction.v1.model.ManualRedactions; import com.iqser.red.service.redaction.v1.model.Point; import com.iqser.red.service.redaction.v1.model.Rectangle; +import com.iqser.red.service.redaction.v1.model.RedactionLogEntry; import com.iqser.red.service.redaction.v1.model.RedactionRequest; import com.iqser.red.service.redaction.v1.model.RedactionResult; import com.iqser.red.service.redaction.v1.model.Status; @@ -262,7 +263,16 @@ public class RedactionIntegrationTest { .document(IOUtils.toByteArray(new FileInputStream(path))) .build(); System.out.println("Redacting file : " + path.getName()); - redactionController.redact(request); + RedactionResult result = redactionController.redact(request); + + Map> duplicates = new HashMap<>(); + result.getRedactionLog().getRedactionLogEntry().forEach(entry -> { + duplicates.computeIfAbsent(entry.getId(), v -> new ArrayList<>()).add(entry); + }); + + duplicates.entrySet().forEach(entry -> { + assertThat(entry.getValue().size()).isEqualTo(1); + }); } } @@ -358,7 +368,6 @@ public class RedactionIntegrationTest { manualRedactions.getComments().put("0836727c3508a0b2ea271da69c04cc2f", List.of(comment)); manualRedactions.getComments().put(manualAddId, List.of(comment)); - ManualRedactionEntry manualRedactionEntry = new ManualRedactionEntry(); manualRedactionEntry.setId(manualAddId); manualRedactionEntry.setStatus(Status.REQUESTED);