RED-419: Avoid duplicate entries

This commit is contained in:
deiflaender 2020-11-05 12:27:24 +01:00
parent 61352b565d
commit efe49ac2c1
3 changed files with 30 additions and 4 deletions

View File

@ -18,7 +18,7 @@ public class IdBuilder {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
crossSequenceParts.forEach(sequencePart -> sequencePart.getTextPositions().forEach(textPosition -> { crossSequenceParts.forEach(sequencePart -> sequencePart.getTextPositions().forEach(textPosition -> {
sb.append(textPosition.getTextMatrix()); sb.append(textPosition.getTextMatrix()).append(sequencePart.getPage());
})); }));
return hashFunction.hashString(sb.toString(), StandardCharsets.UTF_8).toString(); return hashFunction.hashString(sb.toString(), StandardCharsets.UTF_8).toString();

View File

@ -95,6 +95,10 @@ public class AnnotationHighlightService {
List<PDAnnotation> annotations = pdPage.getAnnotations(); List<PDAnnotation> annotations = pdPage.getAnnotations();
// Duplicates can exist due table extraction colums over multiple rows.
Set<String> processedIds = new HashSet<>();
entityLoop:
for (Entity entity : classifiedDoc.getEntities().get(page)) { for (Entity entity : classifiedDoc.getEntities().get(page)) {
if (flatRedaction && !isRedactionType(entity)) { if (flatRedaction && !isRedactionType(entity)) {
@ -104,7 +108,15 @@ public class AnnotationHighlightService {
RedactionLogEntry redactionLogEntry = createRedactionLogEntry(entity); RedactionLogEntry redactionLogEntry = createRedactionLogEntry(entity);
boolean requestedToRemove = false; boolean requestedToRemove = false;
List<Comment> comments = null; List<Comment> comments = null;
for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) { for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) {
if (processedIds.contains(entityPositionSequence.getId())) {
// TODO refactor this outer loop jump as soon as we have the time.
continue entityLoop;
} else {
processedIds.add(entityPositionSequence.getId());
}
if (manualRedactions != null && !manualRedactions.getIdsToRemove().isEmpty()) { if (manualRedactions != null && !manualRedactions.getIdsToRemove().isEmpty()) {
for (IdRemoval manualRemoval : manualRedactions.getIdsToRemove()) { for (IdRemoval manualRemoval : manualRedactions.getIdsToRemove()) {
@ -142,12 +154,17 @@ public class AnnotationHighlightService {
} }
redactionLogEntry.getPositions().addAll(rectanglesPerLine); redactionLogEntry.getPositions().addAll(rectanglesPerLine);
annotations.addAll(createAnnotation(rectanglesPerLine, entityPositionSequence.getId(), createAnnotationContent(entity), getColor(entity, requestedToRemove), comments, !isHint(entity))); annotations.addAll(createAnnotation(rectanglesPerLine, entityPositionSequence.getId(), createAnnotationContent(entity), getColor(entity, requestedToRemove), comments, !isHint(entity)));
} }
redactionLogEntry.setId(entityPositionSequence.getId()); redactionLogEntry.setId(entityPositionSequence.getId());
} }
classifiedDoc.getRedactionLogEntities().add(redactionLogEntry);
// FIXME ids should never be null. Figure out why this happens.
if (redactionLogEntry.getId() != null) {
classifiedDoc.getRedactionLogEntities().add(redactionLogEntry);
}
} }
} }

View File

@ -51,6 +51,7 @@ import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
import com.iqser.red.service.redaction.v1.model.ManualRedactions; import com.iqser.red.service.redaction.v1.model.ManualRedactions;
import com.iqser.red.service.redaction.v1.model.Point; import com.iqser.red.service.redaction.v1.model.Point;
import com.iqser.red.service.redaction.v1.model.Rectangle; import com.iqser.red.service.redaction.v1.model.Rectangle;
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.model.RedactionRequest; import com.iqser.red.service.redaction.v1.model.RedactionRequest;
import com.iqser.red.service.redaction.v1.model.RedactionResult; import com.iqser.red.service.redaction.v1.model.RedactionResult;
import com.iqser.red.service.redaction.v1.model.Status; import com.iqser.red.service.redaction.v1.model.Status;
@ -262,7 +263,16 @@ public class RedactionIntegrationTest {
.document(IOUtils.toByteArray(new FileInputStream(path))) .document(IOUtils.toByteArray(new FileInputStream(path)))
.build(); .build();
System.out.println("Redacting file : " + path.getName()); System.out.println("Redacting file : " + path.getName());
redactionController.redact(request); RedactionResult result = redactionController.redact(request);
Map<String, List<RedactionLogEntry>> duplicates = new HashMap<>();
result.getRedactionLog().getRedactionLogEntry().forEach(entry -> {
duplicates.computeIfAbsent(entry.getId(), v -> new ArrayList<>()).add(entry);
});
duplicates.entrySet().forEach(entry -> {
assertThat(entry.getValue().size()).isEqualTo(1);
});
} }
} }
@ -358,7 +368,6 @@ public class RedactionIntegrationTest {
manualRedactions.getComments().put("0836727c3508a0b2ea271da69c04cc2f", List.of(comment)); manualRedactions.getComments().put("0836727c3508a0b2ea271da69c04cc2f", List.of(comment));
manualRedactions.getComments().put(manualAddId, List.of(comment)); manualRedactions.getComments().put(manualAddId, List.of(comment));
ManualRedactionEntry manualRedactionEntry = new ManualRedactionEntry(); ManualRedactionEntry manualRedactionEntry = new ManualRedactionEntry();
manualRedactionEntry.setId(manualAddId); manualRedactionEntry.setId(manualAddId);
manualRedactionEntry.setStatus(Status.REQUESTED); manualRedactionEntry.setStatus(Status.REQUESTED);