RED-419: Avoid duplicate entries

This commit is contained in:
deiflaender 2020-11-05 12:27:24 +01:00
parent 61352b565d
commit efe49ac2c1
3 changed files with 30 additions and 4 deletions

View File

@ -18,7 +18,7 @@ public class IdBuilder {
StringBuilder sb = new StringBuilder();
crossSequenceParts.forEach(sequencePart -> sequencePart.getTextPositions().forEach(textPosition -> {
sb.append(textPosition.getTextMatrix());
sb.append(textPosition.getTextMatrix()).append(sequencePart.getPage());
}));
return hashFunction.hashString(sb.toString(), StandardCharsets.UTF_8).toString();

View File

@ -95,6 +95,10 @@ public class AnnotationHighlightService {
List<PDAnnotation> annotations = pdPage.getAnnotations();
// Duplicates can exist due table extraction colums over multiple rows.
Set<String> processedIds = new HashSet<>();
entityLoop:
for (Entity entity : classifiedDoc.getEntities().get(page)) {
if (flatRedaction && !isRedactionType(entity)) {
@ -104,7 +108,15 @@ public class AnnotationHighlightService {
RedactionLogEntry redactionLogEntry = createRedactionLogEntry(entity);
boolean requestedToRemove = false;
List<Comment> comments = null;
for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) {
if (processedIds.contains(entityPositionSequence.getId())) {
// TODO refactor this outer loop jump as soon as we have the time.
continue entityLoop;
} else {
processedIds.add(entityPositionSequence.getId());
}
if (manualRedactions != null && !manualRedactions.getIdsToRemove().isEmpty()) {
for (IdRemoval manualRemoval : manualRedactions.getIdsToRemove()) {
@ -142,12 +154,17 @@ public class AnnotationHighlightService {
}
redactionLogEntry.getPositions().addAll(rectanglesPerLine);
annotations.addAll(createAnnotation(rectanglesPerLine, entityPositionSequence.getId(), createAnnotationContent(entity), getColor(entity, requestedToRemove), comments, !isHint(entity)));
}
redactionLogEntry.setId(entityPositionSequence.getId());
}
classifiedDoc.getRedactionLogEntities().add(redactionLogEntry);
// FIXME ids should never be null. Figure out why this happens.
if (redactionLogEntry.getId() != null) {
classifiedDoc.getRedactionLogEntities().add(redactionLogEntry);
}
}
}

View File

@ -51,6 +51,7 @@ import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
import com.iqser.red.service.redaction.v1.model.ManualRedactions;
import com.iqser.red.service.redaction.v1.model.Point;
import com.iqser.red.service.redaction.v1.model.Rectangle;
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
import com.iqser.red.service.redaction.v1.model.RedactionResult;
import com.iqser.red.service.redaction.v1.model.Status;
@ -262,7 +263,16 @@ public class RedactionIntegrationTest {
.document(IOUtils.toByteArray(new FileInputStream(path)))
.build();
System.out.println("Redacting file : " + path.getName());
redactionController.redact(request);
RedactionResult result = redactionController.redact(request);
Map<String, List<RedactionLogEntry>> duplicates = new HashMap<>();
result.getRedactionLog().getRedactionLogEntry().forEach(entry -> {
duplicates.computeIfAbsent(entry.getId(), v -> new ArrayList<>()).add(entry);
});
duplicates.entrySet().forEach(entry -> {
assertThat(entry.getValue().size()).isEqualTo(1);
});
}
}
@ -358,7 +368,6 @@ public class RedactionIntegrationTest {
manualRedactions.getComments().put("0836727c3508a0b2ea271da69c04cc2f", List.of(comment));
manualRedactions.getComments().put(manualAddId, List.of(comment));
ManualRedactionEntry manualRedactionEntry = new ManualRedactionEntry();
manualRedactionEntry.setId(manualAddId);
manualRedactionEntry.setStatus(Status.REQUESTED);