RED-419: Avoid duplicate entries
This commit is contained in:
parent
61352b565d
commit
efe49ac2c1
@ -18,7 +18,7 @@ public class IdBuilder {
|
|||||||
|
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
crossSequenceParts.forEach(sequencePart -> sequencePart.getTextPositions().forEach(textPosition -> {
|
crossSequenceParts.forEach(sequencePart -> sequencePart.getTextPositions().forEach(textPosition -> {
|
||||||
sb.append(textPosition.getTextMatrix());
|
sb.append(textPosition.getTextMatrix()).append(sequencePart.getPage());
|
||||||
}));
|
}));
|
||||||
|
|
||||||
return hashFunction.hashString(sb.toString(), StandardCharsets.UTF_8).toString();
|
return hashFunction.hashString(sb.toString(), StandardCharsets.UTF_8).toString();
|
||||||
|
|||||||
@ -95,6 +95,10 @@ public class AnnotationHighlightService {
|
|||||||
|
|
||||||
List<PDAnnotation> annotations = pdPage.getAnnotations();
|
List<PDAnnotation> annotations = pdPage.getAnnotations();
|
||||||
|
|
||||||
|
// Duplicates can exist due table extraction colums over multiple rows.
|
||||||
|
Set<String> processedIds = new HashSet<>();
|
||||||
|
|
||||||
|
entityLoop:
|
||||||
for (Entity entity : classifiedDoc.getEntities().get(page)) {
|
for (Entity entity : classifiedDoc.getEntities().get(page)) {
|
||||||
|
|
||||||
if (flatRedaction && !isRedactionType(entity)) {
|
if (flatRedaction && !isRedactionType(entity)) {
|
||||||
@ -104,7 +108,15 @@ public class AnnotationHighlightService {
|
|||||||
RedactionLogEntry redactionLogEntry = createRedactionLogEntry(entity);
|
RedactionLogEntry redactionLogEntry = createRedactionLogEntry(entity);
|
||||||
boolean requestedToRemove = false;
|
boolean requestedToRemove = false;
|
||||||
List<Comment> comments = null;
|
List<Comment> comments = null;
|
||||||
|
|
||||||
for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) {
|
for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) {
|
||||||
|
if (processedIds.contains(entityPositionSequence.getId())) {
|
||||||
|
|
||||||
|
// TODO refactor this outer loop jump as soon as we have the time.
|
||||||
|
continue entityLoop;
|
||||||
|
} else {
|
||||||
|
processedIds.add(entityPositionSequence.getId());
|
||||||
|
}
|
||||||
|
|
||||||
if (manualRedactions != null && !manualRedactions.getIdsToRemove().isEmpty()) {
|
if (manualRedactions != null && !manualRedactions.getIdsToRemove().isEmpty()) {
|
||||||
for (IdRemoval manualRemoval : manualRedactions.getIdsToRemove()) {
|
for (IdRemoval manualRemoval : manualRedactions.getIdsToRemove()) {
|
||||||
@ -142,13 +154,18 @@ public class AnnotationHighlightService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
redactionLogEntry.getPositions().addAll(rectanglesPerLine);
|
redactionLogEntry.getPositions().addAll(rectanglesPerLine);
|
||||||
|
|
||||||
annotations.addAll(createAnnotation(rectanglesPerLine, entityPositionSequence.getId(), createAnnotationContent(entity), getColor(entity, requestedToRemove), comments, !isHint(entity)));
|
annotations.addAll(createAnnotation(rectanglesPerLine, entityPositionSequence.getId(), createAnnotationContent(entity), getColor(entity, requestedToRemove), comments, !isHint(entity)));
|
||||||
}
|
}
|
||||||
|
|
||||||
redactionLogEntry.setId(entityPositionSequence.getId());
|
redactionLogEntry.setId(entityPositionSequence.getId());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FIXME ids should never be null. Figure out why this happens.
|
||||||
|
if (redactionLogEntry.getId() != null) {
|
||||||
classifiedDoc.getRedactionLogEntities().add(redactionLogEntry);
|
classifiedDoc.getRedactionLogEntities().add(redactionLogEntry);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -51,6 +51,7 @@ import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
|
|||||||
import com.iqser.red.service.redaction.v1.model.ManualRedactions;
|
import com.iqser.red.service.redaction.v1.model.ManualRedactions;
|
||||||
import com.iqser.red.service.redaction.v1.model.Point;
|
import com.iqser.red.service.redaction.v1.model.Point;
|
||||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
||||||
|
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
||||||
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
|
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
|
||||||
import com.iqser.red.service.redaction.v1.model.RedactionResult;
|
import com.iqser.red.service.redaction.v1.model.RedactionResult;
|
||||||
import com.iqser.red.service.redaction.v1.model.Status;
|
import com.iqser.red.service.redaction.v1.model.Status;
|
||||||
@ -262,7 +263,16 @@ public class RedactionIntegrationTest {
|
|||||||
.document(IOUtils.toByteArray(new FileInputStream(path)))
|
.document(IOUtils.toByteArray(new FileInputStream(path)))
|
||||||
.build();
|
.build();
|
||||||
System.out.println("Redacting file : " + path.getName());
|
System.out.println("Redacting file : " + path.getName());
|
||||||
redactionController.redact(request);
|
RedactionResult result = redactionController.redact(request);
|
||||||
|
|
||||||
|
Map<String, List<RedactionLogEntry>> duplicates = new HashMap<>();
|
||||||
|
result.getRedactionLog().getRedactionLogEntry().forEach(entry -> {
|
||||||
|
duplicates.computeIfAbsent(entry.getId(), v -> new ArrayList<>()).add(entry);
|
||||||
|
});
|
||||||
|
|
||||||
|
duplicates.entrySet().forEach(entry -> {
|
||||||
|
assertThat(entry.getValue().size()).isEqualTo(1);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -358,7 +368,6 @@ public class RedactionIntegrationTest {
|
|||||||
manualRedactions.getComments().put("0836727c3508a0b2ea271da69c04cc2f", List.of(comment));
|
manualRedactions.getComments().put("0836727c3508a0b2ea271da69c04cc2f", List.of(comment));
|
||||||
manualRedactions.getComments().put(manualAddId, List.of(comment));
|
manualRedactions.getComments().put(manualAddId, List.of(comment));
|
||||||
|
|
||||||
|
|
||||||
ManualRedactionEntry manualRedactionEntry = new ManualRedactionEntry();
|
ManualRedactionEntry manualRedactionEntry = new ManualRedactionEntry();
|
||||||
manualRedactionEntry.setId(manualAddId);
|
manualRedactionEntry.setId(manualAddId);
|
||||||
manualRedactionEntry.setStatus(Status.REQUESTED);
|
manualRedactionEntry.setStatus(Status.REQUESTED);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user