Pull request #52: RED-391: Support comments and approval of manual redactions, RED-385: Improve annotation IDs in PDFs

Merge in RED/redaction-service from RED-391 to master

* commit '365158a0084b2034aca91e6f77eb9df42bbcb246':
  RED-391: Support comments and approval of manual redactions, RED-385: Improve annotation IDs in PDFs
This commit is contained in:
Dominique Eiflaender 2020-10-05 15:36:06 +02:00
commit 943d40366e
9 changed files with 191 additions and 35 deletions

View File

@ -0,0 +1,20 @@
package com.iqser.red.service.redaction.v1.model;
import java.time.OffsetDateTime;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class Comment {
private OffsetDateTime date;
private String text;
private String user;
}

View File

@ -0,0 +1,24 @@
package com.iqser.red.service.redaction.v1.model;
import java.util.ArrayList;
import java.util.List;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class IdRemoval {
private String id;
private boolean approved;
private boolean removeFromDictionary;
@Builder.Default
private List<Comment> comments = new ArrayList<>();
}

View File

@ -4,10 +4,12 @@ import java.util.ArrayList;
import java.util.List;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class ManualRedactionEntry {
@ -16,6 +18,11 @@ public class ManualRedactionEntry {
private String value;
private String reason;
private List<Rectangle> positions = new ArrayList<>();
private boolean approved;
private boolean addToDictionary;
@Builder.Default
private List<Comment> comments = new ArrayList<>();
private String section;
private int sectionNumber;

View File

@ -4,14 +4,19 @@ import java.util.HashSet;
import java.util.Set;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class ManualRedactions {
private Set<String> idsToRemove = new HashSet<>();
@Builder.Default
private Set<IdRemoval> idsToRemove = new HashSet<>();
@Builder.Default
private Set<ManualRedactionEntry> entriesToAdd = new HashSet<>();
}

View File

@ -27,5 +27,6 @@ public class RedactionLogEntry {
private List<Rectangle> positions = new ArrayList<>();
private int sectionNumber;
private boolean manual;
private boolean approved;
}

View File

@ -45,6 +45,12 @@ public class DictionaryService {
@Getter
private float[] defaultColor;
@Getter
private float[] requestAddColor;
@Getter
private float[] requestRemoveColor;
public void updateDictionary() {
@ -76,6 +82,10 @@ public class DictionaryService {
.collect(Collectors.toList());
dictionary = entryColors.keySet().stream().collect(Collectors.toMap(type -> type, this::convertEntries));
defaultColor = dictionaryClient.getDefaultColor().getColor();
// TODO get colors from configuration service.
requestAddColor = new float[]{0f, 1f, 0.8f};
requestRemoveColor = new float[]{0f, 1f, 0.8f};
}
} catch (FeignException e) {
log.warn("Got some unknown feignException", e);

View File

@ -3,9 +3,11 @@ package com.iqser.red.service.redaction.v1.server.visualization.service;
import java.awt.Color;
import java.io.IOException;
import java.util.ArrayList;
import java.util.GregorianCalendar;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.UUID;
import java.util.stream.Collectors;
import org.apache.commons.collections4.CollectionUtils;
@ -17,10 +19,13 @@ import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.pdmodel.graphics.color.PDColor;
import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationText;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup;
import org.apache.pdfbox.text.TextPosition;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.model.Comment;
import com.iqser.red.service.redaction.v1.model.IdRemoval;
import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
import com.iqser.red.service.redaction.v1.model.ManualRedactions;
import com.iqser.red.service.redaction.v1.model.Rectangle;
@ -97,17 +102,35 @@ public class AnnotationHighlightService {
}
RedactionLogEntry redactionLogEntry = createRedactionLogEntry(entity);
boolean requestedToRemove = false;
boolean removeFromDictionary = false;
List<Comment> comments = null;
for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) {
if (manualRedactions != null && manualRedactions.getIdsToRemove()
.contains(entityPositionSequence.getId())) {
String manualOverrideReason = entity.getRedactionReason() + ", removed by manual override";
entity.setRedaction(false);
entity.setRedactionReason(manualOverrideReason);
redactionLogEntry.setRedacted(false);
redactionLogEntry.setReason(manualOverrideReason);
redactionLogEntry.setManual(true);
if (manualRedactions != null && !manualRedactions.getIdsToRemove().isEmpty()) {
for (IdRemoval manualRemoval : manualRedactions.getIdsToRemove()) {
if (manualRemoval.getId().equals(entityPositionSequence.getId())) {
comments = manualRemoval.getComments();
String manualOverrideReason;
if (manualRemoval.isApproved()) {
entity.setRedaction(false);
redactionLogEntry.setRedacted(false);
redactionLogEntry.setApproved(true);
manualOverrideReason = entity.getRedactionReason() + ", removed by manual override";
} else {
requestedToRemove = true;
manualOverrideReason = entity.getRedactionReason() + ", requested to remove";
if (manualRemoval.isRemoveFromDictionary()) {
removeFromDictionary = true;
}
}
entity.setRedactionReason(manualOverrideReason);
redactionLogEntry.setReason(manualOverrideReason);
redactionLogEntry.setManual(true);
}
}
}
if (CollectionUtils.isNotEmpty(entityPositionSequence.getSequences())) {
@ -117,7 +140,7 @@ public class AnnotationHighlightService {
.collect(Collectors.toList()), page);
redactionLogEntry.getPositions().addAll(rectanglesPerline);
annotations.add(createAnnotation(rectanglesPerline, entityPositionSequence.getId(), createAnnotationContent(entity), getColor(entity), !flatRedaction && !isHint(entity)));
annotations.addAll(createAnnotation(rectanglesPerline, prefixId(entity, entityPositionSequence.getId(), requestedToRemove, removeFromDictionary), createAnnotationContent(entity), getColor(entity, requestedToRemove), comments, !isHint(entity)));
}
redactionLogEntry.setId(entityPositionSequence.getId());
@ -128,6 +151,27 @@ public class AnnotationHighlightService {
}
private String prefixId(Entity entity, String id, boolean requestedToRemove, boolean removeFromDictionary) {
if (isHint(entity)) {
return "hint:" + entity.getType() + ":" + id;
}
if (entity.isRedaction() && requestedToRemove && removeFromDictionary) {
return "request:remove:" + entity.getType() + ":" + id;
}
if (entity.isRedaction() && requestedToRemove && !removeFromDictionary) {
return "request:remove:only_here:" + id;
}
if (entity.isRedaction()) {
return "redaction:" + entity.getType() + ":" + id;
}
return "ignore:" + entity.getType() + ":" + id;
}
private List<Rectangle> getRectanglesPerLine(List<TextPosition> textPositions, int page) {
List<Rectangle> rectangles = new ArrayList<>();
@ -168,28 +212,35 @@ public class AnnotationHighlightService {
RedactionLogEntry redactionLogEntry = createRedactionLogEntry(manualRedactionEntry, id);
boolean foundOnPage = false;
List<Rectangle> rectanglesOnPage = new ArrayList<>();
for (Rectangle rectangle : manualRedactionEntry.getPositions()) {
if (page != rectangle.getPage()) {
continue;
if (page == rectangle.getPage()) {
rectanglesOnPage.add(rectangle);
redactionLogEntry.getPositions().add(rectangle);
}
foundOnPage = true;
PDAnnotationTextMarkup highlight = createAnnotation(List.of(rectangle), id, createAnnotationContent(manualRedactionEntry), getColor(manualRedactionEntry
.getType()), true);
annotations.add(highlight);
redactionLogEntry.getPositions().add(rectangle);
}
if (foundOnPage) {
if (!rectanglesOnPage.isEmpty()) {
annotations.addAll(createAnnotation(rectanglesOnPage, prefixId(manualRedactionEntry, id), createAnnotationContent(manualRedactionEntry), getColorForManualAdd(manualRedactionEntry
.getType(), manualRedactionEntry.isApproved()), manualRedactionEntry.getComments(), true));
classifiedDoc.getRedactionLogEntities().add(redactionLogEntry);
}
}
}
private String prefixId(ManualRedactionEntry manualRedactionEntry, String id) {
if (manualRedactionEntry.isApproved()) {
return "redaction:" + manualRedactionEntry.getType() + ":" + id;
}
if (manualRedactionEntry.isAddToDictionary()) {
return "request:add:" + manualRedactionEntry.getType() + ":" + id;
}
return "request:add:only_here" + ":" + id;
}
private RedactionLogEntry createRedactionLogEntry(ManualRedactionEntry manualRedactionEntry, String id) {
return RedactionLogEntry.builder()
@ -203,6 +254,7 @@ public class AnnotationHighlightService {
.section(manualRedactionEntry.getSection())
.sectionNumber(manualRedactionEntry.getSectionNumber())
.manual(true)
.approved(manualRedactionEntry.isApproved())
.build();
}
@ -210,7 +262,7 @@ public class AnnotationHighlightService {
private RedactionLogEntry createRedactionLogEntry(Entity entity) {
return RedactionLogEntry.builder()
.color(getColor(entity))
.color(getColor(entity, false))
.reason(entity.getRedactionReason())
.value(entity.getWord())
.type(entity.getType())
@ -222,20 +274,39 @@ public class AnnotationHighlightService {
}
private PDAnnotationTextMarkup createAnnotation(List<Rectangle> rectangles, String id, String content,
float[] color, boolean popup) {
private List<PDAnnotation> createAnnotation(List<Rectangle> rectangles, String id, String content, float[] color,
List<Comment> comments, boolean popup) {
List<PDAnnotation> annotations = new ArrayList<>();
PDAnnotationTextMarkup annotation = new PDAnnotationTextMarkup(PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT);
annotation.constructAppearances();
annotation.setRectangle(toPDRectangle(rectangles));
PDRectangle pdRectangle = toPDRectangle(rectangles);
annotation.setRectangle(pdRectangle);
annotation.setQuadPoints(toQuadPoints(rectangles));
if (popup) {
annotation.setAnnotationName(id);
annotation.setTitlePopup(id);
annotation.setContents(content);
}
annotation.setTitlePopup(id);
annotation.setAnnotationName(id);
annotation.setColor(new PDColor(color, PDDeviceRGB.INSTANCE));
return annotation;
annotations.add(annotation);
if (comments != null) {
for (Comment comment : comments) {
PDAnnotationText txtAnnot = new PDAnnotationText();
txtAnnot.setAnnotationName(UUID.randomUUID().toString());
txtAnnot.setInReplyTo(annotation); // Reference to highlight annotation
txtAnnot.setName(PDAnnotationText.NAME_COMMENT);
txtAnnot.setCreationDate(GregorianCalendar.from(comment.getDate().toZonedDateTime()));
txtAnnot.setTitlePopup(comment.getUser());
txtAnnot.setContents(comment.getText());
txtAnnot.setRectangle(pdRectangle);
annotations.add(txtAnnot);
}
}
return annotations;
}
@ -319,8 +390,11 @@ public class AnnotationHighlightService {
}
private float[] getColor(Entity entity) {
private float[] getColor(Entity entity, boolean requestedToRemove) {
if (requestedToRemove) {
return dictionaryService.getRequestRemoveColor();
}
if (!entity.isRedaction() && !isHint(entity)) {
return new float[]{0.627f, 0.627f, 0.627f};
}
@ -331,6 +405,15 @@ public class AnnotationHighlightService {
}
private float[] getColorForManualAdd(String type, boolean approved) {
if (!approved) {
return dictionaryService.getRequestAddColor();
}
return getColor(type);
}
private float[] getColor(String type) {
if (!dictionaryService.getEntryColors().containsKey(type)) {

View File

@ -14,6 +14,7 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@ -43,6 +44,8 @@ import com.iqser.red.service.configuration.v1.api.model.DictionaryResponse;
import com.iqser.red.service.configuration.v1.api.model.RulesResponse;
import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
import com.iqser.red.service.configuration.v1.api.model.TypeResult;
import com.iqser.red.service.redaction.v1.model.Comment;
import com.iqser.red.service.redaction.v1.model.IdRemoval;
import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
import com.iqser.red.service.redaction.v1.model.ManualRedactions;
import com.iqser.red.service.redaction.v1.model.Point;
@ -283,7 +286,7 @@ public class RedactionIntegrationTest {
System.out.println("redactionTest");
long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource(
"files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
"files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf");
RedactionRequest request = RedactionRequest.builder()
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
@ -334,9 +337,12 @@ public class RedactionIntegrationTest {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
ManualRedactions manualRedactions = new ManualRedactions();
manualRedactions.setIdsToRemove(Set.of("0836727c3508a0b2ea271da69c04cc2f"));
Comment comment = Comment.builder().date(OffsetDateTime.now()).user("TEST_USER").text("This is a comment test").build();
manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder().id("0836727c3508a0b2ea271da69c04cc2f").approved(false).build()));
ManualRedactionEntry manualRedactionEntry = new ManualRedactionEntry();
manualRedactionEntry.setComments(List.of(comment));
manualRedactionEntry.setType("name");
manualRedactionEntry.setValue("O'Loughlin C.K.");
manualRedactionEntry.setReason("Manual Redaction");

View File

@ -16,7 +16,7 @@ rule "1: Redacted because Section contains Vertebrate"
rule "2: Not Redacted because Section contains no Vertebrate"
when
Section(matchesType("vertebrate"))
Section(!matchesType("vertebrate"))
then
section.redactNot("name", 2, "Not Redacted because Section contains no Vertebrate");
section.redactNot("address", 2, "Not Redacted because Section contains no Vertebrate");