Pull request #42: RED-318

Merge in RED/redaction-service from RED-318 to master

* commit 'f5790bccab3fb8dce66aeb9fbe501655ae6b86c3':
  Log entries and manual redaction require one position per line
  Annotate line by line
  Refactor multiple positions to one
  Merge rectangles for single annotation/redaction log entry
This commit is contained in:
Thierry Goeckel 2020-09-29 14:15:21 +02:00
commit c9516205fe
3 changed files with 61 additions and 54 deletions

View File

@ -9,9 +9,7 @@ import com.iqser.red.service.redaction.v1.model.Point;
import com.iqser.red.service.redaction.v1.model.Rectangle; import com.iqser.red.service.redaction.v1.model.Rectangle;
import lombok.Data; import lombok.Data;
import lombok.Getter;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import lombok.Setter;
@Data @Data
@RequiredArgsConstructor @RequiredArgsConstructor
@ -19,10 +17,6 @@ public class TextPositionSequence implements CharSequence {
private List<TextPosition> textPositions = new ArrayList<>(); private List<TextPosition> textPositions = new ArrayList<>();
@Getter
@Setter
private float[] annotationColor;
private final int page; private final int page;
@ -194,22 +188,18 @@ public class TextPositionSequence implements CharSequence {
public Rectangle getRectangle() { public Rectangle getRectangle() {
float height = textPositions.get(0).getHeightDir() + 2; float height = getTextHeight();
float posXInit; float posXInit = getX1();
float posXEnd; float posXEnd;
float posYInit; float posYInit;
float posYEnd; float posYEnd;
if (textPositions.get(0).getRotation() == 90) { if (textPositions.get(0).getRotation() == 90) {
posXEnd = textPositions.get(0).getYDirAdj() + 2; posXEnd = textPositions.get(0).getYDirAdj() + 2;
posXInit = textPositions.get(0).getYDirAdj() - height; posYInit = getY1();
posYInit = textPositions.get(0).getXDirAdj();
posYEnd = textPositions.get(textPositions.size() - 1).getXDirAdj() - height + 4; posYEnd = textPositions.get(textPositions.size() - 1).getXDirAdj() - height + 4;
} else { } else {
posXInit = textPositions.get(0).getXDirAdj();
posXEnd = textPositions.get(textPositions.size() - 1) posXEnd = textPositions.get(textPositions.size() - 1)
.getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidth() + 1; .getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidth() + 1;
posYInit = textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj() - 2; posYInit = textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj() - 2;
@ -220,4 +210,5 @@ public class TextPositionSequence implements CharSequence {
return new Rectangle(new Point(posXInit, posYInit), posXEnd - posXInit, posYEnd - posYInit + height, page); return new Rectangle(new Point(posXInit, posYInit), posXEnd - posXInit, posYEnd - posYInit + height, page);
} }
} }

View File

@ -27,9 +27,7 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service @Service
@RequiredArgsConstructor @RequiredArgsConstructor
public class EntityRedactionService { public class EntityRedactionService {

View File

@ -2,9 +2,11 @@ package com.iqser.red.service.redaction.v1.server.visualization.service;
import java.awt.Color; import java.awt.Color;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Set; import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.collections4.CollectionUtils;
import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocument;
@ -16,6 +18,7 @@ import org.apache.pdfbox.pdmodel.graphics.color.PDColor;
import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB; import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup;
import org.apache.pdfbox.text.TextPosition;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry; import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
@ -35,9 +38,7 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service @Service
@RequiredArgsConstructor @RequiredArgsConstructor
public class AnnotationHighlightService { public class AnnotationHighlightService {
@ -45,7 +46,8 @@ public class AnnotationHighlightService {
private final DictionaryService dictionaryService; private final DictionaryService dictionaryService;
public void highlight(PDDocument document, Document classifiedDoc, boolean flatRedaction, ManualRedactions manualRedactions) throws IOException { public void highlight(PDDocument document, Document classifiedDoc, boolean flatRedaction,
ManualRedactions manualRedactions) throws IOException {
Set<Integer> manualRedactionPages = getManualRedactionPages(manualRedactions); Set<Integer> manualRedactionPages = getManualRedactionPages(manualRedactions);
@ -83,7 +85,8 @@ public class AnnotationHighlightService {
} }
private void addAnnotations(PDPage pdPage, Document classifiedDoc, boolean flatRedaction, ManualRedactions manualRedactions, int page) throws IOException { private void addAnnotations(PDPage pdPage, Document classifiedDoc, boolean flatRedaction,
ManualRedactions manualRedactions, int page) throws IOException {
List<PDAnnotation> annotations = pdPage.getAnnotations(); List<PDAnnotation> annotations = pdPage.getAnnotations();
@ -107,11 +110,15 @@ public class AnnotationHighlightService {
redactionLogEntry.setManual(true); redactionLogEntry.setManual(true);
} }
for (TextPositionSequence textPositions : entityPositionSequence.getSequences()) { if (CollectionUtils.isNotEmpty(entityPositionSequence.getSequences())) {
for (Rectangle rectangle : getRectanglesPerLine(entityPositionSequence.getSequences()
Rectangle rectangle = textPositions.getRectangle(); .stream()
.flatMap(seq -> seq.getTextPositions().stream())
.collect(Collectors.toList()), page)) {
redactionLogEntry.getPositions().add(rectangle); redactionLogEntry.getPositions().add(rectangle);
annotations.add(createAnnotation(rectangle, entityPositionSequence.getId(), createAnnotationContent(entity), getColor(entity), !flatRedaction && !isHint(entity))); annotations.add(createAnnotation(rectangle, entityPositionSequence.getId(),
createAnnotationContent(entity), getColor(entity), !flatRedaction && !isHint(entity)));
}
} }
redactionLogEntry.setId(entityPositionSequence.getId()); redactionLogEntry.setId(entityPositionSequence.getId());
} }
@ -120,7 +127,29 @@ public class AnnotationHighlightService {
} }
private void addManualAnnotations(PDPage pdPage, Document classifiedDoc, ManualRedactions manualRedactions, int page) throws IOException { private List<Rectangle> getRectanglesPerLine(List<TextPosition> textPositions, int page) {
List<Rectangle> rectangles = new ArrayList<>();
float y = textPositions.get(0).getYDirAdj();
int startIndex = 0;
for (int i = 1; i < textPositions.size(); i++) {
float yDirAdj = textPositions.get(i).getYDirAdj();
if (yDirAdj != y) {
rectangles.add(new TextPositionSequence(textPositions.subList(startIndex, i), page).getRectangle());
y = yDirAdj;
startIndex = i;
}
}
if (startIndex != textPositions.size() - 1) {
rectangles.add(new TextPositionSequence(textPositions.subList(startIndex, textPositions.size()), page).getRectangle());
}
return rectangles;
}
private void addManualAnnotations(PDPage pdPage, Document classifiedDoc, ManualRedactions manualRedactions,
int page) throws IOException {
if (manualRedactions == null) { if (manualRedactions == null) {
return; return;
@ -143,8 +172,8 @@ public class AnnotationHighlightService {
foundOnPage = true; foundOnPage = true;
PDAnnotationTextMarkup highlight = createAnnotation(rectangle, id, createAnnotationContent(manualRedactionEntry), getColor(manualRedactionEntry PDAnnotationTextMarkup highlight = createAnnotation(rectangle, id,
.getType()), true); createAnnotationContent(manualRedactionEntry), getColor(manualRedactionEntry.getType()), true);
annotations.add(highlight); annotations.add(highlight);
redactionLogEntry.getPositions().add(rectangle); redactionLogEntry.getPositions().add(rectangle);
@ -188,7 +217,8 @@ public class AnnotationHighlightService {
} }
private PDAnnotationTextMarkup createAnnotation(Rectangle rectangle, String id, String content, float[] color, boolean popup) { private PDAnnotationTextMarkup createAnnotation(Rectangle rectangle, String id, String content, float[] color,
boolean popup) {
PDAnnotationTextMarkup annotation = new PDAnnotationTextMarkup(PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT); PDAnnotationTextMarkup annotation = new PDAnnotationTextMarkup(PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT);
annotation.constructAppearances(); annotation.constructAppearances();
@ -206,25 +236,15 @@ public class AnnotationHighlightService {
private String createAnnotationContent(Entity entity) { private String createAnnotationContent(Entity entity) {
return new StringBuilder().append("\nRule ") return "\nRule " + entity.getMatchedRule() + " matched" + "\n\n" + entity.getRedactionReason() + "\n\nIn " +
.append(entity.getMatchedRule()) "Section : \"" + entity
.append(" matched") .getHeadline() + "\"";
.append("\n\n")
.append(entity.getRedactionReason())
.append("\n\nIn Section : \"")
.append(entity.getHeadline())
.append("\"")
.toString();
} }
private String createAnnotationContent(ManualRedactionEntry entry) { private String createAnnotationContent(ManualRedactionEntry entry) {
return new StringBuilder().append("\nManual Redaction") return "\nManual Redaction" + "\n\nIn Section : \"" + entry.getSection() + "\"";
.append("\n\nIn Section : \"")
.append(entry.getSection())
.append("\"")
.toString();
} }
@ -244,7 +264,8 @@ public class AnnotationHighlightService {
// quadPoints is array of x,y coordinates in Z-like order (top-left, top-right, bottom-left,bottom-right) // quadPoints is array of x,y coordinates in Z-like order (top-left, top-right, bottom-left,bottom-right)
// of the area to be highlighted // of the area to be highlighted
return new float[]{rectangle.getTopLeft().getX(), rectangle.getTopLeft().getY(), rectangle.getTopLeft() return new float[]{rectangle.getTopLeft().getX(), rectangle.getTopLeft().getY(), rectangle.getTopLeft()
.getX() + rectangle.getWidth(), rectangle.getTopLeft().getY(), rectangle.getTopLeft().getX(), rectangle.getTopLeft() .getX() + rectangle.getWidth(), rectangle.getTopLeft().getY(), rectangle.getTopLeft().getX(),
rectangle.getTopLeft()
.getY() + rectangle.getHeight(), rectangle.getTopLeft() .getY() + rectangle.getHeight(), rectangle.getTopLeft()
.getX() + rectangle.getWidth(), rectangle.getTopLeft().getY() + rectangle.getHeight()}; .getX() + rectangle.getWidth(), rectangle.getTopLeft().getY() + rectangle.getHeight()};
} }
@ -255,10 +276,7 @@ public class AnnotationHighlightService {
if (!entity.isRedaction()) { if (!entity.isRedaction()) {
return false; return false;
} }
if (isHint(entity)) { return !isHint(entity);
return false;
}
return true;
} }
@ -286,20 +304,19 @@ public class AnnotationHighlightService {
private boolean isHint(Entity entity) { private boolean isHint(Entity entity) {
List<String> hintTypes = dictionaryService.getHintTypes(); List<String> hintTypes = dictionaryService.getHintTypes();
if (CollectionUtils.isNotEmpty(hintTypes) && hintTypes.contains(entity.getType())) { return CollectionUtils.isNotEmpty(hintTypes) && hintTypes.contains(entity.getType());
return true;
}
return false;
} }
private void drawSectionFrames(PDDocument document, Document classifiedDoc, boolean flatRedaction, PDPage pdPage, int page) throws IOException { private void drawSectionFrames(PDDocument document, Document classifiedDoc, boolean flatRedaction, PDPage pdPage,
int page) throws IOException {
if (flatRedaction) { if (flatRedaction) {
return; return;
} }
PDPageContentStream contentStream = new PDPageContentStream(document, pdPage, PDPageContentStream.AppendMode.APPEND, true); PDPageContentStream contentStream = new PDPageContentStream(document, pdPage,
PDPageContentStream.AppendMode.APPEND, true);
for (Paragraph paragraph : classifiedDoc.getParagraphs()) { for (Paragraph paragraph : classifiedDoc.getParagraphs()) {
for (int i = 0; i <= paragraph.getPageBlocks().size() - 1; i++) { for (int i = 0; i <= paragraph.getPageBlocks().size() - 1; i++) {
@ -348,7 +365,8 @@ public class AnnotationHighlightService {
if (cell != null) { if (cell != null) {
contentStream.setLineWidth(0.5f); contentStream.setLineWidth(0.5f);
contentStream.setStrokingColor(Color.CYAN); contentStream.setStrokingColor(Color.CYAN);
contentStream.addRect((float) cell.getX(), (float) cell.getY(), (float) cell.getWidth(), (float) cell contentStream.addRect((float) cell.getX(), (float) cell.getY(), (float) cell.getWidth(),
(float) cell
.getHeight()); .getHeight());
contentStream.stroke(); contentStream.stroke();
} }