Annotate line by line

This commit is contained in:
Thierry Göckel 2020-09-29 13:43:16 +02:00
parent 6638f0cf9e
commit 74e63ca292
2 changed files with 35 additions and 27 deletions

View File

@ -9,9 +9,7 @@ import com.iqser.red.service.redaction.v1.model.Point;
import com.iqser.red.service.redaction.v1.model.Rectangle; import com.iqser.red.service.redaction.v1.model.Rectangle;
import lombok.Data; import lombok.Data;
import lombok.Getter;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import lombok.Setter;
@Data @Data
@RequiredArgsConstructor @RequiredArgsConstructor
@ -19,10 +17,6 @@ public class TextPositionSequence implements CharSequence {
private List<TextPosition> textPositions = new ArrayList<>(); private List<TextPosition> textPositions = new ArrayList<>();
@Getter
@Setter
private float[] annotationColor;
private final int page; private final int page;
@ -194,22 +188,18 @@ public class TextPositionSequence implements CharSequence {
public Rectangle getRectangle() { public Rectangle getRectangle() {
float height = textPositions.get(0).getHeightDir() + 2; float height = getTextHeight();
float posXInit; float posXInit = getX1();
float posXEnd; float posXEnd;
float posYInit; float posYInit;
float posYEnd; float posYEnd;
if (textPositions.get(0).getRotation() == 90) { if (textPositions.get(0).getRotation() == 90) {
posXEnd = textPositions.get(0).getYDirAdj() + 2; posXEnd = textPositions.get(0).getYDirAdj() + 2;
posXInit = textPositions.get(0).getYDirAdj() - height; posYInit = getY1();
posYInit = textPositions.get(0).getXDirAdj();
posYEnd = textPositions.get(textPositions.size() - 1).getXDirAdj() - height + 4; posYEnd = textPositions.get(textPositions.size() - 1).getXDirAdj() - height + 4;
} else { } else {
posXInit = textPositions.get(0).getXDirAdj();
posXEnd = textPositions.get(textPositions.size() - 1) posXEnd = textPositions.get(textPositions.size() - 1)
.getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidth() + 1; .getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidth() + 1;
posYInit = textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj() - 2; posYInit = textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj() - 2;
@ -220,4 +210,5 @@ public class TextPositionSequence implements CharSequence {
return new Rectangle(new Point(posXInit, posYInit), posXEnd - posXInit, posYEnd - posYInit + height, page); return new Rectangle(new Point(posXInit, posYInit), posXEnd - posXInit, posYEnd - posYInit + height, page);
} }
} }

View File

@ -2,9 +2,11 @@ package com.iqser.red.service.redaction.v1.server.visualization.service;
import java.awt.Color; import java.awt.Color;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Set; import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.collections4.CollectionUtils;
import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocument;
@ -16,6 +18,7 @@ import org.apache.pdfbox.pdmodel.graphics.color.PDColor;
import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB; import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup;
import org.apache.pdfbox.text.TextPosition;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry; import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
@ -104,21 +107,14 @@ public class AnnotationHighlightService {
} }
if (CollectionUtils.isNotEmpty(entityPositionSequence.getSequences())) { if (CollectionUtils.isNotEmpty(entityPositionSequence.getSequences())) {
Rectangle rectangle = new Rectangle(); for (Rectangle rectangle : getRectanglesPerLine(entityPositionSequence.getSequences()
rectangle.setTopLeft(entityPositionSequence.getSequences().get(0).getRectangle().getTopLeft());
rectangle.setHeight((float) entityPositionSequence.getSequences()
.stream() .stream()
.mapToDouble(TextPositionSequence::getHeight) .flatMap(seq -> seq.getTextPositions().stream())
.max() .collect(Collectors.toList()), page)) {
.getAsDouble()); redactionLogEntry.setPosition(rectangle);
rectangle.setWidth((float) entityPositionSequence.getSequences() annotations.add(createAnnotation(rectangle, entityPositionSequence.getId(),
.stream() createAnnotationContent(entity), getColor(entity), !flatRedaction && !isHint(entity)));
.mapToDouble(TextPositionSequence::getWidth) }
.sum());
rectangle.setPage(page);
redactionLogEntry.setPosition(rectangle);
annotations.add(createAnnotation(rectangle, entityPositionSequence.getId(),
createAnnotationContent(entity), getColor(entity), !flatRedaction && !isHint(entity)));
} }
redactionLogEntry.setId(entityPositionSequence.getId()); redactionLogEntry.setId(entityPositionSequence.getId());
} }
@ -127,6 +123,27 @@ public class AnnotationHighlightService {
} }
private List<Rectangle> getRectanglesPerLine(List<TextPosition> textPositions, int page) {
List<Rectangle> rectangles = new ArrayList<>();
float y = textPositions.get(0).getYDirAdj();
int startIndex = 0;
for (int i = 1; i < textPositions.size(); i++) {
float yDirAdj = textPositions.get(i).getYDirAdj();
if (yDirAdj != y) {
rectangles.add(new TextPositionSequence(textPositions.subList(startIndex, i), page).getRectangle());
y = yDirAdj;
startIndex = i;
}
}
if (startIndex != textPositions.size() - 1) {
rectangles.add(new TextPositionSequence(textPositions.subList(startIndex, textPositions.size()), page).getRectangle());
}
return rectangles;
}
private void addManualAnnotations(PDPage pdPage, Document classifiedDoc, ManualRedactions manualRedactions, private void addManualAnnotations(PDPage pdPage, Document classifiedDoc, ManualRedactions manualRedactions,
int page) throws IOException { int page) throws IOException {