From 74e63ca292bc44d38ee6b6d66e372a67f53f3c7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thierry=20G=C3=B6ckel?= Date: Tue, 29 Sep 2020 13:43:16 +0200 Subject: [PATCH] Annotate line by line --- .../parsing/model/TextPositionSequence.java | 17 ++----- .../service/AnnotationHighlightService.java | 45 +++++++++++++------ 2 files changed, 35 insertions(+), 27 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/TextPositionSequence.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/TextPositionSequence.java index cf09ddfc..b109b54a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/TextPositionSequence.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/TextPositionSequence.java @@ -9,9 +9,7 @@ import com.iqser.red.service.redaction.v1.model.Point; import com.iqser.red.service.redaction.v1.model.Rectangle; import lombok.Data; -import lombok.Getter; import lombok.RequiredArgsConstructor; -import lombok.Setter; @Data @RequiredArgsConstructor @@ -19,10 +17,6 @@ public class TextPositionSequence implements CharSequence { private List textPositions = new ArrayList<>(); - @Getter - @Setter - private float[] annotationColor; - private final int page; @@ -194,22 +188,18 @@ public class TextPositionSequence implements CharSequence { public Rectangle getRectangle() { - float height = textPositions.get(0).getHeightDir() + 2; + float height = getTextHeight(); - float posXInit; + float posXInit = getX1(); float posXEnd; float posYInit; float posYEnd; if (textPositions.get(0).getRotation() == 90) { - posXEnd = textPositions.get(0).getYDirAdj() + 2; - posXInit = textPositions.get(0).getYDirAdj() - height; - posYInit = textPositions.get(0).getXDirAdj(); + posYInit = getY1(); posYEnd = textPositions.get(textPositions.size() - 1).getXDirAdj() - height + 4; } else { - - posXInit = textPositions.get(0).getXDirAdj(); posXEnd = textPositions.get(textPositions.size() - 1) .getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidth() + 1; posYInit = textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj() - 2; @@ -220,4 +210,5 @@ public class TextPositionSequence implements CharSequence { return new Rectangle(new Point(posXInit, posYInit), posXEnd - posXInit, posYEnd - posYInit + height, page); } + } \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/AnnotationHighlightService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/AnnotationHighlightService.java index 5c9eb504..510ed470 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/AnnotationHighlightService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/AnnotationHighlightService.java @@ -2,9 +2,11 @@ package com.iqser.red.service.redaction.v1.server.visualization.service; import java.awt.Color; import java.io.IOException; +import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; +import java.util.stream.Collectors; import org.apache.commons.collections4.CollectionUtils; import org.apache.pdfbox.pdmodel.PDDocument; @@ -16,6 +18,7 @@ import org.apache.pdfbox.pdmodel.graphics.color.PDColor; import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup; +import org.apache.pdfbox.text.TextPosition; import org.springframework.stereotype.Service; import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry; @@ -104,21 +107,14 @@ public class AnnotationHighlightService { } if (CollectionUtils.isNotEmpty(entityPositionSequence.getSequences())) { - Rectangle rectangle = new Rectangle(); - rectangle.setTopLeft(entityPositionSequence.getSequences().get(0).getRectangle().getTopLeft()); - rectangle.setHeight((float) entityPositionSequence.getSequences() + for (Rectangle rectangle : getRectanglesPerLine(entityPositionSequence.getSequences() .stream() - .mapToDouble(TextPositionSequence::getHeight) - .max() - .getAsDouble()); - rectangle.setWidth((float) entityPositionSequence.getSequences() - .stream() - .mapToDouble(TextPositionSequence::getWidth) - .sum()); - rectangle.setPage(page); - redactionLogEntry.setPosition(rectangle); - annotations.add(createAnnotation(rectangle, entityPositionSequence.getId(), - createAnnotationContent(entity), getColor(entity), !flatRedaction && !isHint(entity))); + .flatMap(seq -> seq.getTextPositions().stream()) + .collect(Collectors.toList()), page)) { + redactionLogEntry.setPosition(rectangle); + annotations.add(createAnnotation(rectangle, entityPositionSequence.getId(), + createAnnotationContent(entity), getColor(entity), !flatRedaction && !isHint(entity))); + } } redactionLogEntry.setId(entityPositionSequence.getId()); } @@ -127,6 +123,27 @@ public class AnnotationHighlightService { } + private List getRectanglesPerLine(List textPositions, int page) { + + List rectangles = new ArrayList<>(); + float y = textPositions.get(0).getYDirAdj(); + int startIndex = 0; + for (int i = 1; i < textPositions.size(); i++) { + float yDirAdj = textPositions.get(i).getYDirAdj(); + if (yDirAdj != y) { + rectangles.add(new TextPositionSequence(textPositions.subList(startIndex, i), page).getRectangle()); + y = yDirAdj; + startIndex = i; + } + } + if (startIndex != textPositions.size() - 1) { + rectangles.add(new TextPositionSequence(textPositions.subList(startIndex, textPositions.size()), page).getRectangle()); + } + + return rectangles; + } + + private void addManualAnnotations(PDPage pdPage, Document classifiedDoc, ManualRedactions manualRedactions, int page) throws IOException {