diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/TextPositionSequence.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/TextPositionSequence.java index 33b2a722..2175b4fd 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/TextPositionSequence.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/TextPositionSequence.java @@ -1,22 +1,24 @@ package com.iqser.red.service.redaction.v1.server.parsing.model; -import static java.util.stream.Collectors.toSet; - -import com.fasterxml.jackson.annotation.JsonIgnore; -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.iqser.red.service.redaction.v1.model.Point; -import com.iqser.red.service.redaction.v1.model.Rectangle; -import lombok.Data; -import lombok.NoArgsConstructor; -import org.apache.pdfbox.text.TextPosition; - import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; +import org.apache.pdfbox.text.TextPosition; + +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.iqser.red.service.redaction.v1.model.Point; +import com.iqser.red.service.redaction.v1.model.Rectangle; + +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +@Slf4j @Data @NoArgsConstructor -@JsonIgnoreProperties({ "empty" }) +@JsonIgnoreProperties({"empty"}) public class TextPositionSequence implements CharSequence { private int page; @@ -25,12 +27,15 @@ public class TextPositionSequence implements CharSequence { private float x1; private float x2; + public TextPositionSequence(int page) { + this.page = page; } public static TextPositionSequence fromData(List textPositions, int page) { + var textPositionSequence = new TextPositionSequence(); textPositionSequence.textPositions = textPositions; textPositionSequence.page = page; @@ -46,9 +51,6 @@ public class TextPositionSequence implements CharSequence { } - - - @Override public int length() { @@ -131,6 +133,7 @@ public class TextPositionSequence implements CharSequence { } } + @JsonIgnore public float getRotationAdjustedY() { @@ -190,10 +193,8 @@ public class TextPositionSequence implements CharSequence { @JsonIgnore public String getFont() { - return textPositions.get(0).getFontName() - .toLowerCase() - .replaceAll(",bold", "") - .replaceAll(",italic", ""); + + return textPositions.get(0).getFontName().toLowerCase().replaceAll(",bold", "").replaceAll(",italic", ""); } @@ -214,27 +215,33 @@ public class TextPositionSequence implements CharSequence { } + @JsonIgnore public float getFontSize() { return textPositions.get(0).getFontSizeInPt(); } + @JsonIgnore public float getSpaceWidth() { return textPositions.get(0).getWidthOfSpace(); } + @JsonIgnore public int getRotation() { return textPositions.get(0).getRotation(); } + @JsonIgnore public Rectangle getRectangle() { + log.debug("Page: '{}', Word: '{}', Rotation: '{}'", page, toString(), textPositions.get(0).getRotation()); + float height = getTextHeight(); float posXInit = getX1(); @@ -246,36 +253,45 @@ public class TextPositionSequence implements CharSequence { posXEnd = textPositions.get(0).getYDirAdj() + 2; posYInit = getY1(); posYEnd = textPositions.get(textPositions.size() - 1).getXDirAdj() - height + 4; - } else if (textPositions.get(0).getRotation() == 270) { + + } else if (textPositions.get(0).getRotation() == 270 && textPositions.size() > 1) { posYInit = textPositions.get(0).getPageHeight() - getX1(); posYEnd = textPositions.get(0).getPageHeight() - getX2() - textPositions.get(0) .getWidth() - textPositions.get(textPositions.size() - 1).getWidth() - 1; posXInit = textPositions.get(0).getPageWidth() - textPositions.get(0).getYDirAdj() - 2; posXEnd = textPositions.get(0).getPageWidth() - textPositions.get(textPositions.size() - 1) .getYDirAdj() + height; - } else if(textPositions.get(0).getRotation() == 0 && textPositions.get(0).getDir() == 270f) { + + } else if (textPositions.get(0).getRotation() == 270 && textPositions.size() == 1) { + posYInit = textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj() - 2; + posYEnd = posYInit + 1; + posXInit = textPositions.get(0).getXDirAdj(); + posXEnd = posXInit + textPositions.get(0).getWidthDirAdj() + 0.1f; + + } else if (textPositions.get(0).getRotation() == 0 && textPositions.get(0).getDir() == 270f) { posYInit = textPositions.get(0).getPageHeight() - getX1(); posYEnd = textPositions.get(0).getPageHeight() - getX2() - textPositions.get(0) .getWidthDirAdj() - textPositions.get(textPositions.size() - 1).getWidthDirAdj() - 3; posXInit = textPositions.get(0).getPageWidth() - textPositions.get(0).getYDirAdj() - 2; posXEnd = textPositions.get(0).getPageWidth() - textPositions.get(textPositions.size() - 1) .getYDirAdj() + height; - } else if(textPositions.get(0).getRotation() == 90 && textPositions.get(0).getDir() == 0.0f){ - posXInit = textPositions.get(textPositions.size() - 1) + + } else if (textPositions.get(0).getRotation() == 90 && textPositions.get(0).getDir() == 0.0f) { + posXInit = textPositions.get(textPositions.size() - 1) .getXDirAdj() + textPositions.get(textPositions.size() - 1).getHeightDir(); posXEnd = textPositions.get(0).getXDirAdj(); posYInit = textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj() - 2; posYEnd = textPositions.get(0).getPageHeight() - textPositions.get(textPositions.size() - 1) .getYDirAdj() + 2; - } else if(textPositions.get(0).getRotation() == 0 && textPositions.get(0).getDir() == 90f){ - posYInit = getX1(); - posYEnd = getX2() + textPositions.get(0) - .getWidthDirAdj() - textPositions.get(textPositions.size() - 1).getWidthDirAdj() - 3; - posXInit = textPositions.get(0).getYDirAdj() + 2; - posXEnd = textPositions.get(textPositions.size() - 1) - .getYDirAdj() - height; - } - else { + + } else if (textPositions.get(0).getRotation() == 0 && textPositions.get(0).getDir() == 90f) { + posYInit = getX1(); + posYEnd = getX2() + textPositions.get(0).getWidthDirAdj() - textPositions.get(textPositions.size() - 1) + .getWidthDirAdj() - 3; + posXInit = textPositions.get(0).getYDirAdj() + 2; + posXEnd = textPositions.get(textPositions.size() - 1).getYDirAdj() - height; + + } else { posXEnd = textPositions.get(textPositions.size() - 1) .getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidthDirAdj() + 1; posYInit = textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj() - 2; @@ -283,7 +299,9 @@ public class TextPositionSequence implements CharSequence { .getYDirAdj() + 2; } - return new Rectangle(new Point(posXInit, posYInit), posXEnd - posXInit, posYEnd - posYInit + height, page); + var rectangle = new Rectangle(new Point(posXInit, posYInit), posXEnd - posXInit, posYEnd - posYInit + height, page); + log.debug("Rectangle: {}", rectangle); + return rectangle; } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/VV-511309.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/VV-511309.pdf new file mode 100644 index 00000000..91bd608a Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/VV-511309.pdf differ