From 1cc66a309242783dc0f899eda0c7b89e23a50cc7 Mon Sep 17 00:00:00 2001 From: deiflaender Date: Tue, 26 Jul 2022 09:42:43 +0200 Subject: [PATCH] RED-4753: Reduced size of TEXT File --- .../server/parsing/model/RedTextPosition.java | 51 +++-- .../parsing/model/TextPositionSequence.java | 195 ++++++++++-------- .../redaction/model/SearchableText.java | 2 +- .../service/RedactionLogCreatorService.java | 65 +++--- 4 files changed, 181 insertions(+), 132 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/RedTextPosition.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/RedTextPosition.java index a7161730..b7b63614 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/RedTextPosition.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/RedTextPosition.java @@ -22,22 +22,20 @@ import lombok.SneakyThrows; public class RedTextPosition { private String textMatrix; + private float[] position; + + @JsonIgnore private int rotation; - private float y; + + @JsonIgnore private float pageHeight; + + @JsonIgnore private float pageWidth; + private String unicode; - @JsonAlias("xdirAdj") - @JsonAttribute(alternativeNames = {"xdirAdj"}) - private float XDirAdj; - - @JsonAlias("ydirAdj") - @JsonAttribute(alternativeNames = {"ydirAdj"}) - private float YDirAdj; - private float width; - private float heightDir; - private float widthDirAdj; + @JsonIgnore private float dir; // not used in reanalysis @@ -66,18 +64,37 @@ public class RedTextPosition { pos.setTextMatrix(textPosition.getTextMatrix().toString()); + var position = new float[4]; + + position[0] = textPosition.getXDirAdj(); + position[1] = textPosition.getYDirAdj(); + position[2] = textPosition.getWidthDirAdj(); + position[3] = textPosition.getHeightDir(); + + pos.setPosition(position); return pos; } - @JsonAlias("xdirAdj") - @JsonAttribute(alternativeNames = {"xdirAdj"}) - public void setXDirAdj(float XDirAdj) {this.XDirAdj = XDirAdj;} + @JsonIgnore + public float getXDirAdj(){ + return position[0]; + } + @JsonIgnore + public float getYDirAdj(){ + return position[1]; + } - @JsonAlias("ydirAdj") - @JsonAttribute(alternativeNames = {"ydirAdj"}) - public void setYDirAdj(float YDirAdj) {this.YDirAdj = YDirAdj;} + @JsonIgnore + public float getWidthDirAdj(){ + return position[2]; + } + + @JsonIgnore + public float getHeightDir(){ + return position[3]; + } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/TextPositionSequence.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/TextPositionSequence.java index 724351e3..6770712d 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/TextPositionSequence.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/TextPositionSequence.java @@ -31,8 +31,10 @@ public class TextPositionSequence implements CharSequence { private int page; private List textPositions = new ArrayList<>(); - private float x1; - private float x2; + private float dir; + private int rotation; + private float pageHeight; + private float pageWidth; public TextPositionSequence(int page) { @@ -41,7 +43,7 @@ public class TextPositionSequence implements CharSequence { } - public static TextPositionSequence fromData(List textPositions, int page) { + public TextPositionSequence fromData(List textPositions, int page) { var textPositionSequence = new TextPositionSequence(); textPositionSequence.textPositions = textPositions; @@ -55,6 +57,10 @@ public class TextPositionSequence implements CharSequence { this.textPositions = textPositions.stream().map(RedTextPosition::fromTextPosition).collect(Collectors.toList()); this.page = page; + this.dir = textPositions.get(0).getDir(); + this.rotation = textPositions.get(0).getRotation(); + this.pageHeight = textPositions.get(0).getPageHeight(); + this.pageWidth = textPositions.get(0).getPageWidth(); } @@ -85,7 +91,15 @@ public class TextPositionSequence implements CharSequence { @Override public TextPositionSequence subSequence(int start, int end) { - return fromData(textPositions.subList(start, end), page); + var textPositionSequence = new TextPositionSequence(); + textPositionSequence.textPositions = textPositions.subList(start, end); + textPositionSequence.page = page; + textPositionSequence.dir = dir; + textPositionSequence.rotation = rotation; + textPositionSequence.pageHeight = pageHeight; + textPositionSequence.pageWidth = pageWidth; + + return textPositionSequence; } @@ -106,15 +120,26 @@ public class TextPositionSequence implements CharSequence { } - public void add(RedTextPosition textPosition) { + public void add(TextPositionSequence textPositionSequence, RedTextPosition textPosition) { this.textPositions.add(textPosition); + this.page = textPositionSequence.getPage(); + this.dir = textPositionSequence.getDir(); + this.rotation = textPositionSequence.getRotation(); + this.pageHeight = textPositionSequence.getPageHeight(); + this.pageWidth = textPositionSequence.getPageWidth(); } public void add(TextPosition textPosition) { this.textPositions.add(RedTextPosition.fromTextPosition(textPosition)); + + this.dir = textPositions.get(0).getDir(); + this.rotation = textPositions.get(0).getRotation(); + this.pageHeight = textPositions.get(0).getPageHeight(); + this.pageWidth = textPositions.get(0).getPageWidth(); + } @@ -122,7 +147,7 @@ public class TextPositionSequence implements CharSequence { @JsonAttribute(ignore = true) public float getX1() { - if (textPositions.get(0).getRotation() == 90) { + if (rotation == 90) { return textPositions.get(0).getYDirAdj() - getTextHeight(); } else { return textPositions.get(0).getXDirAdj(); @@ -134,10 +159,11 @@ public class TextPositionSequence implements CharSequence { @JsonAttribute(ignore = true) public float getX2() { - if (textPositions.get(0).getRotation() == 90) { + if (rotation == 90) { return textPositions.get(0).getYDirAdj(); } else { - return textPositions.get(textPositions.size() - 1).getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidth() + 1; + return textPositions.get(textPositions.size() - 1) + .getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidthDirAdj() + 1; } } @@ -146,15 +172,7 @@ public class TextPositionSequence implements CharSequence { @JsonAttribute(ignore = true) public float getRotationAdjustedY() { - return textPositions.get(0).getY(); - } - - - @JsonIgnore - @JsonAttribute(ignore = true) - public float getRotationAdjustedX() { - - return textPositions.get(0).getXDirAdj(); + return textPositions.get(0).getYDirAdj(); } @@ -162,10 +180,10 @@ public class TextPositionSequence implements CharSequence { @JsonAttribute(ignore = true) public float getY1() { - if (textPositions.get(0).getRotation() == 90) { + if (rotation == 90) { return textPositions.get(0).getXDirAdj(); } else { - return textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj(); + return pageHeight - textPositions.get(0).getYDirAdj(); } } @@ -174,10 +192,10 @@ public class TextPositionSequence implements CharSequence { @JsonAttribute(ignore = true) public float getY2() { - if (textPositions.get(0).getRotation() == 90) { + if (rotation == 90) { return textPositions.get(textPositions.size() - 1).getXDirAdj() + getTextHeight() - 2; } else { - return textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj() + getTextHeight(); + return pageHeight - textPositions.get(0).getYDirAdj() + getTextHeight(); } } @@ -249,19 +267,11 @@ public class TextPositionSequence implements CharSequence { } - @JsonIgnore - @JsonAttribute(ignore = true) - public int getRotation() { - - return textPositions.get(0).getRotation(); - } - - @JsonIgnore @JsonAttribute(ignore = true) public Rectangle getRectangle() { - log.debug("Page: '{}', Word: '{}', Rotation: '{}', textRotation {}", page, this, textPositions.get(0).getRotation(), textPositions.get(0).getDir()); + log.debug("Page: '{}', Word: '{}', Rotation: '{}', textRotation {}", page, toString(), rotation, dir); float height = getTextHeight(); @@ -270,113 +280,132 @@ public class TextPositionSequence implements CharSequence { float posYInit; float posYEnd; - if (textPositions.get(0).getRotation() == 0 && textPositions.get(0).getDir() == 90f) { + if (rotation == 0 && dir == 90f) { posYInit = getX1(); - posYEnd = getX2() + textPositions.get(0).getWidthDirAdj() - textPositions.get(textPositions.size() - 1).getWidthDirAdj() - 3; + posYEnd = getX2() + textPositions.get(0).getWidthDirAdj() - textPositions.get(textPositions.size() - 1) + .getWidthDirAdj() - 3; posXInit = textPositions.get(0).getYDirAdj() + 2; posXEnd = textPositions.get(textPositions.size() - 1).getYDirAdj() - height; - } else if (textPositions.get(0).getRotation() == 0 && textPositions.get(0).getDir() == 180f) { + } else if (rotation == 0 && dir == 180f) { - posXInit = textPositions.get(0).getPageWidth() - getX1() + 1; - posXEnd = textPositions.get(0).getPageWidth() - getX2() + textPositions.get(0).getWidthDirAdj() - textPositions.get(textPositions.size() - 1).getWidthDirAdj() - 3; + posXInit = pageWidth - getX1() + 1; + posXEnd = pageWidth - getX2() + textPositions.get(0) + .getWidthDirAdj() - textPositions.get(textPositions.size() - 1).getWidthDirAdj() - 3; posYInit = textPositions.get(0).getYDirAdj() - height + 2; posYEnd = textPositions.get(textPositions.size() - 1).getYDirAdj() - height + 2; - } else if (textPositions.get(0).getRotation() == 0 && textPositions.get(0).getDir() == 270f) { + } else if (rotation == 0 && dir == 270f) { - posYInit = textPositions.get(0).getPageHeight() - getX1(); - posYEnd = textPositions.get(0).getPageHeight() - getX2() - textPositions.get(0).getWidthDirAdj() - textPositions.get(textPositions.size() - 1).getWidthDirAdj() - 3; - posXInit = textPositions.get(0).getPageWidth() - textPositions.get(0).getYDirAdj() - 2; - posXEnd = textPositions.get(0).getPageWidth() - textPositions.get(textPositions.size() - 1).getYDirAdj() + height; + posYInit = pageHeight - getX1(); + posYEnd = pageHeight - getX2() - textPositions.get(0) + .getWidthDirAdj() - textPositions.get(textPositions.size() - 1).getWidthDirAdj() - 3; + posXInit = pageWidth - textPositions.get(0).getYDirAdj() - 2; + posXEnd = pageWidth - textPositions.get(textPositions.size() - 1) + .getYDirAdj() + height; - } else if (textPositions.get(0).getRotation() == 90 && textPositions.get(0).getDir() == 0.0f) { + } else if (rotation == 90 && dir == 0.0f) { - posXInit = textPositions.get(textPositions.size() - 1).getXDirAdj() + textPositions.get(textPositions.size() - 1).getHeightDir(); + posXInit = textPositions.get(textPositions.size() - 1) + .getXDirAdj() + textPositions.get(textPositions.size() - 1).getHeightDir(); posXEnd = textPositions.get(0).getXDirAdj(); - posYInit = textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj() - 2; - posYEnd = textPositions.get(0).getPageHeight() - textPositions.get(textPositions.size() - 1).getYDirAdj() + 2; + posYInit = pageHeight - textPositions.get(0).getYDirAdj() - 2; + posYEnd = pageHeight - textPositions.get(textPositions.size() - 1) + .getYDirAdj() + 2; - } else if (textPositions.get(0).getRotation() == 90 && textPositions.get(0).getDir() == 90.0f) { + } else if (rotation == 90 && dir == 90.0f) { posXEnd = textPositions.get(0).getYDirAdj() + 2; posYInit = getY1(); posYEnd = textPositions.get(textPositions.size() - 1).getXDirAdj() - height + 4; - } else if (textPositions.get(0).getRotation() == 90 && textPositions.get(0).getDir() == 180.0f) { + } else if (rotation == 90 && dir == 180.0f) { - posXInit = textPositions.get(0).getPageWidth() - textPositions.get(textPositions.size() - 1).getXDirAdj() - 4; - posXEnd = textPositions.get(0).getPageWidth() - textPositions.get(0).getXDirAdj(); - posYInit = textPositions.get(0).getYDirAdj() - 2 - textPositions.get(textPositions.size() - 1).getHeightDir(); - posYEnd = textPositions.get(textPositions.size() - 1).getYDirAdj() - textPositions.get(textPositions.size() - 1).getHeightDir(); + posXInit = pageWidth - textPositions.get(textPositions.size() - 1) + .getXDirAdj() - 4; + posXEnd = pageWidth - textPositions.get(0).getXDirAdj(); + posYInit = textPositions.get(0).getYDirAdj() - 2 - textPositions.get(textPositions.size() - 1) + .getHeightDir(); + posYEnd = textPositions.get(textPositions.size() - 1) + .getYDirAdj() - textPositions.get(textPositions.size() - 1).getHeightDir(); - } else if (textPositions.get(0).getRotation() == 90 && textPositions.get(0).getDir() == 270.0f) { + } else if (rotation == 90 && dir == 270.0f) { - posXInit = textPositions.get(0).getPageWidth() - getX1(); - posXEnd = textPositions.get(0).getPageWidth() - textPositions.get(0).getYDirAdj() - 2; - posYInit = textPositions.get(0).getPageHeight() - getY1(); - posYEnd = textPositions.get(0).getPageHeight() - textPositions.get(textPositions.size() - 1).getXDirAdj() - height - 4; + posXInit = pageWidth - getX1(); + posXEnd = pageWidth - textPositions.get(0).getYDirAdj() - 2; + posYInit = pageHeight - getY1(); + posYEnd = pageHeight - textPositions.get(textPositions.size() - 1) + .getXDirAdj() - height - 4; - } else if (textPositions.get(0).getRotation() == 180 && textPositions.get(0).getDir() == 0f) { + } else if (rotation == 180 && dir == 0f) { - posXEnd = textPositions.get(textPositions.size() - 1).getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidthDirAdj() + 1; - posYInit = textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj() - 2; - posYEnd = textPositions.get(0).getPageHeight() - textPositions.get(textPositions.size() - 1).getYDirAdj() + 2; + posXEnd = textPositions.get(textPositions.size() - 1) + .getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidthDirAdj() + 1; + posYInit = pageHeight - textPositions.get(0).getYDirAdj() - 2; + posYEnd = pageHeight - textPositions.get(textPositions.size() - 1) + .getYDirAdj() + 2; - } else if (textPositions.get(0).getRotation() == 180 && textPositions.get(0).getDir() == 90f) { + } else if (rotation == 180 && dir == 90f) { posYInit = getX1(); posYEnd = getX2() - 3; posXInit = textPositions.get(0).getYDirAdj() + 2; posXEnd = textPositions.get(textPositions.size() - 1).getYDirAdj() - height; - } else if (textPositions.get(0).getRotation() == 180 && textPositions.get(0).getDir() == 180f) { + } else if (rotation == 180 && dir == 180f) { - posXInit = textPositions.get(0).getPageWidth() - getX1() + 1; - posXEnd = textPositions.get(0).getPageWidth() - getX2() + textPositions.get(0).getWidthDirAdj() - textPositions.get(textPositions.size() - 1).getWidthDirAdj() - 3; + posXInit = pageWidth - getX1() + 1; + posXEnd = pageWidth - getX2() + textPositions.get(0) + .getWidthDirAdj() - textPositions.get(textPositions.size() - 1).getWidthDirAdj() - 3; posYInit = textPositions.get(0).getYDirAdj() - height + 2; posYEnd = textPositions.get(textPositions.size() - 1).getYDirAdj() - height + 2; - } else if (textPositions.get(0).getRotation() == 180 && textPositions.get(0).getDir() == 270.0f) { + } else if (rotation == 180 && dir == 270.0f) { - posYInit = textPositions.get(0).getPageHeight() - getX1(); - posYEnd = textPositions.get(0).getPageHeight() - getX2() - textPositions.get(0).getWidthDirAdj() - textPositions.get(textPositions.size() - 1).getWidthDirAdj(); - posXInit = textPositions.get(0).getPageWidth() - textPositions.get(0).getYDirAdj() - 2; - posXEnd = textPositions.get(0).getPageWidth() - textPositions.get(textPositions.size() - 1).getYDirAdj() + height; + posYInit = pageHeight - getX1(); + posYEnd = pageHeight - getX2() - textPositions.get(0) + .getWidthDirAdj() - textPositions.get(textPositions.size() - 1).getWidthDirAdj(); + posXInit = pageWidth - textPositions.get(0).getYDirAdj() - 2; + posXEnd = pageWidth - textPositions.get(textPositions.size() - 1) + .getYDirAdj() + height; - } else if (textPositions.get(0).getRotation() == 270 && textPositions.get(0).getDir() == 0.0f) { + } else if (rotation == 270 && dir == 0.0f) { - posYInit = textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj() - 2; + posYInit = pageHeight - textPositions.get(0).getYDirAdj() - 2; posYEnd = posYInit + 1; posXInit = textPositions.get(0).getXDirAdj(); - posXEnd = textPositions.get(textPositions.size() - 1).getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidthDirAdj() + 0.1f; + posXEnd = textPositions.get(textPositions.size() - 1) + .getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidthDirAdj() + 0.1f; - } else if (textPositions.get(0).getRotation() == 270 && textPositions.get(0).getDir() == 90.0f) { + } else if (rotation == 270 && dir == 90.0f) { posYInit = getX1(); posYEnd = getX2() - height; posXInit = textPositions.get(0).getYDirAdj() + 2; posXEnd = textPositions.get(textPositions.size() - 1).getYDirAdj() - height; - } else if (textPositions.get(0).getRotation() == 270 && textPositions.get(0).getDir() == 180.0f) { + } else if (rotation == 270 && dir == 180.0f) { - posXInit = textPositions.get(0).getPageWidth() - getX1() + 1; - posXEnd = textPositions.get(0).getPageWidth() - getX2() - 4; + posXInit = pageWidth - getX1() + 1; + posXEnd = pageWidth - getX2() - 4; posYInit = textPositions.get(0).getYDirAdj() - height + 2; posYEnd = textPositions.get(textPositions.size() - 1).getYDirAdj() - height + 2; - } else if (textPositions.get(0).getRotation() == 270 && textPositions.get(0).getDir() == 270.0f) { + } else if (rotation == 270 && dir == 270.0f) { - posYInit = textPositions.get(0).getPageHeight() - getX1(); - posYEnd = textPositions.get(0).getPageHeight() - getX2() - height; - posXInit = textPositions.get(0).getPageWidth() - textPositions.get(0).getYDirAdj() - 2; - posXEnd = textPositions.get(0).getPageWidth() - textPositions.get(textPositions.size() - 1).getYDirAdj() + height; + posYInit = pageHeight - getX1(); + posYEnd = pageHeight - getX2() - height; + posXInit = pageWidth - textPositions.get(0).getYDirAdj() - 2; + posXEnd = pageWidth - textPositions.get(textPositions.size() - 1) + .getYDirAdj() + height; } else { // page rotation = 0 and text direction = 0 - posXEnd = textPositions.get(textPositions.size() - 1).getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidthDirAdj() + 1; - posYInit = textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj() - 2; - posYEnd = textPositions.get(0).getPageHeight() - textPositions.get(textPositions.size() - 1).getYDirAdj() + 2; + posXEnd = textPositions.get(textPositions.size() - 1) + .getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidthDirAdj() + 1; + posYInit = pageHeight - textPositions.get(0).getYDirAdj() - 2; + posYEnd = pageHeight - textPositions.get(textPositions.size() - 1) + .getYDirAdj() + 2; } var rectangle = new Rectangle(new Point(posXInit, posYInit), posXEnd - posXInit, posYEnd - posYInit + height, page); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/SearchableText.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/SearchableText.java index 60565609..f7f63e07 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/SearchableText.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/SearchableText.java @@ -100,7 +100,7 @@ public class SearchableText { .length() - 1, caseInsensitive)) || j == 0 && i != 0 && searchSpace.get(i - 1) .charAt(searchSpace.get(i - 1).length() - 1, caseInsensitive) != ' ' && searchSpace.get(i) .charAt(j, caseInsensitive) != ' ') { - partMatch.add(searchSpace.get(i).textPositionAt(j)); + partMatch.add(searchSpace.get(i),searchSpace.get(i).textPositionAt(j)); if (!(j == searchSpace.get(i).length() - 1 && searchSpace.get(i) .charAt(j, caseInsensitive) == '-' && searchChars[counter] != '-')) { counter++; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java index 79aa82cf..ab791484 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java @@ -114,10 +114,7 @@ public class RedactionLogCreatorService { redactionLogEntry.setId(entityPositionSequence.getId()); if (CollectionUtils.isNotEmpty(entityPositionSequence.getSequences())) { - List rectanglesPerLine = getRectanglesPerLine(entityPositionSequence.getSequences() - .stream() - .flatMap(seq -> seq.getTextPositions().stream()) - .collect(Collectors.toList()), page); + List rectanglesPerLine = getRectanglesPerLine(entityPositionSequence.getSequences()); redactionLogEntry.getPositions().addAll(rectanglesPerLine); @@ -134,38 +131,43 @@ public class RedactionLogCreatorService { } - private List getRectanglesPerLine(List textPositions, int page) { + private List getRectanglesPerLine(List textPositionSequences) { List rectangles = new ArrayList<>(); - if (textPositions.size() == 1) { - rectangles.add(TextPositionSequence.fromData(textPositions, page).getRectangle()); - } else { - float x = textPositions.get(0).getXDirAdj(); - float y = textPositions.get(0).getYDirAdj(); - float width = textPositions.get(0).getWidthDirAdj(); - float height = textPositions.get(0).getHeightDir(); - int startIndex = 0; - for (int i = 1; i < textPositions.size(); i++) { - float xDirAdj = textPositions.get(i).getXDirAdj(); - float yDirAdj = textPositions.get(i).getYDirAdj(); - float widthDir = textPositions.get(i).getWidthDirAdj(); - float heightDir = textPositions.get(i).getHeightDir(); + if (textPositionSequences.size() == 1) { + rectangles.add(textPositionSequences.get(0).getRectangle()); + return rectangles; + } - if (!(isCharInSameLine(y, yDirAdj, height, heightDir) && isCharClose(x, xDirAdj, width))) { - rectangles.add(TextPositionSequence.fromData(textPositions.subList(startIndex, i), page) - .getRectangle()); - y = yDirAdj; - width = widthDir; - height = heightDir; - startIndex = i; - } - x = xDirAdj; + TextPositionSequence combinedSequence = new TextPositionSequence(); + for (int i = 0; i < textPositionSequences.size(); i++) { + + if(combinedSequence.getTextPositions().isEmpty()){ + combinedSequence = textPositionSequences.get(i); + continue; } - if (startIndex != textPositions.size()) { - rectangles.add(TextPositionSequence.fromData(textPositions.subList(startIndex, textPositions.size()), page) - .getRectangle()); + + float lastSeqX = textPositionSequences.get(i - 1).getTextPositions().get(textPositionSequences.get(i - 1).getTextPositions().size() - 1).getXDirAdj(); + float lastSeqY = textPositionSequences.get(i - 1).getTextPositions().get(textPositionSequences.get(i - 1).getTextPositions().size() - 1).getYDirAdj(); + float lastSeqWidth = textPositionSequences.get(i - 1).getTextPositions().get(textPositionSequences.get(i - 1).getTextPositions().size() - 1).getWidthDirAdj(); + float lastSeqHeight = textPositionSequences.get(i - 1).getTextPositions().get(textPositionSequences.get(i - 1).getTextPositions().size() - 1).getHeightDir(); + + float currentSeqX = textPositionSequences.get(i).getTextPositions().get(0).getXDirAdj(); + float currentSeqY = textPositionSequences.get(i).getTextPositions().get(0).getYDirAdj(); + float currentSeqHeight = textPositionSequences.get(i).getTextPositions().get(0).getHeightDir(); + + if (isCharInSameLine(lastSeqY, currentSeqY, lastSeqHeight, currentSeqHeight) && isCharClose(lastSeqX, currentSeqX, lastSeqWidth)) { + combinedSequence.getTextPositions().addAll(textPositionSequences.get(i).getTextPositions()); + } else { + rectangles.add(combinedSequence.getRectangle()); + combinedSequence = textPositionSequences.get(i); } + + } + + if(!combinedSequence.getTextPositions().isEmpty()) { + rectangles.add(combinedSequence.getRectangle()); } return rectangles; @@ -207,7 +209,8 @@ public class RedactionLogCreatorService { .redacted(entity.isRedaction()) .isHint(isHint(entity.getType(), dossierTemplateId)) .isRecommendation(entity.getEntityType().equals(EntityType.RECOMMENDATION)) - .isFalsePositive(entity.getEntityType().equals(EntityType.FALSE_POSITIVE) || entity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION)) + .isFalsePositive(entity.getEntityType().equals(EntityType.FALSE_POSITIVE) || entity.getEntityType() + .equals(EntityType.FALSE_RECOMMENDATION)) .section(entity.getHeadline()) .sectionNumber(entity.getSectionNumber()) .matchedRule(entity.getMatchedRule())