RED-4753: Reduced size of TEXT File

This commit is contained in:
deiflaender 2022-07-26 09:42:43 +02:00
parent 17baf9c0eb
commit 1cc66a3092
4 changed files with 181 additions and 132 deletions

View File

@ -22,22 +22,20 @@ import lombok.SneakyThrows;
public class RedTextPosition { public class RedTextPosition {
private String textMatrix; private String textMatrix;
private float[] position;
@JsonIgnore
private int rotation; private int rotation;
private float y;
@JsonIgnore
private float pageHeight; private float pageHeight;
@JsonIgnore
private float pageWidth; private float pageWidth;
private String unicode; private String unicode;
@JsonAlias("xdirAdj") @JsonIgnore
@JsonAttribute(alternativeNames = {"xdirAdj"})
private float XDirAdj;
@JsonAlias("ydirAdj")
@JsonAttribute(alternativeNames = {"ydirAdj"})
private float YDirAdj;
private float width;
private float heightDir;
private float widthDirAdj;
private float dir; private float dir;
// not used in reanalysis // not used in reanalysis
@ -66,18 +64,37 @@ public class RedTextPosition {
pos.setTextMatrix(textPosition.getTextMatrix().toString()); pos.setTextMatrix(textPosition.getTextMatrix().toString());
var position = new float[4];
position[0] = textPosition.getXDirAdj();
position[1] = textPosition.getYDirAdj();
position[2] = textPosition.getWidthDirAdj();
position[3] = textPosition.getHeightDir();
pos.setPosition(position);
return pos; return pos;
} }
@JsonAlias("xdirAdj") @JsonIgnore
@JsonAttribute(alternativeNames = {"xdirAdj"}) public float getXDirAdj(){
public void setXDirAdj(float XDirAdj) {this.XDirAdj = XDirAdj;} return position[0];
}
@JsonIgnore
public float getYDirAdj(){
return position[1];
}
@JsonAlias("ydirAdj") @JsonIgnore
@JsonAttribute(alternativeNames = {"ydirAdj"}) public float getWidthDirAdj(){
public void setYDirAdj(float YDirAdj) {this.YDirAdj = YDirAdj;} return position[2];
}
@JsonIgnore
public float getHeightDir(){
return position[3];
}
} }

View File

@ -31,8 +31,10 @@ public class TextPositionSequence implements CharSequence {
private int page; private int page;
private List<RedTextPosition> textPositions = new ArrayList<>(); private List<RedTextPosition> textPositions = new ArrayList<>();
private float x1; private float dir;
private float x2; private int rotation;
private float pageHeight;
private float pageWidth;
public TextPositionSequence(int page) { public TextPositionSequence(int page) {
@ -41,7 +43,7 @@ public class TextPositionSequence implements CharSequence {
} }
public static TextPositionSequence fromData(List<RedTextPosition> textPositions, int page) { public TextPositionSequence fromData(List<RedTextPosition> textPositions, int page) {
var textPositionSequence = new TextPositionSequence(); var textPositionSequence = new TextPositionSequence();
textPositionSequence.textPositions = textPositions; textPositionSequence.textPositions = textPositions;
@ -55,6 +57,10 @@ public class TextPositionSequence implements CharSequence {
this.textPositions = textPositions.stream().map(RedTextPosition::fromTextPosition).collect(Collectors.toList()); this.textPositions = textPositions.stream().map(RedTextPosition::fromTextPosition).collect(Collectors.toList());
this.page = page; this.page = page;
this.dir = textPositions.get(0).getDir();
this.rotation = textPositions.get(0).getRotation();
this.pageHeight = textPositions.get(0).getPageHeight();
this.pageWidth = textPositions.get(0).getPageWidth();
} }
@ -85,7 +91,15 @@ public class TextPositionSequence implements CharSequence {
@Override @Override
public TextPositionSequence subSequence(int start, int end) { public TextPositionSequence subSequence(int start, int end) {
return fromData(textPositions.subList(start, end), page); var textPositionSequence = new TextPositionSequence();
textPositionSequence.textPositions = textPositions.subList(start, end);
textPositionSequence.page = page;
textPositionSequence.dir = dir;
textPositionSequence.rotation = rotation;
textPositionSequence.pageHeight = pageHeight;
textPositionSequence.pageWidth = pageWidth;
return textPositionSequence;
} }
@ -106,15 +120,26 @@ public class TextPositionSequence implements CharSequence {
} }
public void add(RedTextPosition textPosition) { public void add(TextPositionSequence textPositionSequence, RedTextPosition textPosition) {
this.textPositions.add(textPosition); this.textPositions.add(textPosition);
this.page = textPositionSequence.getPage();
this.dir = textPositionSequence.getDir();
this.rotation = textPositionSequence.getRotation();
this.pageHeight = textPositionSequence.getPageHeight();
this.pageWidth = textPositionSequence.getPageWidth();
} }
public void add(TextPosition textPosition) { public void add(TextPosition textPosition) {
this.textPositions.add(RedTextPosition.fromTextPosition(textPosition)); this.textPositions.add(RedTextPosition.fromTextPosition(textPosition));
this.dir = textPositions.get(0).getDir();
this.rotation = textPositions.get(0).getRotation();
this.pageHeight = textPositions.get(0).getPageHeight();
this.pageWidth = textPositions.get(0).getPageWidth();
} }
@ -122,7 +147,7 @@ public class TextPositionSequence implements CharSequence {
@JsonAttribute(ignore = true) @JsonAttribute(ignore = true)
public float getX1() { public float getX1() {
if (textPositions.get(0).getRotation() == 90) { if (rotation == 90) {
return textPositions.get(0).getYDirAdj() - getTextHeight(); return textPositions.get(0).getYDirAdj() - getTextHeight();
} else { } else {
return textPositions.get(0).getXDirAdj(); return textPositions.get(0).getXDirAdj();
@ -134,10 +159,11 @@ public class TextPositionSequence implements CharSequence {
@JsonAttribute(ignore = true) @JsonAttribute(ignore = true)
public float getX2() { public float getX2() {
if (textPositions.get(0).getRotation() == 90) { if (rotation == 90) {
return textPositions.get(0).getYDirAdj(); return textPositions.get(0).getYDirAdj();
} else { } else {
return textPositions.get(textPositions.size() - 1).getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidth() + 1; return textPositions.get(textPositions.size() - 1)
.getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidthDirAdj() + 1;
} }
} }
@ -146,15 +172,7 @@ public class TextPositionSequence implements CharSequence {
@JsonAttribute(ignore = true) @JsonAttribute(ignore = true)
public float getRotationAdjustedY() { public float getRotationAdjustedY() {
return textPositions.get(0).getY(); return textPositions.get(0).getYDirAdj();
}
@JsonIgnore
@JsonAttribute(ignore = true)
public float getRotationAdjustedX() {
return textPositions.get(0).getXDirAdj();
} }
@ -162,10 +180,10 @@ public class TextPositionSequence implements CharSequence {
@JsonAttribute(ignore = true) @JsonAttribute(ignore = true)
public float getY1() { public float getY1() {
if (textPositions.get(0).getRotation() == 90) { if (rotation == 90) {
return textPositions.get(0).getXDirAdj(); return textPositions.get(0).getXDirAdj();
} else { } else {
return textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj(); return pageHeight - textPositions.get(0).getYDirAdj();
} }
} }
@ -174,10 +192,10 @@ public class TextPositionSequence implements CharSequence {
@JsonAttribute(ignore = true) @JsonAttribute(ignore = true)
public float getY2() { public float getY2() {
if (textPositions.get(0).getRotation() == 90) { if (rotation == 90) {
return textPositions.get(textPositions.size() - 1).getXDirAdj() + getTextHeight() - 2; return textPositions.get(textPositions.size() - 1).getXDirAdj() + getTextHeight() - 2;
} else { } else {
return textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj() + getTextHeight(); return pageHeight - textPositions.get(0).getYDirAdj() + getTextHeight();
} }
} }
@ -249,19 +267,11 @@ public class TextPositionSequence implements CharSequence {
} }
@JsonIgnore
@JsonAttribute(ignore = true)
public int getRotation() {
return textPositions.get(0).getRotation();
}
@JsonIgnore @JsonIgnore
@JsonAttribute(ignore = true) @JsonAttribute(ignore = true)
public Rectangle getRectangle() { public Rectangle getRectangle() {
log.debug("Page: '{}', Word: '{}', Rotation: '{}', textRotation {}", page, this, textPositions.get(0).getRotation(), textPositions.get(0).getDir()); log.debug("Page: '{}', Word: '{}', Rotation: '{}', textRotation {}", page, toString(), rotation, dir);
float height = getTextHeight(); float height = getTextHeight();
@ -270,113 +280,132 @@ public class TextPositionSequence implements CharSequence {
float posYInit; float posYInit;
float posYEnd; float posYEnd;
if (textPositions.get(0).getRotation() == 0 && textPositions.get(0).getDir() == 90f) { if (rotation == 0 && dir == 90f) {
posYInit = getX1(); posYInit = getX1();
posYEnd = getX2() + textPositions.get(0).getWidthDirAdj() - textPositions.get(textPositions.size() - 1).getWidthDirAdj() - 3; posYEnd = getX2() + textPositions.get(0).getWidthDirAdj() - textPositions.get(textPositions.size() - 1)
.getWidthDirAdj() - 3;
posXInit = textPositions.get(0).getYDirAdj() + 2; posXInit = textPositions.get(0).getYDirAdj() + 2;
posXEnd = textPositions.get(textPositions.size() - 1).getYDirAdj() - height; posXEnd = textPositions.get(textPositions.size() - 1).getYDirAdj() - height;
} else if (textPositions.get(0).getRotation() == 0 && textPositions.get(0).getDir() == 180f) { } else if (rotation == 0 && dir == 180f) {
posXInit = textPositions.get(0).getPageWidth() - getX1() + 1; posXInit = pageWidth - getX1() + 1;
posXEnd = textPositions.get(0).getPageWidth() - getX2() + textPositions.get(0).getWidthDirAdj() - textPositions.get(textPositions.size() - 1).getWidthDirAdj() - 3; posXEnd = pageWidth - getX2() + textPositions.get(0)
.getWidthDirAdj() - textPositions.get(textPositions.size() - 1).getWidthDirAdj() - 3;
posYInit = textPositions.get(0).getYDirAdj() - height + 2; posYInit = textPositions.get(0).getYDirAdj() - height + 2;
posYEnd = textPositions.get(textPositions.size() - 1).getYDirAdj() - height + 2; posYEnd = textPositions.get(textPositions.size() - 1).getYDirAdj() - height + 2;
} else if (textPositions.get(0).getRotation() == 0 && textPositions.get(0).getDir() == 270f) { } else if (rotation == 0 && dir == 270f) {
posYInit = textPositions.get(0).getPageHeight() - getX1(); posYInit = pageHeight - getX1();
posYEnd = textPositions.get(0).getPageHeight() - getX2() - textPositions.get(0).getWidthDirAdj() - textPositions.get(textPositions.size() - 1).getWidthDirAdj() - 3; posYEnd = pageHeight - getX2() - textPositions.get(0)
posXInit = textPositions.get(0).getPageWidth() - textPositions.get(0).getYDirAdj() - 2; .getWidthDirAdj() - textPositions.get(textPositions.size() - 1).getWidthDirAdj() - 3;
posXEnd = textPositions.get(0).getPageWidth() - textPositions.get(textPositions.size() - 1).getYDirAdj() + height; posXInit = pageWidth - textPositions.get(0).getYDirAdj() - 2;
posXEnd = pageWidth - textPositions.get(textPositions.size() - 1)
.getYDirAdj() + height;
} else if (textPositions.get(0).getRotation() == 90 && textPositions.get(0).getDir() == 0.0f) { } else if (rotation == 90 && dir == 0.0f) {
posXInit = textPositions.get(textPositions.size() - 1).getXDirAdj() + textPositions.get(textPositions.size() - 1).getHeightDir(); posXInit = textPositions.get(textPositions.size() - 1)
.getXDirAdj() + textPositions.get(textPositions.size() - 1).getHeightDir();
posXEnd = textPositions.get(0).getXDirAdj(); posXEnd = textPositions.get(0).getXDirAdj();
posYInit = textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj() - 2; posYInit = pageHeight - textPositions.get(0).getYDirAdj() - 2;
posYEnd = textPositions.get(0).getPageHeight() - textPositions.get(textPositions.size() - 1).getYDirAdj() + 2; posYEnd = pageHeight - textPositions.get(textPositions.size() - 1)
.getYDirAdj() + 2;
} else if (textPositions.get(0).getRotation() == 90 && textPositions.get(0).getDir() == 90.0f) { } else if (rotation == 90 && dir == 90.0f) {
posXEnd = textPositions.get(0).getYDirAdj() + 2; posXEnd = textPositions.get(0).getYDirAdj() + 2;
posYInit = getY1(); posYInit = getY1();
posYEnd = textPositions.get(textPositions.size() - 1).getXDirAdj() - height + 4; posYEnd = textPositions.get(textPositions.size() - 1).getXDirAdj() - height + 4;
} else if (textPositions.get(0).getRotation() == 90 && textPositions.get(0).getDir() == 180.0f) { } else if (rotation == 90 && dir == 180.0f) {
posXInit = textPositions.get(0).getPageWidth() - textPositions.get(textPositions.size() - 1).getXDirAdj() - 4; posXInit = pageWidth - textPositions.get(textPositions.size() - 1)
posXEnd = textPositions.get(0).getPageWidth() - textPositions.get(0).getXDirAdj(); .getXDirAdj() - 4;
posYInit = textPositions.get(0).getYDirAdj() - 2 - textPositions.get(textPositions.size() - 1).getHeightDir(); posXEnd = pageWidth - textPositions.get(0).getXDirAdj();
posYEnd = textPositions.get(textPositions.size() - 1).getYDirAdj() - textPositions.get(textPositions.size() - 1).getHeightDir(); posYInit = textPositions.get(0).getYDirAdj() - 2 - textPositions.get(textPositions.size() - 1)
.getHeightDir();
posYEnd = textPositions.get(textPositions.size() - 1)
.getYDirAdj() - textPositions.get(textPositions.size() - 1).getHeightDir();
} else if (textPositions.get(0).getRotation() == 90 && textPositions.get(0).getDir() == 270.0f) { } else if (rotation == 90 && dir == 270.0f) {
posXInit = textPositions.get(0).getPageWidth() - getX1(); posXInit = pageWidth - getX1();
posXEnd = textPositions.get(0).getPageWidth() - textPositions.get(0).getYDirAdj() - 2; posXEnd = pageWidth - textPositions.get(0).getYDirAdj() - 2;
posYInit = textPositions.get(0).getPageHeight() - getY1(); posYInit = pageHeight - getY1();
posYEnd = textPositions.get(0).getPageHeight() - textPositions.get(textPositions.size() - 1).getXDirAdj() - height - 4; posYEnd = pageHeight - textPositions.get(textPositions.size() - 1)
.getXDirAdj() - height - 4;
} else if (textPositions.get(0).getRotation() == 180 && textPositions.get(0).getDir() == 0f) { } else if (rotation == 180 && dir == 0f) {
posXEnd = textPositions.get(textPositions.size() - 1).getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidthDirAdj() + 1; posXEnd = textPositions.get(textPositions.size() - 1)
posYInit = textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj() - 2; .getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidthDirAdj() + 1;
posYEnd = textPositions.get(0).getPageHeight() - textPositions.get(textPositions.size() - 1).getYDirAdj() + 2; posYInit = pageHeight - textPositions.get(0).getYDirAdj() - 2;
posYEnd = pageHeight - textPositions.get(textPositions.size() - 1)
.getYDirAdj() + 2;
} else if (textPositions.get(0).getRotation() == 180 && textPositions.get(0).getDir() == 90f) { } else if (rotation == 180 && dir == 90f) {
posYInit = getX1(); posYInit = getX1();
posYEnd = getX2() - 3; posYEnd = getX2() - 3;
posXInit = textPositions.get(0).getYDirAdj() + 2; posXInit = textPositions.get(0).getYDirAdj() + 2;
posXEnd = textPositions.get(textPositions.size() - 1).getYDirAdj() - height; posXEnd = textPositions.get(textPositions.size() - 1).getYDirAdj() - height;
} else if (textPositions.get(0).getRotation() == 180 && textPositions.get(0).getDir() == 180f) { } else if (rotation == 180 && dir == 180f) {
posXInit = textPositions.get(0).getPageWidth() - getX1() + 1; posXInit = pageWidth - getX1() + 1;
posXEnd = textPositions.get(0).getPageWidth() - getX2() + textPositions.get(0).getWidthDirAdj() - textPositions.get(textPositions.size() - 1).getWidthDirAdj() - 3; posXEnd = pageWidth - getX2() + textPositions.get(0)
.getWidthDirAdj() - textPositions.get(textPositions.size() - 1).getWidthDirAdj() - 3;
posYInit = textPositions.get(0).getYDirAdj() - height + 2; posYInit = textPositions.get(0).getYDirAdj() - height + 2;
posYEnd = textPositions.get(textPositions.size() - 1).getYDirAdj() - height + 2; posYEnd = textPositions.get(textPositions.size() - 1).getYDirAdj() - height + 2;
} else if (textPositions.get(0).getRotation() == 180 && textPositions.get(0).getDir() == 270.0f) { } else if (rotation == 180 && dir == 270.0f) {
posYInit = textPositions.get(0).getPageHeight() - getX1(); posYInit = pageHeight - getX1();
posYEnd = textPositions.get(0).getPageHeight() - getX2() - textPositions.get(0).getWidthDirAdj() - textPositions.get(textPositions.size() - 1).getWidthDirAdj(); posYEnd = pageHeight - getX2() - textPositions.get(0)
posXInit = textPositions.get(0).getPageWidth() - textPositions.get(0).getYDirAdj() - 2; .getWidthDirAdj() - textPositions.get(textPositions.size() - 1).getWidthDirAdj();
posXEnd = textPositions.get(0).getPageWidth() - textPositions.get(textPositions.size() - 1).getYDirAdj() + height; posXInit = pageWidth - textPositions.get(0).getYDirAdj() - 2;
posXEnd = pageWidth - textPositions.get(textPositions.size() - 1)
.getYDirAdj() + height;
} else if (textPositions.get(0).getRotation() == 270 && textPositions.get(0).getDir() == 0.0f) { } else if (rotation == 270 && dir == 0.0f) {
posYInit = textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj() - 2; posYInit = pageHeight - textPositions.get(0).getYDirAdj() - 2;
posYEnd = posYInit + 1; posYEnd = posYInit + 1;
posXInit = textPositions.get(0).getXDirAdj(); posXInit = textPositions.get(0).getXDirAdj();
posXEnd = textPositions.get(textPositions.size() - 1).getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidthDirAdj() + 0.1f; posXEnd = textPositions.get(textPositions.size() - 1)
.getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidthDirAdj() + 0.1f;
} else if (textPositions.get(0).getRotation() == 270 && textPositions.get(0).getDir() == 90.0f) { } else if (rotation == 270 && dir == 90.0f) {
posYInit = getX1(); posYInit = getX1();
posYEnd = getX2() - height; posYEnd = getX2() - height;
posXInit = textPositions.get(0).getYDirAdj() + 2; posXInit = textPositions.get(0).getYDirAdj() + 2;
posXEnd = textPositions.get(textPositions.size() - 1).getYDirAdj() - height; posXEnd = textPositions.get(textPositions.size() - 1).getYDirAdj() - height;
} else if (textPositions.get(0).getRotation() == 270 && textPositions.get(0).getDir() == 180.0f) { } else if (rotation == 270 && dir == 180.0f) {
posXInit = textPositions.get(0).getPageWidth() - getX1() + 1; posXInit = pageWidth - getX1() + 1;
posXEnd = textPositions.get(0).getPageWidth() - getX2() - 4; posXEnd = pageWidth - getX2() - 4;
posYInit = textPositions.get(0).getYDirAdj() - height + 2; posYInit = textPositions.get(0).getYDirAdj() - height + 2;
posYEnd = textPositions.get(textPositions.size() - 1).getYDirAdj() - height + 2; posYEnd = textPositions.get(textPositions.size() - 1).getYDirAdj() - height + 2;
} else if (textPositions.get(0).getRotation() == 270 && textPositions.get(0).getDir() == 270.0f) { } else if (rotation == 270 && dir == 270.0f) {
posYInit = textPositions.get(0).getPageHeight() - getX1(); posYInit = pageHeight - getX1();
posYEnd = textPositions.get(0).getPageHeight() - getX2() - height; posYEnd = pageHeight - getX2() - height;
posXInit = textPositions.get(0).getPageWidth() - textPositions.get(0).getYDirAdj() - 2; posXInit = pageWidth - textPositions.get(0).getYDirAdj() - 2;
posXEnd = textPositions.get(0).getPageWidth() - textPositions.get(textPositions.size() - 1).getYDirAdj() + height; posXEnd = pageWidth - textPositions.get(textPositions.size() - 1)
.getYDirAdj() + height;
} else { } else {
// page rotation = 0 and text direction = 0 // page rotation = 0 and text direction = 0
posXEnd = textPositions.get(textPositions.size() - 1).getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidthDirAdj() + 1; posXEnd = textPositions.get(textPositions.size() - 1)
posYInit = textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj() - 2; .getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidthDirAdj() + 1;
posYEnd = textPositions.get(0).getPageHeight() - textPositions.get(textPositions.size() - 1).getYDirAdj() + 2; posYInit = pageHeight - textPositions.get(0).getYDirAdj() - 2;
posYEnd = pageHeight - textPositions.get(textPositions.size() - 1)
.getYDirAdj() + 2;
} }
var rectangle = new Rectangle(new Point(posXInit, posYInit), posXEnd - posXInit, posYEnd - posYInit + height, page); var rectangle = new Rectangle(new Point(posXInit, posYInit), posXEnd - posXInit, posYEnd - posYInit + height, page);

View File

@ -100,7 +100,7 @@ public class SearchableText {
.length() - 1, caseInsensitive)) || j == 0 && i != 0 && searchSpace.get(i - 1) .length() - 1, caseInsensitive)) || j == 0 && i != 0 && searchSpace.get(i - 1)
.charAt(searchSpace.get(i - 1).length() - 1, caseInsensitive) != ' ' && searchSpace.get(i) .charAt(searchSpace.get(i - 1).length() - 1, caseInsensitive) != ' ' && searchSpace.get(i)
.charAt(j, caseInsensitive) != ' ') { .charAt(j, caseInsensitive) != ' ') {
partMatch.add(searchSpace.get(i).textPositionAt(j)); partMatch.add(searchSpace.get(i),searchSpace.get(i).textPositionAt(j));
if (!(j == searchSpace.get(i).length() - 1 && searchSpace.get(i) if (!(j == searchSpace.get(i).length() - 1 && searchSpace.get(i)
.charAt(j, caseInsensitive) == '-' && searchChars[counter] != '-')) { .charAt(j, caseInsensitive) == '-' && searchChars[counter] != '-')) {
counter++; counter++;

View File

@ -114,10 +114,7 @@ public class RedactionLogCreatorService {
redactionLogEntry.setId(entityPositionSequence.getId()); redactionLogEntry.setId(entityPositionSequence.getId());
if (CollectionUtils.isNotEmpty(entityPositionSequence.getSequences())) { if (CollectionUtils.isNotEmpty(entityPositionSequence.getSequences())) {
List<Rectangle> rectanglesPerLine = getRectanglesPerLine(entityPositionSequence.getSequences() List<Rectangle> rectanglesPerLine = getRectanglesPerLine(entityPositionSequence.getSequences());
.stream()
.flatMap(seq -> seq.getTextPositions().stream())
.collect(Collectors.toList()), page);
redactionLogEntry.getPositions().addAll(rectanglesPerLine); redactionLogEntry.getPositions().addAll(rectanglesPerLine);
@ -134,38 +131,43 @@ public class RedactionLogCreatorService {
} }
private List<Rectangle> getRectanglesPerLine(List<RedTextPosition> textPositions, int page) { private List<Rectangle> getRectanglesPerLine(List<TextPositionSequence> textPositionSequences) {
List<Rectangle> rectangles = new ArrayList<>(); List<Rectangle> rectangles = new ArrayList<>();
if (textPositions.size() == 1) {
rectangles.add(TextPositionSequence.fromData(textPositions, page).getRectangle());
} else {
float x = textPositions.get(0).getXDirAdj();
float y = textPositions.get(0).getYDirAdj();
float width = textPositions.get(0).getWidthDirAdj();
float height = textPositions.get(0).getHeightDir();
int startIndex = 0;
for (int i = 1; i < textPositions.size(); i++) { if (textPositionSequences.size() == 1) {
float xDirAdj = textPositions.get(i).getXDirAdj(); rectangles.add(textPositionSequences.get(0).getRectangle());
float yDirAdj = textPositions.get(i).getYDirAdj(); return rectangles;
float widthDir = textPositions.get(i).getWidthDirAdj(); }
float heightDir = textPositions.get(i).getHeightDir();
if (!(isCharInSameLine(y, yDirAdj, height, heightDir) && isCharClose(x, xDirAdj, width))) { TextPositionSequence combinedSequence = new TextPositionSequence();
rectangles.add(TextPositionSequence.fromData(textPositions.subList(startIndex, i), page) for (int i = 0; i < textPositionSequences.size(); i++) {
.getRectangle());
y = yDirAdj; if(combinedSequence.getTextPositions().isEmpty()){
width = widthDir; combinedSequence = textPositionSequences.get(i);
height = heightDir; continue;
startIndex = i;
}
x = xDirAdj;
} }
if (startIndex != textPositions.size()) {
rectangles.add(TextPositionSequence.fromData(textPositions.subList(startIndex, textPositions.size()), page) float lastSeqX = textPositionSequences.get(i - 1).getTextPositions().get(textPositionSequences.get(i - 1).getTextPositions().size() - 1).getXDirAdj();
.getRectangle()); float lastSeqY = textPositionSequences.get(i - 1).getTextPositions().get(textPositionSequences.get(i - 1).getTextPositions().size() - 1).getYDirAdj();
float lastSeqWidth = textPositionSequences.get(i - 1).getTextPositions().get(textPositionSequences.get(i - 1).getTextPositions().size() - 1).getWidthDirAdj();
float lastSeqHeight = textPositionSequences.get(i - 1).getTextPositions().get(textPositionSequences.get(i - 1).getTextPositions().size() - 1).getHeightDir();
float currentSeqX = textPositionSequences.get(i).getTextPositions().get(0).getXDirAdj();
float currentSeqY = textPositionSequences.get(i).getTextPositions().get(0).getYDirAdj();
float currentSeqHeight = textPositionSequences.get(i).getTextPositions().get(0).getHeightDir();
if (isCharInSameLine(lastSeqY, currentSeqY, lastSeqHeight, currentSeqHeight) && isCharClose(lastSeqX, currentSeqX, lastSeqWidth)) {
combinedSequence.getTextPositions().addAll(textPositionSequences.get(i).getTextPositions());
} else {
rectangles.add(combinedSequence.getRectangle());
combinedSequence = textPositionSequences.get(i);
} }
}
if(!combinedSequence.getTextPositions().isEmpty()) {
rectangles.add(combinedSequence.getRectangle());
} }
return rectangles; return rectangles;
@ -207,7 +209,8 @@ public class RedactionLogCreatorService {
.redacted(entity.isRedaction()) .redacted(entity.isRedaction())
.isHint(isHint(entity.getType(), dossierTemplateId)) .isHint(isHint(entity.getType(), dossierTemplateId))
.isRecommendation(entity.getEntityType().equals(EntityType.RECOMMENDATION)) .isRecommendation(entity.getEntityType().equals(EntityType.RECOMMENDATION))
.isFalsePositive(entity.getEntityType().equals(EntityType.FALSE_POSITIVE) || entity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION)) .isFalsePositive(entity.getEntityType().equals(EntityType.FALSE_POSITIVE) || entity.getEntityType()
.equals(EntityType.FALSE_RECOMMENDATION))
.section(entity.getHeadline()) .section(entity.getHeadline())
.sectionNumber(entity.getSectionNumber()) .sectionNumber(entity.getSectionNumber())
.matchedRule(entity.getMatchedRule()) .matchedRule(entity.getMatchedRule())