RED-5381: Fixed pr findings

This commit is contained in:
deiflaender 2022-10-21 12:00:10 +02:00
parent aa43453206
commit 17bdcf8d24
3 changed files with 97 additions and 119 deletions

View File

@ -52,10 +52,10 @@ public class BlockificationService {
boolean splitByX = prev != null && maxX + 50 < word.getMinXDirAdj() && prev.getMinYDirAdj() == word.getMinYDirAdj();
boolean xIsBeforeFirstX = prev != null && word.getMinXDirAdj() < minX;
boolean newLineAfterSplit = prev != null && word.getMinYDirAdj() != prev.getMinYDirAdj() && wasSplitted && splitX1 != word.getMinXDirAdj();
boolean isSpitByRuling = isSpitByRuling(minX, minY, maxX, maxY, word, horizontalRulingLines, verticalRulingLines);
boolean isSplitByRuling = isSplitByRuling(minX, minY, maxX, maxY, word, horizontalRulingLines, verticalRulingLines);
boolean splitByDir = prev != null && !prev.getDir().equals(word.getDir());
if (prev != null && (lineSeparation || startFromTop || splitByX || splitByDir || isSpitByRuling)) {
if (prev != null && (lineSeparation || startFromTop || splitByX || splitByDir || isSplitByRuling)) {
Orientation prevOrientation = null;
if (!chunkBlockList1.isEmpty()) {
@ -66,15 +66,15 @@ public class BlockificationService {
chunkBlockList1.add(cb1);
chunkWords = new ArrayList<>();
if (splitByX && !isSpitByRuling) {
if (splitByX && !isSplitByRuling) {
wasSplitted = true;
cb1.setOrientation(Orientation.LEFT);
splitX1 = word.getMinXDirAdj();
} else if (newLineAfterSplit && !isSpitByRuling) {
} else if (newLineAfterSplit && !isSplitByRuling) {
wasSplitted = false;
cb1.setOrientation(Orientation.RIGHT);
splitX1 = null;
} else if (prevOrientation != null && prevOrientation.equals(Orientation.RIGHT) && (lineSeparation || !startFromTop || !splitByX || !newLineAfterSplit || !isSpitByRuling)) {
} else if (prevOrientation != null && prevOrientation.equals(Orientation.RIGHT) && (lineSeparation || !startFromTop || !splitByX || !newLineAfterSplit || !isSplitByRuling)) {
cb1.setOrientation(Orientation.LEFT);
}
@ -205,43 +205,18 @@ public class BlockificationService {
}
private boolean isSpitByRuling(float minX,
float minY,
float maxX,
float maxY,
TextPositionSequence word,
List<Ruling> horizontalRulingLines,
List<Ruling> verticalRulingLines) {
private boolean isSplitByRuling(float minX,
float minY,
float maxX,
float maxY,
TextPositionSequence word,
List<Ruling> horizontalRulingLines,
List<Ruling> verticalRulingLines) {
return isSplitByRuling(maxX,
minY,
word.getMinXDirAdj(),
word.getMinYDirAdj(),
verticalRulingLines,
word.getDir().getDegrees(),
word.getPageWidth(),
word.getPageHeight()) || isSplitByRuling(minX,
minY,
word.getMinXDirAdj(),
word.getMaxYDirAdj(),
horizontalRulingLines,
word.getDir().getDegrees(),
word.getPageWidth(),
word.getPageHeight()) || isSplitByRuling(maxX,
minY,
word.getMinXDirAdj(),
word.getMinYDirAdj(),
horizontalRulingLines,
word.getDir().getDegrees(),
word.getPageWidth(),
word.getPageHeight()) || isSplitByRuling(minX,
minY,
word.getMinXDirAdj(),
word.getMaxYDirAdj(),
verticalRulingLines,
word.getDir().getDegrees(),
word.getPageWidth(),
word.getPageHeight());
return isSplitByRuling(maxX, minY, word.getMinXDirAdj(), word.getMinYDirAdj(), verticalRulingLines, word.getDir().getDegrees(), word.getPageWidth(), word.getPageHeight()) //
|| isSplitByRuling(minX, minY, word.getMinXDirAdj(), word.getMaxYDirAdj(), horizontalRulingLines, word.getDir().getDegrees(), word.getPageWidth(), word.getPageHeight()) //
|| isSplitByRuling(maxX, minY, word.getMinXDirAdj(), word.getMinYDirAdj(), horizontalRulingLines, word.getDir().getDegrees(), word.getPageWidth(), word.getPageHeight()) //
|| isSplitByRuling(minX, minY, word.getMinXDirAdj(), word.getMaxYDirAdj(), verticalRulingLines, word.getDir().getDegrees(), word.getPageWidth(), word.getPageHeight()); //
}

View File

@ -67,10 +67,7 @@ public class BodyTextFrameService {
*/
public Rectangle calculateBodyTextFrame(List<Page> pages, FloatFrequencyCounter documentFontSizeCounter, boolean landscape) {
float minX = 10000;
float maxX = -100;
float minY = 10000;
float maxY = -100;
BodyTextFrameExpansionsRectangle expansionsRectangle = new BodyTextFrameExpansionsRectangle();
for (Page page : pages) {
@ -93,33 +90,7 @@ public class BodyTextFrameService {
if (documentFontSizeCounter.getMostPopular() != null && textBlock.getMostPopularWordFontSize() >= documentFontSizeCounter.getMostPopular()) {
if (page.getPageWidth() > page.getPageHeight() && page.getRotation() != 0) {
if (textBlock.getPdfMinY() < minX) {
minX = textBlock.getPdfMinY();
}
if (textBlock.getPdfMaxY() > maxX) {
maxX = textBlock.getPdfMaxY();
}
if (textBlock.getPdfMinX() < minY) {
minY = textBlock.getPdfMinX();
}
if (textBlock.getPdfMaxX() > maxY) {
maxY = textBlock.getPdfMaxX();
}
} else {
if (textBlock.getPdfMinX() < minX) {
minX = textBlock.getPdfMinX();
}
if (textBlock.getPdfMaxX() > maxX) {
maxX = textBlock.getPdfMaxX();
}
if (textBlock.getPdfMinY() < minY) {
minY = textBlock.getPdfMinY();
}
if (textBlock.getPdfMaxY() > maxY) {
maxY = textBlock.getPdfMaxY();
}
}
expandRectangle(textBlock, page, expansionsRectangle);
}
}
@ -132,40 +103,59 @@ public class BodyTextFrameService {
continue;
}
for (TextBlock textBlock : cell.getTextBlocks()) {
if (page.getPageWidth() > page.getPageHeight() && page.getRotation() != 0) {
if (textBlock.getPdfMinY() < minX) {
minX = textBlock.getMinY();
}
if (textBlock.getPdfMaxY() > maxX) {
maxX = textBlock.getPdfMaxY();
}
if (textBlock.getPdfMinX() < minY) {
minY = textBlock.getPdfMinX();
}
if (textBlock.getPdfMaxX() > maxY) {
maxY = textBlock.getPdfMaxX();
}
} else {
if (textBlock.getPdfMinX() < minX) {
minX = textBlock.getPdfMinX();
}
if (textBlock.getPdfMaxX() > maxX) {
maxX = textBlock.getPdfMaxX();
}
if (textBlock.getPdfMinY() < minY) {
minY = textBlock.getPdfMinY();
}
if (textBlock.getPdfMaxY() > maxY) {
maxY = textBlock.getPdfMaxY();
}
}
expandRectangle(textBlock, page, expansionsRectangle);
}
}
}
}
}
}
return new Rectangle(new Point(minX, minY), maxX - minX, maxY - minY, 0);
return new Rectangle(new Point(expansionsRectangle.minX, expansionsRectangle.minY),
expansionsRectangle.maxX - expansionsRectangle.minX,
expansionsRectangle.maxY - expansionsRectangle.minY,
0);
}
private void expandRectangle(TextBlock textBlock, Page page, BodyTextFrameExpansionsRectangle expansionsRectangle) {
if (page.getPageWidth() > page.getPageHeight() && page.getRotation() != 0) {
if (textBlock.getPdfMinY() < expansionsRectangle.minX) {
expansionsRectangle.minX = textBlock.getPdfMinY();
}
if (textBlock.getPdfMaxY() > expansionsRectangle.maxX) {
expansionsRectangle.maxX = textBlock.getPdfMaxY();
}
if (textBlock.getPdfMinX() < expansionsRectangle.minY) {
expansionsRectangle.minY = textBlock.getPdfMinX();
}
if (textBlock.getPdfMaxX() > expansionsRectangle.maxY) {
expansionsRectangle.maxY = textBlock.getPdfMaxX();
}
} else {
if (textBlock.getPdfMinX() < expansionsRectangle.minX) {
expansionsRectangle.minX = textBlock.getPdfMinX();
}
if (textBlock.getPdfMaxX() > expansionsRectangle.maxX) {
expansionsRectangle.maxX = textBlock.getPdfMaxX();
}
if (textBlock.getPdfMinY() < expansionsRectangle.minY) {
expansionsRectangle.minY = textBlock.getPdfMinY();
}
if (textBlock.getPdfMaxY() > expansionsRectangle.maxY) {
expansionsRectangle.maxY = textBlock.getPdfMaxY();
}
}
}
private class BodyTextFrameExpansionsRectangle {
float minX = 10000;
float maxX = -100;
float minY = 10000;
float maxY = -100;
}
}

View File

@ -19,6 +19,7 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@ -26,6 +27,9 @@ import lombok.extern.slf4j.Slf4j;
@RequiredArgsConstructor
public class PdfVisualisationService {
private static final boolean DRAW_POSITIONS = false;
public void visualizeParagraphs(Document classifiedDoc, PDDocument document) throws IOException {
for (int page = 1; page <= document.getNumberOfPages(); page++) {
@ -109,29 +113,38 @@ public class PdfVisualisationService {
contentStream.endText();
contentStream.setNonStrokingColor(Color.BLUE);
contentStream.setFont(PDType1Font.TIMES_ROMAN, 2f);
// contentStream.beginText();
// contentStream.newLineAtOffset(textBlock.getPdfMinX(), textBlock.getPdfMinY());
// contentStream.showText("MinX,MinY(" + textBlock.getPdfMinX() + "," + textBlock.getPdfMinY() + ")");
// contentStream.endText();
// contentStream.beginText();
// contentStream.newLineAtOffset(textBlock.getPdfMaxX(), textBlock.getPdfMinY());
// contentStream.showText("MaxX,MinY(" + textBlock.getPdfMaxX() + "," + textBlock.getPdfMinY() + ")");
// contentStream.endText();
// contentStream.beginText();
// contentStream.newLineAtOffset(textBlock.getPdfMinX(), textBlock.getPdfMaxY());
// contentStream.showText("MinX,MaxY(" + textBlock.getPdfMinX() + "," + textBlock.getPdfMaxY() + ")");
// contentStream.endText();
// contentStream.beginText();
// contentStream.newLineAtOffset(textBlock.getPdfMaxX(), textBlock.getPdfMaxY());
// contentStream.showText("MaxX,MaxY(" + textBlock.getPdfMaxX() + "," + textBlock.getPdfMaxY() + ")");
// contentStream.endText();
if (DRAW_POSITIONS) {
drawPositions(contentStream, textBlock);
}
}
}
@SneakyThrows
private void drawPositions(PDPageContentStream contentStream, TextBlock textBlock) {
contentStream.setNonStrokingColor(Color.BLUE);
contentStream.setFont(PDType1Font.TIMES_ROMAN, 2f);
contentStream.beginText();
contentStream.newLineAtOffset(textBlock.getPdfMinX(), textBlock.getPdfMinY());
contentStream.showText("MinX,MinY(" + textBlock.getPdfMinX() + "," + textBlock.getPdfMinY() + ")");
contentStream.endText();
contentStream.beginText();
contentStream.newLineAtOffset(textBlock.getPdfMaxX(), textBlock.getPdfMinY());
contentStream.showText("MaxX,MinY(" + textBlock.getPdfMaxX() + "," + textBlock.getPdfMinY() + ")");
contentStream.endText();
contentStream.beginText();
contentStream.newLineAtOffset(textBlock.getPdfMinX(), textBlock.getPdfMaxY());
contentStream.showText("MinX,MaxY(" + textBlock.getPdfMinX() + "," + textBlock.getPdfMaxY() + ")");
contentStream.endText();
contentStream.beginText();
contentStream.newLineAtOffset(textBlock.getPdfMaxX(), textBlock.getPdfMaxY());
contentStream.showText("MaxX,MaxY(" + textBlock.getPdfMaxX() + "," + textBlock.getPdfMaxY() + ")");
contentStream.endText();
}
private void visualizeTable(Table table, PDPageContentStream contentStream) throws IOException {
for (List<Cell> row : table.getRows()) {