RED-5381: Fixed pr findings
This commit is contained in:
parent
aa43453206
commit
17bdcf8d24
@ -52,10 +52,10 @@ public class BlockificationService {
|
||||
boolean splitByX = prev != null && maxX + 50 < word.getMinXDirAdj() && prev.getMinYDirAdj() == word.getMinYDirAdj();
|
||||
boolean xIsBeforeFirstX = prev != null && word.getMinXDirAdj() < minX;
|
||||
boolean newLineAfterSplit = prev != null && word.getMinYDirAdj() != prev.getMinYDirAdj() && wasSplitted && splitX1 != word.getMinXDirAdj();
|
||||
boolean isSpitByRuling = isSpitByRuling(minX, minY, maxX, maxY, word, horizontalRulingLines, verticalRulingLines);
|
||||
boolean isSplitByRuling = isSplitByRuling(minX, minY, maxX, maxY, word, horizontalRulingLines, verticalRulingLines);
|
||||
boolean splitByDir = prev != null && !prev.getDir().equals(word.getDir());
|
||||
|
||||
if (prev != null && (lineSeparation || startFromTop || splitByX || splitByDir || isSpitByRuling)) {
|
||||
if (prev != null && (lineSeparation || startFromTop || splitByX || splitByDir || isSplitByRuling)) {
|
||||
|
||||
Orientation prevOrientation = null;
|
||||
if (!chunkBlockList1.isEmpty()) {
|
||||
@ -66,15 +66,15 @@ public class BlockificationService {
|
||||
chunkBlockList1.add(cb1);
|
||||
chunkWords = new ArrayList<>();
|
||||
|
||||
if (splitByX && !isSpitByRuling) {
|
||||
if (splitByX && !isSplitByRuling) {
|
||||
wasSplitted = true;
|
||||
cb1.setOrientation(Orientation.LEFT);
|
||||
splitX1 = word.getMinXDirAdj();
|
||||
} else if (newLineAfterSplit && !isSpitByRuling) {
|
||||
} else if (newLineAfterSplit && !isSplitByRuling) {
|
||||
wasSplitted = false;
|
||||
cb1.setOrientation(Orientation.RIGHT);
|
||||
splitX1 = null;
|
||||
} else if (prevOrientation != null && prevOrientation.equals(Orientation.RIGHT) && (lineSeparation || !startFromTop || !splitByX || !newLineAfterSplit || !isSpitByRuling)) {
|
||||
} else if (prevOrientation != null && prevOrientation.equals(Orientation.RIGHT) && (lineSeparation || !startFromTop || !splitByX || !newLineAfterSplit || !isSplitByRuling)) {
|
||||
cb1.setOrientation(Orientation.LEFT);
|
||||
}
|
||||
|
||||
@ -205,43 +205,18 @@ public class BlockificationService {
|
||||
}
|
||||
|
||||
|
||||
private boolean isSpitByRuling(float minX,
|
||||
float minY,
|
||||
float maxX,
|
||||
float maxY,
|
||||
TextPositionSequence word,
|
||||
List<Ruling> horizontalRulingLines,
|
||||
List<Ruling> verticalRulingLines) {
|
||||
private boolean isSplitByRuling(float minX,
|
||||
float minY,
|
||||
float maxX,
|
||||
float maxY,
|
||||
TextPositionSequence word,
|
||||
List<Ruling> horizontalRulingLines,
|
||||
List<Ruling> verticalRulingLines) {
|
||||
|
||||
return isSplitByRuling(maxX,
|
||||
minY,
|
||||
word.getMinXDirAdj(),
|
||||
word.getMinYDirAdj(),
|
||||
verticalRulingLines,
|
||||
word.getDir().getDegrees(),
|
||||
word.getPageWidth(),
|
||||
word.getPageHeight()) || isSplitByRuling(minX,
|
||||
minY,
|
||||
word.getMinXDirAdj(),
|
||||
word.getMaxYDirAdj(),
|
||||
horizontalRulingLines,
|
||||
word.getDir().getDegrees(),
|
||||
word.getPageWidth(),
|
||||
word.getPageHeight()) || isSplitByRuling(maxX,
|
||||
minY,
|
||||
word.getMinXDirAdj(),
|
||||
word.getMinYDirAdj(),
|
||||
horizontalRulingLines,
|
||||
word.getDir().getDegrees(),
|
||||
word.getPageWidth(),
|
||||
word.getPageHeight()) || isSplitByRuling(minX,
|
||||
minY,
|
||||
word.getMinXDirAdj(),
|
||||
word.getMaxYDirAdj(),
|
||||
verticalRulingLines,
|
||||
word.getDir().getDegrees(),
|
||||
word.getPageWidth(),
|
||||
word.getPageHeight());
|
||||
return isSplitByRuling(maxX, minY, word.getMinXDirAdj(), word.getMinYDirAdj(), verticalRulingLines, word.getDir().getDegrees(), word.getPageWidth(), word.getPageHeight()) //
|
||||
|| isSplitByRuling(minX, minY, word.getMinXDirAdj(), word.getMaxYDirAdj(), horizontalRulingLines, word.getDir().getDegrees(), word.getPageWidth(), word.getPageHeight()) //
|
||||
|| isSplitByRuling(maxX, minY, word.getMinXDirAdj(), word.getMinYDirAdj(), horizontalRulingLines, word.getDir().getDegrees(), word.getPageWidth(), word.getPageHeight()) //
|
||||
|| isSplitByRuling(minX, minY, word.getMinXDirAdj(), word.getMaxYDirAdj(), verticalRulingLines, word.getDir().getDegrees(), word.getPageWidth(), word.getPageHeight()); //
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -67,10 +67,7 @@ public class BodyTextFrameService {
|
||||
*/
|
||||
public Rectangle calculateBodyTextFrame(List<Page> pages, FloatFrequencyCounter documentFontSizeCounter, boolean landscape) {
|
||||
|
||||
float minX = 10000;
|
||||
float maxX = -100;
|
||||
float minY = 10000;
|
||||
float maxY = -100;
|
||||
BodyTextFrameExpansionsRectangle expansionsRectangle = new BodyTextFrameExpansionsRectangle();
|
||||
|
||||
for (Page page : pages) {
|
||||
|
||||
@ -93,33 +90,7 @@ public class BodyTextFrameService {
|
||||
|
||||
if (documentFontSizeCounter.getMostPopular() != null && textBlock.getMostPopularWordFontSize() >= documentFontSizeCounter.getMostPopular()) {
|
||||
|
||||
if (page.getPageWidth() > page.getPageHeight() && page.getRotation() != 0) {
|
||||
if (textBlock.getPdfMinY() < minX) {
|
||||
minX = textBlock.getPdfMinY();
|
||||
}
|
||||
if (textBlock.getPdfMaxY() > maxX) {
|
||||
maxX = textBlock.getPdfMaxY();
|
||||
}
|
||||
if (textBlock.getPdfMinX() < minY) {
|
||||
minY = textBlock.getPdfMinX();
|
||||
}
|
||||
if (textBlock.getPdfMaxX() > maxY) {
|
||||
maxY = textBlock.getPdfMaxX();
|
||||
}
|
||||
} else {
|
||||
if (textBlock.getPdfMinX() < minX) {
|
||||
minX = textBlock.getPdfMinX();
|
||||
}
|
||||
if (textBlock.getPdfMaxX() > maxX) {
|
||||
maxX = textBlock.getPdfMaxX();
|
||||
}
|
||||
if (textBlock.getPdfMinY() < minY) {
|
||||
minY = textBlock.getPdfMinY();
|
||||
}
|
||||
if (textBlock.getPdfMaxY() > maxY) {
|
||||
maxY = textBlock.getPdfMaxY();
|
||||
}
|
||||
}
|
||||
expandRectangle(textBlock, page, expansionsRectangle);
|
||||
}
|
||||
}
|
||||
|
||||
@ -132,40 +103,59 @@ public class BodyTextFrameService {
|
||||
continue;
|
||||
}
|
||||
for (TextBlock textBlock : cell.getTextBlocks()) {
|
||||
if (page.getPageWidth() > page.getPageHeight() && page.getRotation() != 0) {
|
||||
if (textBlock.getPdfMinY() < minX) {
|
||||
minX = textBlock.getMinY();
|
||||
}
|
||||
if (textBlock.getPdfMaxY() > maxX) {
|
||||
maxX = textBlock.getPdfMaxY();
|
||||
}
|
||||
if (textBlock.getPdfMinX() < minY) {
|
||||
minY = textBlock.getPdfMinX();
|
||||
}
|
||||
if (textBlock.getPdfMaxX() > maxY) {
|
||||
maxY = textBlock.getPdfMaxX();
|
||||
}
|
||||
} else {
|
||||
if (textBlock.getPdfMinX() < minX) {
|
||||
minX = textBlock.getPdfMinX();
|
||||
}
|
||||
if (textBlock.getPdfMaxX() > maxX) {
|
||||
maxX = textBlock.getPdfMaxX();
|
||||
}
|
||||
if (textBlock.getPdfMinY() < minY) {
|
||||
minY = textBlock.getPdfMinY();
|
||||
}
|
||||
if (textBlock.getPdfMaxY() > maxY) {
|
||||
maxY = textBlock.getPdfMaxY();
|
||||
}
|
||||
}
|
||||
expandRectangle(textBlock, page, expansionsRectangle);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return new Rectangle(new Point(minX, minY), maxX - minX, maxY - minY, 0);
|
||||
return new Rectangle(new Point(expansionsRectangle.minX, expansionsRectangle.minY),
|
||||
expansionsRectangle.maxX - expansionsRectangle.minX,
|
||||
expansionsRectangle.maxY - expansionsRectangle.minY,
|
||||
0);
|
||||
}
|
||||
|
||||
|
||||
private void expandRectangle(TextBlock textBlock, Page page, BodyTextFrameExpansionsRectangle expansionsRectangle) {
|
||||
|
||||
if (page.getPageWidth() > page.getPageHeight() && page.getRotation() != 0) {
|
||||
if (textBlock.getPdfMinY() < expansionsRectangle.minX) {
|
||||
expansionsRectangle.minX = textBlock.getPdfMinY();
|
||||
}
|
||||
if (textBlock.getPdfMaxY() > expansionsRectangle.maxX) {
|
||||
expansionsRectangle.maxX = textBlock.getPdfMaxY();
|
||||
}
|
||||
if (textBlock.getPdfMinX() < expansionsRectangle.minY) {
|
||||
expansionsRectangle.minY = textBlock.getPdfMinX();
|
||||
}
|
||||
if (textBlock.getPdfMaxX() > expansionsRectangle.maxY) {
|
||||
expansionsRectangle.maxY = textBlock.getPdfMaxX();
|
||||
}
|
||||
} else {
|
||||
if (textBlock.getPdfMinX() < expansionsRectangle.minX) {
|
||||
expansionsRectangle.minX = textBlock.getPdfMinX();
|
||||
}
|
||||
if (textBlock.getPdfMaxX() > expansionsRectangle.maxX) {
|
||||
expansionsRectangle.maxX = textBlock.getPdfMaxX();
|
||||
}
|
||||
if (textBlock.getPdfMinY() < expansionsRectangle.minY) {
|
||||
expansionsRectangle.minY = textBlock.getPdfMinY();
|
||||
}
|
||||
if (textBlock.getPdfMaxY() > expansionsRectangle.maxY) {
|
||||
expansionsRectangle.maxY = textBlock.getPdfMaxY();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private class BodyTextFrameExpansionsRectangle {
|
||||
|
||||
float minX = 10000;
|
||||
float maxX = -100;
|
||||
float minY = 10000;
|
||||
float maxY = -100;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -19,6 +19,7 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@ -26,6 +27,9 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@RequiredArgsConstructor
|
||||
public class PdfVisualisationService {
|
||||
|
||||
private static final boolean DRAW_POSITIONS = false;
|
||||
|
||||
|
||||
public void visualizeParagraphs(Document classifiedDoc, PDDocument document) throws IOException {
|
||||
|
||||
for (int page = 1; page <= document.getNumberOfPages(); page++) {
|
||||
@ -109,29 +113,38 @@ public class PdfVisualisationService {
|
||||
|
||||
contentStream.endText();
|
||||
|
||||
contentStream.setNonStrokingColor(Color.BLUE);
|
||||
contentStream.setFont(PDType1Font.TIMES_ROMAN, 2f);
|
||||
|
||||
// contentStream.beginText();
|
||||
// contentStream.newLineAtOffset(textBlock.getPdfMinX(), textBlock.getPdfMinY());
|
||||
// contentStream.showText("MinX,MinY(" + textBlock.getPdfMinX() + "," + textBlock.getPdfMinY() + ")");
|
||||
// contentStream.endText();
|
||||
// contentStream.beginText();
|
||||
// contentStream.newLineAtOffset(textBlock.getPdfMaxX(), textBlock.getPdfMinY());
|
||||
// contentStream.showText("MaxX,MinY(" + textBlock.getPdfMaxX() + "," + textBlock.getPdfMinY() + ")");
|
||||
// contentStream.endText();
|
||||
// contentStream.beginText();
|
||||
// contentStream.newLineAtOffset(textBlock.getPdfMinX(), textBlock.getPdfMaxY());
|
||||
// contentStream.showText("MinX,MaxY(" + textBlock.getPdfMinX() + "," + textBlock.getPdfMaxY() + ")");
|
||||
// contentStream.endText();
|
||||
// contentStream.beginText();
|
||||
// contentStream.newLineAtOffset(textBlock.getPdfMaxX(), textBlock.getPdfMaxY());
|
||||
// contentStream.showText("MaxX,MaxY(" + textBlock.getPdfMaxX() + "," + textBlock.getPdfMaxY() + ")");
|
||||
// contentStream.endText();
|
||||
if (DRAW_POSITIONS) {
|
||||
drawPositions(contentStream, textBlock);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private void drawPositions(PDPageContentStream contentStream, TextBlock textBlock) {
|
||||
|
||||
contentStream.setNonStrokingColor(Color.BLUE);
|
||||
contentStream.setFont(PDType1Font.TIMES_ROMAN, 2f);
|
||||
|
||||
contentStream.beginText();
|
||||
contentStream.newLineAtOffset(textBlock.getPdfMinX(), textBlock.getPdfMinY());
|
||||
contentStream.showText("MinX,MinY(" + textBlock.getPdfMinX() + "," + textBlock.getPdfMinY() + ")");
|
||||
contentStream.endText();
|
||||
contentStream.beginText();
|
||||
contentStream.newLineAtOffset(textBlock.getPdfMaxX(), textBlock.getPdfMinY());
|
||||
contentStream.showText("MaxX,MinY(" + textBlock.getPdfMaxX() + "," + textBlock.getPdfMinY() + ")");
|
||||
contentStream.endText();
|
||||
contentStream.beginText();
|
||||
contentStream.newLineAtOffset(textBlock.getPdfMinX(), textBlock.getPdfMaxY());
|
||||
contentStream.showText("MinX,MaxY(" + textBlock.getPdfMinX() + "," + textBlock.getPdfMaxY() + ")");
|
||||
contentStream.endText();
|
||||
contentStream.beginText();
|
||||
contentStream.newLineAtOffset(textBlock.getPdfMaxX(), textBlock.getPdfMaxY());
|
||||
contentStream.showText("MaxX,MaxY(" + textBlock.getPdfMaxX() + "," + textBlock.getPdfMaxY() + ")");
|
||||
contentStream.endText();
|
||||
}
|
||||
|
||||
|
||||
private void visualizeTable(Table table, PDPageContentStream contentStream) throws IOException {
|
||||
|
||||
for (List<Cell> row : table.getRows()) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user