From 17bdcf8d2469429106234d9ccfdb9f2db17d9b84 Mon Sep 17 00:00:00 2001 From: deiflaender Date: Fri, 21 Oct 2022 12:00:10 +0200 Subject: [PATCH] RED-5381: Fixed pr findings --- .../service/BlockificationService.java | 57 +++------ .../service/BodyTextFrameService.java | 108 ++++++++---------- .../service/PdfVisualisationService.java | 51 ++++++--- 3 files changed, 97 insertions(+), 119 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/BlockificationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/BlockificationService.java index 028be48b..2cff711a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/BlockificationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/BlockificationService.java @@ -52,10 +52,10 @@ public class BlockificationService { boolean splitByX = prev != null && maxX + 50 < word.getMinXDirAdj() && prev.getMinYDirAdj() == word.getMinYDirAdj(); boolean xIsBeforeFirstX = prev != null && word.getMinXDirAdj() < minX; boolean newLineAfterSplit = prev != null && word.getMinYDirAdj() != prev.getMinYDirAdj() && wasSplitted && splitX1 != word.getMinXDirAdj(); - boolean isSpitByRuling = isSpitByRuling(minX, minY, maxX, maxY, word, horizontalRulingLines, verticalRulingLines); + boolean isSplitByRuling = isSplitByRuling(minX, minY, maxX, maxY, word, horizontalRulingLines, verticalRulingLines); boolean splitByDir = prev != null && !prev.getDir().equals(word.getDir()); - if (prev != null && (lineSeparation || startFromTop || splitByX || splitByDir || isSpitByRuling)) { + if (prev != null && (lineSeparation || startFromTop || splitByX || splitByDir || isSplitByRuling)) { Orientation prevOrientation = null; if (!chunkBlockList1.isEmpty()) { @@ -66,15 +66,15 @@ public class BlockificationService { chunkBlockList1.add(cb1); chunkWords = new ArrayList<>(); - if (splitByX && !isSpitByRuling) { + if (splitByX && !isSplitByRuling) { wasSplitted = true; cb1.setOrientation(Orientation.LEFT); splitX1 = word.getMinXDirAdj(); - } else if (newLineAfterSplit && !isSpitByRuling) { + } else if (newLineAfterSplit && !isSplitByRuling) { wasSplitted = false; cb1.setOrientation(Orientation.RIGHT); splitX1 = null; - } else if (prevOrientation != null && prevOrientation.equals(Orientation.RIGHT) && (lineSeparation || !startFromTop || !splitByX || !newLineAfterSplit || !isSpitByRuling)) { + } else if (prevOrientation != null && prevOrientation.equals(Orientation.RIGHT) && (lineSeparation || !startFromTop || !splitByX || !newLineAfterSplit || !isSplitByRuling)) { cb1.setOrientation(Orientation.LEFT); } @@ -205,43 +205,18 @@ public class BlockificationService { } - private boolean isSpitByRuling(float minX, - float minY, - float maxX, - float maxY, - TextPositionSequence word, - List horizontalRulingLines, - List verticalRulingLines) { + private boolean isSplitByRuling(float minX, + float minY, + float maxX, + float maxY, + TextPositionSequence word, + List horizontalRulingLines, + List verticalRulingLines) { - return isSplitByRuling(maxX, - minY, - word.getMinXDirAdj(), - word.getMinYDirAdj(), - verticalRulingLines, - word.getDir().getDegrees(), - word.getPageWidth(), - word.getPageHeight()) || isSplitByRuling(minX, - minY, - word.getMinXDirAdj(), - word.getMaxYDirAdj(), - horizontalRulingLines, - word.getDir().getDegrees(), - word.getPageWidth(), - word.getPageHeight()) || isSplitByRuling(maxX, - minY, - word.getMinXDirAdj(), - word.getMinYDirAdj(), - horizontalRulingLines, - word.getDir().getDegrees(), - word.getPageWidth(), - word.getPageHeight()) || isSplitByRuling(minX, - minY, - word.getMinXDirAdj(), - word.getMaxYDirAdj(), - verticalRulingLines, - word.getDir().getDegrees(), - word.getPageWidth(), - word.getPageHeight()); + return isSplitByRuling(maxX, minY, word.getMinXDirAdj(), word.getMinYDirAdj(), verticalRulingLines, word.getDir().getDegrees(), word.getPageWidth(), word.getPageHeight()) // + || isSplitByRuling(minX, minY, word.getMinXDirAdj(), word.getMaxYDirAdj(), horizontalRulingLines, word.getDir().getDegrees(), word.getPageWidth(), word.getPageHeight()) // + || isSplitByRuling(maxX, minY, word.getMinXDirAdj(), word.getMinYDirAdj(), horizontalRulingLines, word.getDir().getDegrees(), word.getPageWidth(), word.getPageHeight()) // + || isSplitByRuling(minX, minY, word.getMinXDirAdj(), word.getMaxYDirAdj(), verticalRulingLines, word.getDir().getDegrees(), word.getPageWidth(), word.getPageHeight()); // } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/BodyTextFrameService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/BodyTextFrameService.java index 05b8b825..95eb4e1e 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/BodyTextFrameService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/BodyTextFrameService.java @@ -67,10 +67,7 @@ public class BodyTextFrameService { */ public Rectangle calculateBodyTextFrame(List pages, FloatFrequencyCounter documentFontSizeCounter, boolean landscape) { - float minX = 10000; - float maxX = -100; - float minY = 10000; - float maxY = -100; + BodyTextFrameExpansionsRectangle expansionsRectangle = new BodyTextFrameExpansionsRectangle(); for (Page page : pages) { @@ -93,33 +90,7 @@ public class BodyTextFrameService { if (documentFontSizeCounter.getMostPopular() != null && textBlock.getMostPopularWordFontSize() >= documentFontSizeCounter.getMostPopular()) { - if (page.getPageWidth() > page.getPageHeight() && page.getRotation() != 0) { - if (textBlock.getPdfMinY() < minX) { - minX = textBlock.getPdfMinY(); - } - if (textBlock.getPdfMaxY() > maxX) { - maxX = textBlock.getPdfMaxY(); - } - if (textBlock.getPdfMinX() < minY) { - minY = textBlock.getPdfMinX(); - } - if (textBlock.getPdfMaxX() > maxY) { - maxY = textBlock.getPdfMaxX(); - } - } else { - if (textBlock.getPdfMinX() < minX) { - minX = textBlock.getPdfMinX(); - } - if (textBlock.getPdfMaxX() > maxX) { - maxX = textBlock.getPdfMaxX(); - } - if (textBlock.getPdfMinY() < minY) { - minY = textBlock.getPdfMinY(); - } - if (textBlock.getPdfMaxY() > maxY) { - maxY = textBlock.getPdfMaxY(); - } - } + expandRectangle(textBlock, page, expansionsRectangle); } } @@ -132,40 +103,59 @@ public class BodyTextFrameService { continue; } for (TextBlock textBlock : cell.getTextBlocks()) { - if (page.getPageWidth() > page.getPageHeight() && page.getRotation() != 0) { - if (textBlock.getPdfMinY() < minX) { - minX = textBlock.getMinY(); - } - if (textBlock.getPdfMaxY() > maxX) { - maxX = textBlock.getPdfMaxY(); - } - if (textBlock.getPdfMinX() < minY) { - minY = textBlock.getPdfMinX(); - } - if (textBlock.getPdfMaxX() > maxY) { - maxY = textBlock.getPdfMaxX(); - } - } else { - if (textBlock.getPdfMinX() < minX) { - minX = textBlock.getPdfMinX(); - } - if (textBlock.getPdfMaxX() > maxX) { - maxX = textBlock.getPdfMaxX(); - } - if (textBlock.getPdfMinY() < minY) { - minY = textBlock.getPdfMinY(); - } - if (textBlock.getPdfMaxY() > maxY) { - maxY = textBlock.getPdfMaxY(); - } - } + expandRectangle(textBlock, page, expansionsRectangle); } } } } } } - return new Rectangle(new Point(minX, minY), maxX - minX, maxY - minY, 0); + return new Rectangle(new Point(expansionsRectangle.minX, expansionsRectangle.minY), + expansionsRectangle.maxX - expansionsRectangle.minX, + expansionsRectangle.maxY - expansionsRectangle.minY, + 0); + } + + + private void expandRectangle(TextBlock textBlock, Page page, BodyTextFrameExpansionsRectangle expansionsRectangle) { + + if (page.getPageWidth() > page.getPageHeight() && page.getRotation() != 0) { + if (textBlock.getPdfMinY() < expansionsRectangle.minX) { + expansionsRectangle.minX = textBlock.getPdfMinY(); + } + if (textBlock.getPdfMaxY() > expansionsRectangle.maxX) { + expansionsRectangle.maxX = textBlock.getPdfMaxY(); + } + if (textBlock.getPdfMinX() < expansionsRectangle.minY) { + expansionsRectangle.minY = textBlock.getPdfMinX(); + } + if (textBlock.getPdfMaxX() > expansionsRectangle.maxY) { + expansionsRectangle.maxY = textBlock.getPdfMaxX(); + } + } else { + if (textBlock.getPdfMinX() < expansionsRectangle.minX) { + expansionsRectangle.minX = textBlock.getPdfMinX(); + } + if (textBlock.getPdfMaxX() > expansionsRectangle.maxX) { + expansionsRectangle.maxX = textBlock.getPdfMaxX(); + } + if (textBlock.getPdfMinY() < expansionsRectangle.minY) { + expansionsRectangle.minY = textBlock.getPdfMinY(); + } + if (textBlock.getPdfMaxY() > expansionsRectangle.maxY) { + expansionsRectangle.maxY = textBlock.getPdfMaxY(); + } + } + } + + + private class BodyTextFrameExpansionsRectangle { + + float minX = 10000; + float maxX = -100; + float minY = 10000; + float maxY = -100; + } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/PdfVisualisationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/PdfVisualisationService.java index 751f37fa..a51ea73a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/PdfVisualisationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/PdfVisualisationService.java @@ -19,6 +19,7 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; import lombok.RequiredArgsConstructor; +import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; @Slf4j @@ -26,6 +27,9 @@ import lombok.extern.slf4j.Slf4j; @RequiredArgsConstructor public class PdfVisualisationService { + private static final boolean DRAW_POSITIONS = false; + + public void visualizeParagraphs(Document classifiedDoc, PDDocument document) throws IOException { for (int page = 1; page <= document.getNumberOfPages(); page++) { @@ -109,29 +113,38 @@ public class PdfVisualisationService { contentStream.endText(); - contentStream.setNonStrokingColor(Color.BLUE); - contentStream.setFont(PDType1Font.TIMES_ROMAN, 2f); - -// contentStream.beginText(); -// contentStream.newLineAtOffset(textBlock.getPdfMinX(), textBlock.getPdfMinY()); -// contentStream.showText("MinX,MinY(" + textBlock.getPdfMinX() + "," + textBlock.getPdfMinY() + ")"); -// contentStream.endText(); -// contentStream.beginText(); -// contentStream.newLineAtOffset(textBlock.getPdfMaxX(), textBlock.getPdfMinY()); -// contentStream.showText("MaxX,MinY(" + textBlock.getPdfMaxX() + "," + textBlock.getPdfMinY() + ")"); -// contentStream.endText(); -// contentStream.beginText(); -// contentStream.newLineAtOffset(textBlock.getPdfMinX(), textBlock.getPdfMaxY()); -// contentStream.showText("MinX,MaxY(" + textBlock.getPdfMinX() + "," + textBlock.getPdfMaxY() + ")"); -// contentStream.endText(); -// contentStream.beginText(); -// contentStream.newLineAtOffset(textBlock.getPdfMaxX(), textBlock.getPdfMaxY()); -// contentStream.showText("MaxX,MaxY(" + textBlock.getPdfMaxX() + "," + textBlock.getPdfMaxY() + ")"); -// contentStream.endText(); + if (DRAW_POSITIONS) { + drawPositions(contentStream, textBlock); + } } } + @SneakyThrows + private void drawPositions(PDPageContentStream contentStream, TextBlock textBlock) { + + contentStream.setNonStrokingColor(Color.BLUE); + contentStream.setFont(PDType1Font.TIMES_ROMAN, 2f); + + contentStream.beginText(); + contentStream.newLineAtOffset(textBlock.getPdfMinX(), textBlock.getPdfMinY()); + contentStream.showText("MinX,MinY(" + textBlock.getPdfMinX() + "," + textBlock.getPdfMinY() + ")"); + contentStream.endText(); + contentStream.beginText(); + contentStream.newLineAtOffset(textBlock.getPdfMaxX(), textBlock.getPdfMinY()); + contentStream.showText("MaxX,MinY(" + textBlock.getPdfMaxX() + "," + textBlock.getPdfMinY() + ")"); + contentStream.endText(); + contentStream.beginText(); + contentStream.newLineAtOffset(textBlock.getPdfMinX(), textBlock.getPdfMaxY()); + contentStream.showText("MinX,MaxY(" + textBlock.getPdfMinX() + "," + textBlock.getPdfMaxY() + ")"); + contentStream.endText(); + contentStream.beginText(); + contentStream.newLineAtOffset(textBlock.getPdfMaxX(), textBlock.getPdfMaxY()); + contentStream.showText("MaxX,MaxY(" + textBlock.getPdfMaxX() + "," + textBlock.getPdfMaxY() + ")"); + contentStream.endText(); + } + + private void visualizeTable(Table table, PDPageContentStream contentStream) throws IOException { for (List row : table.getRows()) {