From 695564d162ab65a542e71afc556f4fa029bde1b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thierry=20G=C3=B6ckel?= Date: Mon, 10 Aug 2020 12:04:36 +0200 Subject: [PATCH] Fix style Fix style. Fix style. Fix style and naming Fix style, naming and field modifier Fix style and remove warning suppression --- .../classification/model/Paragraph.java | 14 ++++---- .../model/StringFrequencyCounter.java | 34 ++++++++++--------- .../service/BlockificationService.java | 32 +++++++---------- .../redaction/model/SearchableText.java | 18 ++++++---- .../segmentation/PdfSegmentationService.java | 1 - .../v1/server/tableextraction/model/Cell.java | 8 ++++- .../model/RectangleSpatialIndex.java | 27 +++++++++------ 7 files changed, 75 insertions(+), 59 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Paragraph.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Paragraph.java index 0c5d5e59..5f92dde0 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Paragraph.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Paragraph.java @@ -10,7 +10,6 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; import lombok.Data; import lombok.NoArgsConstructor; - @Data @NoArgsConstructor public class Paragraph { @@ -18,10 +17,12 @@ public class Paragraph { private List pageBlocks = new ArrayList<>(); private String headline; - public SearchableText getSearchableText(){ + + public SearchableText getSearchableText() { + SearchableText searchableText = new SearchableText(); pageBlocks.forEach(block -> { - if(block instanceof TextBlock){ + if (block instanceof TextBlock) { searchableText.addAll(((TextBlock) block).getSequences()); } }); @@ -29,14 +30,15 @@ public class Paragraph { } - public List getTables(){ + public List
getTables() { + List
tables = new ArrayList<>(); pageBlocks.forEach(block -> { - if(block instanceof Table){ + if (block instanceof Table) { tables.add((Table) block); } }); return tables; } -} +} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/StringFrequencyCounter.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/StringFrequencyCounter.java index 36404fbf..0cbdfcc0 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/StringFrequencyCounter.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/StringFrequencyCounter.java @@ -5,43 +5,45 @@ import java.util.Map; import lombok.Getter; -/** - * - */ public class StringFrequencyCounter { @Getter - Map countPerValue = new HashMap<>(); + private final Map countPerValue = new HashMap<>(); - public void add(String value){ - if(!countPerValue.containsKey(value)){ + + public void add(String value) { + + if (!countPerValue.containsKey(value)) { countPerValue.put(value, 1); } else { countPerValue.put(value, countPerValue.get(value) + 1); } } - public void addAll(Map otherCounter){ - for(Map.Entry entry: otherCounter.entrySet()){ - if(countPerValue.containsKey(entry.getKey())){ - countPerValue.put(entry.getKey(), countPerValue.get(entry.getKey())+ entry.getValue()); + + public void addAll(Map otherCounter) { + + for (Map.Entry entry : otherCounter.entrySet()) { + if (countPerValue.containsKey(entry.getKey())) { + countPerValue.put(entry.getKey(), countPerValue.get(entry.getKey()) + entry.getValue()); } else { countPerValue.put(entry.getKey(), entry.getValue()); } } } - public String getMostPopular(){ + + public String getMostPopular() { + Map.Entry mostPopular = null; - for(Map.Entry entry: countPerValue.entrySet()){ - if(mostPopular == null){ + for (Map.Entry entry : countPerValue.entrySet()) { + if (mostPopular == null) { mostPopular = entry; - } else if(entry.getValue() > mostPopular.getValue()){ + } else if (entry.getValue() > mostPopular.getValue()) { mostPopular = entry; } } return mostPopular != null ? mostPopular.getKey() : null; } - -} +} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/BlockificationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/BlockificationService.java index 0a3240bf..d4b83409 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/BlockificationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/BlockificationService.java @@ -29,20 +29,16 @@ public class BlockificationService { float minX = 1000, maxX = 0, minY = 1000, maxY = 0; TextPositionSequence prev = null; - for (TextPositionSequence word : textPositions) { boolean lineSeparation = minY - word.getY2() > word.getHeight() * 1.25; boolean startFromTop = word.getY1() > maxY + word.getHeight(); - if (prev != null && - (lineSeparation - || startFromTop - || word.getRotation() == 0 && isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), verticalRulingLines) - || word.getRotation() == 0 && isSplittedByRuling(minX, minY, word.getX1(), word.getY2(), horizontalRulingLines) - || word.getRotation() == 90 && isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), horizontalRulingLines) - || word.getRotation() == 90 && isSplittedByRuling(minX, minY, word.getX1(), word.getY2(), verticalRulingLines) - )) { + if (prev != null && (lineSeparation || startFromTop || word.getRotation() == 0 && isSplittedByRuling(maxX, minY, word + .getX1(), word.getY1(), verticalRulingLines) || word.getRotation() == 0 && isSplittedByRuling(minX, minY, word + .getX1(), word.getY2(), horizontalRulingLines) || word.getRotation() == 90 && isSplittedByRuling(maxX, minY, word + .getX1(), word.getY1(), horizontalRulingLines) || word.getRotation() == 90 && isSplittedByRuling(minX, minY, word + .getX1(), word.getY2(), verticalRulingLines))) { TextBlock cb1 = buildTextBlock(chunkWords); chunkBlockList1.add(cb1); @@ -100,11 +96,12 @@ public class BlockificationService { styleFrequencyCounter.add(wordBlock.getFontStyle()); if (textBlock == null) { - textBlock = new TextBlock(wordBlock.getX1(), wordBlock.getX2(), wordBlock.getY1(), wordBlock.getY2(), wordBlockList, wordBlock.getRotation()); + textBlock = new TextBlock(wordBlock.getX1(), wordBlock.getX2(), wordBlock.getY1(), wordBlock.getY2(), wordBlockList, wordBlock + .getRotation()); } else { TextBlock spatialEntity = textBlock.union(wordBlock); - textBlock.resize(spatialEntity.getMinX(), spatialEntity.getMinY(), - spatialEntity.getWidth(), spatialEntity.getHeight()); + textBlock.resize(spatialEntity.getMinX(), spatialEntity.getMinY(), spatialEntity.getWidth(), spatialEntity + .getHeight()); } } @@ -122,6 +119,7 @@ public class BlockificationService { private boolean isSplittedByRuling(float previousX2, float previousY1, float currentX1, float currentY1, List rulingLines) { + for (Ruling ruling : rulingLines) { if (ruling.intersectsLine(previousX2, previousY1, currentX1, currentY1)) { return true; @@ -133,7 +131,6 @@ public class BlockificationService { public Rectangle calculateBodyTextFrame(List pages, FloatFrequencyCounter documentFontSizeCounter, boolean landscape) { - float minX = 10000; float maxX = -100; float minY = 10000; @@ -147,7 +144,6 @@ public class BlockificationService { for (AbstractTextContainer container : page.getTextBlocks()) { - if (container instanceof TextBlock) { TextBlock textBlock = (TextBlock) container; if (textBlock.getMostPopularWordFont() == null || textBlock.getMostPopularWordStyle() == null) { @@ -179,16 +175,15 @@ public class BlockificationService { } } - if (container instanceof Table) { Table table = (Table) container; for (List row : table.getRows()) { - for (Cell column : row) { + for (Cell cell : row) { - if (column == null || column.getTextBlocks() == null) { + if (cell == null || cell.getTextBlocks() == null) { continue; } - for (TextBlock textBlock : column.getTextBlocks()) { + for (TextBlock textBlock : cell.getTextBlocks()) { if (textBlock.getMinX() < minX) { minX = textBlock.getMinX(); } @@ -211,5 +206,4 @@ public class BlockificationService { return new Rectangle(minY, minX, maxX - minX, maxY - minY); } - } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/SearchableText.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/SearchableText.java index f4195f6a..e784ebb4 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/SearchableText.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/SearchableText.java @@ -4,14 +4,14 @@ import java.util.ArrayList; import java.util.List; import java.util.UUID; import java.util.regex.Pattern; +import java.util.stream.Collectors; import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities; -@SuppressWarnings("all") public class SearchableText { - private List sequences = new ArrayList<>(); + private final List sequences = new ArrayList<>(); public void add(TextPositionSequence textPositionSequence) { @@ -64,9 +64,9 @@ public class SearchableText { .charAt(j, caseInsensitive) == '-') { if (counter != 0 || i == 0 && j == 0 || j != 0 && isSeparator(sequences.get(i) - .charAt(j - 1, caseInsensitive)) || j == 0 && i != 0 && isSeparator(sequences.get(i - 1) + .charAt(j - 1, caseInsensitive)) || j == 0 && isSeparator(sequences.get(i - 1) .charAt(sequences.get(i - 1) - .length() - 1, caseInsensitive)) || j == 0 && i != 0 && sequences.get(i - 1) + .length() - 1, caseInsensitive)) || j == 0 && sequences.get(i - 1) .charAt(sequences.get(i - 1).length() - 1, caseInsensitive) != ' ' && sequences.get(i) .charAt(j, caseInsensitive) != ' ') { partMatch.add(sequences.get(i).textPositionAt(j)); @@ -163,7 +163,7 @@ public class SearchableText { return TextNormalizationUtilities.removeHyphenLineBreaks(sb.toString()) .replaceAll("\n", " ") - .replaceAll(" ", " "); + .replaceAll(" {2}", " "); } @@ -187,4 +187,10 @@ public class SearchableText { return sb.append("\n").toString(); } -} + + public List getAsTabularData() { + + return sequences.stream().map(TextPositionSequence::toString).collect(Collectors.toList()); + } + +} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java index 9b402fcd..9caff395 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java @@ -29,7 +29,6 @@ import lombok.extern.slf4j.Slf4j; @Slf4j @Service @RequiredArgsConstructor -@SuppressWarnings("PMD") public class PdfSegmentationService { private final RulingCleaningService rulingCleaningService; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Cell.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Cell.java index 6a076019..9342533b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Cell.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Cell.java @@ -16,11 +16,17 @@ public class Cell extends Rectangle { private List textBlocks = new ArrayList<>(); + public Cell(Point2D topLeft, Point2D bottomRight) { - super((float) topLeft.getY(), (float) topLeft.getX(), (float) (bottomRight.getX() - topLeft.getX()), (float) (bottomRight.getY() - topLeft.getY())); + + super((float) topLeft.getY(), (float) topLeft.getX(), (float) (bottomRight.getX() - topLeft.getX()), (float) (bottomRight + .getY() - topLeft.getY())); } + public void addTextBlock(TextBlock textBlock) { + textBlocks.add(textBlock); } + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/RectangleSpatialIndex.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/RectangleSpatialIndex.java index fc73610a..79f08ec4 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/RectangleSpatialIndex.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/RectangleSpatialIndex.java @@ -8,25 +8,28 @@ import org.locationtech.jts.index.strtree.STRtree; import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils; - @SuppressWarnings("all") public class RectangleSpatialIndex { - private final STRtree si = new STRtree(); private final List rectangles = new ArrayList<>(); + public void add(T te) { + rectangles.add(te); si.insert(new Envelope(te.getLeft(), te.getRight(), te.getBottom(), te.getTop()), te); } - - public List contains(Rectangle r) { - List intersection = si.query(new Envelope(r.getLeft(), r.getRight(), r.getTop(), r.getBottom())); + + + public List contains(Rectangle rectangle) { + + List intersection = si.query(new Envelope(rectangle.getLeft(), rectangle.getRight(), rectangle.getTop(), rectangle + .getBottom())); List rv = new ArrayList(); - for (T ir: intersection) { - if (r.contains(ir)) { + for (T ir : intersection) { + if (rectangle.contains(ir)) { rv.add(ir); } } @@ -34,18 +37,22 @@ public class RectangleSpatialIndex { Utils.sort(rv, Rectangle.ILL_DEFINED_ORDER); return rv; } - + + public List intersects(Rectangle r) { + List rv = si.query(new Envelope(r.getLeft(), r.getRight(), r.getTop(), r.getBottom())); return rv; } - + + /** * Minimum bounding box of all the Rectangles contained on this RectangleSpatialIndex - * + * * @return a Rectangle */ public Rectangle getBounds() { + return Rectangle.boundingBoxOf(rectangles); }