Fix style
Fix style. Fix style. Fix style and naming Fix style, naming and field modifier Fix style and remove warning suppression
This commit is contained in:
parent
81048dcc9f
commit
695564d162
@ -10,7 +10,6 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
|
||||
@Data
|
||||
@NoArgsConstructor
|
||||
public class Paragraph {
|
||||
@ -18,10 +17,12 @@ public class Paragraph {
|
||||
private List<AbstractTextContainer> pageBlocks = new ArrayList<>();
|
||||
private String headline;
|
||||
|
||||
public SearchableText getSearchableText(){
|
||||
|
||||
public SearchableText getSearchableText() {
|
||||
|
||||
SearchableText searchableText = new SearchableText();
|
||||
pageBlocks.forEach(block -> {
|
||||
if(block instanceof TextBlock){
|
||||
if (block instanceof TextBlock) {
|
||||
searchableText.addAll(((TextBlock) block).getSequences());
|
||||
}
|
||||
});
|
||||
@ -29,14 +30,15 @@ public class Paragraph {
|
||||
}
|
||||
|
||||
|
||||
public List<Table> getTables(){
|
||||
public List<Table> getTables() {
|
||||
|
||||
List<Table> tables = new ArrayList<>();
|
||||
pageBlocks.forEach(block -> {
|
||||
if(block instanceof Table){
|
||||
if (block instanceof Table) {
|
||||
tables.add((Table) block);
|
||||
}
|
||||
});
|
||||
return tables;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
@ -5,43 +5,45 @@ import java.util.Map;
|
||||
|
||||
import lombok.Getter;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class StringFrequencyCounter {
|
||||
|
||||
@Getter
|
||||
Map<String, Integer> countPerValue = new HashMap<>();
|
||||
private final Map<String, Integer> countPerValue = new HashMap<>();
|
||||
|
||||
public void add(String value){
|
||||
if(!countPerValue.containsKey(value)){
|
||||
|
||||
public void add(String value) {
|
||||
|
||||
if (!countPerValue.containsKey(value)) {
|
||||
countPerValue.put(value, 1);
|
||||
} else {
|
||||
countPerValue.put(value, countPerValue.get(value) + 1);
|
||||
}
|
||||
}
|
||||
|
||||
public void addAll(Map<String, Integer> otherCounter){
|
||||
for(Map.Entry<String, Integer> entry: otherCounter.entrySet()){
|
||||
if(countPerValue.containsKey(entry.getKey())){
|
||||
countPerValue.put(entry.getKey(), countPerValue.get(entry.getKey())+ entry.getValue());
|
||||
|
||||
public void addAll(Map<String, Integer> otherCounter) {
|
||||
|
||||
for (Map.Entry<String, Integer> entry : otherCounter.entrySet()) {
|
||||
if (countPerValue.containsKey(entry.getKey())) {
|
||||
countPerValue.put(entry.getKey(), countPerValue.get(entry.getKey()) + entry.getValue());
|
||||
} else {
|
||||
countPerValue.put(entry.getKey(), entry.getValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public String getMostPopular(){
|
||||
|
||||
public String getMostPopular() {
|
||||
|
||||
Map.Entry<String, Integer> mostPopular = null;
|
||||
for(Map.Entry<String, Integer> entry: countPerValue.entrySet()){
|
||||
if(mostPopular == null){
|
||||
for (Map.Entry<String, Integer> entry : countPerValue.entrySet()) {
|
||||
if (mostPopular == null) {
|
||||
mostPopular = entry;
|
||||
} else if(entry.getValue() > mostPopular.getValue()){
|
||||
} else if (entry.getValue() > mostPopular.getValue()) {
|
||||
mostPopular = entry;
|
||||
}
|
||||
}
|
||||
return mostPopular != null ? mostPopular.getKey() : null;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
@ -29,20 +29,16 @@ public class BlockificationService {
|
||||
float minX = 1000, maxX = 0, minY = 1000, maxY = 0;
|
||||
TextPositionSequence prev = null;
|
||||
|
||||
|
||||
for (TextPositionSequence word : textPositions) {
|
||||
|
||||
boolean lineSeparation = minY - word.getY2() > word.getHeight() * 1.25;
|
||||
boolean startFromTop = word.getY1() > maxY + word.getHeight();
|
||||
|
||||
if (prev != null &&
|
||||
(lineSeparation
|
||||
|| startFromTop
|
||||
|| word.getRotation() == 0 && isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), verticalRulingLines)
|
||||
|| word.getRotation() == 0 && isSplittedByRuling(minX, minY, word.getX1(), word.getY2(), horizontalRulingLines)
|
||||
|| word.getRotation() == 90 && isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), horizontalRulingLines)
|
||||
|| word.getRotation() == 90 && isSplittedByRuling(minX, minY, word.getX1(), word.getY2(), verticalRulingLines)
|
||||
)) {
|
||||
if (prev != null && (lineSeparation || startFromTop || word.getRotation() == 0 && isSplittedByRuling(maxX, minY, word
|
||||
.getX1(), word.getY1(), verticalRulingLines) || word.getRotation() == 0 && isSplittedByRuling(minX, minY, word
|
||||
.getX1(), word.getY2(), horizontalRulingLines) || word.getRotation() == 90 && isSplittedByRuling(maxX, minY, word
|
||||
.getX1(), word.getY1(), horizontalRulingLines) || word.getRotation() == 90 && isSplittedByRuling(minX, minY, word
|
||||
.getX1(), word.getY2(), verticalRulingLines))) {
|
||||
|
||||
TextBlock cb1 = buildTextBlock(chunkWords);
|
||||
chunkBlockList1.add(cb1);
|
||||
@ -100,11 +96,12 @@ public class BlockificationService {
|
||||
styleFrequencyCounter.add(wordBlock.getFontStyle());
|
||||
|
||||
if (textBlock == null) {
|
||||
textBlock = new TextBlock(wordBlock.getX1(), wordBlock.getX2(), wordBlock.getY1(), wordBlock.getY2(), wordBlockList, wordBlock.getRotation());
|
||||
textBlock = new TextBlock(wordBlock.getX1(), wordBlock.getX2(), wordBlock.getY1(), wordBlock.getY2(), wordBlockList, wordBlock
|
||||
.getRotation());
|
||||
} else {
|
||||
TextBlock spatialEntity = textBlock.union(wordBlock);
|
||||
textBlock.resize(spatialEntity.getMinX(), spatialEntity.getMinY(),
|
||||
spatialEntity.getWidth(), spatialEntity.getHeight());
|
||||
textBlock.resize(spatialEntity.getMinX(), spatialEntity.getMinY(), spatialEntity.getWidth(), spatialEntity
|
||||
.getHeight());
|
||||
}
|
||||
}
|
||||
|
||||
@ -122,6 +119,7 @@ public class BlockificationService {
|
||||
|
||||
|
||||
private boolean isSplittedByRuling(float previousX2, float previousY1, float currentX1, float currentY1, List<Ruling> rulingLines) {
|
||||
|
||||
for (Ruling ruling : rulingLines) {
|
||||
if (ruling.intersectsLine(previousX2, previousY1, currentX1, currentY1)) {
|
||||
return true;
|
||||
@ -133,7 +131,6 @@ public class BlockificationService {
|
||||
|
||||
public Rectangle calculateBodyTextFrame(List<Page> pages, FloatFrequencyCounter documentFontSizeCounter, boolean landscape) {
|
||||
|
||||
|
||||
float minX = 10000;
|
||||
float maxX = -100;
|
||||
float minY = 10000;
|
||||
@ -147,7 +144,6 @@ public class BlockificationService {
|
||||
|
||||
for (AbstractTextContainer container : page.getTextBlocks()) {
|
||||
|
||||
|
||||
if (container instanceof TextBlock) {
|
||||
TextBlock textBlock = (TextBlock) container;
|
||||
if (textBlock.getMostPopularWordFont() == null || textBlock.getMostPopularWordStyle() == null) {
|
||||
@ -179,16 +175,15 @@ public class BlockificationService {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (container instanceof Table) {
|
||||
Table table = (Table) container;
|
||||
for (List<Cell> row : table.getRows()) {
|
||||
for (Cell column : row) {
|
||||
for (Cell cell : row) {
|
||||
|
||||
if (column == null || column.getTextBlocks() == null) {
|
||||
if (cell == null || cell.getTextBlocks() == null) {
|
||||
continue;
|
||||
}
|
||||
for (TextBlock textBlock : column.getTextBlocks()) {
|
||||
for (TextBlock textBlock : cell.getTextBlocks()) {
|
||||
if (textBlock.getMinX() < minX) {
|
||||
minX = textBlock.getMinX();
|
||||
}
|
||||
@ -211,5 +206,4 @@ public class BlockificationService {
|
||||
return new Rectangle(minY, minX, maxX - minX, maxY - minY);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -4,14 +4,14 @@ import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
|
||||
@SuppressWarnings("all")
|
||||
public class SearchableText {
|
||||
|
||||
private List<TextPositionSequence> sequences = new ArrayList<>();
|
||||
private final List<TextPositionSequence> sequences = new ArrayList<>();
|
||||
|
||||
|
||||
public void add(TextPositionSequence textPositionSequence) {
|
||||
@ -64,9 +64,9 @@ public class SearchableText {
|
||||
.charAt(j, caseInsensitive) == '-') {
|
||||
|
||||
if (counter != 0 || i == 0 && j == 0 || j != 0 && isSeparator(sequences.get(i)
|
||||
.charAt(j - 1, caseInsensitive)) || j == 0 && i != 0 && isSeparator(sequences.get(i - 1)
|
||||
.charAt(j - 1, caseInsensitive)) || j == 0 && isSeparator(sequences.get(i - 1)
|
||||
.charAt(sequences.get(i - 1)
|
||||
.length() - 1, caseInsensitive)) || j == 0 && i != 0 && sequences.get(i - 1)
|
||||
.length() - 1, caseInsensitive)) || j == 0 && sequences.get(i - 1)
|
||||
.charAt(sequences.get(i - 1).length() - 1, caseInsensitive) != ' ' && sequences.get(i)
|
||||
.charAt(j, caseInsensitive) != ' ') {
|
||||
partMatch.add(sequences.get(i).textPositionAt(j));
|
||||
@ -163,7 +163,7 @@ public class SearchableText {
|
||||
|
||||
return TextNormalizationUtilities.removeHyphenLineBreaks(sb.toString())
|
||||
.replaceAll("\n", " ")
|
||||
.replaceAll(" ", " ");
|
||||
.replaceAll(" {2}", " ");
|
||||
}
|
||||
|
||||
|
||||
@ -187,4 +187,10 @@ public class SearchableText {
|
||||
return sb.append("\n").toString();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public List<String> getAsTabularData() {
|
||||
|
||||
return sequences.stream().map(TextPositionSequence::toString).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
}
|
||||
@ -29,7 +29,6 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@SuppressWarnings("PMD")
|
||||
public class PdfSegmentationService {
|
||||
|
||||
private final RulingCleaningService rulingCleaningService;
|
||||
|
||||
@ -16,11 +16,17 @@ public class Cell extends Rectangle {
|
||||
|
||||
private List<TextBlock> textBlocks = new ArrayList<>();
|
||||
|
||||
|
||||
public Cell(Point2D topLeft, Point2D bottomRight) {
|
||||
super((float) topLeft.getY(), (float) topLeft.getX(), (float) (bottomRight.getX() - topLeft.getX()), (float) (bottomRight.getY() - topLeft.getY()));
|
||||
|
||||
super((float) topLeft.getY(), (float) topLeft.getX(), (float) (bottomRight.getX() - topLeft.getX()), (float) (bottomRight
|
||||
.getY() - topLeft.getY()));
|
||||
}
|
||||
|
||||
|
||||
public void addTextBlock(TextBlock textBlock) {
|
||||
|
||||
textBlocks.add(textBlock);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -8,25 +8,28 @@ import org.locationtech.jts.index.strtree.STRtree;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
||||
|
||||
|
||||
@SuppressWarnings("all")
|
||||
public class RectangleSpatialIndex<T extends Rectangle> {
|
||||
|
||||
|
||||
private final STRtree si = new STRtree();
|
||||
private final List<T> rectangles = new ArrayList<>();
|
||||
|
||||
|
||||
public void add(T te) {
|
||||
|
||||
rectangles.add(te);
|
||||
si.insert(new Envelope(te.getLeft(), te.getRight(), te.getBottom(), te.getTop()), te);
|
||||
}
|
||||
|
||||
public List<T> contains(Rectangle r) {
|
||||
List<T> intersection = si.query(new Envelope(r.getLeft(), r.getRight(), r.getTop(), r.getBottom()));
|
||||
|
||||
|
||||
public List<T> contains(Rectangle rectangle) {
|
||||
|
||||
List<T> intersection = si.query(new Envelope(rectangle.getLeft(), rectangle.getRight(), rectangle.getTop(), rectangle
|
||||
.getBottom()));
|
||||
List<T> rv = new ArrayList<T>();
|
||||
|
||||
for (T ir: intersection) {
|
||||
if (r.contains(ir)) {
|
||||
for (T ir : intersection) {
|
||||
if (rectangle.contains(ir)) {
|
||||
rv.add(ir);
|
||||
}
|
||||
}
|
||||
@ -34,18 +37,22 @@ public class RectangleSpatialIndex<T extends Rectangle> {
|
||||
Utils.sort(rv, Rectangle.ILL_DEFINED_ORDER);
|
||||
return rv;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public List<T> intersects(Rectangle r) {
|
||||
|
||||
List rv = si.query(new Envelope(r.getLeft(), r.getRight(), r.getTop(), r.getBottom()));
|
||||
return rv;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Minimum bounding box of all the Rectangles contained on this RectangleSpatialIndex
|
||||
*
|
||||
*
|
||||
* @return a Rectangle
|
||||
*/
|
||||
public Rectangle getBounds() {
|
||||
|
||||
return Rectangle.boundingBoxOf(rectangles);
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user