Pull request #507: RED-5276: Imporved table calculation, support spanned rows and colmns
Merge in RED/redaction-service from RED-5276-1 to master * commit 'd233c18d335d17d3c79590dd3a295eaa89881de2': RED-5276: Imporved table calculation, support spanned rows and colmns
This commit is contained in:
commit
53a375b832
@ -1,58 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
||||
|
||||
import org.locationtech.jts.geom.Envelope;
|
||||
import org.locationtech.jts.index.strtree.STRtree;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
@SuppressWarnings("all")
|
||||
public class RectangleSpatialIndex<T extends Rectangle> {
|
||||
|
||||
private final STRtree si = new STRtree();
|
||||
private final List<T> rectangles = new ArrayList<>();
|
||||
|
||||
|
||||
public void add(T te) {
|
||||
|
||||
rectangles.add(te);
|
||||
si.insert(new Envelope(te.getLeft(), te.getRight(), te.getBottom(), te.getTop()), te);
|
||||
}
|
||||
|
||||
|
||||
public List<T> contains(Rectangle rectangle) {
|
||||
|
||||
List<T> intersection = si.query(new Envelope(rectangle.getLeft(), rectangle.getRight(), rectangle.getTop(), rectangle.getBottom()));
|
||||
List<T> rv = new ArrayList<T>();
|
||||
|
||||
for (T ir : intersection) {
|
||||
if (rectangle.contains(ir)) {
|
||||
rv.add(ir);
|
||||
}
|
||||
}
|
||||
|
||||
Utils.sort(rv, Rectangle.ILL_DEFINED_ORDER);
|
||||
return rv;
|
||||
}
|
||||
|
||||
|
||||
public List<T> intersects(Rectangle r) {
|
||||
|
||||
List rv = si.query(new Envelope(r.getLeft(), r.getRight(), r.getTop(), r.getBottom()));
|
||||
return rv;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Minimum bounding box of all the Rectangles contained on this RectangleSpatialIndex
|
||||
*
|
||||
* @return a Rectangle
|
||||
*/
|
||||
public Rectangle getBounds() {
|
||||
|
||||
return Rectangle.boundingBoxOf(rectangles);
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,18 +1,18 @@
|
||||
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
|
||||
|
||||
import java.awt.geom.Point2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
@ -23,7 +23,6 @@ public class Table extends AbstractTextContainer {
|
||||
|
||||
private final TreeMap<CellPosition, Cell> cells = new TreeMap<>();
|
||||
|
||||
private final RectangleSpatialIndex<Cell> si = new RectangleSpatialIndex<>();
|
||||
private final int rotation;
|
||||
@Getter
|
||||
@Setter
|
||||
@ -208,80 +207,74 @@ public class Table extends AbstractTextContainer {
|
||||
return;
|
||||
}
|
||||
|
||||
Iterator<Cell> itty = cells.iterator();
|
||||
cells.removeIf(cell -> cell.getWidth() < 1.1 || cell.getHeight() < 1.1);
|
||||
|
||||
while (itty.hasNext()) {
|
||||
Cell cell = itty.next();
|
||||
if (cell.getWidth() > 1.1 && cell.getHeight() > 1.1) {
|
||||
si.add(cell);
|
||||
} else {
|
||||
itty.remove();
|
||||
}
|
||||
}
|
||||
List<List<Cell>> rowsOfCells = calculateStructure(cells);
|
||||
|
||||
List<List<Cell>> rowsOfCells = rowsOfCells(cells);
|
||||
|
||||
Map<Integer, Cell> previousNonNullCellForColumnIndex = new HashMap<>();
|
||||
for (int i = 0; i < rowsOfCells.size(); i++) {
|
||||
List<Cell> row = rowsOfCells.get(i);
|
||||
Iterator<Cell> rowCells = row.iterator();
|
||||
int startColumn = 0;
|
||||
int jumpToColumn = 0;
|
||||
while (rowCells.hasNext()) {
|
||||
Cell cell = rowCells.next();
|
||||
if (i > 0) {
|
||||
Rectangle rectangle = new Rectangle(cell.getBottom(),
|
||||
si.getBounds().getLeft(),
|
||||
cell.getLeft() - si.getBounds().getLeft() + 1,
|
||||
si.getBounds().getBottom() - cell.getBottom());
|
||||
List<List<Cell>> others = rowsOfCells(si.contains(rectangle));
|
||||
|
||||
for (List<Cell> r : others) {
|
||||
jumpToColumn = Math.max(jumpToColumn, r.size());
|
||||
}
|
||||
|
||||
while (startColumn != jumpToColumn) {
|
||||
add(previousNonNullCellForColumnIndex.get(startColumn), i, startColumn);
|
||||
startColumn++;
|
||||
}
|
||||
}
|
||||
add(cell, i, startColumn);
|
||||
previousNonNullCellForColumnIndex.put(startColumn, cell);
|
||||
startColumn++;
|
||||
jumpToColumn = startColumn;
|
||||
for (int j = 0; j < rowsOfCells.get(i).size(); j++) {
|
||||
add(rowsOfCells.get(i).get(j), i, j);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
private List<List<Cell>> rowsOfCells(List<Cell> cells) {
|
||||
/**
|
||||
* Calculates the structure of the table. For spanning rows and columns multiple cells with the same values will be inserted.
|
||||
*
|
||||
* @param cells The found cells
|
||||
* @return Table Structure
|
||||
*/
|
||||
private List<List<Cell>> calculateStructure(List<Cell> cells) {
|
||||
|
||||
List<List<Cell>> rv = new ArrayList<>();
|
||||
List<List<Cell>> matrix = new ArrayList<>();
|
||||
|
||||
if (cells.isEmpty()) {
|
||||
return rv;
|
||||
return matrix;
|
||||
}
|
||||
cells.sort(Comparator.comparingDouble(Rectangle::getLeft));
|
||||
|
||||
cells.sort(Collections.reverseOrder((arg0, arg1) -> Float.compare(Utils.round(arg0.getBottom(), 2), Utils.round(arg1.getBottom(), 2))));
|
||||
Set<Float> uniqueX = new HashSet<>();
|
||||
Set<Float> uniqueY = new HashSet<>();
|
||||
cells.stream().filter(c -> !c.getTextBlocks().isEmpty() || c.getHeight() > 3 && c.getWidth() > 3).forEach(c -> {
|
||||
uniqueX.add(c.getLeft());
|
||||
uniqueX.add(c.getRight());
|
||||
uniqueY.add(c.getBottom());
|
||||
uniqueY.add(c.getTop());
|
||||
});
|
||||
|
||||
Iterator<Cell> iter = cells.iterator();
|
||||
Cell c = iter.next();
|
||||
float lastTop = c.getBottom();
|
||||
List<Cell> lastRow = new ArrayList<>();
|
||||
lastRow.add(c);
|
||||
rv.add(lastRow);
|
||||
var sortedUniqueX = uniqueX.stream().sorted().collect(Collectors.toList());
|
||||
var sortedUniqueY = uniqueY.stream().sorted().collect(Collectors.toList());
|
||||
|
||||
while (iter.hasNext()) {
|
||||
c = iter.next();
|
||||
if (!Utils.feq(c.getBottom(), lastTop)) {
|
||||
lastRow = new ArrayList<>();
|
||||
rv.add(lastRow);
|
||||
Float prevY = null;
|
||||
for (Float y : sortedUniqueY) {
|
||||
|
||||
List<Cell> row = new ArrayList<>();
|
||||
|
||||
Float prevX = null;
|
||||
for (Float x : sortedUniqueX) {
|
||||
|
||||
if (prevY != null && prevX != null) {
|
||||
var cell = new Cell(new Point2D.Float(prevX, prevY), new Point2D.Float(x, y));
|
||||
|
||||
var intersectionCell = cells.stream().filter(c -> cell.intersects(c) && cell.overlapRatio(c) > 0.1f).findFirst();
|
||||
if (intersectionCell.isPresent()) {
|
||||
cell.getTextBlocks().addAll(intersectionCell.get().getTextBlocks());
|
||||
}
|
||||
row.add(cell);
|
||||
}
|
||||
prevX = x;
|
||||
}
|
||||
lastRow.add(c);
|
||||
lastTop = c.getBottom();
|
||||
|
||||
if (prevY != null && prevX != null) {
|
||||
matrix.add(row);
|
||||
}
|
||||
prevY = y;
|
||||
}
|
||||
return rv;
|
||||
|
||||
Collections.reverse(matrix);
|
||||
|
||||
return matrix;
|
||||
}
|
||||
|
||||
|
||||
@ -355,4 +348,4 @@ public class Table extends AbstractTextContainer {
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user