Pull request #507: RED-5276: Imporved table calculation, support spanned rows and colmns

Merge in RED/redaction-service from RED-5276-1 to master

* commit 'd233c18d335d17d3c79590dd3a295eaa89881de2':
  RED-5276: Imporved table calculation, support spanned rows and colmns
This commit is contained in:
Dominique Eiflaender 2022-12-23 11:25:43 +01:00
commit 53a375b832
2 changed files with 56 additions and 121 deletions

View File

@ -1,58 +0,0 @@
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
import org.locationtech.jts.geom.Envelope;
import org.locationtech.jts.index.strtree.STRtree;
import java.util.ArrayList;
import java.util.List;
@SuppressWarnings("all")
public class RectangleSpatialIndex<T extends Rectangle> {
private final STRtree si = new STRtree();
private final List<T> rectangles = new ArrayList<>();
public void add(T te) {
rectangles.add(te);
si.insert(new Envelope(te.getLeft(), te.getRight(), te.getBottom(), te.getTop()), te);
}
public List<T> contains(Rectangle rectangle) {
List<T> intersection = si.query(new Envelope(rectangle.getLeft(), rectangle.getRight(), rectangle.getTop(), rectangle.getBottom()));
List<T> rv = new ArrayList<T>();
for (T ir : intersection) {
if (rectangle.contains(ir)) {
rv.add(ir);
}
}
Utils.sort(rv, Rectangle.ILL_DEFINED_ORDER);
return rv;
}
public List<T> intersects(Rectangle r) {
List rv = si.query(new Envelope(r.getLeft(), r.getRight(), r.getTop(), r.getBottom()));
return rv;
}
/**
* Minimum bounding box of all the Rectangles contained on this RectangleSpatialIndex
*
* @return a Rectangle
*/
public Rectangle getBounds() {
return Rectangle.boundingBoxOf(rectangles);
}
}

View File

@ -1,18 +1,18 @@
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
import java.awt.geom.Point2D;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.stream.Collectors;
import org.apache.commons.collections4.CollectionUtils;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
import lombok.Getter;
import lombok.Setter;
@ -23,7 +23,6 @@ public class Table extends AbstractTextContainer {
private final TreeMap<CellPosition, Cell> cells = new TreeMap<>();
private final RectangleSpatialIndex<Cell> si = new RectangleSpatialIndex<>();
private final int rotation;
@Getter
@Setter
@ -208,80 +207,74 @@ public class Table extends AbstractTextContainer {
return;
}
Iterator<Cell> itty = cells.iterator();
cells.removeIf(cell -> cell.getWidth() < 1.1 || cell.getHeight() < 1.1);
while (itty.hasNext()) {
Cell cell = itty.next();
if (cell.getWidth() > 1.1 && cell.getHeight() > 1.1) {
si.add(cell);
} else {
itty.remove();
}
}
List<List<Cell>> rowsOfCells = calculateStructure(cells);
List<List<Cell>> rowsOfCells = rowsOfCells(cells);
Map<Integer, Cell> previousNonNullCellForColumnIndex = new HashMap<>();
for (int i = 0; i < rowsOfCells.size(); i++) {
List<Cell> row = rowsOfCells.get(i);
Iterator<Cell> rowCells = row.iterator();
int startColumn = 0;
int jumpToColumn = 0;
while (rowCells.hasNext()) {
Cell cell = rowCells.next();
if (i > 0) {
Rectangle rectangle = new Rectangle(cell.getBottom(),
si.getBounds().getLeft(),
cell.getLeft() - si.getBounds().getLeft() + 1,
si.getBounds().getBottom() - cell.getBottom());
List<List<Cell>> others = rowsOfCells(si.contains(rectangle));
for (List<Cell> r : others) {
jumpToColumn = Math.max(jumpToColumn, r.size());
}
while (startColumn != jumpToColumn) {
add(previousNonNullCellForColumnIndex.get(startColumn), i, startColumn);
startColumn++;
}
}
add(cell, i, startColumn);
previousNonNullCellForColumnIndex.put(startColumn, cell);
startColumn++;
jumpToColumn = startColumn;
for (int j = 0; j < rowsOfCells.get(i).size(); j++) {
add(rowsOfCells.get(i).get(j), i, j);
}
}
}
private List<List<Cell>> rowsOfCells(List<Cell> cells) {
/**
* Calculates the structure of the table. For spanning rows and columns multiple cells with the same values will be inserted.
*
* @param cells The found cells
* @return Table Structure
*/
private List<List<Cell>> calculateStructure(List<Cell> cells) {
List<List<Cell>> rv = new ArrayList<>();
List<List<Cell>> matrix = new ArrayList<>();
if (cells.isEmpty()) {
return rv;
return matrix;
}
cells.sort(Comparator.comparingDouble(Rectangle::getLeft));
cells.sort(Collections.reverseOrder((arg0, arg1) -> Float.compare(Utils.round(arg0.getBottom(), 2), Utils.round(arg1.getBottom(), 2))));
Set<Float> uniqueX = new HashSet<>();
Set<Float> uniqueY = new HashSet<>();
cells.stream().filter(c -> !c.getTextBlocks().isEmpty() || c.getHeight() > 3 && c.getWidth() > 3).forEach(c -> {
uniqueX.add(c.getLeft());
uniqueX.add(c.getRight());
uniqueY.add(c.getBottom());
uniqueY.add(c.getTop());
});
Iterator<Cell> iter = cells.iterator();
Cell c = iter.next();
float lastTop = c.getBottom();
List<Cell> lastRow = new ArrayList<>();
lastRow.add(c);
rv.add(lastRow);
var sortedUniqueX = uniqueX.stream().sorted().collect(Collectors.toList());
var sortedUniqueY = uniqueY.stream().sorted().collect(Collectors.toList());
while (iter.hasNext()) {
c = iter.next();
if (!Utils.feq(c.getBottom(), lastTop)) {
lastRow = new ArrayList<>();
rv.add(lastRow);
Float prevY = null;
for (Float y : sortedUniqueY) {
List<Cell> row = new ArrayList<>();
Float prevX = null;
for (Float x : sortedUniqueX) {
if (prevY != null && prevX != null) {
var cell = new Cell(new Point2D.Float(prevX, prevY), new Point2D.Float(x, y));
var intersectionCell = cells.stream().filter(c -> cell.intersects(c) && cell.overlapRatio(c) > 0.1f).findFirst();
if (intersectionCell.isPresent()) {
cell.getTextBlocks().addAll(intersectionCell.get().getTextBlocks());
}
row.add(cell);
}
prevX = x;
}
lastRow.add(c);
lastTop = c.getBottom();
if (prevY != null && prevX != null) {
matrix.add(row);
}
prevY = y;
}
return rv;
Collections.reverse(matrix);
return matrix;
}
@ -355,4 +348,4 @@ public class Table extends AbstractTextContainer {
return sb.toString();
}
}
}