diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/RectangleSpatialIndex.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/RectangleSpatialIndex.java deleted file mode 100644 index e9f0a94c..00000000 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/RectangleSpatialIndex.java +++ /dev/null @@ -1,58 +0,0 @@ -package com.iqser.red.service.redaction.v1.server.tableextraction.model; - -import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils; - -import org.locationtech.jts.geom.Envelope; -import org.locationtech.jts.index.strtree.STRtree; - -import java.util.ArrayList; -import java.util.List; - -@SuppressWarnings("all") -public class RectangleSpatialIndex { - - private final STRtree si = new STRtree(); - private final List rectangles = new ArrayList<>(); - - - public void add(T te) { - - rectangles.add(te); - si.insert(new Envelope(te.getLeft(), te.getRight(), te.getBottom(), te.getTop()), te); - } - - - public List contains(Rectangle rectangle) { - - List intersection = si.query(new Envelope(rectangle.getLeft(), rectangle.getRight(), rectangle.getTop(), rectangle.getBottom())); - List rv = new ArrayList(); - - for (T ir : intersection) { - if (rectangle.contains(ir)) { - rv.add(ir); - } - } - - Utils.sort(rv, Rectangle.ILL_DEFINED_ORDER); - return rv; - } - - - public List intersects(Rectangle r) { - - List rv = si.query(new Envelope(r.getLeft(), r.getRight(), r.getTop(), r.getBottom())); - return rv; - } - - - /** - * Minimum bounding box of all the Rectangles contained on this RectangleSpatialIndex - * - * @return a Rectangle - */ - public Rectangle getBounds() { - - return Rectangle.boundingBoxOf(rectangles); - } - -} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Table.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Table.java index 62910062..8a6a93cb 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Table.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Table.java @@ -1,18 +1,18 @@ package com.iqser.red.service.redaction.v1.server.tableextraction.model; +import java.awt.geom.Point2D; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; -import java.util.HashMap; -import java.util.Iterator; +import java.util.HashSet; import java.util.List; -import java.util.Map; +import java.util.Set; import java.util.TreeMap; +import java.util.stream.Collectors; import org.apache.commons.collections4.CollectionUtils; import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; -import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils; import lombok.Getter; import lombok.Setter; @@ -23,7 +23,6 @@ public class Table extends AbstractTextContainer { private final TreeMap cells = new TreeMap<>(); - private final RectangleSpatialIndex si = new RectangleSpatialIndex<>(); private final int rotation; @Getter @Setter @@ -208,80 +207,74 @@ public class Table extends AbstractTextContainer { return; } - Iterator itty = cells.iterator(); + cells.removeIf(cell -> cell.getWidth() < 1.1 || cell.getHeight() < 1.1); - while (itty.hasNext()) { - Cell cell = itty.next(); - if (cell.getWidth() > 1.1 && cell.getHeight() > 1.1) { - si.add(cell); - } else { - itty.remove(); - } - } + List> rowsOfCells = calculateStructure(cells); - List> rowsOfCells = rowsOfCells(cells); - - Map previousNonNullCellForColumnIndex = new HashMap<>(); for (int i = 0; i < rowsOfCells.size(); i++) { - List row = rowsOfCells.get(i); - Iterator rowCells = row.iterator(); - int startColumn = 0; - int jumpToColumn = 0; - while (rowCells.hasNext()) { - Cell cell = rowCells.next(); - if (i > 0) { - Rectangle rectangle = new Rectangle(cell.getBottom(), - si.getBounds().getLeft(), - cell.getLeft() - si.getBounds().getLeft() + 1, - si.getBounds().getBottom() - cell.getBottom()); - List> others = rowsOfCells(si.contains(rectangle)); - - for (List r : others) { - jumpToColumn = Math.max(jumpToColumn, r.size()); - } - - while (startColumn != jumpToColumn) { - add(previousNonNullCellForColumnIndex.get(startColumn), i, startColumn); - startColumn++; - } - } - add(cell, i, startColumn); - previousNonNullCellForColumnIndex.put(startColumn, cell); - startColumn++; - jumpToColumn = startColumn; + for (int j = 0; j < rowsOfCells.get(i).size(); j++) { + add(rowsOfCells.get(i).get(j), i, j); } } + } - private List> rowsOfCells(List cells) { + /** + * Calculates the structure of the table. For spanning rows and columns multiple cells with the same values will be inserted. + * + * @param cells The found cells + * @return Table Structure + */ + private List> calculateStructure(List cells) { - List> rv = new ArrayList<>(); + List> matrix = new ArrayList<>(); if (cells.isEmpty()) { - return rv; + return matrix; } - cells.sort(Comparator.comparingDouble(Rectangle::getLeft)); - cells.sort(Collections.reverseOrder((arg0, arg1) -> Float.compare(Utils.round(arg0.getBottom(), 2), Utils.round(arg1.getBottom(), 2)))); + Set uniqueX = new HashSet<>(); + Set uniqueY = new HashSet<>(); + cells.stream().filter(c -> !c.getTextBlocks().isEmpty() || c.getHeight() > 3 && c.getWidth() > 3).forEach(c -> { + uniqueX.add(c.getLeft()); + uniqueX.add(c.getRight()); + uniqueY.add(c.getBottom()); + uniqueY.add(c.getTop()); + }); - Iterator iter = cells.iterator(); - Cell c = iter.next(); - float lastTop = c.getBottom(); - List lastRow = new ArrayList<>(); - lastRow.add(c); - rv.add(lastRow); + var sortedUniqueX = uniqueX.stream().sorted().collect(Collectors.toList()); + var sortedUniqueY = uniqueY.stream().sorted().collect(Collectors.toList()); - while (iter.hasNext()) { - c = iter.next(); - if (!Utils.feq(c.getBottom(), lastTop)) { - lastRow = new ArrayList<>(); - rv.add(lastRow); + Float prevY = null; + for (Float y : sortedUniqueY) { + + List row = new ArrayList<>(); + + Float prevX = null; + for (Float x : sortedUniqueX) { + + if (prevY != null && prevX != null) { + var cell = new Cell(new Point2D.Float(prevX, prevY), new Point2D.Float(x, y)); + + var intersectionCell = cells.stream().filter(c -> cell.intersects(c) && cell.overlapRatio(c) > 0.1f).findFirst(); + if (intersectionCell.isPresent()) { + cell.getTextBlocks().addAll(intersectionCell.get().getTextBlocks()); + } + row.add(cell); + } + prevX = x; } - lastRow.add(c); - lastTop = c.getBottom(); + + if (prevY != null && prevX != null) { + matrix.add(row); + } + prevY = y; } - return rv; + + Collections.reverse(matrix); + + return matrix; } @@ -355,4 +348,4 @@ public class Table extends AbstractTextContainer { return sb.toString(); } -} +} \ No newline at end of file