Merge branch 'RED-8550-bp' into 'release/0.89.x'
RED-8550: Faulty table recognition and text duplication leads to huge sections See merge request fforesight/layout-parser!107
This commit is contained in:
commit
a266d98f11
@ -6,12 +6,14 @@ import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageB
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
public abstract class AbstractPageBlock {
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
public abstract class AbstractPageBlock extends Rectangle {
|
||||
|
||||
@JsonIgnore
|
||||
protected float minX;
|
||||
|
||||
@ -84,14 +84,16 @@ public class TableCell implements GenericSemanticNode {
|
||||
|
||||
private TextBlock buildTextBlock() {
|
||||
|
||||
return streamAllSubNodes().filter(SemanticNode::isLeaf).map(SemanticNode::getLeafTextBlock).collect(new TextBlockCollector());
|
||||
return streamAllSubNodes().filter(SemanticNode::isLeaf)
|
||||
.map(SemanticNode::getLeafTextBlock)
|
||||
.collect(new TextBlockCollector());
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return treeId + ": " + NodeType.TABLE_CELL + ": " + this.buildTextBlock().buildSummary();
|
||||
return treeId + ": " + NodeType.TABLE_CELL + ": " + this.getTextBlock().buildSummary();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.model.table;
|
||||
|
||||
import java.awt.geom.Point2D;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
@ -36,6 +37,12 @@ public class Cell extends Rectangle {
|
||||
}
|
||||
|
||||
|
||||
public Cell(Rectangle2D r) {
|
||||
|
||||
super((float) r.getY(), (float) r.getX(), (float) r.getWidth(), (float) r.getHeight());
|
||||
}
|
||||
|
||||
|
||||
public void addTextBlock(TextPageBlock textBlock) {
|
||||
|
||||
textBlocks.add(textBlock);
|
||||
@ -76,14 +83,4 @@ public class Cell extends Rectangle {
|
||||
return this.getHeight() >= MIN_SIZE && this.getWidth() >= MIN_SIZE;
|
||||
}
|
||||
|
||||
public boolean nearlyIntersects(Cell other) {
|
||||
|
||||
if (this.getHeight() <= 0 || other.getHeight() <= 0) {
|
||||
return false;
|
||||
}
|
||||
double x0 = this.getX() + 2;
|
||||
double y0 = this.getY() + 2;
|
||||
return (other.x + other.width > x0 && other.y + other.height > y0 && other.x < x0 + this.getWidth() - 2 && other.y < y0 + this.getHeight() - 2);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -20,7 +20,8 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@SuppressWarnings("all")
|
||||
public class Ruling extends Line2D.Float {
|
||||
|
||||
private static int PERPENDICULAR_PIXEL_EXPAND_AMOUNT = 2;
|
||||
public static final int PERPENDICULAR_UNIT_EXPAND_AMOUNT = 2;
|
||||
public static final int COLINEAR_OR_PARALLEL_UNIT_EXPAND_AMOUNT = 2;
|
||||
|
||||
|
||||
public Ruling(Point2D p1, Point2D p2) {
|
||||
@ -110,8 +111,8 @@ public class Ruling extends Line2D.Float {
|
||||
});
|
||||
|
||||
for (Ruling h : horizontals) {
|
||||
sos.add(new SortObject(SOType.HLEFT, h.getLeft() - PERPENDICULAR_PIXEL_EXPAND_AMOUNT, h));
|
||||
sos.add(new SortObject(SOType.HRIGHT, h.getRight() + PERPENDICULAR_PIXEL_EXPAND_AMOUNT, h));
|
||||
sos.add(new SortObject(SOType.HLEFT, h.getLeft() - PERPENDICULAR_UNIT_EXPAND_AMOUNT, h));
|
||||
sos.add(new SortObject(SOType.HRIGHT, h.getRight() + PERPENDICULAR_UNIT_EXPAND_AMOUNT, h));
|
||||
}
|
||||
|
||||
for (Ruling v : verticals) {
|
||||
@ -151,7 +152,7 @@ public class Ruling extends Line2D.Float {
|
||||
if (i == null) {
|
||||
continue;
|
||||
}
|
||||
rv.put(i, new Ruling[]{h.getKey().expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT), so.ruling.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT)});
|
||||
rv.put(i, new Ruling[]{h.getKey().expand(PERPENDICULAR_UNIT_EXPAND_AMOUNT), so.ruling.expand(PERPENDICULAR_UNIT_EXPAND_AMOUNT)});
|
||||
} catch (UnsupportedOperationException e) {
|
||||
log.info("Some line are oblique, ignoring...");
|
||||
continue;
|
||||
@ -267,7 +268,7 @@ public class Ruling extends Line2D.Float {
|
||||
}
|
||||
|
||||
|
||||
public boolean nearlyIntersects(Ruling another, int colinearOrParallelExpandAmount) {
|
||||
public boolean nearlyIntersects(Ruling another) {
|
||||
|
||||
if (this.intersectsLine(another)) {
|
||||
return true;
|
||||
@ -276,9 +277,9 @@ public class Ruling extends Line2D.Float {
|
||||
boolean rv = false;
|
||||
|
||||
if (this.perpendicularTo(another)) {
|
||||
rv = this.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT).intersectsLine(another);
|
||||
rv = this.expand(PERPENDICULAR_UNIT_EXPAND_AMOUNT).intersectsLine(another);
|
||||
} else {
|
||||
rv = this.expand(colinearOrParallelExpandAmount).intersectsLine(another.expand(colinearOrParallelExpandAmount));
|
||||
rv = this.expand(COLINEAR_OR_PARALLEL_UNIT_EXPAND_AMOUNT).intersectsLine(another.expand(COLINEAR_OR_PARALLEL_UNIT_EXPAND_AMOUNT));
|
||||
}
|
||||
|
||||
return rv;
|
||||
@ -319,8 +320,8 @@ public class Ruling extends Line2D.Float {
|
||||
|
||||
public Point2D intersectionPoint(Ruling other) {
|
||||
|
||||
Ruling this_l = this.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT);
|
||||
Ruling other_l = other.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT);
|
||||
Ruling this_l = this.expand(PERPENDICULAR_UNIT_EXPAND_AMOUNT);
|
||||
Ruling other_l = other.expand(PERPENDICULAR_UNIT_EXPAND_AMOUNT);
|
||||
Ruling horizontal, vertical;
|
||||
|
||||
if (!this_l.intersectsLine(other_l)) {
|
||||
|
||||
@ -3,6 +3,7 @@ package com.knecon.fforesight.service.layoutparser.processor.model.table;
|
||||
import java.awt.geom.Point2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
@ -11,6 +12,7 @@ import java.util.TreeMap;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
@ -19,6 +21,7 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@Slf4j
|
||||
public class TablePageBlock extends AbstractPageBlock {
|
||||
|
||||
public static final double CELL_AREA_CONTAINED_THRESHOLD = 0.98;
|
||||
private final TreeMap<CellPosition, Cell> cellTreeMap = new TreeMap<>();
|
||||
|
||||
private final int rotation;
|
||||
@ -93,7 +96,7 @@ public class TablePageBlock extends AbstractPageBlock {
|
||||
|
||||
/**
|
||||
* Detect header cells (either first row or first column):
|
||||
* Column is marked as header if cell text is bold and row cell text is not bold.
|
||||
* Column is marked as header if originalCell text is bold and row originalCell text is not bold.
|
||||
* Defaults to row.
|
||||
*/
|
||||
private void computeHeaders() {
|
||||
@ -101,7 +104,7 @@ public class TablePageBlock extends AbstractPageBlock {
|
||||
if (rows == null) {
|
||||
rows = computeRows();
|
||||
}
|
||||
// A bold cell is a header cell as long as every cell to the left/top is bold, too
|
||||
// A bold originalCell is a header originalCell as long as every originalCell to the left/top is bold, too
|
||||
// we move from left to right and top to bottom
|
||||
for (int rowIndex = 0; rowIndex < rows.size(); rowIndex++) {
|
||||
List<Cell> rowCells = rows.get(rowIndex);
|
||||
@ -257,15 +260,19 @@ public class TablePageBlock extends AbstractPageBlock {
|
||||
for (Float x : sortedUniqueX) {
|
||||
|
||||
if (prevY != null && prevX != null) {
|
||||
var cell = new Cell(new Point2D.Float(prevX, prevY), new Point2D.Float(x, y));
|
||||
var cellFromGridStructure = new Cell(new Point2D.Float(prevX, prevY), new Point2D.Float(x, y));
|
||||
|
||||
if (cell.hasMinimumSize()) {
|
||||
if (cellFromGridStructure.hasMinimumSize()) {
|
||||
|
||||
cells.stream()
|
||||
.filter(cell::nearlyIntersects)
|
||||
.forEach(intersectingCell -> cell.getTextBlocks().addAll(intersectingCell.getTextBlocks()));
|
||||
.map(originalCell -> new CellWithIntersection(originalCell, RectangleTransformations.calculateIntersectedArea(cellFromGridStructure, originalCell)))
|
||||
.filter(cellWithIntersection -> cellWithIntersection.intersectedArea > 0)
|
||||
.filter(cellWithIntersection -> cellWithIntersection.originalCell.getArea() > cellWithIntersection.intersectedArea * CELL_AREA_CONTAINED_THRESHOLD)
|
||||
.max(Comparator.comparing(CellWithIntersection::intersectedArea))
|
||||
.map(CellWithIntersection::originalCell)
|
||||
.ifPresent(matchingCell -> cellFromGridStructure.getTextBlocks().addAll(matchingCell.getTextBlocks()));
|
||||
|
||||
row.add(cell);
|
||||
row.add(cellFromGridStructure);
|
||||
}
|
||||
}
|
||||
prevX = x;
|
||||
@ -405,4 +412,9 @@ public class TablePageBlock extends AbstractPageBlock {
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
|
||||
record CellWithIntersection(Cell originalCell, double intersectedArea) {
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,21 +1,21 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services;
|
||||
|
||||
import java.awt.geom.Line2D;
|
||||
import static com.knecon.fforesight.service.layoutparser.processor.utils.GeometricComparators.X_FIRST_RULING_COMPARATOR;
|
||||
|
||||
import java.awt.geom.Point2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.Rectangle;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableCells;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.DoubleComparisons;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.UnionFind;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
@ -25,26 +25,145 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@RequiredArgsConstructor
|
||||
public class RulingCleaningService {
|
||||
|
||||
private static final float THRESHOLD_Y = 6;
|
||||
private static final float THRESHOLD_X = 2;
|
||||
private static final float THRESHOLD_X_VERTICAL = 1;
|
||||
private static final float THRESHOLD_Y_VERTICAL = 2;
|
||||
private static final float THRESHOLD_X_HORIZONTAL = 2;
|
||||
private static final float THRESHOLD_Y_HORIZONTAL = 3;
|
||||
|
||||
|
||||
public CleanRulings getCleanRulings(List<TableCells> tableCells, List<Ruling> rulings) {
|
||||
|
||||
Rulings verticalAndHorizontalRulingLines;
|
||||
|
||||
if (!rulings.isEmpty()) {
|
||||
snapPoints(rulings);
|
||||
verticalAndHorizontalRulingLines = extractVerticalAndHorizontalRulingLines(rulings);
|
||||
} else {
|
||||
verticalAndHorizontalRulingLines = getRulingsFromParsedCells(tableCells);
|
||||
}
|
||||
|
||||
verticalAndHorizontalRulingLines.verticalLines.sort(X_FIRST_RULING_COMPARATOR);
|
||||
verticalAndHorizontalRulingLines.horizontalLines.sort(X_FIRST_RULING_COMPARATOR);
|
||||
verticalAndHorizontalRulingLines = cleanRulings(verticalAndHorizontalRulingLines);
|
||||
|
||||
return CleanRulings.builder().vertical(verticalAndHorizontalRulingLines.verticalLines()).horizontal(verticalAndHorizontalRulingLines.horizontalLines()).build();
|
||||
}
|
||||
|
||||
|
||||
private Rulings cleanRulings(Rulings rulings) {
|
||||
|
||||
List<List<Rectangle>> groupedOverlappingVerticalRectangles = groupOverlappingRectangles(rulings.verticalLines.stream()
|
||||
.map(RulingCleaningService::getOverlapRectangle)
|
||||
.distinct()
|
||||
.toList());
|
||||
List<Ruling> cleanedVerticalRulings = groupedOverlappingVerticalRectangles.stream()
|
||||
.map(rectList -> getXCenteredRuling(Rectangle.boundingBoxOf(rectList)))
|
||||
.toList();
|
||||
|
||||
List<List<Rectangle>> groupedOverlappingHorizontalRectangles = groupOverlappingRectangles(rulings.horizontalLines.stream()
|
||||
.map(RulingCleaningService::getOverlapRectangle)
|
||||
.distinct()
|
||||
.toList());
|
||||
|
||||
List<Ruling> cleanedHorizontalRulings = groupedOverlappingHorizontalRectangles.stream()
|
||||
.map(rectList -> getYCenteredRuling(Rectangle.boundingBoxOf(rectList)))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
return new Rulings(cleanedVerticalRulings, cleanedHorizontalRulings);
|
||||
}
|
||||
|
||||
|
||||
private List<List<Rectangle>> groupOverlappingRectangles(List<Rectangle> rectangles) {
|
||||
|
||||
UnionFind<Rectangle> unionFind = new UnionFind<>();
|
||||
for (int i = 0; i < rectangles.size(); i++) {
|
||||
for (int j = i + 1; j < rectangles.size(); j++) {
|
||||
Rectangle rectangle1 = rectangles.get(i);
|
||||
Rectangle rectangle2 = rectangles.get(j);
|
||||
|
||||
// we can stop early when we are too far off because of x-y-sorting
|
||||
if(rectangle1.getRight() < rectangle2.getLeft() && rectangle1.getBottom() < rectangle2.getTop()) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (rectangle1.intersects(rectangle2)) {
|
||||
unionFind.union(rectangle1, rectangle2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Map<Rectangle, List<Rectangle>> groups = new HashMap<>();
|
||||
for (Rectangle rectangle : rectangles) {
|
||||
Rectangle root = unionFind.find(rectangle);
|
||||
groups.computeIfAbsent(root, k -> new ArrayList<>()).add(rectangle);
|
||||
}
|
||||
return new ArrayList<>(groups.values());
|
||||
}
|
||||
|
||||
|
||||
private static Rectangle getOverlapRectangle(Ruling ruling) {
|
||||
|
||||
float top;
|
||||
float left;
|
||||
float w;
|
||||
float h;
|
||||
|
||||
if (ruling.x1 < ruling.x2) {
|
||||
left = ruling.x1;
|
||||
w = ruling.x2 - ruling.x1;
|
||||
} else {
|
||||
left = ruling.x2;
|
||||
w = ruling.x1 - ruling.x2;
|
||||
}
|
||||
if (ruling.y1 < ruling.y2) {
|
||||
top = ruling.y1;
|
||||
h = ruling.y2 - ruling.y1;
|
||||
} else {
|
||||
top = ruling.y2;
|
||||
h = ruling.y1 - ruling.y2;
|
||||
}
|
||||
|
||||
if (ruling.horizontal()) {
|
||||
return new Rectangle(top - THRESHOLD_Y_HORIZONTAL, left - THRESHOLD_X_HORIZONTAL, w + 2 * THRESHOLD_X_HORIZONTAL, h + 2 * THRESHOLD_Y_HORIZONTAL);
|
||||
} else {
|
||||
return new Rectangle(top - THRESHOLD_Y_VERTICAL, left - THRESHOLD_X_VERTICAL, w + 2 * THRESHOLD_X_VERTICAL, h + 2 * THRESHOLD_Y_VERTICAL);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static Ruling getXCenteredRuling(Rectangle rectangle) {
|
||||
|
||||
float x = (float) rectangle.getCenterX();
|
||||
float y1 = rectangle.getTop();
|
||||
float y2 = rectangle.getBottom();
|
||||
|
||||
Point2D point1 = new Point2D.Float(x, y1 + THRESHOLD_Y_VERTICAL);
|
||||
Point2D point2 = new Point2D.Float(x, y2 - THRESHOLD_Y_VERTICAL);
|
||||
|
||||
return new Ruling(point1, point2);
|
||||
}
|
||||
|
||||
|
||||
public static Ruling getYCenteredRuling(Rectangle rectangle) {
|
||||
|
||||
float x1 = rectangle.getLeft();
|
||||
float x2 = rectangle.getRight();
|
||||
float y = (float) rectangle.getCenterY();
|
||||
|
||||
Point2D point1 = new Point2D.Float(x1 + THRESHOLD_X_HORIZONTAL, y);
|
||||
Point2D point2 = new Point2D.Float(x2 - THRESHOLD_X_HORIZONTAL, y);
|
||||
|
||||
return new Ruling(point1, point2);
|
||||
}
|
||||
|
||||
|
||||
private Rulings extractVerticalAndHorizontalRulingLines(List<Ruling> rulings) {
|
||||
|
||||
List<Ruling> vrs = new ArrayList<>();
|
||||
for (Ruling vr : rulings) {
|
||||
if (vr.vertical()) {
|
||||
vrs.add(vr);
|
||||
}
|
||||
}
|
||||
if (vrs.isEmpty()) {
|
||||
vrs.addAll(extractVerticalRulings(tableCells));
|
||||
}
|
||||
List<Ruling> verticalRulingLines = collapseOrientedRulings(vrs);
|
||||
|
||||
List<Ruling> hrs = new ArrayList<>();
|
||||
for (Ruling hr : rulings) {
|
||||
@ -52,98 +171,26 @@ public class RulingCleaningService {
|
||||
hrs.add(hr);
|
||||
}
|
||||
}
|
||||
if (hrs.isEmpty()) {
|
||||
hrs.addAll(extractHorizontalRulings(tableCells));
|
||||
}
|
||||
List<Ruling> horizontalRulingLines = collapseOrientedRulings(hrs);
|
||||
|
||||
return CleanRulings.builder().vertical(verticalRulingLines).horizontal(horizontalRulingLines).build();
|
||||
return new Rulings(vrs, hrs);
|
||||
}
|
||||
|
||||
|
||||
public void snapPoints(List<? extends Line2D.Float> rulings) {
|
||||
private Rulings getRulingsFromParsedCells(List<TableCells> tableCells) {
|
||||
|
||||
// collect points and keep a Line -> p1,p2 map
|
||||
Map<Line2D.Float, Point2D[]> linesToPoints = new HashMap<>();
|
||||
List<Point2D> points = new ArrayList<>();
|
||||
for (Line2D.Float r : rulings) {
|
||||
Point2D p1 = r.getP1();
|
||||
Point2D p2 = r.getP2();
|
||||
linesToPoints.put(r, new Point2D[]{p1, p2});
|
||||
points.add(p1);
|
||||
points.add(p2);
|
||||
}
|
||||
|
||||
// snap by X
|
||||
points.sort(Comparator.comparingDouble(Point2D::getX));
|
||||
|
||||
List<List<Point2D>> groupedPoints = new ArrayList<>();
|
||||
groupedPoints.add(new ArrayList<>(Collections.singletonList(points.get(0))));
|
||||
|
||||
for (Point2D p : points.subList(1, points.size() - 1)) {
|
||||
List<Point2D> last = groupedPoints.get(groupedPoints.size() - 1);
|
||||
if (Math.abs(p.getX() - last.get(0).getX()) < THRESHOLD_X) {
|
||||
groupedPoints.get(groupedPoints.size() - 1).add(p);
|
||||
} else {
|
||||
groupedPoints.add(new ArrayList<>(Collections.singletonList(p)));
|
||||
}
|
||||
}
|
||||
|
||||
for (List<Point2D> group : groupedPoints) {
|
||||
float avgLoc = 0;
|
||||
for (Point2D p : group) {
|
||||
avgLoc += p.getX();
|
||||
}
|
||||
avgLoc /= group.size();
|
||||
for (Point2D p : group) {
|
||||
p.setLocation(avgLoc, p.getY());
|
||||
}
|
||||
}
|
||||
// ---
|
||||
|
||||
// snap by Y
|
||||
points.sort(Comparator.comparingDouble(Point2D::getY));
|
||||
|
||||
groupedPoints = new ArrayList<>();
|
||||
groupedPoints.add(new ArrayList<>(Collections.singletonList(points.get(0))));
|
||||
|
||||
for (Point2D p : points.subList(1, points.size() - 1)) {
|
||||
List<Point2D> last = groupedPoints.get(groupedPoints.size() - 1);
|
||||
if (Math.abs(p.getY() - last.get(0).getY()) < THRESHOLD_Y) {
|
||||
groupedPoints.get(groupedPoints.size() - 1).add(p);
|
||||
} else {
|
||||
groupedPoints.add(new ArrayList<>(Collections.singletonList(p)));
|
||||
}
|
||||
}
|
||||
|
||||
for (List<Point2D> group : groupedPoints) {
|
||||
float avgLoc = 0;
|
||||
for (Point2D p : group) {
|
||||
avgLoc += p.getY();
|
||||
}
|
||||
avgLoc /= group.size();
|
||||
for (Point2D p : group) {
|
||||
p.setLocation(p.getX(), avgLoc);
|
||||
}
|
||||
}
|
||||
// ---
|
||||
|
||||
// finally, modify lines
|
||||
for (Map.Entry<Line2D.Float, Point2D[]> ltp : linesToPoints.entrySet()) {
|
||||
Point2D[] p = ltp.getValue();
|
||||
ltp.getKey().setLine(p[0], p[1]);
|
||||
}
|
||||
List<Ruling> vrs = extractVerticalRulingsFromParsedCells(tableCells);
|
||||
List<Ruling> hrs = extractHorizontalRulingsFromParsedCells(tableCells);
|
||||
return new Rulings(vrs, hrs);
|
||||
}
|
||||
|
||||
|
||||
private Collection<? extends Ruling> extractVerticalRulings(List<TableCells> cvParsedTableCells) {
|
||||
private List<Ruling> extractVerticalRulingsFromParsedCells(List<TableCells> tableCells) {
|
||||
|
||||
List<Ruling> vrs = new ArrayList<>();
|
||||
|
||||
if (cvParsedTableCells != null) {
|
||||
for (TableCells cvParsedTableCell : cvParsedTableCells) {
|
||||
Ruling leftLine = createRuling(cvParsedTableCell.getX0(), cvParsedTableCell.getX0(), cvParsedTableCell.getY0(), cvParsedTableCell.getY1());
|
||||
Ruling rightLine = createRuling(cvParsedTableCell.getX1(), cvParsedTableCell.getX1(), cvParsedTableCell.getY0(), cvParsedTableCell.getY1());
|
||||
if (tableCells != null) {
|
||||
for (TableCells tableCell : tableCells) {
|
||||
Ruling leftLine = createRuling(tableCell.getX0(), tableCell.getX0(), tableCell.getY0(), tableCell.getY1());
|
||||
Ruling rightLine = createRuling(tableCell.getX1(), tableCell.getX1(), tableCell.getY0(), tableCell.getY1());
|
||||
vrs.add(leftLine);
|
||||
vrs.add(rightLine);
|
||||
}
|
||||
@ -152,19 +199,18 @@ public class RulingCleaningService {
|
||||
}
|
||||
|
||||
|
||||
private Collection<? extends Ruling> extractHorizontalRulings(List<TableCells> cvParsedTableCells) {
|
||||
private List<Ruling> extractHorizontalRulingsFromParsedCells(List<TableCells> tableCells) {
|
||||
|
||||
List<Ruling> hrs = new ArrayList<>();
|
||||
|
||||
if (cvParsedTableCells != null) {
|
||||
for (TableCells cvParsedTableCell : cvParsedTableCells) {
|
||||
Ruling topLine = createRuling(cvParsedTableCell.getX0(), cvParsedTableCell.getX1(), cvParsedTableCell.getY1(), cvParsedTableCell.getY1());
|
||||
Ruling baseLine = createRuling(cvParsedTableCell.getX0(), cvParsedTableCell.getX1(), cvParsedTableCell.getY0(), cvParsedTableCell.getY0());
|
||||
if (tableCells != null) {
|
||||
for (TableCells tableCell : tableCells) {
|
||||
Ruling topLine = createRuling(tableCell.getX0(), tableCell.getX1(), tableCell.getY1(), tableCell.getY1());
|
||||
Ruling baseLine = createRuling(tableCell.getX0(), tableCell.getX1(), tableCell.getY0(), tableCell.getY0());
|
||||
hrs.add(topLine);
|
||||
hrs.add(baseLine);
|
||||
}
|
||||
}
|
||||
|
||||
return hrs;
|
||||
}
|
||||
|
||||
@ -190,46 +236,8 @@ public class RulingCleaningService {
|
||||
}
|
||||
|
||||
|
||||
private List<Ruling> collapseOrientedRulings(List<Ruling> lines) {
|
||||
private record Rulings(List<Ruling> verticalLines, List<Ruling> horizontalLines) {
|
||||
|
||||
int COLINEAR_OR_PARALLEL_PIXEL_EXPAND_AMOUNT = 1;
|
||||
return collapseOrientedRulings(lines, COLINEAR_OR_PARALLEL_PIXEL_EXPAND_AMOUNT);
|
||||
}
|
||||
|
||||
|
||||
private List<Ruling> collapseOrientedRulings(List<Ruling> lines, int expandAmount) {
|
||||
|
||||
ArrayList<Ruling> rv = new ArrayList<>();
|
||||
lines.sort((a, b) -> {
|
||||
final float diff = a.getPosition() - b.getPosition();
|
||||
return Float.compare(diff == 0 ? a.getStart() - b.getStart() : diff, 0f);
|
||||
});
|
||||
|
||||
for (Ruling next_line : lines) {
|
||||
Ruling last = rv.isEmpty() ? null : rv.get(rv.size() - 1);
|
||||
// if current line colinear with next, and are "close enough": expand current line
|
||||
if (last != null && DoubleComparisons.feq(next_line.getPosition(), last.getPosition()) && last.nearlyIntersects(next_line, expandAmount)) {
|
||||
final float lastStart = last.getStart();
|
||||
final float lastEnd = last.getEnd();
|
||||
|
||||
final boolean lastFlipped = lastStart > lastEnd;
|
||||
final boolean nextFlipped = next_line.getStart() > next_line.getEnd();
|
||||
|
||||
boolean differentDirections = nextFlipped != lastFlipped;
|
||||
float nextS = differentDirections ? next_line.getEnd() : next_line.getStart();
|
||||
float nextE = differentDirections ? next_line.getStart() : next_line.getEnd();
|
||||
|
||||
final float newStart = lastFlipped ? Math.max(nextS, lastStart) : Math.min(nextS, lastStart);
|
||||
final float newEnd = lastFlipped ? Math.min(nextE, lastEnd) : Math.max(nextE, lastEnd);
|
||||
last.setStartEnd(newStart, newEnd);
|
||||
assert !last.oblique();
|
||||
} else if (next_line.length() == 0) {
|
||||
continue;
|
||||
} else {
|
||||
rv.add(next_line);
|
||||
}
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services;
|
||||
|
||||
import java.awt.geom.Point2D;
|
||||
import static com.knecon.fforesight.service.layoutparser.processor.utils.GeometricComparators.CELL_SIZE_COMPARATOR;
|
||||
import static com.knecon.fforesight.service.layoutparser.processor.utils.GeometricComparators.RECTANGLE_SIZE_COMPARATOR;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
@ -20,66 +20,15 @@ import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.DoubleComparisons;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.RectangularIntersectionFinder;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.SpreadsheetFinder;
|
||||
|
||||
@Service
|
||||
public class TableExtractionService {
|
||||
|
||||
private static final int MAX_TABLE_OUTER_POINT_TOLERANCE = 10;
|
||||
private static final int MAX_TABLE_CONTAINED_CELLS_WITH_TEXT = 1;
|
||||
private static final float SPREADSHEET_AREA_TOLERANCE = 0.001f;
|
||||
|
||||
private static final Comparator<Point2D> X_FIRST_POINT_COMPARATOR = (point1, point2) -> {
|
||||
|
||||
int rv = 0;
|
||||
float point1X = DoubleComparisons.round(point1.getX(), 2);
|
||||
float point1Y = DoubleComparisons.round(point1.getY(), 2);
|
||||
float point2X = DoubleComparisons.round(point2.getX(), 2);
|
||||
float point2Y = DoubleComparisons.round(point2.getY(), 2);
|
||||
|
||||
if (point1X > point2X) {
|
||||
rv = 1;
|
||||
} else if (point1X < point2X) {
|
||||
rv = -1;
|
||||
} else if (point1Y > point2Y) {
|
||||
rv = 1;
|
||||
} else if (point1Y < point2Y) {
|
||||
rv = -1;
|
||||
}
|
||||
return rv;
|
||||
};
|
||||
private static final Comparator<Point2D> Y_FIRST_POINT_COMPARATOR = (point1, point2) -> {
|
||||
|
||||
int rv = 0;
|
||||
float point1X = DoubleComparisons.round(point1.getX(), 2);
|
||||
float point1Y = DoubleComparisons.round(point1.getY(), 2);
|
||||
float point2X = DoubleComparisons.round(point2.getX(), 2);
|
||||
float point2Y = DoubleComparisons.round(point2.getY(), 2);
|
||||
|
||||
if (point1Y > point2Y) {
|
||||
rv = 1;
|
||||
} else if (point1Y < point2Y) {
|
||||
rv = -1;
|
||||
} else if (point1X > point2X) {
|
||||
rv = 1;
|
||||
} else if (point1X < point2X) {
|
||||
rv = -1;
|
||||
}
|
||||
return rv;
|
||||
};
|
||||
|
||||
private static final Comparator<Cell> CELL_SIZE_COMPARATOR = (cell1, cell2) -> {
|
||||
|
||||
Double cell1Size = cell1.getHeight() * cell1.getWidth();
|
||||
Double cell2Size = cell2.getHeight() * cell2.getWidth();
|
||||
return cell1Size.compareTo(cell2Size);
|
||||
};
|
||||
|
||||
private static final Comparator<Rectangle> RECTANGLE_SIZE_COMPARATOR = (rect1, rect2) -> {
|
||||
|
||||
Double rect1Size = rect1.getHeight() * rect1.getWidth();
|
||||
Double rect2Size = rect2.getHeight() * rect2.getWidth();
|
||||
return rect1Size.compareTo(rect2Size);
|
||||
};
|
||||
private static final int TEXT_BLOCK_CONTAINMENT_TOLERANCE = 2;
|
||||
private static final double TABLE_UNIFORMITY_THRESHOLD = 0.7;
|
||||
|
||||
|
||||
/**
|
||||
@ -115,7 +64,7 @@ public class TableExtractionService {
|
||||
cells = new ArrayList<>(new HashSet<>(cells));
|
||||
DoubleComparisons.sort(cells, Rectangle.ILL_DEFINED_ORDER);
|
||||
|
||||
List<Rectangle> spreadsheetAreas = findSpreadsheetsFromCells(cells);
|
||||
List<Rectangle> spreadsheetAreas = SpreadsheetFinder.findSpreadsheetsFromCells(cells);
|
||||
// sort spreadsheetAreas by size (height * width) ascending so that cells are placed in the smallest tables first
|
||||
// this way no cell duplication occurs when tables are contained in other tables and only the most inner table contains the cells
|
||||
spreadsheetAreas.sort(RECTANGLE_SIZE_COMPARATOR);
|
||||
@ -132,10 +81,10 @@ public class TableExtractionService {
|
||||
|
||||
var containedCellsWithText = containedCells.stream()
|
||||
.filter(cell -> !cell.getTextBlocks().isEmpty())
|
||||
.count();
|
||||
.toList();
|
||||
|
||||
// verify if table would contain fewer cells with text than the threshold allows
|
||||
if (containedCellsWithText >= MAX_TABLE_CONTAINED_CELLS_WITH_TEXT) {
|
||||
if (containedCellsWithText.size() >= MAX_TABLE_CONTAINED_CELLS_WITH_TEXT && checkIfTableCellsAreUniform(containedCells)) {
|
||||
tables.add(new TablePageBlock(containedCells, area, page.getRotation()));
|
||||
cells.removeAll(containedCells);
|
||||
}
|
||||
@ -164,6 +113,21 @@ public class TableExtractionService {
|
||||
}
|
||||
|
||||
|
||||
private boolean checkIfTableCellsAreUniform(List<Cell> containedCells) {
|
||||
|
||||
if(containedCells.size() <= 2) {
|
||||
return true;
|
||||
}
|
||||
|
||||
Map<Long, List<Long>> cellsGroupedByRoundedWidth = containedCells.stream()
|
||||
.map(Rectangle::getWidth)
|
||||
.map(size -> Math.round(size / 10.0) * 10)
|
||||
.collect(Collectors.groupingBy(Long::longValue));
|
||||
|
||||
return (double) cellsGroupedByRoundedWidth.size() / containedCells.size() <= TABLE_UNIFORMITY_THRESHOLD;
|
||||
}
|
||||
|
||||
|
||||
private boolean doesCellContainTextBlock(Cell cell, TextPageBlock textBlock) {
|
||||
|
||||
double x = textBlock.getPdfMinX();
|
||||
@ -175,225 +139,19 @@ public class TableExtractionService {
|
||||
}
|
||||
double x0 = cell.getX();
|
||||
double y0 = cell.getY();
|
||||
return (x >= x0 - 2 && y >= y0 - 2 && (x + w) <= x0 + cell.getWidth() + 2 && (y + h) <= y0 + cell.getHeight() + 2);
|
||||
return (x >= x0 - TEXT_BLOCK_CONTAINMENT_TOLERANCE
|
||||
&& y >= y0 - TEXT_BLOCK_CONTAINMENT_TOLERANCE
|
||||
&& (x + w) <= x0 + cell.getWidth() + 2 * TEXT_BLOCK_CONTAINMENT_TOLERANCE
|
||||
&& (y + h) <= y0 + cell.getHeight() + 2 * TEXT_BLOCK_CONTAINMENT_TOLERANCE);
|
||||
}
|
||||
|
||||
|
||||
private List<Cell> findCells(List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines) {
|
||||
|
||||
// Fix for 211.pdf
|
||||
for (Ruling r : horizontalRulingLines) {
|
||||
if (r.getX2() < r.getX1()) {
|
||||
double a = r.getX2();
|
||||
r.x2 = (float) r.getX1();
|
||||
r.x1 = (float) a;
|
||||
}
|
||||
}
|
||||
|
||||
List<Cell> cellsFound = new ArrayList<>();
|
||||
Map<Point2D, Ruling[]> intersectionPoints = Ruling.findIntersections(horizontalRulingLines, verticalRulingLines);
|
||||
List<Point2D> intersectionPointsList = new ArrayList<>(intersectionPoints.keySet());
|
||||
intersectionPointsList.sort(Y_FIRST_POINT_COMPARATOR);
|
||||
|
||||
for (int i = 0; i < intersectionPointsList.size(); i++) {
|
||||
Point2D topLeft = intersectionPointsList.get(i);
|
||||
Ruling[] hv = intersectionPoints.get(topLeft);
|
||||
|
||||
// CrossingPointsDirectlyBelow( topLeft );
|
||||
List<Point2D> xPoints = new ArrayList<>();
|
||||
// CrossingPointsDirectlyToTheRight( topLeft );
|
||||
List<Point2D> yPoints = new ArrayList<>();
|
||||
|
||||
for (Point2D p : intersectionPointsList.subList(i, intersectionPointsList.size())) {
|
||||
if (p.getX() == topLeft.getX() && p.getY() > topLeft.getY()) {
|
||||
xPoints.add(p);
|
||||
}
|
||||
if (p.getY() == topLeft.getY() && p.getX() > topLeft.getX()) {
|
||||
yPoints.add(p);
|
||||
}
|
||||
}
|
||||
outer:
|
||||
for (Point2D xPoint : xPoints) {
|
||||
// is there a vertical edge b/w topLeft and xPoint?
|
||||
if (!hv[1].equals(intersectionPoints.get(xPoint)[1])) {
|
||||
continue;
|
||||
}
|
||||
for (Point2D yPoint : yPoints) {
|
||||
// is there a horizontal edge b/w topLeft and yPoint ?
|
||||
if (!hv[0].equals(intersectionPoints.get(yPoint)[0])) {
|
||||
continue;
|
||||
}
|
||||
Point2D btmRight = new Point2D.Float((float) yPoint.getX(), (float) xPoint.getY());
|
||||
if (intersectionPoints.containsKey(btmRight)
|
||||
&& intersectionPoints.get(btmRight)[0].equals(intersectionPoints.get(xPoint)[0])
|
||||
&& intersectionPoints.get(btmRight)[1].equals(intersectionPoints.get(yPoint)[1])) {
|
||||
cellsFound.add(new Cell(topLeft, btmRight));
|
||||
break outer;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO create cells for vertical ruling lines with aligned endpoints at the top/bottom of a grid
|
||||
// that aren't connected with an horizontal ruler?
|
||||
// see: https://github.com/jazzido/tabula-extractor/issues/78#issuecomment-41481207
|
||||
|
||||
return cellsFound;
|
||||
}
|
||||
|
||||
|
||||
private List<Rectangle> findSpreadsheetsFromCells(List<? extends Rectangle> cells) {
|
||||
// via: http://stackoverflow.com/questions/13746284/merging-multiple-adjacent-rectangles-into-one-polygon
|
||||
List<Rectangle> rectangles = new ArrayList<>();
|
||||
Set<Point2D> pointSet = new HashSet<>();
|
||||
Map<Point2D, Point2D> edgesH = new HashMap<>();
|
||||
Map<Point2D, Point2D> edgesV = new HashMap<>();
|
||||
|
||||
for (Rectangle cell : cells) {
|
||||
for (Point2D pt : cell.getPoints()) {
|
||||
if (pointSet.contains(pt)) { // shared vertex, remove it
|
||||
pointSet.remove(pt);
|
||||
} else {
|
||||
pointSet.add(pt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// X first sort
|
||||
List<Point2D> pointsSortX = new ArrayList<>(pointSet);
|
||||
pointsSortX.sort(X_FIRST_POINT_COMPARATOR);
|
||||
// Y first sort
|
||||
List<Point2D> pointsSortY = new ArrayList<>(pointSet);
|
||||
pointsSortY.sort(Y_FIRST_POINT_COMPARATOR);
|
||||
|
||||
int i = 0;
|
||||
while (i < pointSet.size()) {
|
||||
float currY = (float) pointsSortY.get(i).getY();
|
||||
while (i < pointSet.size() && DoubleComparisons.feq(pointsSortY.get(i).getY(), currY)) {
|
||||
edgesH.put(pointsSortY.get(i), pointsSortY.get(i + 1));
|
||||
edgesH.put(pointsSortY.get(i + 1), pointsSortY.get(i));
|
||||
i += 2;
|
||||
}
|
||||
}
|
||||
|
||||
i = 0;
|
||||
while (i < pointSet.size()) {
|
||||
float currX = (float) pointsSortX.get(i).getX();
|
||||
while (i < pointSet.size() && DoubleComparisons.feq(pointsSortX.get(i).getX(), currX)) {
|
||||
edgesV.put(pointsSortX.get(i), pointsSortX.get(i + 1));
|
||||
edgesV.put(pointsSortX.get(i + 1), pointsSortX.get(i));
|
||||
i += 2;
|
||||
}
|
||||
}
|
||||
|
||||
// Get all the polygons
|
||||
List<List<PolygonVertex>> polygons = new ArrayList<>();
|
||||
Point2D nextVertex;
|
||||
while (!edgesH.isEmpty()) {
|
||||
ArrayList<PolygonVertex> polygon = new ArrayList<>();
|
||||
Point2D first = edgesH.keySet()
|
||||
.iterator().next();
|
||||
polygon.add(new PolygonVertex(first, Direction.HORIZONTAL));
|
||||
edgesH.remove(first);
|
||||
|
||||
while (true) {
|
||||
PolygonVertex curr = polygon.get(polygon.size() - 1);
|
||||
PolygonVertex lastAddedVertex;
|
||||
if (curr.direction == Direction.HORIZONTAL) {
|
||||
nextVertex = edgesV.get(curr.point);
|
||||
edgesV.remove(curr.point);
|
||||
lastAddedVertex = new PolygonVertex(nextVertex, Direction.VERTICAL);
|
||||
} else {
|
||||
nextVertex = edgesH.get(curr.point);
|
||||
edgesH.remove(curr.point);
|
||||
lastAddedVertex = new PolygonVertex(nextVertex, Direction.HORIZONTAL);
|
||||
}
|
||||
polygon.add(lastAddedVertex);
|
||||
|
||||
if (lastAddedVertex.equals(polygon.get(0))) {
|
||||
// closed polygon
|
||||
polygon.remove(polygon.size() - 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (PolygonVertex vertex : polygon) {
|
||||
edgesH.remove(vertex.point);
|
||||
edgesV.remove(vertex.point);
|
||||
}
|
||||
polygons.add(polygon);
|
||||
}
|
||||
|
||||
// calculate grid-aligned minimum area rectangles for each found polygon
|
||||
for (List<PolygonVertex> poly : polygons) {
|
||||
float top = Float.MAX_VALUE;
|
||||
float left = Float.MAX_VALUE;
|
||||
float bottom = Float.MIN_VALUE;
|
||||
float right = Float.MIN_VALUE;
|
||||
for (PolygonVertex pt : poly) {
|
||||
top = (float) Math.min(top, pt.point.getY());
|
||||
left = (float) Math.min(left, pt.point.getX());
|
||||
bottom = (float) Math.max(bottom, pt.point.getY());
|
||||
right = (float) Math.max(right, pt.point.getX());
|
||||
}
|
||||
|
||||
// do not add polygons with too many outer points as they are unlikely to be tables
|
||||
if (poly.size() <= MAX_TABLE_OUTER_POINT_TOLERANCE) {
|
||||
rectangles.add(new Rectangle(top - SPREADSHEET_AREA_TOLERANCE,
|
||||
left - SPREADSHEET_AREA_TOLERANCE,
|
||||
right - left + 2 * SPREADSHEET_AREA_TOLERANCE,
|
||||
bottom - top + 2 * SPREADSHEET_AREA_TOLERANCE));
|
||||
}
|
||||
}
|
||||
|
||||
return rectangles;
|
||||
}
|
||||
|
||||
|
||||
private enum Direction {
|
||||
HORIZONTAL,
|
||||
VERTICAL
|
||||
}
|
||||
|
||||
static class PolygonVertex {
|
||||
|
||||
Point2D point;
|
||||
Direction direction;
|
||||
|
||||
|
||||
PolygonVertex(Point2D point, Direction direction) {
|
||||
|
||||
this.direction = direction;
|
||||
this.point = point;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
|
||||
if (this == other) {
|
||||
return true;
|
||||
}
|
||||
if (!(other instanceof PolygonVertex)) {
|
||||
return false;
|
||||
}
|
||||
return this.point.equals(((PolygonVertex) other).point);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
|
||||
return this.point.hashCode();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return String.format("%s[point=%s,direction=%s]", this.getClass().getName(), this.point.toString(), this.direction.toString());
|
||||
}
|
||||
public static List<Cell> findCells(List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines) {
|
||||
|
||||
return RectangularIntersectionFinder.find(horizontalRulingLines, verticalRulingLines)
|
||||
.stream()
|
||||
.map(Cell::new)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -105,8 +105,8 @@ public class DocumentGraphFactory {
|
||||
.build();
|
||||
page.getMainBody().add(imageNode);
|
||||
|
||||
List<Integer> tocId = context.getDocumentTree().createNewChildEntryAndReturnId(section, imageNode);
|
||||
imageNode.setTreeId(tocId);
|
||||
List<Integer> treeId = context.getDocumentTree().createNewChildEntryAndReturnId(section, imageNode);
|
||||
imageNode.setTreeId(treeId);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -188,14 +188,33 @@ public class LayoutGridService {
|
||||
@SneakyThrows
|
||||
private void addPlacedText(Page page, Rectangle2D textBBox, String s, LayoutGrid layoutGrid) {
|
||||
|
||||
Point2D.Float upperLeftCorner = switch (page.getRotation()) {
|
||||
case 90 -> new Point2D.Float((float) (textBBox.getMinX()), (float) textBBox.getMinY());
|
||||
case 180 -> new Point2D.Float((float) (textBBox.getMaxX()), (float) textBBox.getMinY());
|
||||
case 270 -> new Point2D.Float((float) (textBBox.getMaxX()), (float) textBBox.getMaxY());
|
||||
default -> new Point2D.Float((float) (textBBox.getMinX()), (float) textBBox.getMaxY());
|
||||
};
|
||||
// translates text, such that its right edge is a bit to the left of the drawn box
|
||||
float translationAmount = ((FONT.getStringWidth(s) / 1000) * FONT_SIZE + (2 * LINE_WIDTH) + 4);
|
||||
|
||||
Point2D upperLeftCorner;
|
||||
Point2D translationVector;
|
||||
switch (page.getRotation()) {
|
||||
case 90 -> {
|
||||
upperLeftCorner = new Point2D.Double(textBBox.getMinX(), textBBox.getMinY());
|
||||
translationVector = new Point2D.Double(FONT_SIZE, -translationAmount);
|
||||
}
|
||||
case 180 -> {
|
||||
upperLeftCorner = new Point2D.Double(textBBox.getMaxX(), textBBox.getMinY());
|
||||
translationVector = new Point2D.Double(translationAmount, FONT_SIZE);
|
||||
}
|
||||
case 270 -> {
|
||||
upperLeftCorner = new Point2D.Double(textBBox.getMaxX(), textBBox.getMaxY());
|
||||
translationVector = new Point2D.Double(-FONT_SIZE, translationAmount);
|
||||
}
|
||||
default -> {
|
||||
upperLeftCorner = new Point2D.Double(textBBox.getMinX(), textBBox.getMaxY());
|
||||
translationVector = new Point2D.Double(-translationAmount, -FONT_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
upperLeftCorner = add(upperLeftCorner, translationVector);
|
||||
|
||||
var placedTexts = layoutGrid.getVisualizationsPerPages().get(page.getNumber() - 1).getPlacedTexts();
|
||||
upperLeftCorner.setLocation(upperLeftCorner.getX() - ((FONT.getStringWidth(s) / 1000) * FONT_SIZE + (2 * LINE_WIDTH) + 4), upperLeftCorner.getY() - FONT_SIZE);
|
||||
placedTexts.add(PlacedText.textFacingUp(s, upperLeftCorner, FONT_SIZE, Color.BLACK, FONT));
|
||||
}
|
||||
|
||||
@ -317,4 +336,10 @@ public class LayoutGridService {
|
||||
.add(new ColoredRectangle(textBBox, color, LINE_WIDTH)));
|
||||
}
|
||||
|
||||
|
||||
private Point2D add(Point2D a, Point2D b) {
|
||||
|
||||
return new Point2D.Double(a.getX() + b.getX(), a.getY() + b.getY());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,28 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.utils;
|
||||
|
||||
import java.awt.Color;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@Getter
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class DrawingOptions {
|
||||
|
||||
boolean stroke;
|
||||
@Builder.Default
|
||||
Color strokeColor = Color.BLACK;
|
||||
@Builder.Default
|
||||
float strokeWidth = 1f;
|
||||
|
||||
boolean fill;
|
||||
@Builder.Default
|
||||
Color fillColor = Color.BLACK;
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,88 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.utils;
|
||||
|
||||
import java.awt.geom.Point2D;
|
||||
import java.util.Comparator;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.Rectangle;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling;
|
||||
|
||||
public class GeometricComparators {
|
||||
|
||||
private static final int COMPARATOR_ROUNDING = 2;
|
||||
|
||||
public static final Comparator<Point2D> X_FIRST_POINT_COMPARATOR = (point1, point2) -> {
|
||||
|
||||
int rv = 0;
|
||||
float point1X = DoubleComparisons.round(point1.getX(), COMPARATOR_ROUNDING);
|
||||
float point1Y = DoubleComparisons.round(point1.getY(), COMPARATOR_ROUNDING);
|
||||
float point2X = DoubleComparisons.round(point2.getX(), COMPARATOR_ROUNDING);
|
||||
float point2Y = DoubleComparisons.round(point2.getY(), COMPARATOR_ROUNDING);
|
||||
|
||||
if (point1X > point2X) {
|
||||
rv = 1;
|
||||
} else if (point1X < point2X) {
|
||||
rv = -1;
|
||||
} else if (point1Y > point2Y) {
|
||||
rv = 1;
|
||||
} else if (point1Y < point2Y) {
|
||||
rv = -1;
|
||||
}
|
||||
return rv;
|
||||
};
|
||||
|
||||
public static final Comparator<Point2D> Y_FIRST_POINT_COMPARATOR = (point1, point2) -> {
|
||||
|
||||
int rv = 0;
|
||||
float point1X = DoubleComparisons.round(point1.getX(), COMPARATOR_ROUNDING);
|
||||
float point1Y = DoubleComparisons.round(point1.getY(), COMPARATOR_ROUNDING);
|
||||
float point2X = DoubleComparisons.round(point2.getX(), COMPARATOR_ROUNDING);
|
||||
float point2Y = DoubleComparisons.round(point2.getY(), COMPARATOR_ROUNDING);
|
||||
|
||||
if (point1Y > point2Y) {
|
||||
rv = 1;
|
||||
} else if (point1Y < point2Y) {
|
||||
rv = -1;
|
||||
} else if (point1X > point2X) {
|
||||
rv = 1;
|
||||
} else if (point1X < point2X) {
|
||||
rv = -1;
|
||||
}
|
||||
return rv;
|
||||
};
|
||||
|
||||
public static final Comparator<Cell> CELL_SIZE_COMPARATOR = (cell1, cell2) -> {
|
||||
|
||||
Double cell1Size = cell1.getHeight() * cell1.getWidth();
|
||||
Double cell2Size = cell2.getHeight() * cell2.getWidth();
|
||||
return cell1Size.compareTo(cell2Size);
|
||||
};
|
||||
|
||||
public static final Comparator<Rectangle> RECTANGLE_SIZE_COMPARATOR = (rect1, rect2) -> {
|
||||
|
||||
Double rect1Size = rect1.getHeight() * rect1.getWidth();
|
||||
Double rect2Size = rect2.getHeight() * rect2.getWidth();
|
||||
return rect1Size.compareTo(rect2Size);
|
||||
};
|
||||
|
||||
public static final Comparator<Ruling> X_FIRST_RULING_COMPARATOR = (ruling1, ruling2) -> {
|
||||
|
||||
int rv = 0;
|
||||
float point1X = DoubleComparisons.round(Math.min(ruling1.getLeft(), ruling1.getRight()), COMPARATOR_ROUNDING);
|
||||
float point1Y = DoubleComparisons.round(Math.min(ruling1.getTop(), ruling1.getBottom()), COMPARATOR_ROUNDING);
|
||||
float point2X = DoubleComparisons.round(Math.min(ruling2.getLeft(), ruling2.getRight()), COMPARATOR_ROUNDING);
|
||||
float point2Y = DoubleComparisons.round(Math.min(ruling2.getTop(), ruling2.getBottom()), COMPARATOR_ROUNDING);
|
||||
|
||||
if (point1X > point2X) {
|
||||
rv = 1;
|
||||
} else if (point1X < point2X) {
|
||||
rv = -1;
|
||||
} else if (point1Y > point2Y) {
|
||||
rv = 1;
|
||||
} else if (point1Y < point2Y) {
|
||||
rv = -1;
|
||||
}
|
||||
return rv;
|
||||
};
|
||||
|
||||
}
|
||||
@ -21,11 +21,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Pa
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.AtomicTextBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.experimental.UtilityClass;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@ -41,20 +37,20 @@ public class PdfVisualisationUtility {
|
||||
|
||||
public void drawNode(PDDocument document, DocumentTree.Entry entry) {
|
||||
|
||||
Options options = buildStandardOptionsForNodes(entry);
|
||||
DrawingOptions options = buildStandardOptionsForNodes(entry);
|
||||
|
||||
drawBBoxAndLabelAndNumberOnPage(document, entry, options);
|
||||
|
||||
}
|
||||
|
||||
|
||||
public void drawTextBlock(PDDocument document, TextBlock textBlock, Options options) {
|
||||
public void drawTextBlock(PDDocument document, TextBlock textBlock, DrawingOptions options) {
|
||||
|
||||
textBlock.getAtomicTextBlocks().forEach(atb -> drawAtomicTextBlock(document, atb, options));
|
||||
}
|
||||
|
||||
|
||||
public void drawAtomicTextBlock(PDDocument document, AtomicTextBlock atomicTextBlock, Options options) {
|
||||
public void drawAtomicTextBlock(PDDocument document, AtomicTextBlock atomicTextBlock, DrawingOptions options) {
|
||||
|
||||
drawRectangle2DList(document, atomicTextBlock.getPage().getNumber(), atomicTextBlock.getPositions().stream().toList(), options);
|
||||
|
||||
@ -62,7 +58,7 @@ public class PdfVisualisationUtility {
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public void drawText(String string, PDDocument document, Point2D location, Integer pageNumber, Options options) {
|
||||
public void drawText(String string, PDDocument document, Point2D location, Integer pageNumber, DrawingOptions options) {
|
||||
|
||||
var pdPage = document.getPage(pageNumber - 1);
|
||||
var contentStream = new PDPageContentStream(document, pdPage, PDPageContentStream.AppendMode.APPEND, true);
|
||||
@ -80,14 +76,14 @@ public class PdfVisualisationUtility {
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public void drawRectangle2DList(PDDocument document, int pageNumber, List<Rectangle2D> rectCollection, Options options) {
|
||||
public void drawRectangle2DList(PDDocument document, int pageNumber, List<Rectangle2D> rectCollection, DrawingOptions options) {
|
||||
|
||||
var pdPage = document.getPage(pageNumber - 1);
|
||||
drawRectangle2DList(document, rectCollection, options, pdPage);
|
||||
}
|
||||
|
||||
|
||||
private void drawRectangle2DList(PDDocument document, List<Rectangle2D> rectCollection, Options options, PDPage pdPage) throws IOException {
|
||||
private void drawRectangle2DList(PDDocument document, List<Rectangle2D> rectCollection, DrawingOptions options, PDPage pdPage) throws IOException {
|
||||
|
||||
var contentStream = new PDPageContentStream(document, pdPage, PDPageContentStream.AppendMode.APPEND, true);
|
||||
|
||||
@ -110,9 +106,9 @@ public class PdfVisualisationUtility {
|
||||
}
|
||||
|
||||
|
||||
private Options buildStandardOptionsForNodes(DocumentTree.Entry entry) {
|
||||
private DrawingOptions buildStandardOptionsForNodes(DocumentTree.Entry entry) {
|
||||
|
||||
return Options.builder().stroke(true).strokeColor(switch (entry.getType()) {
|
||||
return DrawingOptions.builder().stroke(true).strokeColor(switch (entry.getType()) {
|
||||
case DOCUMENT -> Color.LIGHT_GRAY;
|
||||
case HEADER, FOOTER -> Color.GREEN;
|
||||
case PARAGRAPH -> Color.BLUE;
|
||||
@ -125,7 +121,7 @@ public class PdfVisualisationUtility {
|
||||
}
|
||||
|
||||
|
||||
private void drawBBoxAndLabelAndNumberOnPage(PDDocument document, DocumentTree.Entry entry, Options options) {
|
||||
private void drawBBoxAndLabelAndNumberOnPage(PDDocument document, DocumentTree.Entry entry, DrawingOptions options) {
|
||||
|
||||
Map<Page, Rectangle2D> rectanglesPerPage = entry.getNode().getBBox();
|
||||
rectanglesPerPage.forEach((page, rectangle2D) -> {
|
||||
@ -152,7 +148,7 @@ public class PdfVisualisationUtility {
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public static void drawLine2DList(PDDocument pdDocument, int pageNumber, List<? extends Line2D> line2DS, Options options) {
|
||||
public static void drawLine2DList(PDDocument pdDocument, int pageNumber, List<? extends Line2D> line2DS, DrawingOptions options) {
|
||||
|
||||
var pdPage = pdDocument.getPage(pageNumber - 1);
|
||||
var contentStream = new PDPageContentStream(pdDocument, pdPage, PDPageContentStream.AppendMode.APPEND, true);
|
||||
@ -176,21 +172,4 @@ public class PdfVisualisationUtility {
|
||||
contentStream.close();
|
||||
}
|
||||
|
||||
|
||||
@Builder
|
||||
@Getter
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public static class Options {
|
||||
|
||||
boolean fill;
|
||||
boolean stroke;
|
||||
@Builder.Default
|
||||
Color strokeColor = Color.BLACK;
|
||||
@Builder.Default
|
||||
float strokeWidth = 1f;
|
||||
@Builder.Default
|
||||
Color fillColor = Color.BLACK;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -2,6 +2,7 @@ package com.knecon.fforesight.service.layoutparser.processor.utils;
|
||||
|
||||
import static java.lang.String.format;
|
||||
|
||||
import java.awt.geom.Area;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.awt.geom.RectangularShape;
|
||||
import java.util.Collections;
|
||||
@ -37,15 +38,28 @@ public class RectangleTransformations {
|
||||
}
|
||||
|
||||
|
||||
public static double calculateIntersectedArea(Rectangle2D r1, Rectangle2D r2) {
|
||||
|
||||
Area a1 = new Area(r1);
|
||||
Area a2 = new Area(r2);
|
||||
a1.intersect(a2);
|
||||
Rectangle2D intersection = a1.getBounds2D();
|
||||
return intersection.getWidth() * intersection.getHeight();
|
||||
}
|
||||
|
||||
|
||||
public static Rectangle2D bBoxUnionAtomicTextBlock(List<AtomicTextBlock> atomicTextBlocks) {
|
||||
|
||||
return atomicTextBlocks.stream().flatMap(atomicTextBlock -> atomicTextBlock.getPositions().stream()).collect(new Rectangle2DBBoxCollector());
|
||||
}
|
||||
|
||||
|
||||
public static Collector<Rectangle2D, Rectangle2DBBoxCollector.BBox, Rectangle2D> collectBBox() {
|
||||
|
||||
return new Rectangle2DBBoxCollector();
|
||||
}
|
||||
|
||||
|
||||
public static PDRectangle toPDRectangleBBox(List<Rectangle> rectangles) {
|
||||
|
||||
Rectangle2D rectangle2D = RectangleTransformations.rectangleBBox(rectangles);
|
||||
@ -70,6 +84,7 @@ public class RectangleTransformations {
|
||||
return format("%f,%f,%f,%f", rectangle2D.getX(), rectangle2D.getY(), rectangle2D.getWidth(), rectangle2D.getHeight());
|
||||
}
|
||||
|
||||
|
||||
public static Rectangle2D rectangleBBox(List<Rectangle> rectangles) {
|
||||
|
||||
return rectangles.stream().map(RectangleTransformations::toRectangle2D).collect(new Rectangle2DBBoxCollector());
|
||||
@ -84,6 +99,7 @@ public class RectangleTransformations {
|
||||
-redactionLogRectangle.getHeight());
|
||||
}
|
||||
|
||||
|
||||
public static Rectangle2D toRectangle2D(PDRectangle rectangle) {
|
||||
|
||||
return new Rectangle2D.Double(rectangle.getLowerLeftX(), rectangle.getLowerLeftY(), rectangle.getWidth(), rectangle.getHeight());
|
||||
|
||||
@ -0,0 +1,77 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.utils;
|
||||
|
||||
import static com.knecon.fforesight.service.layoutparser.processor.utils.GeometricComparators.Y_FIRST_POINT_COMPARATOR;
|
||||
|
||||
import java.awt.geom.Point2D;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling;
|
||||
|
||||
public class RectangularIntersectionFinder {
|
||||
|
||||
public static List<Rectangle2D> find(List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines) {
|
||||
|
||||
// Fix for 211.pdf
|
||||
for (Ruling r : horizontalRulingLines) {
|
||||
if (r.getX2() < r.getX1()) {
|
||||
double a = r.getX2();
|
||||
r.x2 = (float) r.getX1();
|
||||
r.x1 = (float) a;
|
||||
}
|
||||
}
|
||||
|
||||
List<Rectangle2D> foundRectangles = new ArrayList<>();
|
||||
Map<Point2D, Ruling[]> intersectionPoints = Ruling.findIntersections(horizontalRulingLines, verticalRulingLines);
|
||||
List<Point2D> intersectionPointsList = new ArrayList<>(intersectionPoints.keySet());
|
||||
intersectionPointsList.sort(Y_FIRST_POINT_COMPARATOR);
|
||||
|
||||
for (int i = 0; i < intersectionPointsList.size(); i++) {
|
||||
Point2D topLeft = intersectionPointsList.get(i);
|
||||
Ruling[] hv = intersectionPoints.get(topLeft);
|
||||
|
||||
// CrossingPointsDirectlyBelow( topLeft );
|
||||
List<Point2D> xPoints = new ArrayList<>();
|
||||
// CrossingPointsDirectlyToTheRight( topLeft );
|
||||
List<Point2D> yPoints = new ArrayList<>();
|
||||
|
||||
for (Point2D p : intersectionPointsList.subList(i, intersectionPointsList.size())) {
|
||||
if (p.getX() == topLeft.getX() && p.getY() > topLeft.getY()) {
|
||||
xPoints.add(p);
|
||||
}
|
||||
if (p.getY() == topLeft.getY() && p.getX() > topLeft.getX()) {
|
||||
yPoints.add(p);
|
||||
}
|
||||
}
|
||||
outer:
|
||||
for (Point2D xPoint : xPoints) {
|
||||
// is there a vertical edge b/w topLeft and xPoint?
|
||||
if (!hv[1].equals(intersectionPoints.get(xPoint)[1])) {
|
||||
continue;
|
||||
}
|
||||
for (Point2D yPoint : yPoints) {
|
||||
// is there a horizontal edge b/w topLeft and yPoint ?
|
||||
if (!hv[0].equals(intersectionPoints.get(yPoint)[0])) {
|
||||
continue;
|
||||
}
|
||||
Point2D btmRight = new Point2D.Float((float) yPoint.getX(), (float) xPoint.getY());
|
||||
if (intersectionPoints.containsKey(btmRight)
|
||||
&& intersectionPoints.get(btmRight)[0].equals(intersectionPoints.get(xPoint)[0])
|
||||
&& intersectionPoints.get(btmRight)[1].equals(intersectionPoints.get(yPoint)[1])) {
|
||||
foundRectangles.add(new Rectangle2D.Double(topLeft.getX(), topLeft.getY(), btmRight.getX() - topLeft.getX(), btmRight.getY() - topLeft.getY()));
|
||||
break outer;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO create cells for vertical ruling lines with aligned endpoints at the top/bottom of a grid
|
||||
// that aren't connected with an horizontal ruler?
|
||||
// see: https://github.com/jazzido/tabula-extractor/issues/78#issuecomment-41481207
|
||||
|
||||
return foundRectangles;
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,172 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.utils;
|
||||
|
||||
import static com.knecon.fforesight.service.layoutparser.processor.utils.GeometricComparators.X_FIRST_POINT_COMPARATOR;
|
||||
import static com.knecon.fforesight.service.layoutparser.processor.utils.GeometricComparators.Y_FIRST_POINT_COMPARATOR;
|
||||
|
||||
import java.awt.geom.Point2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.Rectangle;
|
||||
|
||||
public class SpreadsheetFinder {
|
||||
|
||||
private static final int MAX_OUTER_POINT_TOLERANCE = 10;
|
||||
private static final float AREA_TOLERANCE = 0.001f;
|
||||
|
||||
|
||||
public static List<Rectangle> findSpreadsheetsFromCells(List<? extends Rectangle> cells) {
|
||||
// via: http://stackoverflow.com/questions/13746284/merging-multiple-adjacent-rectangles-into-one-polygon
|
||||
List<Rectangle> rectangles = new ArrayList<>();
|
||||
Set<Point2D> pointSet = new HashSet<>();
|
||||
Map<Point2D, Point2D> edgesH = new HashMap<>();
|
||||
Map<Point2D, Point2D> edgesV = new HashMap<>();
|
||||
|
||||
for (Rectangle cell : cells) {
|
||||
for (Point2D pt : cell.getPoints()) {
|
||||
if (pointSet.contains(pt)) { // shared vertex, remove it
|
||||
pointSet.remove(pt);
|
||||
} else {
|
||||
pointSet.add(pt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// X first sort
|
||||
List<Point2D> pointsSortX = new ArrayList<>(pointSet);
|
||||
pointsSortX.sort(X_FIRST_POINT_COMPARATOR);
|
||||
// Y first sort
|
||||
List<Point2D> pointsSortY = new ArrayList<>(pointSet);
|
||||
pointsSortY.sort(Y_FIRST_POINT_COMPARATOR);
|
||||
|
||||
int i = 0;
|
||||
while (i < pointSet.size()) {
|
||||
float currY = (float) pointsSortY.get(i).getY();
|
||||
while (i < pointSet.size() && DoubleComparisons.feq(pointsSortY.get(i).getY(), currY)) {
|
||||
edgesH.put(pointsSortY.get(i), pointsSortY.get(i + 1));
|
||||
edgesH.put(pointsSortY.get(i + 1), pointsSortY.get(i));
|
||||
i += 2;
|
||||
}
|
||||
}
|
||||
|
||||
i = 0;
|
||||
while (i < pointSet.size()) {
|
||||
float currX = (float) pointsSortX.get(i).getX();
|
||||
while (i < pointSet.size() && DoubleComparisons.feq(pointsSortX.get(i).getX(), currX)) {
|
||||
edgesV.put(pointsSortX.get(i), pointsSortX.get(i + 1));
|
||||
edgesV.put(pointsSortX.get(i + 1), pointsSortX.get(i));
|
||||
i += 2;
|
||||
}
|
||||
}
|
||||
|
||||
// Get all the polygons
|
||||
List<List<PolygonVertex>> polygons = new ArrayList<>();
|
||||
Point2D nextVertex;
|
||||
while (!edgesH.isEmpty()) {
|
||||
ArrayList<PolygonVertex> polygon = new ArrayList<>();
|
||||
Point2D first = edgesH.keySet()
|
||||
.iterator().next();
|
||||
polygon.add(new PolygonVertex(first, Direction.HORIZONTAL));
|
||||
edgesH.remove(first);
|
||||
|
||||
while (true) {
|
||||
PolygonVertex curr = polygon.get(polygon.size() - 1);
|
||||
PolygonVertex lastAddedVertex;
|
||||
if (curr.direction == Direction.HORIZONTAL) {
|
||||
nextVertex = edgesV.get(curr.point);
|
||||
edgesV.remove(curr.point);
|
||||
lastAddedVertex = new PolygonVertex(nextVertex, Direction.VERTICAL);
|
||||
} else {
|
||||
nextVertex = edgesH.get(curr.point);
|
||||
edgesH.remove(curr.point);
|
||||
lastAddedVertex = new PolygonVertex(nextVertex, Direction.HORIZONTAL);
|
||||
}
|
||||
polygon.add(lastAddedVertex);
|
||||
|
||||
if (lastAddedVertex.equals(polygon.get(0))) {
|
||||
// closed polygon
|
||||
polygon.remove(polygon.size() - 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (PolygonVertex vertex : polygon) {
|
||||
edgesH.remove(vertex.point);
|
||||
edgesV.remove(vertex.point);
|
||||
}
|
||||
polygons.add(polygon);
|
||||
}
|
||||
|
||||
// calculate grid-aligned minimum area rectangles for each found polygon
|
||||
for (List<PolygonVertex> poly : polygons) {
|
||||
float top = Float.MAX_VALUE;
|
||||
float left = Float.MAX_VALUE;
|
||||
float bottom = Float.MIN_VALUE;
|
||||
float right = Float.MIN_VALUE;
|
||||
for (PolygonVertex pt : poly) {
|
||||
top = (float) Math.min(top, pt.point.getY());
|
||||
left = (float) Math.min(left, pt.point.getX());
|
||||
bottom = (float) Math.max(bottom, pt.point.getY());
|
||||
right = (float) Math.max(right, pt.point.getX());
|
||||
}
|
||||
|
||||
// do not add polygons with too many outer points as they are unlikely to be tables
|
||||
if (poly.size() <= MAX_OUTER_POINT_TOLERANCE) {
|
||||
rectangles.add(new Rectangle(top - AREA_TOLERANCE, left - AREA_TOLERANCE, right - left + 2 * AREA_TOLERANCE, bottom - top + 2 * AREA_TOLERANCE));
|
||||
}
|
||||
}
|
||||
return rectangles;
|
||||
}
|
||||
|
||||
|
||||
private enum Direction {
|
||||
HORIZONTAL,
|
||||
VERTICAL
|
||||
}
|
||||
|
||||
static class PolygonVertex {
|
||||
|
||||
Point2D point;
|
||||
Direction direction;
|
||||
|
||||
|
||||
PolygonVertex(Point2D point, Direction direction) {
|
||||
|
||||
this.direction = direction;
|
||||
this.point = point;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
|
||||
if (this == other) {
|
||||
return true;
|
||||
}
|
||||
if (!(other instanceof PolygonVertex)) {
|
||||
return false;
|
||||
}
|
||||
return this.point.equals(((PolygonVertex) other).point);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
|
||||
return this.point.hashCode();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return String.format("%s[point=%s,direction=%s]", this.getClass().getName(), this.point.toString(), this.direction.toString());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,44 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.utils;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
// simple implementation of a disjoint-set data structure
|
||||
// https://en.wikipedia.org/wiki/Disjoint-set_data_structure
|
||||
public class UnionFind<T> {
|
||||
|
||||
Map<T, T> parents = new HashMap<>();
|
||||
Map<T, Integer> numberOfObjects = new HashMap<>();
|
||||
|
||||
|
||||
public T find(T node) {
|
||||
|
||||
if (!parents.containsKey(node)) {
|
||||
parents.put(node, node);
|
||||
numberOfObjects.put(node, 1);
|
||||
}
|
||||
if (!node.equals(parents.get(node))) {
|
||||
parents.put(node, find(parents.get(node)));
|
||||
}
|
||||
return parents.get(node);
|
||||
}
|
||||
|
||||
|
||||
public void union(T node1, T node2) {
|
||||
|
||||
T root1 = find(node1);
|
||||
T root2 = find(node2);
|
||||
|
||||
if (!root1.equals(root2)) {
|
||||
if (numberOfObjects.getOrDefault(root1, 1) < numberOfObjects.getOrDefault(root2, 1)) {
|
||||
parents.put(root1, root2);
|
||||
numberOfObjects.put(root2, numberOfObjects.get(root2) + numberOfObjects.get(root1));
|
||||
} else {
|
||||
parents.put(root2, root1);
|
||||
numberOfObjects.put(root1, numberOfObjects.get(root1) + numberOfObjects.get(root2));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -29,6 +29,7 @@ import com.knecon.fforesight.service.layoutparser.processor.python_api.model.ima
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.TaasDocumentDataMapper;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.DrawingOptions;
|
||||
import com.knecon.fforesight.service.layoutparser.server.utils.AbstractTest;
|
||||
import com.knecon.fforesight.service.layoutparser.server.utils.visualizations.PdfDraw;
|
||||
|
||||
@ -111,7 +112,7 @@ public class BdrJsonBuildTest extends AbstractTest {
|
||||
|
||||
try (PDDocument pdDocument = Loader.loadPDF(file); var outputStream = new FileOutputStream(resultingFileName)) {
|
||||
PdfDraw.drawDocumentGraph(pdDocument, document);
|
||||
PdfDraw.drawTextBlock(pdDocument, textBlock, PdfDraw.Options.builder().stroke(true).strokeWidth(0.1f).strokeColor(Color.YELLOW).build());
|
||||
PdfDraw.drawTextBlock(pdDocument, textBlock, DrawingOptions.builder().stroke(true).strokeWidth(0.1f).strokeColor(Color.YELLOW).build());
|
||||
pdDocument.save(outputStream);
|
||||
}
|
||||
}
|
||||
|
||||
@ -13,6 +13,7 @@ import org.springframework.core.io.ClassPathResource;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.DrawingOptions;
|
||||
import com.knecon.fforesight.service.layoutparser.server.utils.BuildDocumentTest;
|
||||
import com.knecon.fforesight.service.layoutparser.server.utils.visualizations.PdfDraw;
|
||||
|
||||
@ -70,7 +71,7 @@ public class DocumentGraphVisualizationTest extends BuildDocumentTest {
|
||||
try (PDDocument pdDocument = Loader.loadPDF(fileResource.getFile())) {
|
||||
log.info("drawing document");
|
||||
PdfDraw.drawDocumentGraph(pdDocument, documentGraph);
|
||||
PdfDraw.drawTextBlock(pdDocument, textBlock, PdfDraw.Options.builder().stroke(true).strokeWidth(0.1f).strokeColor(Color.YELLOW).build());
|
||||
PdfDraw.drawTextBlock(pdDocument, textBlock, DrawingOptions.builder().stroke(true).strokeWidth(0.1f).strokeColor(Color.YELLOW).build());
|
||||
log.info("saving document");
|
||||
pdDocument.save(tmpFile);
|
||||
log.info("saved document");
|
||||
|
||||
@ -25,7 +25,7 @@ public class ViewerDocumentTest extends BuildDocumentTest {
|
||||
@SneakyThrows
|
||||
public void testViewerDocument() {
|
||||
|
||||
String fileName = "files/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf";
|
||||
String fileName = "files/SinglePages/T5 VV-640252-Page16.pdf";
|
||||
String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf";
|
||||
|
||||
var documentFile = new ClassPathResource(fileName).getFile();
|
||||
|
||||
@ -681,7 +681,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
||||
validateTableSize(document, 4);
|
||||
|
||||
validateTable(document, 0, 3, 3, 0, 0);
|
||||
validateTable(document, 1, 3, 5, 2, 0);
|
||||
validateTable(document, 1, 3, 6, 2, 0);
|
||||
validateTable(document, 2, 3, 3, 1, 0);
|
||||
validateTable(document, 3, 3, 3, 0, 0);
|
||||
|
||||
@ -742,13 +742,12 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
||||
|
||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||
|
||||
validateTableSize(document, 6);
|
||||
validateTableSize(document, 5);
|
||||
validateTable(document, 0, 1, 1, 0, 0);
|
||||
validateTable(document, 1, 1, 1, 0, 0);
|
||||
validateTable(document, 2, 1, 1, 0, 0);
|
||||
validateTable(document, 3, 1, 1, 0, 0);
|
||||
validateTable(document, 4, 1, 1, 0, 0);
|
||||
validateTable(document, 5, 1, 1, 0, 0);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -1,13 +1,17 @@
|
||||
package com.knecon.fforesight.service.layoutparser.server.services;
|
||||
|
||||
import java.awt.Color;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Collections;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.pdfbox.Loader;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
|
||||
@ -26,6 +30,8 @@ import com.knecon.fforesight.service.layoutparser.processor.services.RulingClean
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.DocumentDataMapper;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.PropertiesMapper;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.DrawingOptions;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.RectangularIntersectionFinder;
|
||||
import com.knecon.fforesight.service.layoutparser.server.utils.BuildDocumentTest;
|
||||
import com.knecon.fforesight.service.layoutparser.server.utils.visualizations.PdfDraw;
|
||||
|
||||
@ -34,19 +40,40 @@ import lombok.SneakyThrows;
|
||||
public class RulingCleaningServiceTest extends BuildDocumentTest {
|
||||
|
||||
@Test
|
||||
// @Disabled
|
||||
@Disabled
|
||||
@SneakyThrows
|
||||
public void textRectanglesFromRulingsExtraction() {
|
||||
|
||||
String fileName = "files/SinglePages/T5 VV-640252-Page16.pdf";
|
||||
String lineFileName = "/tmp/" + Path.of(fileName).getFileName().toString() + "_CELLS.pdf";
|
||||
List<PageContents> pageContents = PageContentExtractor.getSortedPageContents(fileName);
|
||||
RulingCleaningService rulingCleaningService = new RulingCleaningService();
|
||||
List<List<Rectangle2D>> rectanglesPerPage = new LinkedList<>();
|
||||
for (PageContents pageContent : pageContents) {
|
||||
CleanRulings cleanRulings = rulingCleaningService.getCleanRulings(Collections.emptyList(), pageContent.getRulings());
|
||||
List<Rectangle2D> rects = RectangularIntersectionFinder.find(cleanRulings.getHorizontal(), cleanRulings.getVertical());
|
||||
rectanglesPerPage.add(rects);
|
||||
}
|
||||
|
||||
PdfDraw.drawRectanglesPerPage(fileName, rectanglesPerPage, lineFileName, DrawingOptions.builder().stroke(true).strokeColor(Color.RED).build());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
@Disabled
|
||||
@SneakyThrows
|
||||
public void textRulingExtraction() {
|
||||
|
||||
String fileName = "files/211.pdf";
|
||||
String fileName = "files/SinglePages/19 Chlorothalonil RAR 08 Volume 3CA B 6b metabolites Oct 2017_Page35.pdf";
|
||||
String lineFileName = "/tmp/" + Path.of(fileName).getFileName().toString() + "_LINES.pdf";
|
||||
List<PageContents> pageContents = PageContentExtractor.getSortedPageContents(fileName);
|
||||
RulingCleaningService rulingCleaningService = new RulingCleaningService();
|
||||
PdfDraw.drawLinesPerPage(fileName, pageContents.stream().map(PageContents::getRulings).toList(), lineFileName);
|
||||
List<CleanRulings> cleanRulingsPerPage = new LinkedList<>();
|
||||
for (PageContents pageContent : pageContents) {
|
||||
cleanRulingsPerPage.add(rulingCleaningService.getCleanRulings(Collections.emptyList(), pageContent.getRulings()));
|
||||
}
|
||||
var cleanRulings = cleanRulingsPerPage.stream().map(CleanRulings::getVertical).collect(Collectors.toList());
|
||||
PdfDraw.drawLinesPerPage(fileName, cleanRulings, lineFileName);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -24,20 +24,31 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Pa
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.AtomicTextBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.DrawingOptions;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.PdfVisualisationUtility;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@UtilityClass
|
||||
public class PdfDraw {
|
||||
|
||||
public static void drawRectanglesPerPage(String filename, List<List<Rectangle2D>> rectanglesPerPage, String tmpFileName, DrawingOptions options) throws IOException {
|
||||
|
||||
ClassPathResource pdfResource = new ClassPathResource(filename);
|
||||
try (PDDocument pdDocument = Loader.loadPDF(pdfResource.getFile()); var out = new FileOutputStream(tmpFileName)) {
|
||||
for (int pageNumber = 1; pageNumber < pdDocument.getNumberOfPages() + 1; pageNumber++) {
|
||||
PdfVisualisationUtility.drawRectangle2DList(pdDocument,
|
||||
pageNumber,
|
||||
rectanglesPerPage.get(pageNumber - 1),
|
||||
options);
|
||||
}
|
||||
pdDocument.save(out);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static void drawRectanglesPerPage(String filename, List<List<Rectangle2D>> rectanglesPerPage, String tmpFileName) throws IOException {
|
||||
|
||||
ClassPathResource pdfResource = new ClassPathResource(filename);
|
||||
@ -46,7 +57,7 @@ public class PdfDraw {
|
||||
PdfVisualisationUtility.drawRectangle2DList(pdDocument,
|
||||
pageNumber,
|
||||
rectanglesPerPage.get(pageNumber - 1),
|
||||
PdfVisualisationUtility.Options.builder().stroke(true).build());
|
||||
DrawingOptions.builder().stroke(true).build());
|
||||
}
|
||||
pdDocument.save(out);
|
||||
}
|
||||
@ -62,13 +73,13 @@ public class PdfDraw {
|
||||
var rectanglesOnPage = rectanglesPerPage.get(pageNumber - 1);
|
||||
for (int lineNumber = 0; lineNumber < rectanglesOnPage.size(); lineNumber++) {
|
||||
var rectanglesInLine = rectanglesOnPage.get(lineNumber);
|
||||
PdfVisualisationUtility.drawRectangle2DList(pdDocument, pageNumber, rectanglesInLine, PdfVisualisationUtility.Options.builder().stroke(true).build());
|
||||
PdfVisualisationUtility.drawRectangle2DList(pdDocument, pageNumber, rectanglesInLine, DrawingOptions.builder().stroke(true).build());
|
||||
double y = Math.min(rectanglesInLine.get(0).getMinY(), rectanglesInLine.get(0).getMaxY());
|
||||
PdfVisualisationUtility.drawText(String.format("%d", lineNumber),
|
||||
pdDocument,
|
||||
new Point2D.Double(rectanglesInLine.get(0).getX() - (5 + (5 * countNumberOfDigits(lineNumber))), y + 2),
|
||||
pageNumber,
|
||||
PdfVisualisationUtility.Options.builder().stroke(true).build());
|
||||
DrawingOptions.builder().stroke(true).build());
|
||||
}
|
||||
}
|
||||
pdDocument.save(out);
|
||||
@ -99,20 +110,20 @@ public class PdfDraw {
|
||||
|
||||
public static void drawNode(PDDocument document, DocumentTree.Entry entry) {
|
||||
|
||||
Options options = buildStandardOptionsForNodes(entry);
|
||||
DrawingOptions options = buildStandardOptionsForNodes(entry);
|
||||
|
||||
drawBBoxAndLabelAndNumberOnPage(document, entry, options);
|
||||
|
||||
}
|
||||
|
||||
|
||||
public static void drawTextBlock(PDDocument document, TextBlock textBlock, Options options) {
|
||||
public static void drawTextBlock(PDDocument document, TextBlock textBlock, DrawingOptions options) {
|
||||
|
||||
textBlock.getAtomicTextBlocks().forEach(atb -> drawAtomicTextBlock(document, atb, options));
|
||||
}
|
||||
|
||||
|
||||
public static void drawAtomicTextBlock(PDDocument document, AtomicTextBlock atomicTextBlock, Options options) {
|
||||
public static void drawAtomicTextBlock(PDDocument document, AtomicTextBlock atomicTextBlock, DrawingOptions options) {
|
||||
|
||||
drawRectangle2DList(document, atomicTextBlock.getPage().getNumber(), atomicTextBlock.getPositions().stream().toList(), options);
|
||||
|
||||
@ -120,7 +131,7 @@ public class PdfDraw {
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private static void drawText(String string, PDDocument document, Point2D location, Integer pageNumber, Options options, boolean rotate) {
|
||||
private static void drawText(String string, PDDocument document, Point2D location, Integer pageNumber, DrawingOptions options, boolean rotate) {
|
||||
|
||||
var pdPage = document.getPage(pageNumber - 1);
|
||||
var contentStream = new PDPageContentStream(document, pdPage, PDPageContentStream.AppendMode.APPEND, true);
|
||||
@ -142,14 +153,14 @@ public class PdfDraw {
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public static void drawRectangle2DList(PDDocument document, int pageNumber, List<Rectangle2D> rectCollection, Options options) {
|
||||
public static void drawRectangle2DList(PDDocument document, int pageNumber, List<Rectangle2D> rectCollection, DrawingOptions options) {
|
||||
|
||||
var pdPage = document.getPage(pageNumber - 1);
|
||||
drawRectangle2DList(document, rectCollection, options, pdPage);
|
||||
}
|
||||
|
||||
|
||||
private static void drawRectangle2DList(PDDocument document, List<Rectangle2D> rectCollection, Options options, PDPage pdPage) throws IOException {
|
||||
private static void drawRectangle2DList(PDDocument document, List<Rectangle2D> rectCollection, DrawingOptions options, PDPage pdPage) throws IOException {
|
||||
|
||||
var contentStream = new PDPageContentStream(document, pdPage, PDPageContentStream.AppendMode.APPEND, true);
|
||||
|
||||
@ -181,12 +192,12 @@ public class PdfDraw {
|
||||
// PdfVisualisationUtility.drawLine2DList(pdDocument,
|
||||
// pageNumber,
|
||||
// list.get(pageNumber - 1),
|
||||
// PdfVisualisationUtility.Options.builder().stroke(true).build());
|
||||
// PdfVisualisationUtility.DrawingOptions.builder().stroke(true).build());
|
||||
PdfVisualisationUtility.drawRectangle2DList(pdDocument,
|
||||
pageNumber,
|
||||
rectanglesPerPage.get(pageNumber - 1),
|
||||
PdfVisualisationUtility.Options.builder().stroke(true).build());
|
||||
PdfVisualisationUtility.drawRectangle2DList(pdDocument, pageNumber, list.get(pageNumber - 1), PdfVisualisationUtility.Options.builder().stroke(true).build());
|
||||
DrawingOptions.builder().stroke(true).build());
|
||||
PdfVisualisationUtility.drawRectangle2DList(pdDocument, pageNumber, list.get(pageNumber - 1), DrawingOptions.builder().stroke(true).build());
|
||||
}
|
||||
pdDocument.save(out);
|
||||
}
|
||||
@ -202,35 +213,18 @@ public class PdfDraw {
|
||||
PdfVisualisationUtility.drawLine2DList(pdDocument,
|
||||
pageNumber,
|
||||
linesPerPage.get(pageNumber - 1),
|
||||
PdfVisualisationUtility.Options.builder().strokeColor(Color.RED).stroke(true).build());
|
||||
DrawingOptions.builder().strokeColor(Color.RED).stroke(true).build());
|
||||
}
|
||||
pdDocument.save(out);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@Getter
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public static class Options {
|
||||
|
||||
boolean stroke;
|
||||
@Builder.Default
|
||||
Color strokeColor = Color.BLACK;
|
||||
@Builder.Default
|
||||
float strokeWidth = 1f;
|
||||
|
||||
boolean fill;
|
||||
@Builder.Default
|
||||
Color fillColor = Color.BLACK;
|
||||
|
||||
}
|
||||
|
||||
|
||||
private static Options buildStandardOptionsForNodes(DocumentTree.Entry entry) {
|
||||
private static DrawingOptions buildStandardOptionsForNodes(DocumentTree.Entry entry) {
|
||||
|
||||
return Options.builder().stroke(true).strokeColor(switch (entry.getType()) {
|
||||
return DrawingOptions.builder().stroke(true).strokeColor(switch (entry.getType()) {
|
||||
case DOCUMENT -> Color.LIGHT_GRAY;
|
||||
case HEADER, FOOTER -> Color.GREEN;
|
||||
case PARAGRAPH -> Color.BLUE;
|
||||
@ -243,7 +237,7 @@ public class PdfDraw {
|
||||
}
|
||||
|
||||
|
||||
private static void drawBBoxAndLabelAndNumberOnPage(PDDocument document, DocumentTree.Entry entry, Options options) {
|
||||
private static void drawBBoxAndLabelAndNumberOnPage(PDDocument document, DocumentTree.Entry entry, DrawingOptions options) {
|
||||
|
||||
Map<Page, Rectangle2D> rectanglesPerPage = entry.getNode().getBBox();
|
||||
for (Page page : rectanglesPerPage.keySet()) {
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user