Reading Order
This commit is contained in:
parent
72202f63dc
commit
0c8c727303
@ -1,12 +1,17 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Character;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.DisjointSets;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Line;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Zone;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service.LineBuilderService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service.NearestNeighbourService;
|
||||
@ -20,6 +25,8 @@ import lombok.RequiredArgsConstructor;
|
||||
@RequiredArgsConstructor
|
||||
public class DocstrumSegmentationService {
|
||||
|
||||
private static final double MAX_VERTICAL_MERGE_DIST = 0.5;
|
||||
|
||||
private final NearestNeighbourService nearestNeighbourService;
|
||||
private final SpacingService spacingService;
|
||||
private final LineBuilderService lineBuilderService;
|
||||
@ -42,7 +49,105 @@ public class DocstrumSegmentationService {
|
||||
|
||||
var zones = zoneBuilderService.buildZones(lines, characterSpacing, lineSpacing);
|
||||
|
||||
return readingOrderService.resolve(zones);
|
||||
zones = mergeLines(zones, characterSpacing, Double.NEGATIVE_INFINITY, 0.0, 0.0, lineSpacing * MAX_VERTICAL_MERGE_DIST);
|
||||
|
||||
return readingOrderService.resolve(zones, false);
|
||||
}
|
||||
|
||||
// private List<Zone> mergeZones(List<Zone> zones, double tolerance) {
|
||||
//
|
||||
// List<BxBounds> bounds = new ArrayList<BxBounds>(zones.size());
|
||||
// for (List<ComponentLine> zone : zones) {
|
||||
// BxBoundsBuilder builder = new BxBoundsBuilder();
|
||||
// for (ComponentLine line : zone) {
|
||||
// for (Component component : line.getComponents()) {
|
||||
// builder.expand(component.getChunk().getBounds());
|
||||
// }
|
||||
// }
|
||||
// bounds.add(builder.getBounds());
|
||||
// }
|
||||
//
|
||||
// List<List<ComponentLine>> outputZones = new ArrayList<List<ComponentLine>>();
|
||||
// mainFor:
|
||||
// for (int i = 0; i < zones.size(); i++) {
|
||||
// for (int j = 0; j < zones.size(); j++) {
|
||||
// if (i == j || bounds.get(j) == null || bounds.get(i) == null) {
|
||||
// continue;
|
||||
// }
|
||||
// if (BxModelUtils.contains(bounds.get(j), bounds.get(i), tolerance)) {
|
||||
// zones.get(j).addAll(zones.get(i));
|
||||
// bounds.set(i, null);
|
||||
// continue mainFor;
|
||||
// }
|
||||
// }
|
||||
// outputZones.add(zones.get(i));
|
||||
// }
|
||||
// return outputZones;
|
||||
// }
|
||||
|
||||
|
||||
private List<Zone> mergeLines(List<Zone> zones,
|
||||
double wordSpacing,
|
||||
double minHorizontalDistance,
|
||||
double maxHorizontalDistance,
|
||||
double minVerticalDistance,
|
||||
double maxVerticalDistance) {
|
||||
|
||||
List<Zone> outputZones = new ArrayList<>(zones.size());
|
||||
for (Zone zone : zones) {
|
||||
outputZones.add(mergeLinesInZone(zone, wordSpacing, minHorizontalDistance, maxHorizontalDistance, minVerticalDistance, maxVerticalDistance));
|
||||
}
|
||||
return outputZones;
|
||||
}
|
||||
|
||||
|
||||
private Zone mergeLinesInZone(Zone zone,
|
||||
double wordSpacing,
|
||||
double minHorizontalDistance,
|
||||
double maxHorizontalDistance,
|
||||
double minVerticalDistance,
|
||||
double maxVerticalDistance) {
|
||||
|
||||
DisjointSets<Line> sets = new DisjointSets<>(zone.getLines());
|
||||
for (int i = 0; i < zone.getLines().size(); i++) {
|
||||
Line li = zone.getLines().get(i);
|
||||
for (int j = i + 1; j < zone.getLines().size(); j++) {
|
||||
Line lj = zone.getLines().get(j);
|
||||
double hDist = li.horizontalDistance(lj);
|
||||
double vDist = li.verticalDistance(lj);
|
||||
if (minHorizontalDistance <= hDist && hDist <= maxHorizontalDistance && minVerticalDistance <= vDist && vDist <= maxVerticalDistance) {
|
||||
sets.union(li, lj);
|
||||
} else if (minVerticalDistance <= vDist && vDist <= maxVerticalDistance && Math.abs(hDist - Math.min(li.getLength(), lj.getLength())) < 0.1) {
|
||||
boolean componentOverlap = false;
|
||||
int overlappingCount = 0;
|
||||
for (Character ci : li.getCharacters()) {
|
||||
for (Character cj : lj.getCharacters()) {
|
||||
double dist = ci.overlappingDistance(cj);
|
||||
if (dist > 2) {
|
||||
componentOverlap = true;
|
||||
}
|
||||
if (dist > 0) {
|
||||
overlappingCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!componentOverlap && overlappingCount <= 2) {
|
||||
sets.union(li, lj);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
List<Line> outputZone = new ArrayList<>();
|
||||
for (Set<Line> group : sets) {
|
||||
List<Character> components = new ArrayList<>();
|
||||
for (Line line : group) {
|
||||
components.addAll(line.getCharacters());
|
||||
}
|
||||
components.sort(Comparator.comparingDouble(Character::getX));
|
||||
|
||||
outputZone.add(new Line(components, wordSpacing));
|
||||
}
|
||||
return new Zone(outputZone);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -1,29 +1,14 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model;
|
||||
|
||||
/**
|
||||
* Filter class for neighbor objects that checks if the angle of the
|
||||
* neighbor is within specified range.
|
||||
*/
|
||||
public abstract class AngleFilter {
|
||||
|
||||
private final double lowerAngle;
|
||||
private final double upperAngle;
|
||||
protected double lowerAngle;
|
||||
protected double upperAngle;
|
||||
|
||||
|
||||
private AngleFilter(double lowerAngle, double upperAngle) {
|
||||
|
||||
this.lowerAngle = lowerAngle;
|
||||
this.upperAngle = upperAngle;
|
||||
}
|
||||
public abstract boolean matches(Neighbor neighbor);
|
||||
|
||||
|
||||
/**
|
||||
* Constructs new angle filter.
|
||||
*
|
||||
* @param lowerAngle minimum angle in range [-3*pi/2, pi/2)
|
||||
* @param upperAngle maximum angle in range [-pi/2, 3*pi/2)
|
||||
* @return newly constructed angle filter
|
||||
*/
|
||||
public static AngleFilter newInstance(double lowerAngle, double upperAngle) {
|
||||
|
||||
if (lowerAngle < -Math.PI / 2) {
|
||||
@ -40,33 +25,19 @@ public abstract class AngleFilter {
|
||||
}
|
||||
|
||||
|
||||
public double getLowerAngle() {
|
||||
|
||||
return lowerAngle;
|
||||
}
|
||||
|
||||
|
||||
public double getUpperAngle() {
|
||||
|
||||
return upperAngle;
|
||||
}
|
||||
|
||||
|
||||
public abstract boolean matches(Neighbor neighbor);
|
||||
|
||||
|
||||
public static final class AndFilter extends AngleFilter {
|
||||
|
||||
private AndFilter(double lowerAngle, double upperAngle) {
|
||||
|
||||
super(lowerAngle, upperAngle);
|
||||
this.lowerAngle = lowerAngle;
|
||||
this.upperAngle = upperAngle;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean matches(Neighbor neighbor) {
|
||||
|
||||
return getLowerAngle() <= neighbor.getAngle() && neighbor.getAngle() < getUpperAngle();
|
||||
return lowerAngle <= neighbor.getAngle() && neighbor.getAngle() < upperAngle;
|
||||
}
|
||||
|
||||
}
|
||||
@ -75,14 +46,15 @@ public abstract class AngleFilter {
|
||||
|
||||
private OrFilter(double lowerAngle, double upperAngle) {
|
||||
|
||||
super(lowerAngle, upperAngle);
|
||||
this.lowerAngle = lowerAngle;
|
||||
this.upperAngle = upperAngle;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean matches(Neighbor neighbor) {
|
||||
|
||||
return getLowerAngle() <= neighbor.getAngle() || neighbor.getAngle() < getUpperAngle();
|
||||
return lowerAngle <= neighbor.getAngle() || neighbor.getAngle() < upperAngle;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.RedTextPosition;
|
||||
@ -51,6 +52,20 @@ public class Character {
|
||||
}
|
||||
|
||||
|
||||
public double overlappingDistance(Character other) {
|
||||
|
||||
double[] xs = new double[4];
|
||||
double s = Math.sin(-0), c = Math.cos(-0);
|
||||
xs[0] = c * x - s * y;
|
||||
xs[1] = c * (x + textPosition.getWidthDirAdj()) - s * (y + textPosition.getHeightDir());
|
||||
xs[2] = c * other.x - s * other.y;
|
||||
xs[3] = c * (other.x + other.textPosition.getWidthDirAdj()) - s * (other.y + other.textPosition.getHeightDir());
|
||||
boolean overlapping = xs[1] >= xs[2] && xs[3] >= xs[0];
|
||||
Arrays.sort(xs);
|
||||
return Math.abs(xs[2] - xs[1]) * (overlapping ? 1 : -1);
|
||||
}
|
||||
|
||||
|
||||
public void setNeighbors(List<Neighbor> neighbors) {
|
||||
|
||||
this.neighbors = neighbors;
|
||||
|
||||
@ -10,29 +10,17 @@ import java.util.Set;
|
||||
|
||||
public class DisjointSets<E> implements Iterable<Set<E>> {
|
||||
|
||||
private final Map<E, Entry<E>> map = new HashMap<E, Entry<E>>();
|
||||
private final Map<E, Entry<E>> map = new HashMap<>();
|
||||
|
||||
|
||||
/**
|
||||
* Constructs a new set of singletons.
|
||||
*
|
||||
* @param c elements of singleton sets
|
||||
*/
|
||||
public DisjointSets(Collection<? extends E> c) {
|
||||
public DisjointSets(Collection<? extends E> collection) {
|
||||
|
||||
for (E element : c) {
|
||||
for (E element : collection) {
|
||||
map.put(element, new Entry<E>(element));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks if elements are in the same subsets.
|
||||
*
|
||||
* @param e1 element from a subset
|
||||
* @param e2 element from a subset
|
||||
* @return true if elements are in the same subset; false otherwise
|
||||
*/
|
||||
public boolean areTogether(E e1, E e2) {
|
||||
|
||||
return map.get(e1).findRepresentative() == map.get(e2).findRepresentative();
|
||||
|
||||
@ -33,11 +33,11 @@ public class Line extends BoundingBox {
|
||||
if (characters.size() >= 2) {
|
||||
// Simple linear regression
|
||||
double sx = 0.0, sxx = 0.0, sxy = 0.0, sy = 0.0;
|
||||
for (Character component : characters) {
|
||||
sx += component.getX();
|
||||
sxx += component.getX() * component.getX();
|
||||
sxy += component.getX() * component.getY();
|
||||
sy += component.getY();
|
||||
for (Character character : characters) {
|
||||
sx += character.getX();
|
||||
sxx += character.getX() * character.getX();
|
||||
sxy += character.getX() * character.getY();
|
||||
sy += character.getY();
|
||||
}
|
||||
double b = (characters.size() * sxy - sx * sy) / (characters.size() * sxx - sx * sx);
|
||||
double a = (sy - b * sx) / characters.size();
|
||||
@ -47,13 +47,13 @@ public class Line extends BoundingBox {
|
||||
this.x1 = characters.get(characters.size() - 1).getX();
|
||||
this.y1 = a + b * this.x1;
|
||||
} else if (!characters.isEmpty()) {
|
||||
Character component = characters.get(0);
|
||||
double dx = component.getTextPosition().getWidthDirAdj() / 3;
|
||||
Character character = characters.get(0);
|
||||
double dx = character.getTextPosition().getWidthDirAdj() / 3;
|
||||
double dy = dx * Math.tan(0);
|
||||
this.x0 = component.getX() - dx;
|
||||
this.x1 = component.getX() + dx;
|
||||
this.y0 = component.getY() - dy;
|
||||
this.y1 = component.getY() + dy;
|
||||
this.x0 = character.getX() - dx;
|
||||
this.x1 = character.getX() + dx;
|
||||
this.y0 = character.getY() - dy;
|
||||
this.y1 = character.getY() + dy;
|
||||
} else {
|
||||
throw new IllegalArgumentException("Component list must not be empty");
|
||||
}
|
||||
@ -155,5 +155,13 @@ public class Line extends BoundingBox {
|
||||
this.setBBox(new Rectangle2D.Double(minX, minY, maxX - minX, maxY - minY));
|
||||
}
|
||||
|
||||
|
||||
public String toString() {
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
words.forEach(word -> sb.append(word.toString()).append(" "));
|
||||
return sb.toString().trim();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -39,4 +39,12 @@ public class Zone extends BoundingBox {
|
||||
this.setBBox(new Rectangle2D.Double(minX, minY, maxX - minX, maxY - minY));
|
||||
}
|
||||
|
||||
|
||||
public String toString() {
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
lines.forEach(line -> sb.append(line.toString()).append("\n"));
|
||||
return sb.toString().trim();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,30 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.BoundingBox;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.utils.DoubleUtils;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
public class BoundingBoxDistanceTuple implements Comparable<BoundingBoxDistanceTuple> {
|
||||
|
||||
private boolean c;
|
||||
private double distance;
|
||||
private BoundingBox zone1;
|
||||
private BoundingBox zone2;
|
||||
|
||||
|
||||
@Override
|
||||
public int compareTo(BoundingBoxDistanceTuple compareObject) {
|
||||
|
||||
double eps = 1E-3;
|
||||
if (c == compareObject.c) {
|
||||
return DoubleUtils.compareDouble(distance, compareObject.distance, eps);
|
||||
} else {
|
||||
return c ? -1 : 1;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -4,6 +4,9 @@ import java.awt.geom.Rectangle2D;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.BoundingBox;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class BoundingBoxZoneGroup extends BoundingBox {
|
||||
|
||||
private BoundingBox leftChild;
|
||||
@ -14,51 +17,13 @@ public class BoundingBoxZoneGroup extends BoundingBox {
|
||||
|
||||
this.leftChild = child1;
|
||||
this.rightChild = child2;
|
||||
setBounds(Math.min(child1.getX(), child2.getX()),
|
||||
Math.min(child1.getY(), child2.getY()),
|
||||
Math.max(child1.getX() + child1.getWidth(), child2.getX() + child2.getWidth()),
|
||||
Math.max(child1.getY() + child1.getHeight(), child2.getY() + child2.getHeight()));
|
||||
|
||||
double minX = Math.min(leftChild.getX(), rightChild.getX());
|
||||
double minY = Math.min(leftChild.getY(), rightChild.getY());
|
||||
double maxX = Math.max(leftChild.getX() + leftChild.getWidth(), rightChild.getX() + rightChild.getWidth());
|
||||
double maxY = Math.max(leftChild.getY() + leftChild.getHeight(), rightChild.getY() + rightChild.getHeight());
|
||||
|
||||
this.setBBox(new Rectangle2D.Double(minX, minY, maxX - minX, maxY - minY));
|
||||
}
|
||||
|
||||
|
||||
public void setbBox(Rectangle2D bBox) {
|
||||
|
||||
super.setBBox(bBox);
|
||||
}
|
||||
|
||||
|
||||
public BoundingBox getLeftChild() {
|
||||
|
||||
return leftChild;
|
||||
}
|
||||
|
||||
|
||||
public BoundingBox getRightChild() {
|
||||
|
||||
return rightChild;
|
||||
}
|
||||
|
||||
|
||||
public BoundingBoxZoneGroup setLeftChild(BoundingBox obj) {
|
||||
|
||||
this.leftChild = obj;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
public BoundingBoxZoneGroup setRightChild(BoundingBox obj) {
|
||||
|
||||
this.rightChild = obj;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
public BoundingBoxZoneGroup setBounds(double x0, double y0, double x1, double y1) {
|
||||
|
||||
assert x1 >= x0;
|
||||
assert y1 >= y0;
|
||||
this.setBBox(new Rectangle2D.Double(x0, y0, x1 - x0, y1 - y0));
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -1,115 +0,0 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.utils.DoubleUtils;
|
||||
|
||||
public class DistElem<E> implements Comparable<DistElem<E>> {
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
|
||||
final int prime = 31;
|
||||
int result = 1;
|
||||
result = prime * result + (c ? 1231 : 1237);
|
||||
long temp;
|
||||
temp = Double.doubleToLongBits(dist);
|
||||
result = prime * result + (int) (temp ^ (temp >>> 32));
|
||||
result = prime * result + ((obj1 == null) ? 0 : obj1.hashCode());
|
||||
result = prime * result + ((obj2 == null) ? 0 : obj2.hashCode());
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
|
||||
if (this == obj) {
|
||||
return true;
|
||||
}
|
||||
if (obj == null) {
|
||||
return false;
|
||||
}
|
||||
if (getClass() != obj.getClass()) {
|
||||
return false;
|
||||
}
|
||||
DistElem other = (DistElem) obj;
|
||||
if (c != other.c) {
|
||||
return false;
|
||||
}
|
||||
if (Double.doubleToLongBits(dist) != Double.doubleToLongBits(other.dist)) {
|
||||
return false;
|
||||
}
|
||||
if (obj1 == null) {
|
||||
if (other.obj1 != null) {
|
||||
return false;
|
||||
}
|
||||
} else if (!obj1.equals(other.obj1)) {
|
||||
return false;
|
||||
}
|
||||
if (obj2 == null) {
|
||||
if (other.obj2 != null) {
|
||||
return false;
|
||||
}
|
||||
} else if (!obj2.equals(other.obj2)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
boolean c;
|
||||
double dist;
|
||||
E obj1;
|
||||
E obj2;
|
||||
|
||||
|
||||
public boolean isC() {
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
public void setC(boolean c) {
|
||||
|
||||
this.c = c;
|
||||
}
|
||||
|
||||
|
||||
public double getDist() {
|
||||
|
||||
return dist;
|
||||
}
|
||||
|
||||
|
||||
public E getObj1() {
|
||||
|
||||
return obj1;
|
||||
}
|
||||
|
||||
|
||||
public E getObj2() {
|
||||
|
||||
return obj2;
|
||||
}
|
||||
|
||||
|
||||
public DistElem(boolean c, double dist, E obj1, E obj2) {
|
||||
|
||||
this.c = c;
|
||||
this.dist = dist;
|
||||
this.obj1 = obj1;
|
||||
this.obj2 = obj2;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int compareTo(DistElem<E> compareObject) {
|
||||
|
||||
double eps = 1E-3;
|
||||
if (c == compareObject.c) {
|
||||
return DoubleUtils.compareDouble(dist, compareObject.dist, eps);
|
||||
} else {
|
||||
return c ? -1 : 1;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -5,13 +5,14 @@ import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.ListIterator;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.BoundingBox;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Zone;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder.BoundingBoxDistanceTuple;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder.BoundingBoxZoneGroup;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder.DistElem;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder.DocumentPlane;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder.TreeToListConverter;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.utils.DoubleUtils;
|
||||
@ -20,66 +21,96 @@ import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.ut
|
||||
public class ReadingOrderService {
|
||||
|
||||
static final int GRIDSIZE = 50;
|
||||
static final double EPS = 0.01;
|
||||
static final int MAX_ZONES = 1000;
|
||||
static final Comparator<BoundingBox> Y_ASCENDING_ORDER = new Comparator<BoundingBox>() {
|
||||
static final double THRESHOLD = 1;
|
||||
|
||||
@Override
|
||||
public int compare(BoundingBox o1, BoundingBox o2) {
|
||||
|
||||
return DoubleUtils.compareDouble(o1.getY(), o2.getY(), EPS);
|
||||
public List<Zone> resolve(List<Zone> zones, boolean yxOrder) {
|
||||
|
||||
if (zones.isEmpty() || zones.size() == 1) {
|
||||
return zones;
|
||||
}
|
||||
};
|
||||
|
||||
static final Comparator<BoundingBox> X_ASCENDING_ORDER = new Comparator<BoundingBox>() {
|
||||
|
||||
@Override
|
||||
public int compare(BoundingBox o1, BoundingBox o2) {
|
||||
|
||||
return DoubleUtils.compareDouble(o1.getX(), o2.getX(), EPS);
|
||||
if (yxOrder) {
|
||||
zones.sort(Comparator.comparing(BoundingBox::getY, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD))
|
||||
.thenComparing(BoundingBox::getX, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD)));
|
||||
return zones;
|
||||
}
|
||||
};
|
||||
|
||||
static final Comparator<BoundingBox> YX_ASCENDING_ORDER = new Comparator<BoundingBox>() {
|
||||
return simpleOrder(zones);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(BoundingBox o1, BoundingBox o2) {
|
||||
|
||||
int yCompare = Y_ASCENDING_ORDER.compare(o1, o2);
|
||||
return yCompare == 0 ? X_ASCENDING_ORDER.compare(o1, o2) : yCompare;
|
||||
private List<Zone> simpleOrder(List<Zone> zones) {
|
||||
|
||||
double minX = Double.POSITIVE_INFINITY;
|
||||
double maxX = Double.NEGATIVE_INFINITY;
|
||||
|
||||
for (Zone zone : zones) {
|
||||
if (zone.getX() < minX) {
|
||||
minX = zone.getX();
|
||||
}
|
||||
if (zone.getX() + zone.getWidth() > maxX) {
|
||||
maxX = zone.getX() + zone.getWidth();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
double midLineXCoordinate = (minX + maxX) / 2;
|
||||
|
||||
public List<Zone> resolve(List<Zone> zones) {
|
||||
|
||||
List<Zone> orderedZones;
|
||||
if (zones.size() > MAX_ZONES) {
|
||||
orderedZones = new ArrayList<>(zones);
|
||||
Collections.sort(orderedZones, YX_ASCENDING_ORDER);
|
||||
} else {
|
||||
orderedZones = reorderZones(zones);
|
||||
List<Zone> leftOf = new ArrayList<>();
|
||||
List<Zone> rightOf = new ArrayList<>();
|
||||
List<Zone> middle = new ArrayList<>();
|
||||
for (Zone zone : zones) {
|
||||
if (zone.getX() < midLineXCoordinate && zone.getX() + zone.getWidth() < midLineXCoordinate) {
|
||||
leftOf.add(zone);
|
||||
} else if (zone.getX() > midLineXCoordinate && zone.getX() + zone.getWidth() > midLineXCoordinate) {
|
||||
rightOf.add(zone);
|
||||
} else {
|
||||
middle.add(zone);
|
||||
}
|
||||
}
|
||||
return orderedZones;
|
||||
|
||||
leftOf.sort(Comparator.comparing(BoundingBox::getY, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD))
|
||||
.thenComparing(BoundingBox::getX, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD)));
|
||||
|
||||
rightOf.sort(Comparator.comparing(BoundingBox::getY, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD))
|
||||
.thenComparing(BoundingBox::getX, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD)));
|
||||
|
||||
middle.sort(Comparator.comparing(BoundingBox::getY, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD)));
|
||||
|
||||
List<Zone> sortedZones = new ArrayList<>();
|
||||
sortedZones.addAll(leftOf);
|
||||
sortedZones.addAll(rightOf);
|
||||
|
||||
ListIterator<Zone> itty = middle.listIterator();
|
||||
|
||||
while (itty.hasNext()) {
|
||||
Zone current = itty.next();
|
||||
|
||||
for (int i = 0; i < sortedZones.size(); i++) {
|
||||
if (current.getY() < sortedZones.get(i).getY()) {
|
||||
sortedZones.add(i, current);
|
||||
itty.remove();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
sortedZones.addAll(middle);
|
||||
|
||||
return sortedZones;
|
||||
}
|
||||
|
||||
|
||||
private List<Zone> reorderZones(List<Zone> unorderedZones) {
|
||||
|
||||
if (unorderedZones.isEmpty()) {
|
||||
return new ArrayList<>();
|
||||
} else if (unorderedZones.size() == 1) {
|
||||
List<Zone> ret = new ArrayList<>(1);
|
||||
ret.add(unorderedZones.get(0));
|
||||
return ret;
|
||||
} else {
|
||||
BoundingBoxZoneGroup bxZonesTree = groupZonesHierarchically(unorderedZones);
|
||||
sortGroupedZones(bxZonesTree);
|
||||
TreeToListConverter treeConverter = new TreeToListConverter();
|
||||
List<Zone> orderedZones = treeConverter.convertToList(bxZonesTree);
|
||||
assert unorderedZones.size() == orderedZones.size();
|
||||
return orderedZones;
|
||||
}
|
||||
BoundingBoxZoneGroup bxZonesTree = groupZonesHierarchically(unorderedZones);
|
||||
sortGroupedZones(bxZonesTree);
|
||||
TreeToListConverter treeConverter = new TreeToListConverter();
|
||||
List<Zone> orderedZones = treeConverter.convertToList(bxZonesTree);
|
||||
assert unorderedZones.size() == orderedZones.size();
|
||||
return orderedZones;
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -94,29 +125,29 @@ public class ReadingOrderService {
|
||||
/*
|
||||
* Distance tuples are stored sorted by ascending distance value
|
||||
*/
|
||||
List<DistElem<BoundingBox>> dists = new ArrayList<DistElem<BoundingBox>>(zones.size() * zones.size() / 2);
|
||||
List<BoundingBoxDistanceTuple> dists = new ArrayList<>();
|
||||
for (int idx1 = 0; idx1 < zones.size(); ++idx1) {
|
||||
for (int idx2 = idx1 + 1; idx2 < zones.size(); ++idx2) {
|
||||
Zone zone1 = zones.get(idx1);
|
||||
Zone zone2 = zones.get(idx2);
|
||||
dists.add(new DistElem<BoundingBox>(false, distance(zone1, zone2), zone1, zone2));
|
||||
dists.add(new BoundingBoxDistanceTuple(false, distance(zone1, zone2), zone1, zone2));
|
||||
}
|
||||
}
|
||||
Collections.sort(dists);
|
||||
DocumentPlane plane = new DocumentPlane(zones, GRIDSIZE);
|
||||
while (!dists.isEmpty()) {
|
||||
DistElem<BoundingBox> distElem = dists.get(0);
|
||||
BoundingBoxDistanceTuple distElem = dists.get(0);
|
||||
dists.remove(0);
|
||||
if (!distElem.isC() && plane.anyObjectsBetween(distElem.getObj1(), distElem.getObj2())) {
|
||||
dists.add(new DistElem<BoundingBox>(true, distElem.getDist(), distElem.getObj1(), distElem.getObj2()));
|
||||
if (!distElem.isC() && plane.anyObjectsBetween(distElem.getZone1(), distElem.getZone2())) {
|
||||
dists.add(new BoundingBoxDistanceTuple(true, distElem.getDistance(), distElem.getZone1(), distElem.getZone2()));
|
||||
continue;
|
||||
}
|
||||
BoundingBoxZoneGroup newGroup = new BoundingBoxZoneGroup(distElem.getObj1(), distElem.getObj2());
|
||||
plane.remove(distElem.getObj1()).remove(distElem.getObj2());
|
||||
dists = removeDistElementsContainingObject(dists, distElem.getObj1());
|
||||
dists = removeDistElementsContainingObject(dists, distElem.getObj2());
|
||||
BoundingBoxZoneGroup newGroup = new BoundingBoxZoneGroup(distElem.getZone1(), distElem.getZone2());
|
||||
plane.remove(distElem.getZone1()).remove(distElem.getZone2());
|
||||
dists = removeDistElementsContainingObject(dists, distElem.getZone1());
|
||||
dists = removeDistElementsContainingObject(dists, distElem.getZone2());
|
||||
for (BoundingBox other : plane.getObjects()) {
|
||||
dists.add(new DistElem<BoundingBox>(false, distance(other, newGroup), newGroup, other));
|
||||
dists.add(new BoundingBoxDistanceTuple(false, distance(other, newGroup), newGroup, other));
|
||||
}
|
||||
Collections.sort(dists);
|
||||
plane.add(newGroup);
|
||||
@ -130,11 +161,11 @@ public class ReadingOrderService {
|
||||
/**
|
||||
* Removes all distance tuples containing obj
|
||||
*/
|
||||
private List<DistElem<BoundingBox>> removeDistElementsContainingObject(Collection<DistElem<BoundingBox>> list, BoundingBox obj) {
|
||||
private List<BoundingBoxDistanceTuple> removeDistElementsContainingObject(Collection<BoundingBoxDistanceTuple> list, BoundingBox obj) {
|
||||
|
||||
List<DistElem<BoundingBox>> ret = new ArrayList<DistElem<BoundingBox>>();
|
||||
for (DistElem<BoundingBox> distElem : list) {
|
||||
if (distElem.getObj1() != obj && distElem.getObj2() != obj) {
|
||||
List<BoundingBoxDistanceTuple> ret = new ArrayList<>();
|
||||
for (BoundingBoxDistanceTuple distElem : list) {
|
||||
if (distElem.getZone1() != obj && distElem.getZone2() != obj) {
|
||||
ret.add(distElem);
|
||||
}
|
||||
}
|
||||
|
||||
@ -41,6 +41,7 @@ public class ZoneBuilderService {
|
||||
|
||||
lines.forEach(outerLine -> //
|
||||
lines.forEach(innerLine -> {
|
||||
|
||||
double scale = Math.min(outerLine.getHeight(), innerLine.getHeight()) / meanHeight;
|
||||
scale = Math.max(MIN_LINE_SIZE_SCALE, Math.min(scale, MAX_LINE_SIZE_SCALE));
|
||||
|
||||
@ -49,13 +50,8 @@ public class ZoneBuilderService {
|
||||
double horizontalDistance = outerLine.horizontalDistance(innerLine) / scale;
|
||||
double verticalDistance = outerLine.verticalDistance(innerLine) / scale;
|
||||
|
||||
// Line over or above
|
||||
if (minHorizontalDistance <= horizontalDistance && verticalDistance <= maxVerticalDistance) {
|
||||
sets.union(outerLine, innerLine);
|
||||
}
|
||||
|
||||
// Split line that needs later merging
|
||||
else if (minHorizontalMergeDistance <= horizontalDistance && verticalDistance <= maxVerticalMergeDistance) {
|
||||
if (minHorizontalDistance <= horizontalDistance && verticalDistance <= maxVerticalDistance //
|
||||
|| minHorizontalMergeDistance <= horizontalDistance && verticalDistance <= maxVerticalMergeDistance) {
|
||||
sets.union(outerLine, innerLine);
|
||||
}
|
||||
}
|
||||
|
||||
@ -25,9 +25,7 @@ public class ViewerDocumentTest extends BuildDocumentTest {
|
||||
@SneakyThrows
|
||||
public void testViewerDocument() {
|
||||
|
||||
System.out.println("<<<<<<<<<<" + Math.sin(-0) + "aaa " + Math.cos(0) + Math.tan(0));
|
||||
|
||||
String fileName = "files/Plenarprotokoll 1 (keine Druchsache!) (1).pdf";
|
||||
String fileName = "files/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf";
|
||||
String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf";
|
||||
|
||||
var documentFile = new ClassPathResource(fileName).getFile();
|
||||
|
||||
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user