Reading Order

This commit is contained in:
Dominique Eifländer 2024-02-20 11:56:43 +01:00
parent 72202f63dc
commit 0c8c727303
13 changed files with 294 additions and 293 deletions

View File

@ -1,12 +1,17 @@
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import org.springframework.stereotype.Service;
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Character;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.DisjointSets;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Line;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Zone;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service.LineBuilderService;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service.NearestNeighbourService;
@ -20,6 +25,8 @@ import lombok.RequiredArgsConstructor;
@RequiredArgsConstructor
public class DocstrumSegmentationService {
private static final double MAX_VERTICAL_MERGE_DIST = 0.5;
private final NearestNeighbourService nearestNeighbourService;
private final SpacingService spacingService;
private final LineBuilderService lineBuilderService;
@ -42,7 +49,105 @@ public class DocstrumSegmentationService {
var zones = zoneBuilderService.buildZones(lines, characterSpacing, lineSpacing);
return readingOrderService.resolve(zones);
zones = mergeLines(zones, characterSpacing, Double.NEGATIVE_INFINITY, 0.0, 0.0, lineSpacing * MAX_VERTICAL_MERGE_DIST);
return readingOrderService.resolve(zones, false);
}
// private List<Zone> mergeZones(List<Zone> zones, double tolerance) {
//
// List<BxBounds> bounds = new ArrayList<BxBounds>(zones.size());
// for (List<ComponentLine> zone : zones) {
// BxBoundsBuilder builder = new BxBoundsBuilder();
// for (ComponentLine line : zone) {
// for (Component component : line.getComponents()) {
// builder.expand(component.getChunk().getBounds());
// }
// }
// bounds.add(builder.getBounds());
// }
//
// List<List<ComponentLine>> outputZones = new ArrayList<List<ComponentLine>>();
// mainFor:
// for (int i = 0; i < zones.size(); i++) {
// for (int j = 0; j < zones.size(); j++) {
// if (i == j || bounds.get(j) == null || bounds.get(i) == null) {
// continue;
// }
// if (BxModelUtils.contains(bounds.get(j), bounds.get(i), tolerance)) {
// zones.get(j).addAll(zones.get(i));
// bounds.set(i, null);
// continue mainFor;
// }
// }
// outputZones.add(zones.get(i));
// }
// return outputZones;
// }
private List<Zone> mergeLines(List<Zone> zones,
double wordSpacing,
double minHorizontalDistance,
double maxHorizontalDistance,
double minVerticalDistance,
double maxVerticalDistance) {
List<Zone> outputZones = new ArrayList<>(zones.size());
for (Zone zone : zones) {
outputZones.add(mergeLinesInZone(zone, wordSpacing, minHorizontalDistance, maxHorizontalDistance, minVerticalDistance, maxVerticalDistance));
}
return outputZones;
}
private Zone mergeLinesInZone(Zone zone,
double wordSpacing,
double minHorizontalDistance,
double maxHorizontalDistance,
double minVerticalDistance,
double maxVerticalDistance) {
DisjointSets<Line> sets = new DisjointSets<>(zone.getLines());
for (int i = 0; i < zone.getLines().size(); i++) {
Line li = zone.getLines().get(i);
for (int j = i + 1; j < zone.getLines().size(); j++) {
Line lj = zone.getLines().get(j);
double hDist = li.horizontalDistance(lj);
double vDist = li.verticalDistance(lj);
if (minHorizontalDistance <= hDist && hDist <= maxHorizontalDistance && minVerticalDistance <= vDist && vDist <= maxVerticalDistance) {
sets.union(li, lj);
} else if (minVerticalDistance <= vDist && vDist <= maxVerticalDistance && Math.abs(hDist - Math.min(li.getLength(), lj.getLength())) < 0.1) {
boolean componentOverlap = false;
int overlappingCount = 0;
for (Character ci : li.getCharacters()) {
for (Character cj : lj.getCharacters()) {
double dist = ci.overlappingDistance(cj);
if (dist > 2) {
componentOverlap = true;
}
if (dist > 0) {
overlappingCount++;
}
}
}
if (!componentOverlap && overlappingCount <= 2) {
sets.union(li, lj);
}
}
}
}
List<Line> outputZone = new ArrayList<>();
for (Set<Line> group : sets) {
List<Character> components = new ArrayList<>();
for (Line line : group) {
components.addAll(line.getCharacters());
}
components.sort(Comparator.comparingDouble(Character::getX));
outputZone.add(new Line(components, wordSpacing));
}
return new Zone(outputZone);
}
}

View File

@ -1,29 +1,14 @@
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model;
/**
* Filter class for neighbor objects that checks if the angle of the
* neighbor is within specified range.
*/
public abstract class AngleFilter {
private final double lowerAngle;
private final double upperAngle;
protected double lowerAngle;
protected double upperAngle;
private AngleFilter(double lowerAngle, double upperAngle) {
this.lowerAngle = lowerAngle;
this.upperAngle = upperAngle;
}
public abstract boolean matches(Neighbor neighbor);
/**
* Constructs new angle filter.
*
* @param lowerAngle minimum angle in range [-3*pi/2, pi/2)
* @param upperAngle maximum angle in range [-pi/2, 3*pi/2)
* @return newly constructed angle filter
*/
public static AngleFilter newInstance(double lowerAngle, double upperAngle) {
if (lowerAngle < -Math.PI / 2) {
@ -40,33 +25,19 @@ public abstract class AngleFilter {
}
public double getLowerAngle() {
return lowerAngle;
}
public double getUpperAngle() {
return upperAngle;
}
public abstract boolean matches(Neighbor neighbor);
public static final class AndFilter extends AngleFilter {
private AndFilter(double lowerAngle, double upperAngle) {
super(lowerAngle, upperAngle);
this.lowerAngle = lowerAngle;
this.upperAngle = upperAngle;
}
@Override
public boolean matches(Neighbor neighbor) {
return getLowerAngle() <= neighbor.getAngle() && neighbor.getAngle() < getUpperAngle();
return lowerAngle <= neighbor.getAngle() && neighbor.getAngle() < upperAngle;
}
}
@ -75,14 +46,15 @@ public abstract class AngleFilter {
private OrFilter(double lowerAngle, double upperAngle) {
super(lowerAngle, upperAngle);
this.lowerAngle = lowerAngle;
this.upperAngle = upperAngle;
}
@Override
public boolean matches(Neighbor neighbor) {
return getLowerAngle() <= neighbor.getAngle() || neighbor.getAngle() < getUpperAngle();
return lowerAngle <= neighbor.getAngle() || neighbor.getAngle() < upperAngle;
}
}

View File

@ -1,6 +1,7 @@
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import com.knecon.fforesight.service.layoutparser.processor.model.text.RedTextPosition;
@ -51,6 +52,20 @@ public class Character {
}
public double overlappingDistance(Character other) {
double[] xs = new double[4];
double s = Math.sin(-0), c = Math.cos(-0);
xs[0] = c * x - s * y;
xs[1] = c * (x + textPosition.getWidthDirAdj()) - s * (y + textPosition.getHeightDir());
xs[2] = c * other.x - s * other.y;
xs[3] = c * (other.x + other.textPosition.getWidthDirAdj()) - s * (other.y + other.textPosition.getHeightDir());
boolean overlapping = xs[1] >= xs[2] && xs[3] >= xs[0];
Arrays.sort(xs);
return Math.abs(xs[2] - xs[1]) * (overlapping ? 1 : -1);
}
public void setNeighbors(List<Neighbor> neighbors) {
this.neighbors = neighbors;

View File

@ -10,29 +10,17 @@ import java.util.Set;
public class DisjointSets<E> implements Iterable<Set<E>> {
private final Map<E, Entry<E>> map = new HashMap<E, Entry<E>>();
private final Map<E, Entry<E>> map = new HashMap<>();
/**
* Constructs a new set of singletons.
*
* @param c elements of singleton sets
*/
public DisjointSets(Collection<? extends E> c) {
public DisjointSets(Collection<? extends E> collection) {
for (E element : c) {
for (E element : collection) {
map.put(element, new Entry<E>(element));
}
}
/**
* Checks if elements are in the same subsets.
*
* @param e1 element from a subset
* @param e2 element from a subset
* @return true if elements are in the same subset; false otherwise
*/
public boolean areTogether(E e1, E e2) {
return map.get(e1).findRepresentative() == map.get(e2).findRepresentative();

View File

@ -33,11 +33,11 @@ public class Line extends BoundingBox {
if (characters.size() >= 2) {
// Simple linear regression
double sx = 0.0, sxx = 0.0, sxy = 0.0, sy = 0.0;
for (Character component : characters) {
sx += component.getX();
sxx += component.getX() * component.getX();
sxy += component.getX() * component.getY();
sy += component.getY();
for (Character character : characters) {
sx += character.getX();
sxx += character.getX() * character.getX();
sxy += character.getX() * character.getY();
sy += character.getY();
}
double b = (characters.size() * sxy - sx * sy) / (characters.size() * sxx - sx * sx);
double a = (sy - b * sx) / characters.size();
@ -47,13 +47,13 @@ public class Line extends BoundingBox {
this.x1 = characters.get(characters.size() - 1).getX();
this.y1 = a + b * this.x1;
} else if (!characters.isEmpty()) {
Character component = characters.get(0);
double dx = component.getTextPosition().getWidthDirAdj() / 3;
Character character = characters.get(0);
double dx = character.getTextPosition().getWidthDirAdj() / 3;
double dy = dx * Math.tan(0);
this.x0 = component.getX() - dx;
this.x1 = component.getX() + dx;
this.y0 = component.getY() - dy;
this.y1 = component.getY() + dy;
this.x0 = character.getX() - dx;
this.x1 = character.getX() + dx;
this.y0 = character.getY() - dy;
this.y1 = character.getY() + dy;
} else {
throw new IllegalArgumentException("Component list must not be empty");
}
@ -155,5 +155,13 @@ public class Line extends BoundingBox {
this.setBBox(new Rectangle2D.Double(minX, minY, maxX - minX, maxY - minY));
}
public String toString() {
StringBuilder sb = new StringBuilder();
words.forEach(word -> sb.append(word.toString()).append(" "));
return sb.toString().trim();
}
}

View File

@ -39,4 +39,12 @@ public class Zone extends BoundingBox {
this.setBBox(new Rectangle2D.Double(minX, minY, maxX - minX, maxY - minY));
}
public String toString() {
StringBuilder sb = new StringBuilder();
lines.forEach(line -> sb.append(line.toString()).append("\n"));
return sb.toString().trim();
}
}

View File

@ -0,0 +1,30 @@
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.BoundingBox;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.utils.DoubleUtils;
import lombok.AllArgsConstructor;
import lombok.Data;
@Data
@AllArgsConstructor
public class BoundingBoxDistanceTuple implements Comparable<BoundingBoxDistanceTuple> {
private boolean c;
private double distance;
private BoundingBox zone1;
private BoundingBox zone2;
@Override
public int compareTo(BoundingBoxDistanceTuple compareObject) {
double eps = 1E-3;
if (c == compareObject.c) {
return DoubleUtils.compareDouble(distance, compareObject.distance, eps);
} else {
return c ? -1 : 1;
}
}
}

View File

@ -4,6 +4,9 @@ import java.awt.geom.Rectangle2D;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.BoundingBox;
import lombok.Data;
@Data
public class BoundingBoxZoneGroup extends BoundingBox {
private BoundingBox leftChild;
@ -14,51 +17,13 @@ public class BoundingBoxZoneGroup extends BoundingBox {
this.leftChild = child1;
this.rightChild = child2;
setBounds(Math.min(child1.getX(), child2.getX()),
Math.min(child1.getY(), child2.getY()),
Math.max(child1.getX() + child1.getWidth(), child2.getX() + child2.getWidth()),
Math.max(child1.getY() + child1.getHeight(), child2.getY() + child2.getHeight()));
double minX = Math.min(leftChild.getX(), rightChild.getX());
double minY = Math.min(leftChild.getY(), rightChild.getY());
double maxX = Math.max(leftChild.getX() + leftChild.getWidth(), rightChild.getX() + rightChild.getWidth());
double maxY = Math.max(leftChild.getY() + leftChild.getHeight(), rightChild.getY() + rightChild.getHeight());
this.setBBox(new Rectangle2D.Double(minX, minY, maxX - minX, maxY - minY));
}
public void setbBox(Rectangle2D bBox) {
super.setBBox(bBox);
}
public BoundingBox getLeftChild() {
return leftChild;
}
public BoundingBox getRightChild() {
return rightChild;
}
public BoundingBoxZoneGroup setLeftChild(BoundingBox obj) {
this.leftChild = obj;
return this;
}
public BoundingBoxZoneGroup setRightChild(BoundingBox obj) {
this.rightChild = obj;
return this;
}
public BoundingBoxZoneGroup setBounds(double x0, double y0, double x1, double y1) {
assert x1 >= x0;
assert y1 >= y0;
this.setBBox(new Rectangle2D.Double(x0, y0, x1 - x0, y1 - y0));
return this;
}
}

View File

@ -1,115 +0,0 @@
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.utils.DoubleUtils;
public class DistElem<E> implements Comparable<DistElem<E>> {
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + (c ? 1231 : 1237);
long temp;
temp = Double.doubleToLongBits(dist);
result = prime * result + (int) (temp ^ (temp >>> 32));
result = prime * result + ((obj1 == null) ? 0 : obj1.hashCode());
result = prime * result + ((obj2 == null) ? 0 : obj2.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
DistElem other = (DistElem) obj;
if (c != other.c) {
return false;
}
if (Double.doubleToLongBits(dist) != Double.doubleToLongBits(other.dist)) {
return false;
}
if (obj1 == null) {
if (other.obj1 != null) {
return false;
}
} else if (!obj1.equals(other.obj1)) {
return false;
}
if (obj2 == null) {
if (other.obj2 != null) {
return false;
}
} else if (!obj2.equals(other.obj2)) {
return false;
}
return true;
}
boolean c;
double dist;
E obj1;
E obj2;
public boolean isC() {
return c;
}
public void setC(boolean c) {
this.c = c;
}
public double getDist() {
return dist;
}
public E getObj1() {
return obj1;
}
public E getObj2() {
return obj2;
}
public DistElem(boolean c, double dist, E obj1, E obj2) {
this.c = c;
this.dist = dist;
this.obj1 = obj1;
this.obj2 = obj2;
}
@Override
public int compareTo(DistElem<E> compareObject) {
double eps = 1E-3;
if (c == compareObject.c) {
return DoubleUtils.compareDouble(dist, compareObject.dist, eps);
} else {
return c ? -1 : 1;
}
}
}

View File

@ -5,13 +5,14 @@ import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.ListIterator;
import org.springframework.stereotype.Service;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.BoundingBox;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Zone;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder.BoundingBoxDistanceTuple;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder.BoundingBoxZoneGroup;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder.DistElem;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder.DocumentPlane;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder.TreeToListConverter;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.utils.DoubleUtils;
@ -20,66 +21,96 @@ import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.ut
public class ReadingOrderService {
static final int GRIDSIZE = 50;
static final double EPS = 0.01;
static final int MAX_ZONES = 1000;
static final Comparator<BoundingBox> Y_ASCENDING_ORDER = new Comparator<BoundingBox>() {
static final double THRESHOLD = 1;
@Override
public int compare(BoundingBox o1, BoundingBox o2) {
return DoubleUtils.compareDouble(o1.getY(), o2.getY(), EPS);
public List<Zone> resolve(List<Zone> zones, boolean yxOrder) {
if (zones.isEmpty() || zones.size() == 1) {
return zones;
}
};
static final Comparator<BoundingBox> X_ASCENDING_ORDER = new Comparator<BoundingBox>() {
@Override
public int compare(BoundingBox o1, BoundingBox o2) {
return DoubleUtils.compareDouble(o1.getX(), o2.getX(), EPS);
if (yxOrder) {
zones.sort(Comparator.comparing(BoundingBox::getY, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD))
.thenComparing(BoundingBox::getX, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD)));
return zones;
}
};
static final Comparator<BoundingBox> YX_ASCENDING_ORDER = new Comparator<BoundingBox>() {
return simpleOrder(zones);
}
@Override
public int compare(BoundingBox o1, BoundingBox o2) {
int yCompare = Y_ASCENDING_ORDER.compare(o1, o2);
return yCompare == 0 ? X_ASCENDING_ORDER.compare(o1, o2) : yCompare;
private List<Zone> simpleOrder(List<Zone> zones) {
double minX = Double.POSITIVE_INFINITY;
double maxX = Double.NEGATIVE_INFINITY;
for (Zone zone : zones) {
if (zone.getX() < minX) {
minX = zone.getX();
}
if (zone.getX() + zone.getWidth() > maxX) {
maxX = zone.getX() + zone.getWidth();
}
}
};
double midLineXCoordinate = (minX + maxX) / 2;
public List<Zone> resolve(List<Zone> zones) {
List<Zone> orderedZones;
if (zones.size() > MAX_ZONES) {
orderedZones = new ArrayList<>(zones);
Collections.sort(orderedZones, YX_ASCENDING_ORDER);
} else {
orderedZones = reorderZones(zones);
List<Zone> leftOf = new ArrayList<>();
List<Zone> rightOf = new ArrayList<>();
List<Zone> middle = new ArrayList<>();
for (Zone zone : zones) {
if (zone.getX() < midLineXCoordinate && zone.getX() + zone.getWidth() < midLineXCoordinate) {
leftOf.add(zone);
} else if (zone.getX() > midLineXCoordinate && zone.getX() + zone.getWidth() > midLineXCoordinate) {
rightOf.add(zone);
} else {
middle.add(zone);
}
}
return orderedZones;
leftOf.sort(Comparator.comparing(BoundingBox::getY, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD))
.thenComparing(BoundingBox::getX, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD)));
rightOf.sort(Comparator.comparing(BoundingBox::getY, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD))
.thenComparing(BoundingBox::getX, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD)));
middle.sort(Comparator.comparing(BoundingBox::getY, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD)));
List<Zone> sortedZones = new ArrayList<>();
sortedZones.addAll(leftOf);
sortedZones.addAll(rightOf);
ListIterator<Zone> itty = middle.listIterator();
while (itty.hasNext()) {
Zone current = itty.next();
for (int i = 0; i < sortedZones.size(); i++) {
if (current.getY() < sortedZones.get(i).getY()) {
sortedZones.add(i, current);
itty.remove();
break;
}
}
}
sortedZones.addAll(middle);
return sortedZones;
}
private List<Zone> reorderZones(List<Zone> unorderedZones) {
if (unorderedZones.isEmpty()) {
return new ArrayList<>();
} else if (unorderedZones.size() == 1) {
List<Zone> ret = new ArrayList<>(1);
ret.add(unorderedZones.get(0));
return ret;
} else {
BoundingBoxZoneGroup bxZonesTree = groupZonesHierarchically(unorderedZones);
sortGroupedZones(bxZonesTree);
TreeToListConverter treeConverter = new TreeToListConverter();
List<Zone> orderedZones = treeConverter.convertToList(bxZonesTree);
assert unorderedZones.size() == orderedZones.size();
return orderedZones;
}
BoundingBoxZoneGroup bxZonesTree = groupZonesHierarchically(unorderedZones);
sortGroupedZones(bxZonesTree);
TreeToListConverter treeConverter = new TreeToListConverter();
List<Zone> orderedZones = treeConverter.convertToList(bxZonesTree);
assert unorderedZones.size() == orderedZones.size();
return orderedZones;
}
@ -94,29 +125,29 @@ public class ReadingOrderService {
/*
* Distance tuples are stored sorted by ascending distance value
*/
List<DistElem<BoundingBox>> dists = new ArrayList<DistElem<BoundingBox>>(zones.size() * zones.size() / 2);
List<BoundingBoxDistanceTuple> dists = new ArrayList<>();
for (int idx1 = 0; idx1 < zones.size(); ++idx1) {
for (int idx2 = idx1 + 1; idx2 < zones.size(); ++idx2) {
Zone zone1 = zones.get(idx1);
Zone zone2 = zones.get(idx2);
dists.add(new DistElem<BoundingBox>(false, distance(zone1, zone2), zone1, zone2));
dists.add(new BoundingBoxDistanceTuple(false, distance(zone1, zone2), zone1, zone2));
}
}
Collections.sort(dists);
DocumentPlane plane = new DocumentPlane(zones, GRIDSIZE);
while (!dists.isEmpty()) {
DistElem<BoundingBox> distElem = dists.get(0);
BoundingBoxDistanceTuple distElem = dists.get(0);
dists.remove(0);
if (!distElem.isC() && plane.anyObjectsBetween(distElem.getObj1(), distElem.getObj2())) {
dists.add(new DistElem<BoundingBox>(true, distElem.getDist(), distElem.getObj1(), distElem.getObj2()));
if (!distElem.isC() && plane.anyObjectsBetween(distElem.getZone1(), distElem.getZone2())) {
dists.add(new BoundingBoxDistanceTuple(true, distElem.getDistance(), distElem.getZone1(), distElem.getZone2()));
continue;
}
BoundingBoxZoneGroup newGroup = new BoundingBoxZoneGroup(distElem.getObj1(), distElem.getObj2());
plane.remove(distElem.getObj1()).remove(distElem.getObj2());
dists = removeDistElementsContainingObject(dists, distElem.getObj1());
dists = removeDistElementsContainingObject(dists, distElem.getObj2());
BoundingBoxZoneGroup newGroup = new BoundingBoxZoneGroup(distElem.getZone1(), distElem.getZone2());
plane.remove(distElem.getZone1()).remove(distElem.getZone2());
dists = removeDistElementsContainingObject(dists, distElem.getZone1());
dists = removeDistElementsContainingObject(dists, distElem.getZone2());
for (BoundingBox other : plane.getObjects()) {
dists.add(new DistElem<BoundingBox>(false, distance(other, newGroup), newGroup, other));
dists.add(new BoundingBoxDistanceTuple(false, distance(other, newGroup), newGroup, other));
}
Collections.sort(dists);
plane.add(newGroup);
@ -130,11 +161,11 @@ public class ReadingOrderService {
/**
* Removes all distance tuples containing obj
*/
private List<DistElem<BoundingBox>> removeDistElementsContainingObject(Collection<DistElem<BoundingBox>> list, BoundingBox obj) {
private List<BoundingBoxDistanceTuple> removeDistElementsContainingObject(Collection<BoundingBoxDistanceTuple> list, BoundingBox obj) {
List<DistElem<BoundingBox>> ret = new ArrayList<DistElem<BoundingBox>>();
for (DistElem<BoundingBox> distElem : list) {
if (distElem.getObj1() != obj && distElem.getObj2() != obj) {
List<BoundingBoxDistanceTuple> ret = new ArrayList<>();
for (BoundingBoxDistanceTuple distElem : list) {
if (distElem.getZone1() != obj && distElem.getZone2() != obj) {
ret.add(distElem);
}
}

View File

@ -41,6 +41,7 @@ public class ZoneBuilderService {
lines.forEach(outerLine -> //
lines.forEach(innerLine -> {
double scale = Math.min(outerLine.getHeight(), innerLine.getHeight()) / meanHeight;
scale = Math.max(MIN_LINE_SIZE_SCALE, Math.min(scale, MAX_LINE_SIZE_SCALE));
@ -49,13 +50,8 @@ public class ZoneBuilderService {
double horizontalDistance = outerLine.horizontalDistance(innerLine) / scale;
double verticalDistance = outerLine.verticalDistance(innerLine) / scale;
// Line over or above
if (minHorizontalDistance <= horizontalDistance && verticalDistance <= maxVerticalDistance) {
sets.union(outerLine, innerLine);
}
// Split line that needs later merging
else if (minHorizontalMergeDistance <= horizontalDistance && verticalDistance <= maxVerticalMergeDistance) {
if (minHorizontalDistance <= horizontalDistance && verticalDistance <= maxVerticalDistance //
|| minHorizontalMergeDistance <= horizontalDistance && verticalDistance <= maxVerticalMergeDistance) {
sets.union(outerLine, innerLine);
}
}

View File

@ -25,9 +25,7 @@ public class ViewerDocumentTest extends BuildDocumentTest {
@SneakyThrows
public void testViewerDocument() {
System.out.println("<<<<<<<<<<" + Math.sin(-0) + "aaa " + Math.cos(0) + Math.tan(0));
String fileName = "files/Plenarprotokoll 1 (keine Druchsache!) (1).pdf";
String fileName = "files/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf";
String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf";
var documentFile = new ClassPathResource(fileName).getFile();