RED-8666
This commit is contained in:
parent
91401361e9
commit
7f56ed15c8
@ -45,12 +45,13 @@ public class RedTextPosition {
|
||||
@JsonIgnore
|
||||
private String fontName;
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
TextPosition textPosition;
|
||||
private int textSequence;
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public static RedTextPosition fromTextPosition(TextPosition textPosition) {
|
||||
public static RedTextPosition fromTextPosition(TextPosition textPosition, int textSequence) {
|
||||
|
||||
var pos = new RedTextPosition();
|
||||
BeanUtils.copyProperties(textPosition, pos);
|
||||
@ -66,7 +67,7 @@ public class RedTextPosition {
|
||||
position[3] = textPosition.getHeightDir();
|
||||
|
||||
pos.setPosition(position);
|
||||
pos.textPosition = textPosition;
|
||||
pos.setTextSequence(textSequence);
|
||||
return pos;
|
||||
}
|
||||
|
||||
|
||||
@ -43,9 +43,9 @@ public class TextPositionSequence implements CharSequence {
|
||||
}
|
||||
|
||||
|
||||
public TextPositionSequence(List<TextPosition> textPositions, int page, boolean isParagraphStart) {
|
||||
public TextPositionSequence(List<TextPosition> textPositions, int page, boolean isParagraphStart, int textSequence) {
|
||||
|
||||
this.textPositions = textPositions.stream().map(RedTextPosition::fromTextPosition).collect(Collectors.toList());
|
||||
this.textPositions = textPositions.stream().map(textPosition -> RedTextPosition.fromTextPosition(textPosition, textSequence)).collect(Collectors.toList());
|
||||
this.page = page;
|
||||
this.dir = TextDirection.fromDegrees(textPositions.get(0).getDir());
|
||||
this.rotation = textPositions.get(0).getRotation();
|
||||
@ -133,9 +133,9 @@ public class TextPositionSequence implements CharSequence {
|
||||
}
|
||||
|
||||
|
||||
public void add(TextPosition textPosition) {
|
||||
public void add(TextPosition textPosition, int textSequence) {
|
||||
|
||||
this.textPositions.add(RedTextPosition.fromTextPosition(textPosition));
|
||||
this.textPositions.add(RedTextPosition.fromTextPosition(textPosition, textSequence));
|
||||
|
||||
this.dir = TextDirection.fromDegrees(textPositions.get(0).getDir());
|
||||
this.rotation = textPositions.get(0).getRotation();
|
||||
|
||||
@ -29,7 +29,7 @@ public class DocstrumSegmentationService {
|
||||
private final ReadingOrderService readingOrderService;
|
||||
|
||||
|
||||
public List<Zone> segmentPage(List<TextPositionSequence> textPositions, boolean xyOder) {
|
||||
public List<Zone> segmentPage(List<TextPositionSequence> textPositions, boolean columnWise) {
|
||||
|
||||
List<Zone> zones = new ArrayList<>();
|
||||
zones.addAll(computeZones(textPositions, TextDirection.ZERO));
|
||||
@ -37,7 +37,7 @@ public class DocstrumSegmentationService {
|
||||
zones.addAll(computeZones(textPositions, TextDirection.HALF_CIRCLE));
|
||||
zones.addAll(computeZones(textPositions, TextDirection.THREE_QUARTER_CIRCLE));
|
||||
|
||||
return readingOrderService.resolve(zones, xyOder);
|
||||
return readingOrderService.resolve(zones, columnWise);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -11,6 +11,7 @@ public class Zone extends BoundingBox {
|
||||
|
||||
private List<Line> lines;
|
||||
|
||||
private int readingOrder = -1;
|
||||
|
||||
public Zone(List<Line> lines) {
|
||||
|
||||
|
||||
@ -1,8 +1,13 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Zone;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
|
||||
public class UnsupervisedReadingOrderDetector {
|
||||
@ -13,17 +18,21 @@ public class UnsupervisedReadingOrderDetector {
|
||||
private double tolerance = 5;
|
||||
private ZoneComparator zoneComparator;
|
||||
|
||||
|
||||
public boolean useRenderingOrder() {
|
||||
|
||||
return useRenderingOrder;
|
||||
}
|
||||
|
||||
|
||||
public UnsupervisedReadingOrderDetector() {
|
||||
|
||||
configureComparator();
|
||||
}
|
||||
|
||||
|
||||
public UnsupervisedReadingOrderDetector(double tolerance, SpatialReasoningRules spatialReasoningRule, boolean useRenderingOrder) {
|
||||
|
||||
this.tolerance = tolerance;
|
||||
this.spatialReasoningRule = spatialReasoningRule;
|
||||
this.useRenderingOrder = useRenderingOrder;
|
||||
@ -33,12 +42,12 @@ public class UnsupervisedReadingOrderDetector {
|
||||
}
|
||||
|
||||
|
||||
public void configureComparator() { // Or a suitable method name
|
||||
public void configureComparator() {
|
||||
|
||||
switch (spatialReasoningRule) {
|
||||
case COLUMN_WISE:
|
||||
if (useRenderingOrder) {
|
||||
zoneComparator = (Zone z1, Zone z2, double t) ->
|
||||
getBeforeInReadingVertical(z1, z2, t) || getBeforeInRendering(z1, z2);
|
||||
zoneComparator = (Zone z1, Zone z2, double t) -> getBeforeInReadingVertical(z1, z2, t) || getBeforeInRendering(z1, z2);
|
||||
} else {
|
||||
zoneComparator = this::getBeforeInReadingVertical;
|
||||
}
|
||||
@ -46,8 +55,7 @@ public class UnsupervisedReadingOrderDetector {
|
||||
|
||||
case ROW_WISE:
|
||||
if (useRenderingOrder) {
|
||||
zoneComparator = (Zone z1, Zone z2, double t) ->
|
||||
getBeforeInReadingHorizontal(z1, z2, t) || getBeforeInRendering(z1, z2);
|
||||
zoneComparator = (Zone z1, Zone z2, double t) -> getBeforeInReadingHorizontal(z1, z2, t) || getBeforeInRendering(z1, z2);
|
||||
} else {
|
||||
zoneComparator = this::getBeforeInReadingHorizontal;
|
||||
}
|
||||
@ -56,8 +64,7 @@ public class UnsupervisedReadingOrderDetector {
|
||||
case BASIC:
|
||||
default:
|
||||
if (useRenderingOrder) {
|
||||
zoneComparator = (Zone z1, Zone z2, double t) ->
|
||||
getBeforeInReading(z1, z2, t) || getBeforeInRendering(z1, z2);
|
||||
zoneComparator = (Zone z1, Zone z2, double t) -> getBeforeInReading(z1, z2, t) || getBeforeInRendering(z1, z2);
|
||||
} else {
|
||||
zoneComparator = this::getBeforeInReading;
|
||||
}
|
||||
@ -65,171 +72,207 @@ public class UnsupervisedReadingOrderDetector {
|
||||
}
|
||||
}
|
||||
|
||||
private boolean getBeforeInRendering(Zone z1, Zone z2) {
|
||||
double avgTextSequenceZ1 = z1.getTextLines().stream()
|
||||
.flatMap(tl -> tl.getWords().stream())
|
||||
.flatMap(w -> w.getLetters().stream())
|
||||
.mapToDouble(l -> l.getTextSequence())
|
||||
.average().orElse(0);
|
||||
|
||||
double avgTextSequenceZ2 = z2.getTextLines().stream()
|
||||
.flatMap(tl -> tl.getWords().stream())
|
||||
.flatMap(w -> w.getLetters().stream())
|
||||
.mapToDouble(l -> l.getTextSequence())
|
||||
.average().orElse(0);
|
||||
public List<Zone> get(List<Zone> zones) {
|
||||
|
||||
int readingOrder = 0;
|
||||
Map<Integer, List<Integer>> graph = buildGraph(zones);
|
||||
|
||||
List<Zone> orderedZones = new ArrayList<>();
|
||||
|
||||
while (!graph.isEmpty()) {
|
||||
int maxCount = graph.values()
|
||||
.stream()
|
||||
.mapToInt(List::size)
|
||||
.max()
|
||||
.orElse(0);
|
||||
|
||||
Map.Entry<Integer, List<Integer>> current = graph.entrySet()
|
||||
.stream()
|
||||
.filter(entry -> entry.getValue().size() == maxCount)
|
||||
.findFirst()
|
||||
.orElse(null);
|
||||
|
||||
if (current != null) {
|
||||
int index = current.getKey();
|
||||
graph.remove(index);
|
||||
|
||||
for (List<Integer> valueList : graph.values()) {
|
||||
valueList.remove(Integer.valueOf(index));
|
||||
}
|
||||
|
||||
Zone zone = zones.get(index);
|
||||
zone.setReadingOrder(readingOrder++);
|
||||
orderedZones.add(zone);
|
||||
}
|
||||
}
|
||||
|
||||
return orderedZones;
|
||||
}
|
||||
|
||||
|
||||
private Map<Integer, List<Integer>> buildGraph(List<Zone> zones) {
|
||||
|
||||
Map<Integer, List<Integer>> graph = new HashMap<>();
|
||||
|
||||
for (int i = 0; i < zones.size(); i++) {
|
||||
graph.put(i, new ArrayList<>());
|
||||
}
|
||||
|
||||
for (int i = 0; i < zones.size(); i++) {
|
||||
Zone zone1 = zones.get(i);
|
||||
for (int j = 0; j < zones.size(); j++) {
|
||||
if (i == j) {
|
||||
continue;
|
||||
}
|
||||
Zone zone2 = zones.get(j);
|
||||
|
||||
if (zoneComparator.isBefore(zone1, zone2, tolerance)) {
|
||||
graph.get(i).add(j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return graph;
|
||||
}
|
||||
|
||||
|
||||
private boolean getBeforeInRendering(Zone z1, Zone z2) {
|
||||
|
||||
double avgTextSequenceZ1 = z1.getLines()
|
||||
.stream()
|
||||
.flatMap(line -> line.getCharacters()
|
||||
.stream())
|
||||
.map(character -> character.getTextPosition().getTextSequence())
|
||||
.collect(Collectors.averagingDouble(Integer::intValue));
|
||||
|
||||
double avgTextSequenceZ2 = z2.getLines()
|
||||
.stream()
|
||||
.flatMap(line -> line.getCharacters()
|
||||
.stream())
|
||||
.map(character -> character.getTextPosition().getTextSequence())
|
||||
.collect(Collectors.averagingDouble(Integer::intValue));
|
||||
|
||||
return avgTextSequenceZ1 < avgTextSequenceZ2;
|
||||
}
|
||||
|
||||
private boolean getBeforeInReading(Zone z1, Zone z2, double T) {
|
||||
IntervalRelations xRelation = getIntervalRelationX(z1, z2, T);
|
||||
IntervalRelations yRelation = getIntervalRelationY(z1, z2, T);
|
||||
|
||||
return xRelation == IntervalRelations.PRECEDES ||
|
||||
yRelation == IntervalRelations.PRECEDES ||
|
||||
xRelation == IntervalRelations.MEETS ||
|
||||
yRelation == IntervalRelations.MEETS ||
|
||||
xRelation == IntervalRelations.OVERLAPS ||
|
||||
yRelation == IntervalRelations.OVERLAPS;
|
||||
}
|
||||
private boolean getBeforeInReading(Zone z1, Zone z2, double tolerance) {
|
||||
|
||||
private boolean getBeforeInReadingVertical(Zone z1, Zone z2, double T) {
|
||||
IntervalRelations xRelation = getIntervalRelationX(z1, z2, T);
|
||||
IntervalRelations yRelation = getIntervalRelationY(z1, z2, T);
|
||||
IntervalRelations xRelation = getIntervalRelationX(z1, z2, tolerance);
|
||||
IntervalRelations yRelation = getIntervalRelationY(z1, z2, tolerance);
|
||||
|
||||
return xRelation == IntervalRelations.PRECEDES ||
|
||||
xRelation == IntervalRelations.MEETS ||
|
||||
(xRelation == IntervalRelations.OVERLAPS && (yRelation == IntervalRelations.PRECEDES ||
|
||||
yRelation == IntervalRelations.MEETS ||
|
||||
yRelation == IntervalRelations.OVERLAPS)) ||
|
||||
((yRelation == IntervalRelations.PRECEDES || yRelation == IntervalRelations.MEETS ||
|
||||
yRelation == IntervalRelations.OVERLAPS) &&
|
||||
(xRelation == IntervalRelations.PRECEDES ||
|
||||
xRelation == IntervalRelations.MEETS ||
|
||||
xRelation == IntervalRelations.OVERLAPS ||
|
||||
xRelation == IntervalRelations.STARTS ||
|
||||
xRelation == IntervalRelations.FINISHES_INVERSE ||
|
||||
xRelation == IntervalRelations.EQUALS ||
|
||||
xRelation == IntervalRelations.DURING ||
|
||||
xRelation == IntervalRelations.DURING_INVERSE ||
|
||||
xRelation == IntervalRelations.FINISHES ||
|
||||
xRelation == IntervalRelations.STARTS_INVERSE ||
|
||||
xRelation == IntervalRelations.OVERLAPS_INVERSE));
|
||||
}
|
||||
|
||||
private boolean getBeforeInReadingHorizontal(Zone z1, Zone z2, double T) {
|
||||
IntervalRelations xRelation = getIntervalRelationX(z1, z2, T);
|
||||
IntervalRelations yRelation = getIntervalRelationY(z1, z2, T);
|
||||
|
||||
return yRelation == IntervalRelations.PRECEDES ||
|
||||
yRelation == IntervalRelations.MEETS ||
|
||||
(yRelation == IntervalRelations.OVERLAPS && (xRelation == IntervalRelations.PRECEDES ||
|
||||
xRelation == IntervalRelations.MEETS ||
|
||||
xRelation == IntervalRelations.OVERLAPS)) ||
|
||||
((xRelation == IntervalRelations.PRECEDES || xRelation == IntervalRelations.MEETS ||
|
||||
xRelation == IntervalRelations.OVERLAPS) &&
|
||||
(yRelation == IntervalRelations.PRECEDES ||
|
||||
yRelation == IntervalRelations.MEETS ||
|
||||
yRelation == IntervalRelations.OVERLAPS ||
|
||||
yRelation == IntervalRelations.STARTS ||
|
||||
yRelation == IntervalRelations.FINISHES_INVERSE ||
|
||||
yRelation == IntervalRelations.EQUALS ||
|
||||
yRelation == IntervalRelations.DURING ||
|
||||
yRelation == IntervalRelations.DURING_INVERSE ||
|
||||
yRelation == IntervalRelations.FINISHES ||
|
||||
yRelation == IntervalRelations.STARTS_INVERSE ||
|
||||
yRelation == IntervalRelations.OVERLAPS_INVERSE));
|
||||
return xRelation == IntervalRelations.PRECEDES
|
||||
|| yRelation == IntervalRelations.PRECEDES
|
||||
|| xRelation == IntervalRelations.MEETS
|
||||
|| yRelation == IntervalRelations.MEETS
|
||||
|| xRelation == IntervalRelations.OVERLAPS
|
||||
|| yRelation == IntervalRelations.OVERLAPS;
|
||||
}
|
||||
|
||||
|
||||
public class ZoneComparator {
|
||||
private boolean getBeforeInReadingVertical(Zone z1, Zone z2, double tolerance) {
|
||||
|
||||
// Other methods and classes...
|
||||
IntervalRelations xRelation = getIntervalRelationX(z1, z2, tolerance);
|
||||
IntervalRelations yRelation = getIntervalRelationY(z1, z2, tolerance);
|
||||
|
||||
private static IntervalRelations getIntervalRelationX(Zone z1, Zone z2, double T) {
|
||||
if (z1.getX() < z2.getX() + z2.getWidth() - T) {
|
||||
return IntervalRelations.PRECEDES;
|
||||
} else if (z1.getX() >= z2.getX() + z2.getWidth() - T) {
|
||||
return IntervalRelations.PRECEDESI;
|
||||
} else if (z2.getX() + z2.getWidth() - T <= z1.getX()
|
||||
&& z1.getX() <= z2.getX() + z2.getWidth() + T) {
|
||||
return IntervalRelations.MEETS;
|
||||
} else if (z2.getX() + z2.getWidth() - T > z1.getX()
|
||||
&& z1.getX() > z2.getX() + z2.getWidth() + T) {
|
||||
return IntervalRelations.MEETSI;
|
||||
} else if (z1.getX() + z1.getWidth() < z2.getX() + z2.getWidth() - T
|
||||
&& (z2.getX() + z2.getWidth() + T < z1.getX()
|
||||
&& z1.getX() < z2.getX() - T)) {
|
||||
return IntervalRelations.OVERLAPSI;
|
||||
} else if (z1.getX() + z1.getWidth() >= z2.getX() + z2.getWidth() - T
|
||||
&& (z2.getX() + z2.getWidth() + T >= z1.getX()
|
||||
&& z1.getX() >= z2.getX() - T)) {
|
||||
return IntervalRelations.OVERLAPS;
|
||||
} else if (z2.getX() + z2.getWidth() - T <= z1.getX() + z1.getWidth()
|
||||
&& z1.getX() + z1.getWidth() <= z2.getX() + z2.getWidth() + T
|
||||
&& z1.getX() < z2.getX() - T) {
|
||||
return IntervalRelations.STARTSI;
|
||||
} else if (z2.getX() + z2.getWidth() - T > z1.getX() + z1.getWidth()
|
||||
&& z1.getX() + z1.getWidth() > z2.getX() + z2.getWidth() + T
|
||||
&& z1.getX() >= z2.getX() - T) {
|
||||
return IntervalRelations.STARTS;
|
||||
} else if (z1.getX() + z1.getWidth() > z2.getX() + z2.getWidth() + T
|
||||
&& z1.getX() < z2.getX() - T) {
|
||||
return IntervalRelations.DURINGI;
|
||||
} else if (z1.getX() + z1.getWidth() <= z2.getX() + z2.getWidth() + T
|
||||
&& z1.getX() >= z2.getX() - T) {
|
||||
return IntervalRelations.DURING;
|
||||
} else if (z1.getX() + z1.getWidth() > z2.getX() + z2.getWidth() + T
|
||||
&& (z2.getX() - T <= z1.getX()
|
||||
&& z1.getX() <= z2.getX() + T)) {
|
||||
return IntervalRelations.FINISHESI;
|
||||
} else if (z1.getX() + z1.getWidth() <= z2.getX() + z2.getWidth() + T
|
||||
&& (z2.getX() - T > z1.getX()
|
||||
&& z1.getX() > z2.getX() + T)) {
|
||||
return IntervalRelations.FINISHES;
|
||||
} else if (z2.getX() + z2.getWidth() - T <= z1.getX() + z1.getWidth()
|
||||
&& z1.getX() + z1.getWidth() <= z2.getX() + z2.getWidth() + T
|
||||
&& (z2.getX() - T <= z1.getX()
|
||||
&& z1.getX() <= z2.getX() + T)) {
|
||||
return IntervalRelations.EQUALS;
|
||||
}
|
||||
return getIntervalRelations(xRelation, yRelation);
|
||||
}
|
||||
|
||||
return IntervalRelations.UNKNOWN;
|
||||
|
||||
private static boolean getIntervalRelations(IntervalRelations relation1, IntervalRelations relation2) {
|
||||
|
||||
return relation1 == IntervalRelations.PRECEDES
|
||||
|| relation1 == IntervalRelations.MEETS
|
||||
|| (relation2 == IntervalRelations.PRECEDES || relation2 == IntervalRelations.MEETS || relation2 == IntervalRelations.OVERLAPS) && //
|
||||
(relation1 == IntervalRelations.OVERLAPS
|
||||
|| relation1 == IntervalRelations.STARTS
|
||||
|| relation1 == IntervalRelations.FINISHES_INVERSE
|
||||
|| relation1 == IntervalRelations.EQUALS
|
||||
|| relation1 == IntervalRelations.DURING
|
||||
|| relation1 == IntervalRelations.DURING_INVERSE
|
||||
|| relation1 == IntervalRelations.FINISHES
|
||||
|| relation1 == IntervalRelations.STARTS_INVERSE
|
||||
|| relation1 == IntervalRelations.OVERLAPS_INVERSE);
|
||||
}
|
||||
|
||||
|
||||
private boolean getBeforeInReadingHorizontal(Zone z1, Zone z2, double tolerance) {
|
||||
|
||||
IntervalRelations xRelation = getIntervalRelationX(z1, z2, tolerance);
|
||||
IntervalRelations yRelation = getIntervalRelationY(z1, z2, tolerance);
|
||||
|
||||
return getIntervalRelations(yRelation, xRelation);
|
||||
}
|
||||
|
||||
|
||||
private static IntervalRelations getIntervalRelationX(Zone z1, Zone z2, double t) {
|
||||
|
||||
if (z1.getX() + z1.getWidth() < z2.getX() - t) {
|
||||
return IntervalRelations.PRECEDES;
|
||||
} else if (z1.getX() + z1.getWidth() >= z2.getX() - t) {
|
||||
return IntervalRelations.PRECEDES_INVERSE;
|
||||
} else if (z2.getX() - t <= z1.getX() + z1.getWidth() && z1.getX() + z1.getWidth() <= z2.getX() + t) {
|
||||
return IntervalRelations.MEETS;
|
||||
} else if (z2.getX() - t > z1.getX() + z1.getWidth() && z1.getX() + z1.getWidth() > z2.getX() + t) {
|
||||
return IntervalRelations.MEETS_INVERSE;
|
||||
} else if (z1.getX() < z2.getX() - t && (z2.getX() + t < z1.getX() + z1.getWidth() && z1.getX() + z1.getWidth() < z2.getX() + z2.getWidth() - t)) {
|
||||
return IntervalRelations.OVERLAPS;
|
||||
} else if (z1.getX() >= z2.getX() - t && (z2.getX() + t >= z1.getX() + z1.getWidth() && z1.getX() + z1.getWidth() >= z2.getX() + z2.getWidth() - t)) {
|
||||
return IntervalRelations.OVERLAPS_INVERSE;
|
||||
} else if (z2.getX() - t <= z1.getX() && z1.getX() <= z2.getX() + t && z1.getX() + z1.getWidth() < z2.getX() + z2.getWidth() - t) {
|
||||
return IntervalRelations.STARTS;
|
||||
} else if (z2.getX() - t > z1.getX() && z1.getX() > z2.getX() + t && z1.getX() + z1.getWidth() >= z2.getX() + z2.getWidth() - t) {
|
||||
return IntervalRelations.STARTS_INVERSE;
|
||||
} else if (z1.getX() > z2.getX() + t && z1.getX() + z1.getWidth() < z2.getX() + z2.getWidth() - t) {
|
||||
return IntervalRelations.DURING;
|
||||
} else if (z1.getX() <= z2.getX() + t && z1.getX() + z1.getWidth() >= z2.getX() + z2.getWidth() - t) {
|
||||
return IntervalRelations.DURING_INVERSE;
|
||||
} else if (z1.getX() > z2.getX() + t && (z2.getX() + z2.getWidth() - t <= z1.getX() + z1.getWidth() && z1.getX() + z1.getWidth() <= z2.getX() + z2.getWidth() + t)) {
|
||||
return IntervalRelations.FINISHES;
|
||||
} else if (z1.getX() <= z2.getX() + t && (z2.getX() + z2.getWidth() - t > z1.getX() + z1.getWidth() && z1.getX() + z1.getWidth() > z2.getX() + z2.getWidth() + t)) {
|
||||
return IntervalRelations.FINISHES_INVERSE;
|
||||
} else if (z2.getX() - t <= z1.getX() && z1.getX() <= z2.getX() + t && (z2.getX() + z2.getWidth() - t <= z1.getX() + z1.getWidth()
|
||||
&& z1.getX() + z1.getWidth() <= z2.getX() + z2.getWidth() + t)) {
|
||||
return IntervalRelations.EQUALS;
|
||||
}
|
||||
|
||||
private static IntervalRelations getIntervalRelationY(Zone z1, Zone z2, double T) {
|
||||
if (z1.getBottom() < z2.getTop() - T) {
|
||||
return IntervalRelations.PRECEDESI;
|
||||
} else if (z1.getBottom() >= z2.getTop() - T) {
|
||||
return IntervalRelations.PRECEDES;
|
||||
} else if (z2.getTop() - T <= z1.getBottom()
|
||||
&& z1.getBottom() <= z2.getTop() + T) {
|
||||
return IntervalRelations.MEETSI;
|
||||
} else if (z2.getTop() - T > z1.getBottom()
|
||||
&& z1.getBottom() > z2.getTop() + T) {
|
||||
return IntervalRelations.MEETS;
|
||||
} else if (z1.getTop() < z2.getTop() - T
|
||||
&& (z2.getTop() + T < z1.getBottom()
|
||||
&& z1.getBottom() < z2.getBottom() - T)) {
|
||||
return IntervalRelations.OVERLAPSI;
|
||||
} else if (z1.getTop() >= z2.getTop() - T
|
||||
&& (z2.getTop() + T >= z1.getBottom()
|
||||
&& z1.getBottom() >= z2.getBottom() - T)) {
|
||||
return IntervalRelations.OVERLAPS;
|
||||
} else if (z2.getTop() - T <= z1.getTop()
|
||||
&& z1.getTop() <= z2.getTop() + T
|
||||
&& z1.getBottom() < z2.getBottom() - T) {
|
||||
return IntervalRelations.STARTSI;
|
||||
} else if (z2.getTop() - T > z1.getTop()
|
||||
&& z1.getTop() > z2.getTop() + T
|
||||
&& z1.getBottom() >= z2.getBottom() - T) {
|
||||
return IntervalRelations.STARTS;
|
||||
} else if (z1.getTop() > z2.getTop() + T
|
||||
&& z1.getBottom() < z2.getBottom() - T) {
|
||||
return IntervalRelations.DURINGI;
|
||||
}
|
||||
|
||||
return IntervalRelations.UNKNOWN;
|
||||
}
|
||||
|
||||
|
||||
private static IntervalRelations getIntervalRelationY(Zone z1, Zone z2, double t) {
|
||||
|
||||
if (z1.getY() + z1.getWidth() < z2.getY() - t) {
|
||||
return IntervalRelations.PRECEDES;
|
||||
} else if (z1.getY() + z1.getWidth() >= z2.getY() - t) {
|
||||
return IntervalRelations.PRECEDES_INVERSE;
|
||||
} else if (z2.getY() - t <= z1.getY() + z1.getWidth() && z1.getY() + z1.getWidth() <= z2.getY() + t) {
|
||||
return IntervalRelations.MEETS;
|
||||
} else if (z2.getY() - t > z1.getY() + z1.getWidth() && z1.getY() + z1.getWidth() > z2.getY() + t) {
|
||||
return IntervalRelations.MEETS_INVERSE;
|
||||
} else if (z1.getY() < z2.getY() - t && (z2.getY() + t < z1.getY() + z1.getWidth() && z1.getY() + z1.getWidth() < z2.getY() + z2.getWidth() - t)) {
|
||||
return IntervalRelations.OVERLAPS;
|
||||
} else if (z1.getY() >= z2.getY() - t && (z2.getY() + t >= z1.getY() + z1.getWidth() && z1.getY() + z1.getWidth() >= z2.getY() + z2.getWidth() - t)) {
|
||||
return IntervalRelations.OVERLAPS_INVERSE;
|
||||
} else if (z2.getY() - t <= z1.getY() && z1.getY() <= z2.getY() + t && z1.getY() + z1.getWidth() < z2.getY() + z2.getWidth() - t) {
|
||||
return IntervalRelations.STARTS;
|
||||
} else if (z2.getY() - t > z1.getY() && z1.getY() > z2.getY() + t && z1.getY() + z1.getWidth() >= z2.getY() + z2.getWidth() - t) {
|
||||
return IntervalRelations.STARTS_INVERSE;
|
||||
} else if (z1.getY() > z2.getY() + t && z1.getY() + z1.getWidth() < z2.getY() + z2.getWidth() - t) {
|
||||
return IntervalRelations.DURING;
|
||||
} else if (z1.getY() <= z2.getY() + t && z1.getY() + z1.getWidth() >= z2.getY() + z2.getWidth() - t) {
|
||||
return IntervalRelations.DURING_INVERSE;
|
||||
} else if (z1.getY() > z2.getY() + t && (z2.getY() + z2.getWidth() - t <= z1.getY() + z1.getWidth() && z1.getY() + z1.getWidth() <= z2.getY() + z2.getWidth() + t)) {
|
||||
return IntervalRelations.FINISHES;
|
||||
} else if (z1.getY() <= z2.getY() + t && (z2.getY() + z2.getWidth() - t > z1.getY() + z1.getWidth() && z1.getY() + z1.getWidth() > z2.getY() + z2.getWidth() + t)) {
|
||||
return IntervalRelations.FINISHES_INVERSE;
|
||||
} else if (z2.getY() - t <= z1.getY() && z1.getY() <= z2.getY() + t && (z2.getY() + z2.getWidth() - t <= z1.getY() + z1.getWidth()
|
||||
&& z1.getY() + z1.getWidth() <= z2.getY() + z2.getWidth() + t)) {
|
||||
return IntervalRelations.EQUALS;
|
||||
}
|
||||
|
||||
return IntervalRelations.UNKNOWN;
|
||||
}
|
||||
|
||||
}
|
||||
@ -9,6 +9,8 @@ import org.springframework.stereotype.Service;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.BoundingBox;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Zone;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder.SpatialReasoningRules;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder.UnsupervisedReadingOrderDetector;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.utils.DoubleUtils;
|
||||
|
||||
@Service
|
||||
@ -17,7 +19,20 @@ public class ReadingOrderService {
|
||||
private static final double THRESHOLD = 1;
|
||||
|
||||
|
||||
public List<Zone> resolve(List<Zone> zones, boolean xyOrder) {
|
||||
public List<Zone> resolve(List<Zone> zones, boolean columnWise) {
|
||||
|
||||
if (zones.isEmpty() || zones.size() == 1) {
|
||||
return zones;
|
||||
}
|
||||
|
||||
SpatialReasoningRules spatialReasoningRules = columnWise ? SpatialReasoningRules.COLUMN_WISE : SpatialReasoningRules.ROW_WISE;
|
||||
|
||||
var unsupervisedReadingOrderDetector = new UnsupervisedReadingOrderDetector(5, spatialReasoningRules, true);
|
||||
|
||||
return unsupervisedReadingOrderDetector.get(zones);
|
||||
}
|
||||
|
||||
public List<Zone> resolveOld(List<Zone> zones, boolean xyOrder) {
|
||||
|
||||
if (zones.isEmpty() || zones.size() == 1) {
|
||||
return zones;
|
||||
|
||||
@ -257,7 +257,7 @@ public class PDFLinesTextStripper extends PDFTextStripper {
|
||||
|
||||
if (textPositions.get(i).getDir() != direction && startIndex != i) {
|
||||
List<TextPosition> sublist = textPositions.subList(startIndex, i);
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber, i == textPositions.size() - 1 && isParagraphStart));
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber, i == textPositions.size() - 1 && isParagraphStart, textPositionSequences.size()));
|
||||
startIndex = i;
|
||||
direction = textPositions.get(i).getDir();
|
||||
}
|
||||
@ -266,7 +266,7 @@ public class PDFLinesTextStripper extends PDFTextStripper {
|
||||
if (checkIfCurrentPositionIsToTheRightOfPreviousPosition(i, textPositions)) {
|
||||
List<TextPosition> sublist = textPositions.subList(startIndex, i);
|
||||
if (checkIfSequenceContainsOnlyWhitespaces(sublist)) {
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber, i == textPositions.size() - 1 && isParagraphStart));
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber, i == textPositions.size() - 1 && isParagraphStart, textPositionSequences.size()));
|
||||
}
|
||||
startIndex = i;
|
||||
}
|
||||
@ -274,7 +274,7 @@ public class PDFLinesTextStripper extends PDFTextStripper {
|
||||
if (textPositions.get(i).getRotation() == 0 && i > 0 && textPositions.get(i).getX() > textPositions.get(i - 1).getEndX() + 1) {
|
||||
List<TextPosition> sublist = textPositions.subList(startIndex, i);
|
||||
if (checkIfSequenceContainsOnlyWhitespaces(sublist)) {
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber, i == textPositions.size() - 1 && isParagraphStart));
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber, i == textPositions.size() - 1 && isParagraphStart, textPositionSequences.size()));
|
||||
}
|
||||
startIndex = i;
|
||||
}
|
||||
@ -288,10 +288,10 @@ public class PDFLinesTextStripper extends PDFTextStripper {
|
||||
// Remove false sequence ends (whitespaces)
|
||||
if (checkIfGapSizeBetweenCharactersSmallerThanMaximum(previous, sublist, 0.01f)) {
|
||||
for (TextPosition t : sublist) {
|
||||
textPositionSequences.get(textPositionSequences.size() - 1).add(t);
|
||||
textPositionSequences.get(textPositionSequences.size() - 1).add(t, textPositionSequences.size());
|
||||
}
|
||||
} else {
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber, i == textPositions.size() - 1 && isParagraphStart));
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber, i == textPositions.size() - 1 && isParagraphStart, textPositionSequences.size()));
|
||||
}
|
||||
}
|
||||
startIndex = i + 1;
|
||||
@ -311,10 +311,10 @@ public class PDFLinesTextStripper extends PDFTextStripper {
|
||||
if (previous != null && sublist.get(0).getYDirAdj() == previous.getYDirAdj() && sublist.get(0)
|
||||
.getXDirAdj() - (previous.getXDirAdj() + previous.getWidthDirAdj()) < 0.01) {
|
||||
for (TextPosition t : sublist) {
|
||||
textPositionSequences.get(textPositionSequences.size() - 1).add(t);
|
||||
textPositionSequences.get(textPositionSequences.size() - 1).add(t, textPositionSequences.size());
|
||||
}
|
||||
} else {
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber, isParagraphStart));
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber, isParagraphStart, textPositionSequences.size()));
|
||||
}
|
||||
}
|
||||
super.writeString(text);
|
||||
|
||||
@ -43,7 +43,7 @@ public class MarkedContentUtils {
|
||||
|
||||
return markedContentByYPosition.values().stream()
|
||||
.map(textPositions -> new TextPositionSequence(textPositions.stream()
|
||||
.toList(), 0, true)
|
||||
.toList(), 0, true, 0)
|
||||
.getRectangle())
|
||||
.map(t -> new Rectangle2D.Float(t.getTopLeft().getX(), t.getTopLeft().getY() - Math.abs(t.getHeight()), t.getWidth(), Math.abs(t.getHeight()))).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
@ -26,14 +26,14 @@ public class ViewerDocumentTest extends BuildDocumentTest {
|
||||
@SneakyThrows
|
||||
public void testViewerDocument() {
|
||||
|
||||
String fileName = "files/Plenarprotokoll 1 (keine Druchsache!) (1).pdf";
|
||||
String fileName = "files/new/A8240D TRESO dRR Part B Section 9 core - Germany Commenting - 07_05_2019.pdf";
|
||||
String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf";
|
||||
|
||||
var documentFile = new ClassPathResource(fileName).getFile();
|
||||
ViewerDocumentService viewerDocumentService = new ViewerDocumentService(null);
|
||||
LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService);
|
||||
|
||||
Document document = buildGraph(fileName, LayoutParsingType.DOCSTRUM);
|
||||
Document document = buildGraph(fileName, LayoutParsingType.DOCSTRUM_XY);
|
||||
long start = System.currentTimeMillis();
|
||||
layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), true);
|
||||
System.out.printf("Total time: %.2fs%n", ((float) (System.currentTimeMillis() - start)) / 1000);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user