RED-8666
This commit is contained in:
parent
2ab60195e4
commit
91401361e9
@ -0,0 +1,59 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder;
|
||||
|
||||
public enum IntervalRelations {
|
||||
// Unknown interval relations.
|
||||
UNKNOWN,
|
||||
|
||||
// X takes place before Y.
|
||||
// |____X____|......................
|
||||
// ......................|____Y____|
|
||||
PRECEDES,
|
||||
|
||||
// X meets Y.
|
||||
// |____X____|.................
|
||||
// ................|____Y____|
|
||||
MEETS,
|
||||
|
||||
// X overlaps with Y.
|
||||
// |______X______|.................
|
||||
// ................|______Y______|
|
||||
OVERLAPS,
|
||||
|
||||
// X starts Y.
|
||||
// |____X____|.................
|
||||
// |_____Y_____|..............
|
||||
STARTS,
|
||||
|
||||
// X during Y.
|
||||
// ........|____X____|.........
|
||||
// .....|______Y______|.....
|
||||
DURING,
|
||||
|
||||
// X finishes Y.
|
||||
// .................|____X____|
|
||||
// ..............|_____Y_____|
|
||||
FINISHES,
|
||||
|
||||
// Inverse precedes.
|
||||
PRECEDES_INVERSE,
|
||||
|
||||
// Inverse meets.
|
||||
MEETS_INVERSE,
|
||||
|
||||
// Inverse overlaps.
|
||||
OVERLAPS_INVERSE,
|
||||
|
||||
// Inverse Starts.
|
||||
STARTS_INVERSE,
|
||||
|
||||
// Inverse during.
|
||||
DURING_INVERSE,
|
||||
|
||||
// Inverse finishes.
|
||||
FINISHES_INVERSE,
|
||||
|
||||
// X is equal to Y.
|
||||
// ..........|____X____|............
|
||||
// ..........|____Y____|............
|
||||
EQUALS
|
||||
}
|
||||
@ -0,0 +1,11 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Zone;
|
||||
|
||||
public interface ReadingOrderDetector {
|
||||
|
||||
Collection<Zone> get(Collection<Zone> zones);
|
||||
|
||||
}
|
||||
@ -0,0 +1,10 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder;
|
||||
|
||||
public enum SpatialReasoningRules {
|
||||
// In western culture the reading order is from left to right and from top to bottom.
|
||||
BASIC,
|
||||
// The diagonal direction 'left-bottom to top-right' cannot be present among the Basic relations allowed.
|
||||
ROW_WISE,
|
||||
// The diagonal direction 'right-top to bottom-left' cannot be present among the Basic relations allowed.
|
||||
COLUMN_WISE;
|
||||
}
|
||||
@ -0,0 +1,235 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Zone;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
|
||||
public class UnsupervisedReadingOrderDetector {
|
||||
|
||||
private boolean useRenderingOrder = true;
|
||||
@Getter
|
||||
private SpatialReasoningRules spatialReasoningRule = SpatialReasoningRules.COLUMN_WISE;
|
||||
private double tolerance = 5;
|
||||
private ZoneComparator zoneComparator;
|
||||
|
||||
public boolean useRenderingOrder() {
|
||||
|
||||
return useRenderingOrder;
|
||||
}
|
||||
|
||||
public UnsupervisedReadingOrderDetector() {
|
||||
|
||||
configureComparator();
|
||||
}
|
||||
|
||||
public UnsupervisedReadingOrderDetector(double tolerance, SpatialReasoningRules spatialReasoningRule, boolean useRenderingOrder) {
|
||||
this.tolerance = tolerance;
|
||||
this.spatialReasoningRule = spatialReasoningRule;
|
||||
this.useRenderingOrder = useRenderingOrder;
|
||||
|
||||
configureComparator();
|
||||
|
||||
}
|
||||
|
||||
|
||||
public void configureComparator() { // Or a suitable method name
|
||||
switch (spatialReasoningRule) {
|
||||
case COLUMN_WISE:
|
||||
if (useRenderingOrder) {
|
||||
zoneComparator = (Zone z1, Zone z2, double t) ->
|
||||
getBeforeInReadingVertical(z1, z2, t) || getBeforeInRendering(z1, z2);
|
||||
} else {
|
||||
zoneComparator = this::getBeforeInReadingVertical;
|
||||
}
|
||||
break;
|
||||
|
||||
case ROW_WISE:
|
||||
if (useRenderingOrder) {
|
||||
zoneComparator = (Zone z1, Zone z2, double t) ->
|
||||
getBeforeInReadingHorizontal(z1, z2, t) || getBeforeInRendering(z1, z2);
|
||||
} else {
|
||||
zoneComparator = this::getBeforeInReadingHorizontal;
|
||||
}
|
||||
break;
|
||||
|
||||
case BASIC:
|
||||
default:
|
||||
if (useRenderingOrder) {
|
||||
zoneComparator = (Zone z1, Zone z2, double t) ->
|
||||
getBeforeInReading(z1, z2, t) || getBeforeInRendering(z1, z2);
|
||||
} else {
|
||||
zoneComparator = this::getBeforeInReading;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private boolean getBeforeInRendering(Zone z1, Zone z2) {
|
||||
double avgTextSequenceZ1 = z1.getTextLines().stream()
|
||||
.flatMap(tl -> tl.getWords().stream())
|
||||
.flatMap(w -> w.getLetters().stream())
|
||||
.mapToDouble(l -> l.getTextSequence())
|
||||
.average().orElse(0);
|
||||
|
||||
double avgTextSequenceZ2 = z2.getTextLines().stream()
|
||||
.flatMap(tl -> tl.getWords().stream())
|
||||
.flatMap(w -> w.getLetters().stream())
|
||||
.mapToDouble(l -> l.getTextSequence())
|
||||
.average().orElse(0);
|
||||
|
||||
return avgTextSequenceZ1 < avgTextSequenceZ2;
|
||||
}
|
||||
|
||||
private boolean getBeforeInReading(Zone z1, Zone z2, double T) {
|
||||
IntervalRelations xRelation = getIntervalRelationX(z1, z2, T);
|
||||
IntervalRelations yRelation = getIntervalRelationY(z1, z2, T);
|
||||
|
||||
return xRelation == IntervalRelations.PRECEDES ||
|
||||
yRelation == IntervalRelations.PRECEDES ||
|
||||
xRelation == IntervalRelations.MEETS ||
|
||||
yRelation == IntervalRelations.MEETS ||
|
||||
xRelation == IntervalRelations.OVERLAPS ||
|
||||
yRelation == IntervalRelations.OVERLAPS;
|
||||
}
|
||||
|
||||
private boolean getBeforeInReadingVertical(Zone z1, Zone z2, double T) {
|
||||
IntervalRelations xRelation = getIntervalRelationX(z1, z2, T);
|
||||
IntervalRelations yRelation = getIntervalRelationY(z1, z2, T);
|
||||
|
||||
return xRelation == IntervalRelations.PRECEDES ||
|
||||
xRelation == IntervalRelations.MEETS ||
|
||||
(xRelation == IntervalRelations.OVERLAPS && (yRelation == IntervalRelations.PRECEDES ||
|
||||
yRelation == IntervalRelations.MEETS ||
|
||||
yRelation == IntervalRelations.OVERLAPS)) ||
|
||||
((yRelation == IntervalRelations.PRECEDES || yRelation == IntervalRelations.MEETS ||
|
||||
yRelation == IntervalRelations.OVERLAPS) &&
|
||||
(xRelation == IntervalRelations.PRECEDES ||
|
||||
xRelation == IntervalRelations.MEETS ||
|
||||
xRelation == IntervalRelations.OVERLAPS ||
|
||||
xRelation == IntervalRelations.STARTS ||
|
||||
xRelation == IntervalRelations.FINISHES_INVERSE ||
|
||||
xRelation == IntervalRelations.EQUALS ||
|
||||
xRelation == IntervalRelations.DURING ||
|
||||
xRelation == IntervalRelations.DURING_INVERSE ||
|
||||
xRelation == IntervalRelations.FINISHES ||
|
||||
xRelation == IntervalRelations.STARTS_INVERSE ||
|
||||
xRelation == IntervalRelations.OVERLAPS_INVERSE));
|
||||
}
|
||||
|
||||
private boolean getBeforeInReadingHorizontal(Zone z1, Zone z2, double T) {
|
||||
IntervalRelations xRelation = getIntervalRelationX(z1, z2, T);
|
||||
IntervalRelations yRelation = getIntervalRelationY(z1, z2, T);
|
||||
|
||||
return yRelation == IntervalRelations.PRECEDES ||
|
||||
yRelation == IntervalRelations.MEETS ||
|
||||
(yRelation == IntervalRelations.OVERLAPS && (xRelation == IntervalRelations.PRECEDES ||
|
||||
xRelation == IntervalRelations.MEETS ||
|
||||
xRelation == IntervalRelations.OVERLAPS)) ||
|
||||
((xRelation == IntervalRelations.PRECEDES || xRelation == IntervalRelations.MEETS ||
|
||||
xRelation == IntervalRelations.OVERLAPS) &&
|
||||
(yRelation == IntervalRelations.PRECEDES ||
|
||||
yRelation == IntervalRelations.MEETS ||
|
||||
yRelation == IntervalRelations.OVERLAPS ||
|
||||
yRelation == IntervalRelations.STARTS ||
|
||||
yRelation == IntervalRelations.FINISHES_INVERSE ||
|
||||
yRelation == IntervalRelations.EQUALS ||
|
||||
yRelation == IntervalRelations.DURING ||
|
||||
yRelation == IntervalRelations.DURING_INVERSE ||
|
||||
yRelation == IntervalRelations.FINISHES ||
|
||||
yRelation == IntervalRelations.STARTS_INVERSE ||
|
||||
yRelation == IntervalRelations.OVERLAPS_INVERSE));
|
||||
}
|
||||
|
||||
|
||||
public class ZoneComparator {
|
||||
|
||||
// Other methods and classes...
|
||||
|
||||
private static IntervalRelations getIntervalRelationX(Zone z1, Zone z2, double T) {
|
||||
if (z1.getX() < z2.getX() + z2.getWidth() - T) {
|
||||
return IntervalRelations.PRECEDES;
|
||||
} else if (z1.getX() >= z2.getX() + z2.getWidth() - T) {
|
||||
return IntervalRelations.PRECEDESI;
|
||||
} else if (z2.getX() + z2.getWidth() - T <= z1.getX()
|
||||
&& z1.getX() <= z2.getX() + z2.getWidth() + T) {
|
||||
return IntervalRelations.MEETS;
|
||||
} else if (z2.getX() + z2.getWidth() - T > z1.getX()
|
||||
&& z1.getX() > z2.getX() + z2.getWidth() + T) {
|
||||
return IntervalRelations.MEETSI;
|
||||
} else if (z1.getX() + z1.getWidth() < z2.getX() + z2.getWidth() - T
|
||||
&& (z2.getX() + z2.getWidth() + T < z1.getX()
|
||||
&& z1.getX() < z2.getX() - T)) {
|
||||
return IntervalRelations.OVERLAPSI;
|
||||
} else if (z1.getX() + z1.getWidth() >= z2.getX() + z2.getWidth() - T
|
||||
&& (z2.getX() + z2.getWidth() + T >= z1.getX()
|
||||
&& z1.getX() >= z2.getX() - T)) {
|
||||
return IntervalRelations.OVERLAPS;
|
||||
} else if (z2.getX() + z2.getWidth() - T <= z1.getX() + z1.getWidth()
|
||||
&& z1.getX() + z1.getWidth() <= z2.getX() + z2.getWidth() + T
|
||||
&& z1.getX() < z2.getX() - T) {
|
||||
return IntervalRelations.STARTSI;
|
||||
} else if (z2.getX() + z2.getWidth() - T > z1.getX() + z1.getWidth()
|
||||
&& z1.getX() + z1.getWidth() > z2.getX() + z2.getWidth() + T
|
||||
&& z1.getX() >= z2.getX() - T) {
|
||||
return IntervalRelations.STARTS;
|
||||
} else if (z1.getX() + z1.getWidth() > z2.getX() + z2.getWidth() + T
|
||||
&& z1.getX() < z2.getX() - T) {
|
||||
return IntervalRelations.DURINGI;
|
||||
} else if (z1.getX() + z1.getWidth() <= z2.getX() + z2.getWidth() + T
|
||||
&& z1.getX() >= z2.getX() - T) {
|
||||
return IntervalRelations.DURING;
|
||||
} else if (z1.getX() + z1.getWidth() > z2.getX() + z2.getWidth() + T
|
||||
&& (z2.getX() - T <= z1.getX()
|
||||
&& z1.getX() <= z2.getX() + T)) {
|
||||
return IntervalRelations.FINISHESI;
|
||||
} else if (z1.getX() + z1.getWidth() <= z2.getX() + z2.getWidth() + T
|
||||
&& (z2.getX() - T > z1.getX()
|
||||
&& z1.getX() > z2.getX() + T)) {
|
||||
return IntervalRelations.FINISHES;
|
||||
} else if (z2.getX() + z2.getWidth() - T <= z1.getX() + z1.getWidth()
|
||||
&& z1.getX() + z1.getWidth() <= z2.getX() + z2.getWidth() + T
|
||||
&& (z2.getX() - T <= z1.getX()
|
||||
&& z1.getX() <= z2.getX() + T)) {
|
||||
return IntervalRelations.EQUALS;
|
||||
}
|
||||
|
||||
return IntervalRelations.UNKNOWN;
|
||||
}
|
||||
|
||||
private static IntervalRelations getIntervalRelationY(Zone z1, Zone z2, double T) {
|
||||
if (z1.getBottom() < z2.getTop() - T) {
|
||||
return IntervalRelations.PRECEDESI;
|
||||
} else if (z1.getBottom() >= z2.getTop() - T) {
|
||||
return IntervalRelations.PRECEDES;
|
||||
} else if (z2.getTop() - T <= z1.getBottom()
|
||||
&& z1.getBottom() <= z2.getTop() + T) {
|
||||
return IntervalRelations.MEETSI;
|
||||
} else if (z2.getTop() - T > z1.getBottom()
|
||||
&& z1.getBottom() > z2.getTop() + T) {
|
||||
return IntervalRelations.MEETS;
|
||||
} else if (z1.getTop() < z2.getTop() - T
|
||||
&& (z2.getTop() + T < z1.getBottom()
|
||||
&& z1.getBottom() < z2.getBottom() - T)) {
|
||||
return IntervalRelations.OVERLAPSI;
|
||||
} else if (z1.getTop() >= z2.getTop() - T
|
||||
&& (z2.getTop() + T >= z1.getBottom()
|
||||
&& z1.getBottom() >= z2.getBottom() - T)) {
|
||||
return IntervalRelations.OVERLAPS;
|
||||
} else if (z2.getTop() - T <= z1.getTop()
|
||||
&& z1.getTop() <= z2.getTop() + T
|
||||
&& z1.getBottom() < z2.getBottom() - T) {
|
||||
return IntervalRelations.STARTSI;
|
||||
} else if (z2.getTop() - T > z1.getTop()
|
||||
&& z1.getTop() > z2.getTop() + T
|
||||
&& z1.getBottom() >= z2.getBottom() - T) {
|
||||
return IntervalRelations.STARTS;
|
||||
} else if (z1.getTop() > z2.getTop() + T
|
||||
&& z1.getBottom() < z2.getBottom() - T) {
|
||||
return IntervalRelations.DURINGI;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
@ -0,0 +1,8 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Zone;
|
||||
|
||||
@FunctionalInterface
|
||||
public interface ZoneComparator {
|
||||
boolean isBefore(Zone zone1, Zone zone2, double tolerance);
|
||||
}
|
||||
@ -0,0 +1,39 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.utils;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
|
||||
|
||||
public class ReadingOrderHelper {
|
||||
|
||||
public static List<TextPositionSequence> orderByReadingOrder(List<TextPositionSequence> words) {
|
||||
if (words.size() <= 1) {
|
||||
return words;
|
||||
}
|
||||
|
||||
int textOrientation = words.get(0).getRotation();
|
||||
|
||||
switch (textOrientation) {
|
||||
case 0:
|
||||
return words.stream()
|
||||
.sorted(Comparator.comparingDouble(w -> w.getRectangle().getTopLeft().getX()))
|
||||
.collect(Collectors.toList());
|
||||
case 90:
|
||||
return words.stream()
|
||||
.sorted((w1, w2) -> -Double.compare(w1.getRectangle().getTopLeft().getY(), w2.getRectangle().getTopLeft().getY()))
|
||||
.collect(Collectors.toList());
|
||||
case 180:
|
||||
return words.stream()
|
||||
.sorted((w1, w2) -> -Double.compare(w1.getRectangle().getTopLeft().getX(), w2.getRectangle().getTopLeft().getX()))
|
||||
.collect(Collectors.toList());
|
||||
case 270:
|
||||
return words.stream()
|
||||
.sorted(Comparator.comparingDouble(w -> w.getRectangle().getTopLeft().getY()))
|
||||
.collect(Collectors.toList());
|
||||
default:
|
||||
throw new IllegalArgumentException("Not sure what to do with this text rotation...");
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user