RED-7141: Improved basic block combination logic
This commit is contained in:
parent
d06933ed17
commit
d0e1af3a44
@ -45,6 +45,12 @@ public abstract class AbstractPageBlock {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public boolean containsBlock(TextPageBlock other, float threshold) {
|
||||||
|
|
||||||
|
return this.minX <= other.getMinX() + threshold && this.maxX >= other.getMaxX() - threshold && this.minY <= other.getMinY() + threshold && this.maxY >= other.getMaxY() - threshold;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public boolean contains(AbstractPageBlock other) {
|
public boolean contains(AbstractPageBlock other) {
|
||||||
|
|
||||||
return this.minX <= other.minX && this.maxX >= other.maxX && this.minY >= other.minY && this.maxY <= other.maxY;
|
return this.minX <= other.minX && this.maxX >= other.maxX && this.minY >= other.minY && this.maxY <= other.maxY;
|
||||||
|
|||||||
@ -7,6 +7,7 @@ import java.util.Comparator;
|
|||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.ListIterator;
|
import java.util.ListIterator;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
@ -68,24 +69,31 @@ public class DocstrumBlockificationService {
|
|||||||
|
|
||||||
if (previous != null) {
|
if (previous != null) {
|
||||||
|
|
||||||
if ((Math.abs(previous.getMaxY() - current.getMaxY()) < THRESHOLD || Math.abs(previous.getMinY() - current.getMinY()) < THRESHOLD) && (previous.getNumberOfLines() == 1 && current.getNumberOfLines() >= 1 && current.getNumberOfLines() <= 5 || previous.getNumberOfLines() == 2 && current.getNumberOfLines() == 1)) {
|
if ((Math.abs(previous.getMaxY() - current.getMaxY()) < THRESHOLD || Math.abs(previous.getMinY() - current.getMinY()) < THRESHOLD) && (previous.getNumberOfLines() == 1 && current.getNumberOfLines() >= 1 /* && current.getNumberOfLines() <= 10 */ || previous.getNumberOfLines() == 2 && current.getNumberOfLines() == 1)) {
|
||||||
previous.getSequences().addAll(current.getSequences());
|
previous.getSequences().addAll(current.getSequences());
|
||||||
previous = buildTextBlock(previous.getSequences(), 0);
|
previous = buildTextBlock(previous.getSequences(), 0);
|
||||||
itty.remove();
|
itty.remove();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((Math.abs(previous.getMaxY() - current.getMaxY()) < THRESHOLD || Math.abs(previous.getMinY() - current.getMinY()) < THRESHOLD) && (previous.getNumberOfLines() >= 2 && current.getNumberOfLines() >= 2 && current.getNumberOfLines() <= 5 && previous.getNumberOfLines() <= current.getNumberOfLines())) {
|
if ((Math.abs(previous.getMaxY() - current.getMaxY()) < THRESHOLD || Math.abs(previous.getMinY() - current.getMinY()) < THRESHOLD) && (previous.getNumberOfLines() >= 2 && current.getNumberOfLines() >= 2 /* && current.getNumberOfLines() <= 10 */ && previous.getNumberOfLines() <= current.getNumberOfLines())) {
|
||||||
previous.getSequences().addAll(current.getSequences());
|
previous.getSequences().addAll(current.getSequences());
|
||||||
previous = buildTextBlock(previous.getSequences(), 0);
|
previous = buildTextBlock(previous.getSequences(), 0);
|
||||||
itty.remove();
|
itty.remove();
|
||||||
|
|
||||||
// Might be a left/right mapping add one sorted as well
|
// Might be a left/right mapping add one sorted as well
|
||||||
var sortedDublicate = buildTextBlock(previous.getSequences().stream().sorted(new TextPositionSequenceComparator()).toList(), 0);
|
var sortedDublicate = buildTextBlock(previous.getSequences().stream().sorted(new TextPositionSequenceComparator()).collect(Collectors.toList()), 0);
|
||||||
itty.add(sortedDublicate);
|
itty.add(sortedDublicate);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (previous.containsBlock(current, THRESHOLD)) {
|
||||||
|
previous.getSequences().addAll(current.getSequences());
|
||||||
|
previous = buildTextBlock(previous.getSequences().stream().sorted(new TextPositionSequenceComparator()).collect(Collectors.toList()), 0);
|
||||||
|
itty.remove();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
previous = current;
|
previous = current;
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user