More
This commit is contained in:
parent
e394f2fa7c
commit
9e5778d4b2
@ -173,15 +173,15 @@ public class HierarchicalReadingOrderResolver {
|
||||
private boolean shouldBeSwapped(BBoxObject first, BBoxObject second) {
|
||||
|
||||
double cx, cy, cw, ch, ox, oy, ow, oh;
|
||||
cx = first.getbBox().getX();
|
||||
cy = first.getbBox().getY();
|
||||
cw = first.getbBox().getWidth();
|
||||
ch = first.getbBox().getHeight();
|
||||
cx = first.getBBox().getX();
|
||||
cy = first.getBBox().getY();
|
||||
cw = first.getBBox().getWidth();
|
||||
ch = first.getBBox().getHeight();
|
||||
|
||||
ox = second.getbBox().getX();
|
||||
oy = second.getbBox().getY();
|
||||
ow = second.getbBox().getWidth();
|
||||
oh = second.getbBox().getHeight();
|
||||
ox = second.getBBox().getX();
|
||||
oy = second.getBBox().getY();
|
||||
ow = second.getBBox().getWidth();
|
||||
oh = second.getBBox().getHeight();
|
||||
|
||||
// Determine Octant
|
||||
//
|
||||
|
||||
@ -1,26 +1,13 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public abstract class BBoxObject {
|
||||
|
||||
private BoundingBox bBox;
|
||||
|
||||
|
||||
public double getArea() {
|
||||
|
||||
return (bBox.getHeight() * bBox.getWidth());
|
||||
}
|
||||
|
||||
|
||||
public BoundingBox getbBox() {
|
||||
|
||||
return bBox;
|
||||
}
|
||||
|
||||
|
||||
public void setbBox(BoundingBox bBox) {
|
||||
|
||||
this.bBox = bBox;
|
||||
}
|
||||
private Rectangle2D bBox;
|
||||
|
||||
|
||||
public double getX() {
|
||||
@ -46,4 +33,16 @@ public abstract class BBoxObject {
|
||||
return bBox.getHeight();
|
||||
}
|
||||
|
||||
|
||||
public double getArea() {
|
||||
|
||||
return (bBox.getHeight() * bBox.getWidth());
|
||||
}
|
||||
|
||||
|
||||
public boolean contains(Rectangle2D contained, double tolerance) {
|
||||
|
||||
return bBox.getX() <= contained.getX() + tolerance && bBox.getY() <= contained.getY() + tolerance && bBox.getX() + bBox.getWidth() >= contained.getX() + contained.getWidth() - tolerance && bBox.getY() + bBox.getHeight() >= contained.getY() + contained.getHeight() - tolerance;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -18,4 +18,10 @@ public final class BoundingBox {
|
||||
return x <= contained.getX() + tolerance && y <= contained.getY() + tolerance && x + width >= contained.getX() + contained.getWidth() - tolerance && y + height >= contained.getY() + contained.getHeight() - tolerance;
|
||||
}
|
||||
|
||||
|
||||
public double getArea() {
|
||||
|
||||
return (height * width);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -1,21 +0,0 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
public class Line extends BBoxObject {
|
||||
|
||||
@Setter
|
||||
@Getter
|
||||
private List<Word> words = new ArrayList<>();
|
||||
|
||||
|
||||
public void addWord(Word word) {
|
||||
|
||||
this.words.add(word);
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,23 +0,0 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.RedTextPosition;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
public class Word extends BBoxObject {
|
||||
|
||||
@Setter
|
||||
@Getter
|
||||
private List<RedTextPosition> textPositions = new ArrayList<>();
|
||||
|
||||
|
||||
public void addChunk(RedTextPosition chunk) {
|
||||
|
||||
this.textPositions.add(chunk);
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,19 +0,0 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public final class Zone extends BBoxObject {
|
||||
|
||||
private List<Line> lines = new ArrayList<>();
|
||||
|
||||
|
||||
public void addLine(Line line) {
|
||||
|
||||
this.lines.add(line);
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,15 +1,12 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.docstrum;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.BBoxObject;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.BoundingBox;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.Line;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.Word;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.utils.BoundingBoxBuilder;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@ -121,7 +118,7 @@ public class CharacterLine extends BBoxObject {
|
||||
}
|
||||
|
||||
|
||||
public void computeWords(double wordSpacing) {
|
||||
private void computeWords(double wordSpacing) {
|
||||
|
||||
TextPositionSequence word = new TextPositionSequence();
|
||||
Character previous = null;
|
||||
@ -140,31 +137,7 @@ public class CharacterLine extends BBoxObject {
|
||||
}
|
||||
|
||||
|
||||
public Line convertToBxLine(double wordSpacing) {
|
||||
|
||||
Line line = new Line();
|
||||
Word word = new Word();
|
||||
Character previousComponent = null;
|
||||
for (Character component : characters) {
|
||||
if (previousComponent != null) {
|
||||
double dist = component.getTextPosition().getXDirAdj() - previousComponent.getTextPosition().getXDirAdj() - previousComponent.getTextPosition().getWidthDirAdj();
|
||||
if (dist > wordSpacing) {
|
||||
BoundingBoxBuilder.setBounds(word);
|
||||
line.addWord(word);
|
||||
word = new Word();
|
||||
}
|
||||
}
|
||||
word.addChunk(component.getTextPosition());
|
||||
previousComponent = component;
|
||||
}
|
||||
BoundingBoxBuilder.setBounds(word);
|
||||
line.addWord(word);
|
||||
BoundingBoxBuilder.setBounds(line);
|
||||
return line;
|
||||
}
|
||||
|
||||
|
||||
public void buildBox() {
|
||||
private void buildBox() {
|
||||
|
||||
double minX = Double.POSITIVE_INFINITY;
|
||||
double minY = Double.POSITIVE_INFINITY;
|
||||
@ -180,7 +153,7 @@ public class CharacterLine extends BBoxObject {
|
||||
|
||||
}
|
||||
|
||||
this.setbBox(new BoundingBox(minX, minY, maxX - minX, maxY - minY));
|
||||
this.setBBox(new Rectangle2D.Double(minX, minY, maxX - minX, maxY - minY));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -1,10 +1,10 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.docstrum;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.BBoxObject;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.BoundingBox;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@ -12,7 +12,7 @@ import lombok.Data;
|
||||
public class CharacterZone extends BBoxObject {
|
||||
|
||||
private List<CharacterLine> lines;
|
||||
|
||||
|
||||
|
||||
public CharacterZone(List<CharacterLine> lines) {
|
||||
|
||||
@ -38,7 +38,7 @@ public class CharacterZone extends BBoxObject {
|
||||
|
||||
}
|
||||
|
||||
this.setbBox(new BoundingBox(minX, minY, maxX - minX, maxY - minY));
|
||||
this.setBBox(new Rectangle2D.Double(minX, minY, maxX - minX, maxY - minY));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -1,96 +0,0 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.utils;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.RedTextPosition;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.BoundingBox;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.Line;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.Word;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.Zone;
|
||||
|
||||
public class BoundingBoxBuilder {
|
||||
|
||||
private double minX = Double.POSITIVE_INFINITY;
|
||||
private double minY = Double.POSITIVE_INFINITY;
|
||||
private double maxX = Double.NEGATIVE_INFINITY;
|
||||
private double maxY = Double.NEGATIVE_INFINITY;
|
||||
|
||||
|
||||
public void expandByLines(Zone zone) {
|
||||
|
||||
for (Line line : zone.getLines()) {
|
||||
expand(line.getbBox());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void expandByWords(Line line) {
|
||||
|
||||
for (Word word : line.getWords()) {
|
||||
expand(word.getbBox());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void expandByChunks(Word word) {
|
||||
|
||||
for (RedTextPosition chunk : word.getTextPositions()) {
|
||||
expand(chunk);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void expand(BoundingBox bounds) {
|
||||
|
||||
if (bounds != null) {
|
||||
minX = Math.min(minX, bounds.getX());
|
||||
minY = Math.min(minY, bounds.getY());
|
||||
maxX = Math.max(maxX, bounds.getX() + bounds.getWidth());
|
||||
maxY = Math.max(maxY, bounds.getY() + bounds.getHeight());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void expand(RedTextPosition bounds) {
|
||||
|
||||
if (bounds != null) {
|
||||
minX = Math.min(minX, bounds.getXDirAdj());
|
||||
minY = Math.min(minY, bounds.getYDirAdj());
|
||||
maxX = Math.max(maxX, bounds.getXDirAdj() + bounds.getWidthDirAdj());
|
||||
maxY = Math.max(maxY, bounds.getYDirAdj() + bounds.getHeightDir());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public BoundingBox getBounds() {
|
||||
|
||||
if (minX <= maxX && minY <= maxY) {
|
||||
return new BoundingBox(minX, minY, maxX - minX, maxY - minY);
|
||||
} else {
|
||||
return new BoundingBox(0, 0, 0, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static void setBounds(Zone zone) {
|
||||
|
||||
BoundingBoxBuilder builder = new BoundingBoxBuilder();
|
||||
builder.expandByLines(zone);
|
||||
zone.setbBox(builder.getBounds());
|
||||
}
|
||||
|
||||
|
||||
public static void setBounds(Line line) {
|
||||
|
||||
BoundingBoxBuilder builder = new BoundingBoxBuilder();
|
||||
builder.expandByWords(line);
|
||||
line.setbBox(builder.getBounds());
|
||||
}
|
||||
|
||||
|
||||
public static void setBounds(Word word) {
|
||||
|
||||
BoundingBoxBuilder builder = new BoundingBoxBuilder();
|
||||
builder.expandByChunks(word);
|
||||
word.setbBox(builder.getBounds());
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,36 +0,0 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.utils;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.Zone;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@UtilityClass
|
||||
public class ZoneUtils {
|
||||
|
||||
public void sortZonesYX(List<Zone> zones) {
|
||||
|
||||
sortZonesYX(zones, 0);
|
||||
}
|
||||
|
||||
|
||||
public void sortZonesYX(List<Zone> zones, final double tolerance) {
|
||||
|
||||
Collections.sort(zones, new Comparator<Zone>() {
|
||||
|
||||
@Override
|
||||
public int compare(Zone o1, Zone o2) {
|
||||
|
||||
int cmp = DoubleUtils.compareDouble(o1.getbBox().getY(), o2.getbBox().getY(), tolerance);
|
||||
if (cmp == 0) {
|
||||
return DoubleUtils.compareDouble(o1.getbBox().getX(), o2.getbBox().getX(), tolerance);
|
||||
}
|
||||
return cmp;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,7 +1,8 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.BBoxObject;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.BoundingBox;
|
||||
|
||||
public class BBoxZoneGroup extends BBoxObject {
|
||||
|
||||
@ -20,9 +21,9 @@ public class BBoxZoneGroup extends BBoxObject {
|
||||
}
|
||||
|
||||
|
||||
public void setbBox(BoundingBox bBox) {
|
||||
public void setbBox(Rectangle2D bBox) {
|
||||
|
||||
super.setbBox(bBox);
|
||||
super.setBBox(bBox);
|
||||
}
|
||||
|
||||
|
||||
@ -56,7 +57,7 @@ public class BBoxZoneGroup extends BBoxObject {
|
||||
|
||||
assert x1 >= x0;
|
||||
assert y1 >= y0;
|
||||
this.setbBox(new BoundingBox(x0, y0, x1 - x0, y1 - y0));
|
||||
this.setBBox(new Rectangle2D.Double(x0, y0, x1 - x0, y1 - y0));
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user