This commit is contained in:
Dominique Eifländer 2024-02-16 14:08:59 +01:00
parent e394f2fa7c
commit 9e5778d4b2
11 changed files with 44 additions and 260 deletions

View File

@ -173,15 +173,15 @@ public class HierarchicalReadingOrderResolver {
private boolean shouldBeSwapped(BBoxObject first, BBoxObject second) {
double cx, cy, cw, ch, ox, oy, ow, oh;
cx = first.getbBox().getX();
cy = first.getbBox().getY();
cw = first.getbBox().getWidth();
ch = first.getbBox().getHeight();
cx = first.getBBox().getX();
cy = first.getBBox().getY();
cw = first.getBBox().getWidth();
ch = first.getBBox().getHeight();
ox = second.getbBox().getX();
oy = second.getbBox().getY();
ow = second.getbBox().getWidth();
oh = second.getbBox().getHeight();
ox = second.getBBox().getX();
oy = second.getBBox().getY();
ow = second.getBBox().getWidth();
oh = second.getBBox().getHeight();
// Determine Octant
//

View File

@ -1,26 +1,13 @@
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor;
import java.awt.geom.Rectangle2D;
import lombok.Data;
@Data
public abstract class BBoxObject {
private BoundingBox bBox;
public double getArea() {
return (bBox.getHeight() * bBox.getWidth());
}
public BoundingBox getbBox() {
return bBox;
}
public void setbBox(BoundingBox bBox) {
this.bBox = bBox;
}
private Rectangle2D bBox;
public double getX() {
@ -46,4 +33,16 @@ public abstract class BBoxObject {
return bBox.getHeight();
}
public double getArea() {
return (bBox.getHeight() * bBox.getWidth());
}
public boolean contains(Rectangle2D contained, double tolerance) {
return bBox.getX() <= contained.getX() + tolerance && bBox.getY() <= contained.getY() + tolerance && bBox.getX() + bBox.getWidth() >= contained.getX() + contained.getWidth() - tolerance && bBox.getY() + bBox.getHeight() >= contained.getY() + contained.getHeight() - tolerance;
}
}

View File

@ -18,4 +18,10 @@ public final class BoundingBox {
return x <= contained.getX() + tolerance && y <= contained.getY() + tolerance && x + width >= contained.getX() + contained.getWidth() - tolerance && y + height >= contained.getY() + contained.getHeight() - tolerance;
}
public double getArea() {
return (height * width);
}
}

View File

@ -1,21 +0,0 @@
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor;
import java.util.ArrayList;
import java.util.List;
import lombok.Getter;
import lombok.Setter;
public class Line extends BBoxObject {
@Setter
@Getter
private List<Word> words = new ArrayList<>();
public void addWord(Word word) {
this.words.add(word);
}
}

View File

@ -1,23 +0,0 @@
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor;
import java.util.ArrayList;
import java.util.List;
import com.knecon.fforesight.service.layoutparser.processor.model.text.RedTextPosition;
import lombok.Getter;
import lombok.Setter;
public class Word extends BBoxObject {
@Setter
@Getter
private List<RedTextPosition> textPositions = new ArrayList<>();
public void addChunk(RedTextPosition chunk) {
this.textPositions.add(chunk);
}
}

View File

@ -1,19 +0,0 @@
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor;
import java.util.ArrayList;
import java.util.List;
import lombok.Data;
@Data
public final class Zone extends BBoxObject {
private List<Line> lines = new ArrayList<>();
public void addLine(Line line) {
this.lines.add(line);
}
}

View File

@ -1,15 +1,12 @@
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.docstrum;
import java.awt.geom.Rectangle2D;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.BBoxObject;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.BoundingBox;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.Line;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.Word;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.utils.BoundingBoxBuilder;
import lombok.Data;
@ -121,7 +118,7 @@ public class CharacterLine extends BBoxObject {
}
public void computeWords(double wordSpacing) {
private void computeWords(double wordSpacing) {
TextPositionSequence word = new TextPositionSequence();
Character previous = null;
@ -140,31 +137,7 @@ public class CharacterLine extends BBoxObject {
}
public Line convertToBxLine(double wordSpacing) {
Line line = new Line();
Word word = new Word();
Character previousComponent = null;
for (Character component : characters) {
if (previousComponent != null) {
double dist = component.getTextPosition().getXDirAdj() - previousComponent.getTextPosition().getXDirAdj() - previousComponent.getTextPosition().getWidthDirAdj();
if (dist > wordSpacing) {
BoundingBoxBuilder.setBounds(word);
line.addWord(word);
word = new Word();
}
}
word.addChunk(component.getTextPosition());
previousComponent = component;
}
BoundingBoxBuilder.setBounds(word);
line.addWord(word);
BoundingBoxBuilder.setBounds(line);
return line;
}
public void buildBox() {
private void buildBox() {
double minX = Double.POSITIVE_INFINITY;
double minY = Double.POSITIVE_INFINITY;
@ -180,7 +153,7 @@ public class CharacterLine extends BBoxObject {
}
this.setbBox(new BoundingBox(minX, minY, maxX - minX, maxY - minY));
this.setBBox(new Rectangle2D.Double(minX, minY, maxX - minX, maxY - minY));
}
}

View File

@ -1,10 +1,10 @@
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.docstrum;
import java.awt.geom.Rectangle2D;
import java.util.Comparator;
import java.util.List;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.BBoxObject;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.BoundingBox;
import lombok.Data;
@ -12,7 +12,7 @@ import lombok.Data;
public class CharacterZone extends BBoxObject {
private List<CharacterLine> lines;
public CharacterZone(List<CharacterLine> lines) {
@ -38,7 +38,7 @@ public class CharacterZone extends BBoxObject {
}
this.setbBox(new BoundingBox(minX, minY, maxX - minX, maxY - minY));
this.setBBox(new Rectangle2D.Double(minX, minY, maxX - minX, maxY - minY));
}
}

View File

@ -1,96 +0,0 @@
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.utils;
import com.knecon.fforesight.service.layoutparser.processor.model.text.RedTextPosition;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.BoundingBox;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.Line;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.Word;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.Zone;
public class BoundingBoxBuilder {
private double minX = Double.POSITIVE_INFINITY;
private double minY = Double.POSITIVE_INFINITY;
private double maxX = Double.NEGATIVE_INFINITY;
private double maxY = Double.NEGATIVE_INFINITY;
public void expandByLines(Zone zone) {
for (Line line : zone.getLines()) {
expand(line.getbBox());
}
}
public void expandByWords(Line line) {
for (Word word : line.getWords()) {
expand(word.getbBox());
}
}
public void expandByChunks(Word word) {
for (RedTextPosition chunk : word.getTextPositions()) {
expand(chunk);
}
}
public void expand(BoundingBox bounds) {
if (bounds != null) {
minX = Math.min(minX, bounds.getX());
minY = Math.min(minY, bounds.getY());
maxX = Math.max(maxX, bounds.getX() + bounds.getWidth());
maxY = Math.max(maxY, bounds.getY() + bounds.getHeight());
}
}
public void expand(RedTextPosition bounds) {
if (bounds != null) {
minX = Math.min(minX, bounds.getXDirAdj());
minY = Math.min(minY, bounds.getYDirAdj());
maxX = Math.max(maxX, bounds.getXDirAdj() + bounds.getWidthDirAdj());
maxY = Math.max(maxY, bounds.getYDirAdj() + bounds.getHeightDir());
}
}
public BoundingBox getBounds() {
if (minX <= maxX && minY <= maxY) {
return new BoundingBox(minX, minY, maxX - minX, maxY - minY);
} else {
return new BoundingBox(0, 0, 0, 0);
}
}
public static void setBounds(Zone zone) {
BoundingBoxBuilder builder = new BoundingBoxBuilder();
builder.expandByLines(zone);
zone.setbBox(builder.getBounds());
}
public static void setBounds(Line line) {
BoundingBoxBuilder builder = new BoundingBoxBuilder();
builder.expandByWords(line);
line.setbBox(builder.getBounds());
}
public static void setBounds(Word word) {
BoundingBoxBuilder builder = new BoundingBoxBuilder();
builder.expandByChunks(word);
word.setbBox(builder.getBounds());
}
}

View File

@ -1,36 +0,0 @@
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.utils;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.Zone;
import lombok.experimental.UtilityClass;
@UtilityClass
public class ZoneUtils {
public void sortZonesYX(List<Zone> zones) {
sortZonesYX(zones, 0);
}
public void sortZonesYX(List<Zone> zones, final double tolerance) {
Collections.sort(zones, new Comparator<Zone>() {
@Override
public int compare(Zone o1, Zone o2) {
int cmp = DoubleUtils.compareDouble(o1.getbBox().getY(), o2.getbBox().getY(), tolerance);
if (cmp == 0) {
return DoubleUtils.compareDouble(o1.getbBox().getX(), o2.getbBox().getX(), tolerance);
}
return cmp;
}
});
}
}

View File

@ -1,7 +1,8 @@
package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder;
import java.awt.geom.Rectangle2D;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.BBoxObject;
import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.BoundingBox;
public class BBoxZoneGroup extends BBoxObject {
@ -20,9 +21,9 @@ public class BBoxZoneGroup extends BBoxObject {
}
public void setbBox(BoundingBox bBox) {
public void setbBox(Rectangle2D bBox) {
super.setbBox(bBox);
super.setBBox(bBox);
}
@ -56,7 +57,7 @@ public class BBoxZoneGroup extends BBoxObject {
assert x1 >= x0;
assert y1 >= y0;
this.setbBox(new BoundingBox(x0, y0, x1 - x0, y1 - y0));
this.setBBox(new Rectangle2D.Double(x0, y0, x1 - x0, y1 - y0));
return this;
}