From 9e5778d4b29372790d77855207d63fcb779b0c1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominique=20Eifl=C3=A4nder?= Date: Fri, 16 Feb 2024 14:08:59 +0100 Subject: [PATCH] More --- .../HierarchicalReadingOrderResolver.java | 16 ++-- .../docstrum/model/refactor/BBoxObject.java | 37 ++++--- .../docstrum/model/refactor/BoundingBox.java | 6 ++ .../docstrum/model/refactor/Line.java | 21 ---- .../docstrum/model/refactor/Word.java | 23 ----- .../docstrum/model/refactor/Zone.java | 19 ---- .../refactor/docstrum/CharacterLine.java | 35 +------ .../refactor/docstrum/CharacterZone.java | 6 +- .../refactor/utils/BoundingBoxBuilder.java | 96 ------------------- .../model/refactor/utils/ZoneUtils.java | 36 ------- .../docstrum/readingorder/BBoxZoneGroup.java | 9 +- 11 files changed, 44 insertions(+), 260 deletions(-) delete mode 100644 layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/Line.java delete mode 100644 layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/Word.java delete mode 100644 layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/Zone.java delete mode 100644 layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/utils/BoundingBoxBuilder.java delete mode 100644 layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/utils/ZoneUtils.java diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/HierarchicalReadingOrderResolver.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/HierarchicalReadingOrderResolver.java index 0521dc5..8fd6d78 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/HierarchicalReadingOrderResolver.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/HierarchicalReadingOrderResolver.java @@ -173,15 +173,15 @@ public class HierarchicalReadingOrderResolver { private boolean shouldBeSwapped(BBoxObject first, BBoxObject second) { double cx, cy, cw, ch, ox, oy, ow, oh; - cx = first.getbBox().getX(); - cy = first.getbBox().getY(); - cw = first.getbBox().getWidth(); - ch = first.getbBox().getHeight(); + cx = first.getBBox().getX(); + cy = first.getBBox().getY(); + cw = first.getBBox().getWidth(); + ch = first.getBBox().getHeight(); - ox = second.getbBox().getX(); - oy = second.getbBox().getY(); - ow = second.getbBox().getWidth(); - oh = second.getbBox().getHeight(); + ox = second.getBBox().getX(); + oy = second.getBBox().getY(); + ow = second.getBBox().getWidth(); + oh = second.getBBox().getHeight(); // Determine Octant // diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/BBoxObject.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/BBoxObject.java index 958e4e4..a4bd1d0 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/BBoxObject.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/BBoxObject.java @@ -1,26 +1,13 @@ package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor; +import java.awt.geom.Rectangle2D; + +import lombok.Data; + +@Data public abstract class BBoxObject { - private BoundingBox bBox; - - - public double getArea() { - - return (bBox.getHeight() * bBox.getWidth()); - } - - - public BoundingBox getbBox() { - - return bBox; - } - - - public void setbBox(BoundingBox bBox) { - - this.bBox = bBox; - } + private Rectangle2D bBox; public double getX() { @@ -46,4 +33,16 @@ public abstract class BBoxObject { return bBox.getHeight(); } + + public double getArea() { + + return (bBox.getHeight() * bBox.getWidth()); + } + + + public boolean contains(Rectangle2D contained, double tolerance) { + + return bBox.getX() <= contained.getX() + tolerance && bBox.getY() <= contained.getY() + tolerance && bBox.getX() + bBox.getWidth() >= contained.getX() + contained.getWidth() - tolerance && bBox.getY() + bBox.getHeight() >= contained.getY() + contained.getHeight() - tolerance; + } + } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/BoundingBox.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/BoundingBox.java index 81173b8..e878eb9 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/BoundingBox.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/BoundingBox.java @@ -18,4 +18,10 @@ public final class BoundingBox { return x <= contained.getX() + tolerance && y <= contained.getY() + tolerance && x + width >= contained.getX() + contained.getWidth() - tolerance && y + height >= contained.getY() + contained.getHeight() - tolerance; } + + public double getArea() { + + return (height * width); + } + } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/Line.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/Line.java deleted file mode 100644 index aa9dc9f..0000000 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/Line.java +++ /dev/null @@ -1,21 +0,0 @@ -package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor; - -import java.util.ArrayList; -import java.util.List; - -import lombok.Getter; -import lombok.Setter; - -public class Line extends BBoxObject { - - @Setter - @Getter - private List words = new ArrayList<>(); - - - public void addWord(Word word) { - - this.words.add(word); - } - -} diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/Word.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/Word.java deleted file mode 100644 index 4b1fbdc..0000000 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/Word.java +++ /dev/null @@ -1,23 +0,0 @@ -package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor; - -import java.util.ArrayList; -import java.util.List; - -import com.knecon.fforesight.service.layoutparser.processor.model.text.RedTextPosition; - -import lombok.Getter; -import lombok.Setter; - -public class Word extends BBoxObject { - - @Setter - @Getter - private List textPositions = new ArrayList<>(); - - - public void addChunk(RedTextPosition chunk) { - - this.textPositions.add(chunk); - } - -} diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/Zone.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/Zone.java deleted file mode 100644 index 5f3eb1d..0000000 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/Zone.java +++ /dev/null @@ -1,19 +0,0 @@ -package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor; - -import java.util.ArrayList; -import java.util.List; - -import lombok.Data; - -@Data -public final class Zone extends BBoxObject { - - private List lines = new ArrayList<>(); - - - public void addLine(Line line) { - - this.lines.add(line); - } - -} diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/docstrum/CharacterLine.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/docstrum/CharacterLine.java index 1d4c719..1d3d803 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/docstrum/CharacterLine.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/docstrum/CharacterLine.java @@ -1,15 +1,12 @@ package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.docstrum; +import java.awt.geom.Rectangle2D; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence; import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.BBoxObject; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.BoundingBox; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.Line; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.Word; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.utils.BoundingBoxBuilder; import lombok.Data; @@ -121,7 +118,7 @@ public class CharacterLine extends BBoxObject { } - public void computeWords(double wordSpacing) { + private void computeWords(double wordSpacing) { TextPositionSequence word = new TextPositionSequence(); Character previous = null; @@ -140,31 +137,7 @@ public class CharacterLine extends BBoxObject { } - public Line convertToBxLine(double wordSpacing) { - - Line line = new Line(); - Word word = new Word(); - Character previousComponent = null; - for (Character component : characters) { - if (previousComponent != null) { - double dist = component.getTextPosition().getXDirAdj() - previousComponent.getTextPosition().getXDirAdj() - previousComponent.getTextPosition().getWidthDirAdj(); - if (dist > wordSpacing) { - BoundingBoxBuilder.setBounds(word); - line.addWord(word); - word = new Word(); - } - } - word.addChunk(component.getTextPosition()); - previousComponent = component; - } - BoundingBoxBuilder.setBounds(word); - line.addWord(word); - BoundingBoxBuilder.setBounds(line); - return line; - } - - - public void buildBox() { + private void buildBox() { double minX = Double.POSITIVE_INFINITY; double minY = Double.POSITIVE_INFINITY; @@ -180,7 +153,7 @@ public class CharacterLine extends BBoxObject { } - this.setbBox(new BoundingBox(minX, minY, maxX - minX, maxY - minY)); + this.setBBox(new Rectangle2D.Double(minX, minY, maxX - minX, maxY - minY)); } } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/docstrum/CharacterZone.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/docstrum/CharacterZone.java index d47f89b..903c088 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/docstrum/CharacterZone.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/docstrum/CharacterZone.java @@ -1,10 +1,10 @@ package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.docstrum; +import java.awt.geom.Rectangle2D; import java.util.Comparator; import java.util.List; import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.BBoxObject; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.BoundingBox; import lombok.Data; @@ -12,7 +12,7 @@ import lombok.Data; public class CharacterZone extends BBoxObject { private List lines; - + public CharacterZone(List lines) { @@ -38,7 +38,7 @@ public class CharacterZone extends BBoxObject { } - this.setbBox(new BoundingBox(minX, minY, maxX - minX, maxY - minY)); + this.setBBox(new Rectangle2D.Double(minX, minY, maxX - minX, maxY - minY)); } } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/utils/BoundingBoxBuilder.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/utils/BoundingBoxBuilder.java deleted file mode 100644 index d77ac03..0000000 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/utils/BoundingBoxBuilder.java +++ /dev/null @@ -1,96 +0,0 @@ -package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.utils; - -import com.knecon.fforesight.service.layoutparser.processor.model.text.RedTextPosition; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.BoundingBox; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.Line; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.Word; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.Zone; - -public class BoundingBoxBuilder { - - private double minX = Double.POSITIVE_INFINITY; - private double minY = Double.POSITIVE_INFINITY; - private double maxX = Double.NEGATIVE_INFINITY; - private double maxY = Double.NEGATIVE_INFINITY; - - - public void expandByLines(Zone zone) { - - for (Line line : zone.getLines()) { - expand(line.getbBox()); - } - } - - - public void expandByWords(Line line) { - - for (Word word : line.getWords()) { - expand(word.getbBox()); - } - } - - - public void expandByChunks(Word word) { - - for (RedTextPosition chunk : word.getTextPositions()) { - expand(chunk); - } - } - - - public void expand(BoundingBox bounds) { - - if (bounds != null) { - minX = Math.min(minX, bounds.getX()); - minY = Math.min(minY, bounds.getY()); - maxX = Math.max(maxX, bounds.getX() + bounds.getWidth()); - maxY = Math.max(maxY, bounds.getY() + bounds.getHeight()); - } - } - - - public void expand(RedTextPosition bounds) { - - if (bounds != null) { - minX = Math.min(minX, bounds.getXDirAdj()); - minY = Math.min(minY, bounds.getYDirAdj()); - maxX = Math.max(maxX, bounds.getXDirAdj() + bounds.getWidthDirAdj()); - maxY = Math.max(maxY, bounds.getYDirAdj() + bounds.getHeightDir()); - } - } - - - public BoundingBox getBounds() { - - if (minX <= maxX && minY <= maxY) { - return new BoundingBox(minX, minY, maxX - minX, maxY - minY); - } else { - return new BoundingBox(0, 0, 0, 0); - } - } - - - public static void setBounds(Zone zone) { - - BoundingBoxBuilder builder = new BoundingBoxBuilder(); - builder.expandByLines(zone); - zone.setbBox(builder.getBounds()); - } - - - public static void setBounds(Line line) { - - BoundingBoxBuilder builder = new BoundingBoxBuilder(); - builder.expandByWords(line); - line.setbBox(builder.getBounds()); - } - - - public static void setBounds(Word word) { - - BoundingBoxBuilder builder = new BoundingBoxBuilder(); - builder.expandByChunks(word); - word.setbBox(builder.getBounds()); - } - -} diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/utils/ZoneUtils.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/utils/ZoneUtils.java deleted file mode 100644 index 1a93cac..0000000 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/utils/ZoneUtils.java +++ /dev/null @@ -1,36 +0,0 @@ -package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.utils; - -import java.util.Collections; -import java.util.Comparator; -import java.util.List; - -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.Zone; - -import lombok.experimental.UtilityClass; - -@UtilityClass -public class ZoneUtils { - - public void sortZonesYX(List zones) { - - sortZonesYX(zones, 0); - } - - - public void sortZonesYX(List zones, final double tolerance) { - - Collections.sort(zones, new Comparator() { - - @Override - public int compare(Zone o1, Zone o2) { - - int cmp = DoubleUtils.compareDouble(o1.getbBox().getY(), o2.getbBox().getY(), tolerance); - if (cmp == 0) { - return DoubleUtils.compareDouble(o1.getbBox().getX(), o2.getbBox().getX(), tolerance); - } - return cmp; - } - }); - } - -} diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/readingorder/BBoxZoneGroup.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/readingorder/BBoxZoneGroup.java index 061c85d..1035ecd 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/readingorder/BBoxZoneGroup.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/readingorder/BBoxZoneGroup.java @@ -1,7 +1,8 @@ package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder; +import java.awt.geom.Rectangle2D; + import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.BBoxObject; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.BoundingBox; public class BBoxZoneGroup extends BBoxObject { @@ -20,9 +21,9 @@ public class BBoxZoneGroup extends BBoxObject { } - public void setbBox(BoundingBox bBox) { + public void setbBox(Rectangle2D bBox) { - super.setbBox(bBox); + super.setBBox(bBox); } @@ -56,7 +57,7 @@ public class BBoxZoneGroup extends BBoxObject { assert x1 >= x0; assert y1 >= y0; - this.setbBox(new BoundingBox(x0, y0, x1 - x0, y1 - y0)); + this.setBBox(new Rectangle2D.Double(x0, y0, x1 - x0, y1 - y0)); return this; }