diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java index f36444b..95c96b5 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java @@ -49,8 +49,7 @@ import com.knecon.fforesight.service.layoutparser.processor.services.blockificat import com.knecon.fforesight.service.layoutparser.processor.services.classification.DocuMineClassificationService; import com.knecon.fforesight.service.layoutparser.processor.services.classification.RedactManagerClassificationService; import com.knecon.fforesight.service.layoutparser.processor.services.classification.TaasClassificationService; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.DocstrumSegmenter; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.HierarchicalReadingOrderResolver; +import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.DocstrumSegmentationService; import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory; import com.knecon.fforesight.service.layoutparser.processor.services.mapper.DocumentDataMapper; import com.knecon.fforesight.service.layoutparser.processor.services.mapper.TaasDocumentDataMapper; @@ -90,8 +89,7 @@ public class LayoutParsingPipeline { RedactManagerBlockificationService redactManagerBlockificationService; LayoutGridService layoutGridService; ObservationRegistry observationRegistry; - DocstrumSegmenter docstrumSegmenter; - HierarchicalReadingOrderResolver hierarchicalReadingOrderResolver; + DocstrumSegmentationService docstrumSegmentationService; public LayoutParsingFinishedEvent parseLayoutAndSaveFilesToStorage(LayoutParsingRequest layoutParsingRequest) throws IOException { @@ -251,8 +249,7 @@ public class LayoutParsingPipeline { // Docstrum AtomicInteger num = new AtomicInteger(pageNumber); - var zones = docstrumSegmenter.segmentPage(stripper.getTextPositionSequences()); - zones = hierarchicalReadingOrderResolver.resolve(zones); + var zones = docstrumSegmentationService.segmentPage(stripper.getTextPositionSequences()); List pageBlocks = new ArrayList<>(); AtomicInteger numOnPage = new AtomicInteger(1); diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/DocstrumSegmentationService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/DocstrumSegmentationService.java new file mode 100644 index 0000000..955ca58 --- /dev/null +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/DocstrumSegmentationService.java @@ -0,0 +1,48 @@ +package com.knecon.fforesight.service.layoutparser.processor.services.docstrum; + +import java.util.List; +import java.util.stream.Collectors; + +import org.springframework.stereotype.Service; + +import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence; +import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Character; +import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Zone; +import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service.LineBuilderService; +import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service.NearestNeighbourService; +import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service.ReadingOrderService; +import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service.SpacingService; +import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service.ZoneBuilderService; + +import lombok.RequiredArgsConstructor; + +@Service +@RequiredArgsConstructor +public class DocstrumSegmentationService { + + private final NearestNeighbourService nearestNeighbourService; + private final SpacingService spacingService; + private final LineBuilderService lineBuilderService; + private final ZoneBuilderService zoneBuilderService; + private final ReadingOrderService readingOrderService; + + + public List segmentPage(List textPositions) { + + var positions = textPositions.stream().map(TextPositionSequence::getTextPositions).flatMap(List::stream).toList(); + + var components = positions.stream().map(Character::new).collect(Collectors.toList()); + + nearestNeighbourService.findNearestNeighbors(components); + + var characterSpacing = spacingService.computeCharacterSpacing(components); + var lineSpacing = spacingService.computeLineSpacing(components); + + var lines = lineBuilderService.buildLines(components, characterSpacing, lineSpacing); + + var zones = zoneBuilderService.buildZones(lines, characterSpacing, lineSpacing); + + return readingOrderService.resolve(zones); + } + +} diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/DocstrumSegmenter.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/DocstrumSegmenter.java deleted file mode 100644 index f660f6d..0000000 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/DocstrumSegmenter.java +++ /dev/null @@ -1,42 +0,0 @@ -package com.knecon.fforesight.service.layoutparser.processor.services.docstrum; - -import java.util.List; -import java.util.stream.Collectors; - -import org.springframework.stereotype.Service; - -import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.docstrum.Character; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.docstrum.CharacterLine; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.docstrum.CharacterZone; - -import lombok.RequiredArgsConstructor; - -@Service -@RequiredArgsConstructor -public class DocstrumSegmenter { - - private final NearestNeighbourService nearestNeighbourService; - private final SpacingService spacingService; - private final LineBuilderService lineBuilderService; - private final ZoneBuilderService zoneBuilderService; - - - public List segmentPage(List textPositions) { - - var positions = textPositions.stream().map(TextPositionSequence::getTextPositions).flatMap(List::stream).toList(); - - var components = positions.stream().map(Character::new).collect(Collectors.toList()); - - nearestNeighbourService.findNearestNeighbors(components); - - double characterSpacing = spacingService.computeCharacterSpacing(components); - double lineSpacing = spacingService.computeLineSpacing(components); - - List lines = lineBuilderService.buildLines(components, characterSpacing, lineSpacing); - - return zoneBuilderService.buildZones(lines, characterSpacing, lineSpacing); - - } - -} diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/AngleFilter.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/AngleFilter.java similarity index 94% rename from layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/AngleFilter.java rename to layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/AngleFilter.java index e1a9fb7..5ecc891 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/AngleFilter.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/AngleFilter.java @@ -1,6 +1,4 @@ -package com.knecon.fforesight.service.layoutparser.processor.services.docstrum; - -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.docstrum.Neighbor; +package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model; /** * Filter class for neighbor objects that checks if the angle of the diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/BBoxObject.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/BoundingBox.java similarity index 93% rename from layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/BBoxObject.java rename to layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/BoundingBox.java index a4bd1d0..5215d6f 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/BBoxObject.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/BoundingBox.java @@ -1,11 +1,11 @@ -package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor; +package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model; import java.awt.geom.Rectangle2D; import lombok.Data; @Data -public abstract class BBoxObject { +public abstract class BoundingBox { private Rectangle2D bBox; diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/docstrum/Character.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Character.java similarity index 97% rename from layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/docstrum/Character.java rename to layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Character.java index 0c3a0e1..150a926 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/docstrum/Character.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Character.java @@ -1,4 +1,4 @@ -package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.docstrum; +package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model; import java.util.ArrayList; import java.util.List; diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/docstrum/CharacterLine.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Line.java similarity index 91% rename from layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/docstrum/CharacterLine.java rename to layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Line.java index 1d3d803..892771d 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/docstrum/CharacterLine.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Line.java @@ -1,4 +1,4 @@ -package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.docstrum; +package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model; import java.awt.geom.Rectangle2D; import java.util.ArrayList; @@ -6,12 +6,11 @@ import java.util.Arrays; import java.util.List; import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.BBoxObject; import lombok.Data; @Data -public class CharacterLine extends BBoxObject { +public class Line extends BoundingBox { private static final double WORD_DISTANCE_MULTIPLIER = 0.2; @@ -27,7 +26,7 @@ public class CharacterLine extends BBoxObject { private final List words = new ArrayList<>(); - public CharacterLine(List characters, double wordSpacing) { + public Line(List characters, double wordSpacing) { this.characters = characters; @@ -86,7 +85,7 @@ public class CharacterLine extends BBoxObject { } - public double angularDifference(CharacterLine j) { + public double angularDifference(Line j) { double diff = Math.abs(getAngle() - j.getAngle()); if (diff <= Math.PI / 2) { @@ -97,7 +96,7 @@ public class CharacterLine extends BBoxObject { } - public double horizontalDistance(CharacterLine other) { + public double horizontalDistance(Line other) { double[] xs = new double[4]; xs[0] = x0; @@ -110,7 +109,7 @@ public class CharacterLine extends BBoxObject { } - public double verticalDistance(CharacterLine other) { + public double verticalDistance(Line other) { double ym = (y0 + y1) / 2; double yn = (other.y0 + other.y1) / 2; diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/docstrum/Neighbor.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Neighbor.java similarity index 95% rename from layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/docstrum/Neighbor.java rename to layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Neighbor.java index 856f514..b2b4174 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/docstrum/Neighbor.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Neighbor.java @@ -1,4 +1,4 @@ -package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.docstrum; +package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model; import lombok.Getter; diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/docstrum/CharacterZone.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Zone.java similarity index 66% rename from layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/docstrum/CharacterZone.java rename to layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Zone.java index 903c088..7960f63 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/docstrum/CharacterZone.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/Zone.java @@ -1,22 +1,20 @@ -package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.docstrum; +package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model; import java.awt.geom.Rectangle2D; import java.util.Comparator; import java.util.List; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.BBoxObject; - import lombok.Data; @Data -public class CharacterZone extends BBoxObject { +public class Zone extends BoundingBox { - private List lines; + private List lines; - public CharacterZone(List lines) { + public Zone(List lines) { - lines.sort(Comparator.comparingDouble(CharacterLine::getY)); + lines.sort(Comparator.comparingDouble(Line::getY)); this.lines = lines; buildBox(); } @@ -29,7 +27,7 @@ public class CharacterZone extends BBoxObject { double maxX = Double.NEGATIVE_INFINITY; double maxY = Double.NEGATIVE_INFINITY; - for (CharacterLine line : lines) { + for (Line line : lines) { minX = Math.min(minX, line.getX()); minY = Math.min(minY, line.getY()); diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/BoundingBox.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/BoundingBox.java deleted file mode 100644 index e878eb9..0000000 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/BoundingBox.java +++ /dev/null @@ -1,27 +0,0 @@ -package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor; - -import lombok.AllArgsConstructor; -import lombok.Data; - -@Data -@AllArgsConstructor -public final class BoundingBox { - - private final double x; - private final double y; - private final double width; - private final double height; - - - public boolean contains(BoundingBox contained, double tolerance) { - - return x <= contained.getX() + tolerance && y <= contained.getY() + tolerance && x + width >= contained.getX() + contained.getWidth() - tolerance && y + height >= contained.getY() + contained.getHeight() - tolerance; - } - - - public double getArea() { - - return (height * width); - } - -} diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/readingorder/BBoxZoneGroup.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/readingorder/BoundingBoxZoneGroup.java similarity index 65% rename from layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/readingorder/BBoxZoneGroup.java rename to layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/readingorder/BoundingBoxZoneGroup.java index 1035ecd..b03ce06 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/readingorder/BBoxZoneGroup.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/readingorder/BoundingBoxZoneGroup.java @@ -2,15 +2,15 @@ package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.r import java.awt.geom.Rectangle2D; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.BBoxObject; +import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.BoundingBox; -public class BBoxZoneGroup extends BBoxObject { +public class BoundingBoxZoneGroup extends BoundingBox { - private BBoxObject leftChild; - private BBoxObject rightChild; + private BoundingBox leftChild; + private BoundingBox rightChild; - public BBoxZoneGroup(BBoxObject child1, BBoxObject child2) { + public BoundingBoxZoneGroup(BoundingBox child1, BoundingBox child2) { this.leftChild = child1; this.rightChild = child2; @@ -27,33 +27,33 @@ public class BBoxZoneGroup extends BBoxObject { } - public BBoxObject getLeftChild() { + public BoundingBox getLeftChild() { return leftChild; } - public BBoxObject getRightChild() { + public BoundingBox getRightChild() { return rightChild; } - public BBoxZoneGroup setLeftChild(BBoxObject obj) { + public BoundingBoxZoneGroup setLeftChild(BoundingBox obj) { this.leftChild = obj; return this; } - public BBoxZoneGroup setRightChild(BBoxObject obj) { + public BoundingBoxZoneGroup setRightChild(BoundingBox obj) { this.rightChild = obj; return this; } - public BBoxZoneGroup setBounds(double x0, double y0, double x1, double y1) { + public BoundingBoxZoneGroup setBounds(double x0, double y0, double x1, double y1) { assert x1 >= x0; assert y1 >= y0; diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/readingorder/DistElem.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/readingorder/DistElem.java index 7755d8d..29ec401 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/readingorder/DistElem.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/readingorder/DistElem.java @@ -1,6 +1,6 @@ package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.utils.DoubleUtils; +import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.utils.DoubleUtils; public class DistElem implements Comparable> { diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/readingorder/DocumentPlane.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/readingorder/DocumentPlane.java index adcf323..7b33f21 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/readingorder/DocumentPlane.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/readingorder/DocumentPlane.java @@ -1,13 +1,13 @@ package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder; +import java.awt.geom.Rectangle2D; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.BBoxObject; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.BoundingBox; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.docstrum.CharacterZone; +import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.BoundingBox; +import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Zone; /** * A set-like data structure for objects placed on a plane. Can efficiently find objects in a certain rectangular area. @@ -20,7 +20,7 @@ public class DocumentPlane { /** * List of objects on the plane. Stored in a random order */ - private final List objs; + private final List objs; /** * Size of a grid square. If gridSize=50, then the plane is divided into squares of size 50. Each square contains * objects placed in a 50x50 area @@ -31,7 +31,7 @@ public class DocumentPlane { * grid square. Single object can be stored under several keys (depending on its physical size). Grid squares are * lazy-initialized. */ - private final Map> grid; + private final Map> grid; /** * Representation of XY coordinates @@ -76,18 +76,18 @@ public class DocumentPlane { } - public List getObjects() { + public List getObjects() { return objs; } - public DocumentPlane(List objectList, int gridSize) { + public DocumentPlane(List objectList, int gridSize) { - this.grid = new HashMap>(); - this.objs = new ArrayList(); + this.grid = new HashMap>(); + this.objs = new ArrayList(); this.gridSize = gridSize; - for (CharacterZone obj : objectList) { + for (Zone obj : objectList) { add(obj); } } @@ -100,15 +100,15 @@ public class DocumentPlane { * @param obj2 object * @return object list */ - public List findObjectsBetween(BBoxObject obj1, BBoxObject obj2) { + public List findObjectsBetween(BoundingBox obj1, BoundingBox obj2) { double x0 = Math.min(obj1.getX(), obj2.getX()); double y0 = Math.min(obj1.getY(), obj2.getY()); double x1 = Math.max(obj1.getX() + obj1.getWidth(), obj2.getX() + obj2.getWidth()); double y1 = Math.max(obj1.getY() + obj1.getHeight(), obj2.getY() + obj2.getHeight()); assert x1 >= x0 && y1 >= y0; - BoundingBox searchBounds = new BoundingBox(x0, y0, x1 - x0, y1 - y0); - List objsBetween = find(searchBounds); + Rectangle2D searchBounds = new Rectangle2D.Double(x0, y0, x1 - x0, y1 - y0); + List objsBetween = find(searchBounds); /* * the rectangle area must contain at least obj1 and obj2 */ @@ -125,9 +125,9 @@ public class DocumentPlane { * @param obj2 object * @return true if anything is placed between, false otherwise */ - public boolean anyObjectsBetween(BBoxObject obj1, BBoxObject obj2) { + public boolean anyObjectsBetween(BoundingBox obj1, BoundingBox obj2) { - List lObjs = findObjectsBetween(obj1, obj2); + List lObjs = findObjectsBetween(obj1, obj2); return !(lObjs.isEmpty()); } @@ -138,7 +138,7 @@ public class DocumentPlane { * @param obj object * @return document plane */ - public DocumentPlane add(BBoxObject obj) { + public DocumentPlane add(BoundingBox obj) { int objsBefore = this.objs.size(); /* @@ -151,7 +151,7 @@ public class DocumentPlane { /* * add the non-existing key */ - grid.put(xy, new ArrayList()); + grid.put(xy, new ArrayList()); grid.get(xy).add(obj); assert grid.get(xy).size() == 1; } else { @@ -172,7 +172,7 @@ public class DocumentPlane { } - public DocumentPlane remove(BBoxObject obj) { + public DocumentPlane remove(BoundingBox obj) { /* * iterate over grid squares */ @@ -196,10 +196,10 @@ public class DocumentPlane { * @param searchBounds is a search rectangle * @return list of objects in!side search rectangle */ - public List find(BoundingBox searchBounds) { + public List find(Rectangle2D searchBounds) { - List done = new ArrayList(); //contains already considered objects (wrt. optimization) - List ret = new ArrayList(); + List done = new ArrayList(); //contains already considered objects (wrt. optimization) + List ret = new ArrayList(); double x0 = searchBounds.getX(); double y0 = searchBounds.getY(); double y1 = searchBounds.getY() + searchBounds.getHeight(); @@ -213,7 +213,7 @@ public class DocumentPlane { if (!grid.containsKey(xy)) { continue; } - for (BBoxObject obj : grid.get(xy)) { + for (BoundingBox obj : grid.get(xy)) { if (done.contains(obj)) /* * omit if already checked */ { @@ -244,9 +244,9 @@ public class DocumentPlane { */ protected int elementsInGrid() { - List objs_ = new ArrayList(); + List objs_ = new ArrayList(); for (GridXY coord : grid.keySet()) { - for (BBoxObject obj : grid.get(coord)) { + for (BoundingBox obj : grid.get(coord)) { if (!objs_.contains(obj)) { objs_.add(obj); } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/readingorder/TreeToListConverter.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/readingorder/TreeToListConverter.java index 81cdaf1..d58c90b 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/readingorder/TreeToListConverter.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/readingorder/TreeToListConverter.java @@ -3,25 +3,25 @@ package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.r import java.util.ArrayList; import java.util.List; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.docstrum.CharacterZone; +import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Zone; public class TreeToListConverter { - public List convertToList(BBoxZoneGroup obj) { + public List convertToList(BoundingBoxZoneGroup obj) { - List ret = new ArrayList<>(); - if (obj.getLeftChild() instanceof CharacterZone) { - CharacterZone zone = (CharacterZone) obj.getLeftChild(); + List ret = new ArrayList<>(); + if (obj.getLeftChild() instanceof Zone) { + Zone zone = (Zone) obj.getLeftChild(); ret.add(zone); } else { // obj.getLeftChild() instanceof BxZoneGroup - ret.addAll(convertToList((BBoxZoneGroup) obj.getLeftChild())); + ret.addAll(convertToList((BoundingBoxZoneGroup) obj.getLeftChild())); } - if (obj.getRightChild() instanceof CharacterZone) { - CharacterZone zone = (CharacterZone) obj.getRightChild(); + if (obj.getRightChild() instanceof Zone) { + Zone zone = (Zone) obj.getRightChild(); ret.add(zone); } else { // obj.getRightChild() instanceof BxZoneGroup - ret.addAll(convertToList((BBoxZoneGroup) obj.getRightChild())); + ret.addAll(convertToList((BoundingBoxZoneGroup) obj.getRightChild())); } return ret; } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/LineBuilderService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/LineBuilderService.java similarity index 81% rename from layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/LineBuilderService.java rename to layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/LineBuilderService.java index 8f32a9e..44da8da 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/LineBuilderService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/LineBuilderService.java @@ -1,4 +1,4 @@ -package com.knecon.fforesight.service.layoutparser.processor.services.docstrum; +package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service; import java.util.ArrayList; import java.util.Comparator; @@ -6,9 +6,10 @@ import java.util.List; import org.springframework.stereotype.Service; +import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.AngleFilter; +import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Character; import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.DisjointSets; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.docstrum.Character; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.docstrum.CharacterLine; +import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Line; @Service public class LineBuilderService { @@ -18,7 +19,7 @@ public class LineBuilderService { private static final double ANGLE_TOLERANCE = Math.PI / 6; - public List buildLines(List characters, double characterSpacing, double lineSpacing) { + public List buildLines(List characters, double characterSpacing, double lineSpacing) { double maxHorizontalDistance = characterSpacing * CHARACTER_SPACING_DISTANCE_MULTIPLIER; double maxVerticalDistance = lineSpacing * MAX_VERTICAL_CHARACTER_DISTANCE; @@ -36,13 +37,13 @@ public class LineBuilderService { }); }); - List lines = new ArrayList<>(); + List lines = new ArrayList<>(); sets.forEach(group -> { List lineComponents = new ArrayList<>(group); lineComponents.sort(Comparator.comparingDouble(Character::getX)); - lines.add(new CharacterLine(lineComponents, characterSpacing)); + lines.add(new Line(lineComponents, characterSpacing)); }); - + return lines; } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/NearestNeighbourService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/NearestNeighbourService.java similarity index 95% rename from layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/NearestNeighbourService.java rename to layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/NearestNeighbourService.java index 5cbd406..1a3f6e2 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/NearestNeighbourService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/NearestNeighbourService.java @@ -1,4 +1,4 @@ -package com.knecon.fforesight.service.layoutparser.processor.services.docstrum; +package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service; import java.util.ArrayList; import java.util.Comparator; @@ -6,8 +6,8 @@ import java.util.List; import org.springframework.stereotype.Service; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.docstrum.Character; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.docstrum.Neighbor; +import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Character; +import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Neighbor; @Service public class NearestNeighbourService { diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/HierarchicalReadingOrderResolver.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/ReadingOrderService.java similarity index 70% rename from layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/HierarchicalReadingOrderResolver.java rename to layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/ReadingOrderService.java index 8fd6d78..098b0ca 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/HierarchicalReadingOrderResolver.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/ReadingOrderService.java @@ -1,4 +1,4 @@ -package com.knecon.fforesight.service.layoutparser.processor.services.docstrum; +package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service; import java.util.ArrayList; import java.util.Collection; @@ -8,42 +8,42 @@ import java.util.List; import org.springframework.stereotype.Service; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.BBoxObject; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.docstrum.CharacterZone; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.utils.DoubleUtils; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder.BBoxZoneGroup; +import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.BoundingBox; +import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Zone; +import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder.BoundingBoxZoneGroup; import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder.DistElem; import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder.DocumentPlane; import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.readingorder.TreeToListConverter; +import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.utils.DoubleUtils; @Service -public class HierarchicalReadingOrderResolver { +public class ReadingOrderService { static final int GRIDSIZE = 50; static final double EPS = 0.01; static final int MAX_ZONES = 1000; - static final Comparator Y_ASCENDING_ORDER = new Comparator() { + static final Comparator Y_ASCENDING_ORDER = new Comparator() { @Override - public int compare(BBoxObject o1, BBoxObject o2) { + public int compare(BoundingBox o1, BoundingBox o2) { return DoubleUtils.compareDouble(o1.getY(), o2.getY(), EPS); } }; - static final Comparator X_ASCENDING_ORDER = new Comparator() { + static final Comparator X_ASCENDING_ORDER = new Comparator() { @Override - public int compare(BBoxObject o1, BBoxObject o2) { + public int compare(BoundingBox o1, BoundingBox o2) { return DoubleUtils.compareDouble(o1.getX(), o2.getX(), EPS); } }; - static final Comparator YX_ASCENDING_ORDER = new Comparator() { + static final Comparator YX_ASCENDING_ORDER = new Comparator() { @Override - public int compare(BBoxObject o1, BBoxObject o2) { + public int compare(BoundingBox o1, BoundingBox o2) { int yCompare = Y_ASCENDING_ORDER.compare(o1, o2); return yCompare == 0 ? X_ASCENDING_ORDER.compare(o1, o2) : yCompare; @@ -51,9 +51,9 @@ public class HierarchicalReadingOrderResolver { }; - public List resolve(List zones) { + public List resolve(List zones) { - List orderedZones; + List orderedZones; if (zones.size() > MAX_ZONES) { orderedZones = new ArrayList<>(zones); Collections.sort(orderedZones, YX_ASCENDING_ORDER); @@ -64,19 +64,19 @@ public class HierarchicalReadingOrderResolver { } - private List reorderZones(List unorderedZones) { + private List reorderZones(List unorderedZones) { if (unorderedZones.isEmpty()) { return new ArrayList<>(); } else if (unorderedZones.size() == 1) { - List ret = new ArrayList<>(1); + List ret = new ArrayList<>(1); ret.add(unorderedZones.get(0)); return ret; } else { - BBoxZoneGroup bxZonesTree = groupZonesHierarchically(unorderedZones); + BoundingBoxZoneGroup bxZonesTree = groupZonesHierarchically(unorderedZones); sortGroupedZones(bxZonesTree); TreeToListConverter treeConverter = new TreeToListConverter(); - List orderedZones = treeConverter.convertToList(bxZonesTree); + List orderedZones = treeConverter.convertToList(bxZonesTree); assert unorderedZones.size() == orderedZones.size(); return orderedZones; } @@ -90,50 +90,50 @@ public class HierarchicalReadingOrderResolver { * @param zones is a list of unordered zones * @return root of the zones clustered in a tree */ - private BBoxZoneGroup groupZonesHierarchically(List zones) { + private BoundingBoxZoneGroup groupZonesHierarchically(List zones) { /* * Distance tuples are stored sorted by ascending distance value */ - List> dists = new ArrayList>(zones.size() * zones.size() / 2); + List> dists = new ArrayList>(zones.size() * zones.size() / 2); for (int idx1 = 0; idx1 < zones.size(); ++idx1) { for (int idx2 = idx1 + 1; idx2 < zones.size(); ++idx2) { - CharacterZone zone1 = zones.get(idx1); - CharacterZone zone2 = zones.get(idx2); - dists.add(new DistElem(false, distance(zone1, zone2), zone1, zone2)); + Zone zone1 = zones.get(idx1); + Zone zone2 = zones.get(idx2); + dists.add(new DistElem(false, distance(zone1, zone2), zone1, zone2)); } } Collections.sort(dists); DocumentPlane plane = new DocumentPlane(zones, GRIDSIZE); while (!dists.isEmpty()) { - DistElem distElem = dists.get(0); + DistElem distElem = dists.get(0); dists.remove(0); if (!distElem.isC() && plane.anyObjectsBetween(distElem.getObj1(), distElem.getObj2())) { - dists.add(new DistElem(true, distElem.getDist(), distElem.getObj1(), distElem.getObj2())); + dists.add(new DistElem(true, distElem.getDist(), distElem.getObj1(), distElem.getObj2())); continue; } - BBoxZoneGroup newGroup = new BBoxZoneGroup(distElem.getObj1(), distElem.getObj2()); + BoundingBoxZoneGroup newGroup = new BoundingBoxZoneGroup(distElem.getObj1(), distElem.getObj2()); plane.remove(distElem.getObj1()).remove(distElem.getObj2()); dists = removeDistElementsContainingObject(dists, distElem.getObj1()); dists = removeDistElementsContainingObject(dists, distElem.getObj2()); - for (BBoxObject other : plane.getObjects()) { - dists.add(new DistElem(false, distance(other, newGroup), newGroup, other)); + for (BoundingBox other : plane.getObjects()) { + dists.add(new DistElem(false, distance(other, newGroup), newGroup, other)); } Collections.sort(dists); plane.add(newGroup); } assert plane.getObjects().size() == 1 : "There should be one object left at the plane after grouping"; - return (BBoxZoneGroup) plane.getObjects().get(0); + return (BoundingBoxZoneGroup) plane.getObjects().get(0); } /** * Removes all distance tuples containing obj */ - private List> removeDistElementsContainingObject(Collection> list, BBoxObject obj) { + private List> removeDistElementsContainingObject(Collection> list, BoundingBox obj) { - List> ret = new ArrayList>(); - for (DistElem distElem : list) { + List> ret = new ArrayList>(); + for (DistElem distElem : list) { if (distElem.getObj1() != obj && distElem.getObj2() != obj) { ret.add(distElem); } @@ -149,28 +149,28 @@ public class HierarchicalReadingOrderResolver { * * @param group */ - private void sortGroupedZones(BBoxZoneGroup group) { + private void sortGroupedZones(BoundingBoxZoneGroup group) { - BBoxObject leftChild = group.getLeftChild(); - BBoxObject rightChild = group.getRightChild(); + BoundingBox leftChild = group.getLeftChild(); + BoundingBox rightChild = group.getRightChild(); if (shouldBeSwapped(leftChild, rightChild)) { // swap group.setLeftChild(rightChild); group.setRightChild(leftChild); } - if (leftChild instanceof BBoxZoneGroup) // if the child is a tree node, then recurse + if (leftChild instanceof BoundingBoxZoneGroup) // if the child is a tree node, then recurse { - sortGroupedZones((BBoxZoneGroup) leftChild); + sortGroupedZones((BoundingBoxZoneGroup) leftChild); } - if (rightChild instanceof BBoxZoneGroup) // as above - recurse + if (rightChild instanceof BoundingBoxZoneGroup) // as above - recurse { - sortGroupedZones((BBoxZoneGroup) rightChild); + sortGroupedZones((BoundingBoxZoneGroup) rightChild); } } - private boolean shouldBeSwapped(BBoxObject first, BBoxObject second) { + private boolean shouldBeSwapped(BoundingBox first, BoundingBox second) { double cx, cy, cw, ch, ox, oy, ow, oh; cx = first.getBBox().getX(); @@ -216,7 +216,7 @@ public class HierarchicalReadingOrderResolver { * * @return distance value based on objects' coordinates and physical size on a plane */ - private double distance(BBoxObject obj1, BBoxObject obj2) { + private double distance(BoundingBox obj1, BoundingBox obj2) { double x0 = Math.min(obj1.getX(), obj2.getX()); double y0 = Math.min(obj1.getY(), obj2.getY()); diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/SpacingService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/SpacingService.java similarity index 91% rename from layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/SpacingService.java rename to layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/SpacingService.java index 0cb851a..55105fd 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/SpacingService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/SpacingService.java @@ -1,12 +1,13 @@ -package com.knecon.fforesight.service.layoutparser.processor.services.docstrum; +package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service; import java.util.List; import org.springframework.stereotype.Service; +import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.AngleFilter; +import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Character; import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Histogram; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.docstrum.Character; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.docstrum.Neighbor; +import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Neighbor; @Service public class SpacingService { diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/ZoneBuilderService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/ZoneBuilderService.java similarity index 81% rename from layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/ZoneBuilderService.java rename to layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/ZoneBuilderService.java index ed30f34..ef2344c 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/ZoneBuilderService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/service/ZoneBuilderService.java @@ -1,4 +1,4 @@ -package com.knecon.fforesight.service.layoutparser.processor.services.docstrum; +package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.service; import java.util.ArrayList; import java.util.List; @@ -6,8 +6,8 @@ import java.util.List; import org.springframework.stereotype.Service; import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.DisjointSets; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.docstrum.CharacterLine; -import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.docstrum.CharacterZone; +import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Line; +import com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.Zone; @Service public class ZoneBuilderService { @@ -28,14 +28,14 @@ public class ZoneBuilderService { public static final int MAX_ZONES = 300; - public List buildZones(List lines, double characterSpacing, double lineSpacing) { + public List buildZones(List lines, double characterSpacing, double lineSpacing) { double minHorizontalDistance = characterSpacing * MIN_HORIZONTAL_DISTANCE_MULTIPLIER; double maxVerticalDistance = lineSpacing * MAX_VERTICAL_DISTANCE_MULTIPLIER; double minHorizontalMergeDistance = characterSpacing * MIN_HORIZONTAL_MERGE_DISTANCE_MULTIPLIER; double maxVerticalMergeDistance = lineSpacing * MAX_VERTICAL_MERGE_DISTANCE_MULTIPLIER; - DisjointSets sets = new DisjointSets<>(lines); + DisjointSets sets = new DisjointSets<>(lines); double meanHeight = calculateMeanHeight(lines); @@ -61,28 +61,28 @@ public class ZoneBuilderService { } })); - List zones = new ArrayList<>(); + List zones = new ArrayList<>(); sets.forEach(group -> { - zones.add(new CharacterZone(new ArrayList<>(group))); + zones.add(new Zone(new ArrayList<>(group))); }); if (zones.size() > MAX_ZONES) { - List oneZoneLines = new ArrayList<>(); - for (CharacterZone zone : zones) { + List oneZoneLines = new ArrayList<>(); + for (Zone zone : zones) { oneZoneLines.addAll(zone.getLines()); } - return List.of(new CharacterZone(oneZoneLines)); + return List.of(new Zone(oneZoneLines)); } return zones; } - private double calculateMeanHeight(List lines) { + private double calculateMeanHeight(List lines) { double meanHeight = 0.0; double weights = 0.0; - for (CharacterLine line : lines) { + for (Line line : lines) { double weight = line.getLength(); meanHeight += line.getHeight() * weight; weights += weight; diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/utils/DoubleUtils.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/utils/DoubleUtils.java similarity index 93% rename from layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/utils/DoubleUtils.java rename to layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/utils/DoubleUtils.java index 2454536..f0e7be9 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/model/refactor/utils/DoubleUtils.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/docstrum/utils/DoubleUtils.java @@ -1,4 +1,4 @@ -package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.model.refactor.utils; +package com.knecon.fforesight.service.layoutparser.processor.services.docstrum.utils; public class DoubleUtils {