From 6287837aca499605e61fdc38fc908e4d571568ea Mon Sep 17 00:00:00 2001 From: Kilian Schuettler Date: Wed, 6 Sep 2023 11:38:40 +0200 Subject: [PATCH] TAAS-104: merge visually intersecting Paragraphs * fix static import --- .../services/blockification/TaasBlockificationService.java | 6 ++---- .../services/factory/SearchTextWithTextPositionFactory.java | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/TaasBlockificationService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/TaasBlockificationService.java index f16198d..3b99ec2 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/TaasBlockificationService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/TaasBlockificationService.java @@ -1,8 +1,6 @@ package com.knecon.fforesight.service.layoutparser.processor.services.blockification; - -// TODO: figure out, why this fails the build -// import static com.knecon.fforesight.service.layoutparser.processor.services.factory.SearchTextWithTextPositionFactory.HEIGHT_PADDING; +import static com.knecon.fforesight.service.layoutparser.processor.services.factory.SearchTextWithTextPositionFactory.HEIGHT_PADDING; import java.util.ArrayList; import java.util.HashSet; @@ -30,7 +28,7 @@ public class TaasBlockificationService { private static final float THRESHOLD = 1f; private static final float Y_GAP_SPLIT_HEIGHT_MODIFIER = 1.25f; // multiplied with text height - private static final float INTERSECTS_Y_THRESHOLD = 4;// 2 * HEIGHT_PADDING // This is exactly 2 times our position height padding. This is required to find boxes that are visually intersecting. + private static final float INTERSECTS_Y_THRESHOLD = 2 * HEIGHT_PADDING; // This is exactly 2 times our position height padding. This is required to find boxes that are visually intersecting. private static final int X_GAP_SPLIT_CONSTANT = 50; public static final int X_ALIGNMENT_THRESHOLD = 1; public static final int SMALL_Y_GAP_THRESHOLD = 5; diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/SearchTextWithTextPositionFactory.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/SearchTextWithTextPositionFactory.java index e14b55e..b620f08 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/SearchTextWithTextPositionFactory.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/SearchTextWithTextPositionFactory.java @@ -18,7 +18,7 @@ import lombok.experimental.UtilityClass; @UtilityClass public class SearchTextWithTextPositionFactory { - public final int HEIGHT_PADDING = 2; + public static final int HEIGHT_PADDING = 2; // when checking for a hyphen linebreak, we need to check after a linebreak if the last hyphen was less than three symbols away. // We detect a linebreak as either a "\n" character or if two adjacent symbol's position differ in y-coordinates by at least one character height. // If there is a hyphen linebreak, the hyphen will be 1 position in front of a "\n" or 2 positions in front of the character which has a lower y-coordinate