TAAS-104: merge visually intersecting Paragraphs

* fix static import
This commit is contained in:
Kilian Schuettler 2023-09-06 11:38:40 +02:00
parent 5792ff4a93
commit 6287837aca
2 changed files with 3 additions and 5 deletions

View File

@ -1,8 +1,6 @@
package com.knecon.fforesight.service.layoutparser.processor.services.blockification;
// TODO: figure out, why this fails the build
// import static com.knecon.fforesight.service.layoutparser.processor.services.factory.SearchTextWithTextPositionFactory.HEIGHT_PADDING;
import static com.knecon.fforesight.service.layoutparser.processor.services.factory.SearchTextWithTextPositionFactory.HEIGHT_PADDING;
import java.util.ArrayList;
import java.util.HashSet;
@ -30,7 +28,7 @@ public class TaasBlockificationService {
private static final float THRESHOLD = 1f;
private static final float Y_GAP_SPLIT_HEIGHT_MODIFIER = 1.25f; // multiplied with text height
private static final float INTERSECTS_Y_THRESHOLD = 4;// 2 * HEIGHT_PADDING // This is exactly 2 times our position height padding. This is required to find boxes that are visually intersecting.
private static final float INTERSECTS_Y_THRESHOLD = 2 * HEIGHT_PADDING; // This is exactly 2 times our position height padding. This is required to find boxes that are visually intersecting.
private static final int X_GAP_SPLIT_CONSTANT = 50;
public static final int X_ALIGNMENT_THRESHOLD = 1;
public static final int SMALL_Y_GAP_THRESHOLD = 5;

View File

@ -18,7 +18,7 @@ import lombok.experimental.UtilityClass;
@UtilityClass
public class SearchTextWithTextPositionFactory {
public final int HEIGHT_PADDING = 2;
public static final int HEIGHT_PADDING = 2;
// when checking for a hyphen linebreak, we need to check after a linebreak if the last hyphen was less than three symbols away.
// We detect a linebreak as either a "\n" character or if two adjacent symbol's position differ in y-coordinates by at least one character height.
// If there is a hyphen linebreak, the hyphen will be 1 position in front of a "\n" or 2 positions in front of the character which has a lower y-coordinate