diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/docstrum/service/LineBuilderService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/docstrum/service/LineBuilderService.java index 6c9f987..4e5f8d8 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/docstrum/service/LineBuilderService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/docstrum/service/LineBuilderService.java @@ -32,6 +32,9 @@ public class LineBuilderService { characters.forEach(character -> { character.getNeighbors() .forEach(neighbor -> { + if (!characters.contains(neighbor.getCharacter())) { + return; + } double normalizedHorizontalDistance = neighbor.getHorizontalDistance() / maxHorizontalDistance; double normalizedVerticalDistance = neighbor.getVerticalDistance() / maxVerticalDistance; diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/TextRulingsClassifier.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/TextRulingsClassifier.java index 3f203e6..8ef6f77 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/TextRulingsClassifier.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/TextRulingsClassifier.java @@ -12,7 +12,7 @@ import lombok.experimental.UtilityClass; @UtilityClass public class TextRulingsClassifier { - private final static double STRIKETHROUGH_ZONE = 0.5; // multiplied with text height, determines height of intersection interval for strikethrough lines. + private final static double STRIKETHROUGH_ZONE = 0.65; // multiplied with text height, determines height of intersection interval for strikethrough lines. private final static double UNDERLINE_ZONE = 0.2; // multiplied with text height, determines height of intersection interval of underline lines. private final static double TEXT_BBOX_THRESHOLD_FACTOR = 0.15; // multiplied with text width then subtracted from word width. If ruling covers this width, it is considered as strikethrough/underline.