From b447788fcb4fb927f5c46e961fa62a3fcfc6cfd1 Mon Sep 17 00:00:00 2001 From: Kilian Schuettler Date: Tue, 14 Jan 2025 12:59:01 +0100 Subject: [PATCH] RM-231: missing whitespace in name --- .../service/layoutparser/processor/docstrum/model/Line.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/docstrum/model/Line.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/docstrum/model/Line.java index eb999c7..7f09b0c 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/docstrum/model/Line.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/docstrum/model/Line.java @@ -24,7 +24,7 @@ import lombok.EqualsAndHashCode; @EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false) public class Line extends TextBoundingBox { - private static final double WORD_DISTANCE_MULTIPLIER = 0.18; + private static final double WORD_DISTANCE_MULTIPLIER = 0.17; @EqualsAndHashCode.Include private final double x0; @@ -159,6 +159,9 @@ public class Line extends TextBoundingBox { private void computeWords(double wordSpacing) { + // Imo, the width of space should be scaled with the font size, but it only depends on the median distance between horizontal neighbours. + // If there are large differences in fontsize on a page, this might lead to missing spaces for the smaller fonts and too many for larger fonts. + // I've just now changed the scaling factor. If you come across this comment with missing whitespaces again, try scaling the fontsize instead of simply changing the factor again. Word word = new Word(); Character previous = null; for (Character current : characters) {