Compare commits

...

3 Commits

Author SHA1 Message Date
Kilian Schüttler
e44fc3b536 Merge branch 'RM-231-bp' into 'release/0.192.x'
RM-231: missing whitespace in name

See merge request fforesight/layout-parser!266
2025-01-14 13:07:49 +01:00
Kilian Schuettler
237034b696 RM-231: missing whitespace in name 2025-01-14 13:01:09 +01:00
Kilian Schuettler
85432a2511 RED-10714: fix IndexOutOfBoundsException
(cherry picked from commit 0b6a292c7567b93c488e65729a85b50caf3e262d)
2025-01-13 13:22:59 +01:00
2 changed files with 7 additions and 1 deletions

View File

@ -24,7 +24,7 @@ import lombok.EqualsAndHashCode;
@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false)
public class Line extends TextBoundingBox {
private static final double WORD_DISTANCE_MULTIPLIER = 0.18;
private static final double WORD_DISTANCE_MULTIPLIER = 0.17;
@EqualsAndHashCode.Include
private final double x0;
@ -157,6 +157,9 @@ public class Line extends TextBoundingBox {
private void computeWords(List<Character> characters, double wordSpacing) {
// Imo, the width of space should be scaled with the font size, but it only depends on the median distance between horizontal neighbours.
// If there are large differences in fontsize on a page, this might lead to missing spaces for the smaller fonts and too many for larger fonts.
// I've just now changed the scaling factor. If you come across this comment with missing whitespaces again, try scaling the fontsize instead of simply changing the factor again.
Word word = new Word();
Character previous = null;
for (Character current : characters) {

View File

@ -71,6 +71,9 @@ public class TableOfContentsClassificationService {
private int identifyTOCItems(int start, List<TextBlockOnPage> textBlocks, ClassificationDocument document) {
if (start >= textBlocks.size()) {
return start;
}
ClassificationPage startPage = textBlocks.get(start).page();
List<TextBlockOnPage> initialLookAhead = textBlocks.subList(start, Math.min(start + SURROUNDING_BLOCKS_RADIUS, textBlocks.size()));
HashMap<NumberWord, TextBlockOnPage> numberToBlockLookup = new HashMap<>();