Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e44fc3b536 | ||
|
|
237034b696 | ||
|
|
85432a2511 |
@ -24,7 +24,7 @@ import lombok.EqualsAndHashCode;
|
|||||||
@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false)
|
@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false)
|
||||||
public class Line extends TextBoundingBox {
|
public class Line extends TextBoundingBox {
|
||||||
|
|
||||||
private static final double WORD_DISTANCE_MULTIPLIER = 0.18;
|
private static final double WORD_DISTANCE_MULTIPLIER = 0.17;
|
||||||
|
|
||||||
@EqualsAndHashCode.Include
|
@EqualsAndHashCode.Include
|
||||||
private final double x0;
|
private final double x0;
|
||||||
@ -157,6 +157,9 @@ public class Line extends TextBoundingBox {
|
|||||||
|
|
||||||
private void computeWords(List<Character> characters, double wordSpacing) {
|
private void computeWords(List<Character> characters, double wordSpacing) {
|
||||||
|
|
||||||
|
// Imo, the width of space should be scaled with the font size, but it only depends on the median distance between horizontal neighbours.
|
||||||
|
// If there are large differences in fontsize on a page, this might lead to missing spaces for the smaller fonts and too many for larger fonts.
|
||||||
|
// I've just now changed the scaling factor. If you come across this comment with missing whitespaces again, try scaling the fontsize instead of simply changing the factor again.
|
||||||
Word word = new Word();
|
Word word = new Word();
|
||||||
Character previous = null;
|
Character previous = null;
|
||||||
for (Character current : characters) {
|
for (Character current : characters) {
|
||||||
|
|||||||
@ -71,6 +71,9 @@ public class TableOfContentsClassificationService {
|
|||||||
|
|
||||||
private int identifyTOCItems(int start, List<TextBlockOnPage> textBlocks, ClassificationDocument document) {
|
private int identifyTOCItems(int start, List<TextBlockOnPage> textBlocks, ClassificationDocument document) {
|
||||||
|
|
||||||
|
if (start >= textBlocks.size()) {
|
||||||
|
return start;
|
||||||
|
}
|
||||||
ClassificationPage startPage = textBlocks.get(start).page();
|
ClassificationPage startPage = textBlocks.get(start).page();
|
||||||
List<TextBlockOnPage> initialLookAhead = textBlocks.subList(start, Math.min(start + SURROUNDING_BLOCKS_RADIUS, textBlocks.size()));
|
List<TextBlockOnPage> initialLookAhead = textBlocks.subList(start, Math.min(start + SURROUNDING_BLOCKS_RADIUS, textBlocks.size()));
|
||||||
HashMap<NumberWord, TextBlockOnPage> numberToBlockLookup = new HashMap<>();
|
HashMap<NumberWord, TextBlockOnPage> numberToBlockLookup = new HashMap<>();
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user