From 366c12bab0cf6c0cb18f8215203303de5a59c0d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominique=20Eifl=C3=A4nder?= Date: Tue, 30 Jul 2024 15:46:40 +0200 Subject: [PATCH] RED-9760: Fixed missing newLines --- .../processor/model/text/TextPageBlock.java | 6 ++++-- .../factory/SearchTextWithTextPositionFactory.java | 11 ++++++----- .../processor/utils/ParsingConstants.java | 7 +++++++ 3 files changed, 17 insertions(+), 7 deletions(-) create mode 100644 layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/ParsingConstants.java diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPageBlock.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPageBlock.java index c7c2ae6..3af9b40 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPageBlock.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPageBlock.java @@ -1,5 +1,7 @@ package com.knecon.fforesight.service.layoutparser.processor.model.text; +import static com.knecon.fforesight.service.layoutparser.processor.utils.ParsingConstants.NEW_LINE_TEXT_HEIGHT_PERCENTAGE; + import java.awt.geom.Rectangle2D; import java.util.ArrayList; import java.util.List; @@ -208,7 +210,7 @@ public class TextPageBlock extends AbstractPageBlock { TextPositionSequence previous = null; for (TextPositionSequence word : sequences) { if (previous != null) { - if (Math.abs(previous.getMaxYDirAdj() - word.getMaxYDirAdj()) > word.getTextHeight()) { + if (Math.abs(previous.getMaxYDirAdj() - word.getMaxYDirAdj()) > word.getTextHeight() * NEW_LINE_TEXT_HEIGHT_PERCENTAGE) { sb.append('\n'); } else { sb.append(' '); @@ -228,7 +230,7 @@ public class TextPageBlock extends AbstractPageBlock { TextPositionSequence previous = null; for (TextPositionSequence word : sequences) { if (previous != null) { - if (word.getMaxYDirAdj() - previous.getMaxYDirAdj() > word.getTextHeight()) { + if (word.getMaxYDirAdj() - previous.getMaxYDirAdj() > word.getTextHeight() * NEW_LINE_TEXT_HEIGHT_PERCENTAGE) { numberOfLines++; } } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/SearchTextWithTextPositionFactory.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/SearchTextWithTextPositionFactory.java index 0d9fd8f..fe861a7 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/SearchTextWithTextPositionFactory.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/SearchTextWithTextPositionFactory.java @@ -1,5 +1,7 @@ package com.knecon.fforesight.service.layoutparser.processor.services.factory; +import static com.knecon.fforesight.service.layoutparser.processor.utils.ParsingConstants.NEW_LINE_TEXT_HEIGHT_PERCENTAGE; + import java.awt.geom.AffineTransform; import java.awt.geom.Rectangle2D; import java.util.Collection; @@ -67,7 +69,6 @@ public class SearchTextWithTextPositionFactory { ++context.stringIdx; } - List positions = sequences.stream() .map(TextPositionSequence::getTextPositions) .flatMap(Collection::stream) @@ -161,7 +162,7 @@ public class SearchTextWithTextPositionFactory { } double deltaY = Math.abs(currentPosition.getYDirAdj() - previousPosition.getYDirAdj()); - return deltaY >= currentPosition.getHeightDir(); + return deltaY >= currentPosition.getHeightDir() * NEW_LINE_TEXT_HEIGHT_PERCENTAGE; } @@ -191,9 +192,9 @@ public class SearchTextWithTextPositionFactory { float textHeight = sequence.getTextHeight() + HEIGHT_PADDING; Rectangle2D rectangle2D = new Rectangle2D.Double(textPosition.getXDirAdj(), - textPosition.getYDirAdj() - textHeight, - textPosition.getWidthDirAdj(), - textHeight + HEIGHT_PADDING); + textPosition.getYDirAdj() - textHeight, + textPosition.getWidthDirAdj(), + textHeight + HEIGHT_PADDING); AffineTransform transform = new AffineTransform(); diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/ParsingConstants.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/ParsingConstants.java new file mode 100644 index 0000000..8ca43c5 --- /dev/null +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/ParsingConstants.java @@ -0,0 +1,7 @@ +package com.knecon.fforesight.service.layoutparser.processor.utils; + +public class ParsingConstants { + + public final static float NEW_LINE_TEXT_HEIGHT_PERCENTAGE = 0.6f; + +}