From 99ed331a1e1ef220d6eefafe8f375140eff08544 Mon Sep 17 00:00:00 2001 From: Corina Olariu Date: Wed, 4 Oct 2023 14:13:38 +0300 Subject: [PATCH] RED-7607 - Rotating pages leads to lost annotations (RM & DM) - use getXDirAdj instead of getX - add fontSizeCounter for landscape pages also --- .../layoutparser/processor/LayoutParsingPipeline.java | 4 ++-- .../processor/services/parsing/PDFTextStripper.java | 2 +- .../layoutparser/processor/utils/PositionUtils.java | 7 ++++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java index acb23fc..b14fb8a 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java @@ -249,9 +249,9 @@ public class LayoutParsingPipeline { private void increaseDocumentStatistics(ClassificationPage classificationPage, ClassificationDocument document) { - if (!classificationPage.isLandscape()) { +// if (!classificationPage.isLandscape()) { document.getFontSizeCounter().addAll(classificationPage.getFontSizeCounter().getCountPerValue()); - } +// } document.getFontCounter().addAll(classificationPage.getFontCounter().getCountPerValue()); document.getTextHeightCounter().addAll(classificationPage.getTextHeightCounter().getCountPerValue()); document.getFontStyleCounter().addAll(classificationPage.getFontStyleCounter().getCountPerValue()); diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/parsing/PDFTextStripper.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/parsing/PDFTextStripper.java index 2f2d6ea..1ca5b43 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/parsing/PDFTextStripper.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/parsing/PDFTextStripper.java @@ -1711,7 +1711,7 @@ public class PDFTextStripper extends LegacyPDFStreamEngine { int numberOfStrings = line.size(); for (int i = 0; i < numberOfStrings; i++) { WordWithTextPositions word = line.get(i); - word.getTextPositions().sort(Comparator.comparing(TextPosition::getX)); + word.getTextPositions().sort(Comparator.comparing(TextPosition::getXDirAdj)); writeString(word.getText(), word.getTextPositions(), isParagraphEnd && i == numberOfStrings - 1); if (i < numberOfStrings - 1) { writeWordSeparator(); diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/PositionUtils.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/PositionUtils.java index 3aecb92..48b720d 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/PositionUtils.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/PositionUtils.java @@ -19,9 +19,10 @@ public final class PositionUtils { double threshold = textBlock.getMostPopularWordHeight() * 3; - if (textBlock.getPdfMinX() + threshold > btf.getTopLeft().getX() && textBlock.getPdfMaxX() - threshold < btf.getTopLeft() - .getX() + btf.getWidth() && textBlock.getPdfMinY() + threshold > btf.getTopLeft().getY() && textBlock.getPdfMaxY() - threshold < btf.getTopLeft() - .getY() + btf.getHeight()) { + if (textBlock.getPdfMinX() + threshold > btf.getTopLeft().getX() + && textBlock.getPdfMaxX() - threshold < btf.getTopLeft().getX() + btf.getWidth() + && textBlock.getPdfMinY() + threshold > btf.getTopLeft().getY() + && textBlock.getPdfMaxY() - threshold < btf.getTopLeft().getY() + btf.getHeight()) { return true; } else { return false;