From 4a0896e24ba5d7fcf429061a122c429e1326d4a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominique=20Eifl=C3=A4nder?= Date: Tue, 2 Nov 2021 13:33:16 +0100 Subject: [PATCH 1/2] RED-2623: Fixed headline handline on strange ocr document --- .../classification/service/ClassificationService.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/ClassificationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/ClassificationService.java index 1e72fd52..db673d7a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/ClassificationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/ClassificationService.java @@ -73,8 +73,10 @@ public class ClassificationService { } } else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() > document .getFontSizeCounter() - .getMostPopular() && PositionUtils.getApproxLineCount(textBlock) < 4.9 && textBlock.getMostPopularWordStyle() - .equals("bold")) { + .getMostPopular() && PositionUtils.getApproxLineCount(textBlock) < 4.9 && (textBlock.getMostPopularWordStyle() + .equals("bold") || !document.getFontStyleCounter().getCountPerValue().containsKey("bold") && textBlock.getMostPopularWordFontSize() > document + .getFontSizeCounter() + .getMostPopular() + 1)) { for (int i = 1; i <= headlineFontSizes.size(); i++) { if (textBlock.getMostPopularWordFontSize() == headlineFontSizes.get(i - 1)) { From c49f85916b1503fe08abcacb970a2efdaa165090 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominique=20Eifl=C3=A4nder?= Date: Wed, 3 Nov 2021 12:11:38 +0100 Subject: [PATCH 2/2] RED-2624: Fixed footnote is recognized as headline --- .../server/classification/service/ClassificationService.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/ClassificationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/ClassificationService.java index db673d7a..a364ccc7 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/ClassificationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/ClassificationService.java @@ -76,7 +76,7 @@ public class ClassificationService { .getMostPopular() && PositionUtils.getApproxLineCount(textBlock) < 4.9 && (textBlock.getMostPopularWordStyle() .equals("bold") || !document.getFontStyleCounter().getCountPerValue().containsKey("bold") && textBlock.getMostPopularWordFontSize() > document .getFontSizeCounter() - .getMostPopular() + 1)) { + .getMostPopular() + 1) && textBlock.getSequences().get(0).getTextPositions().get(0).getFontSizeInPt() >= textBlock.getMostPopularWordFontSize()) { for (int i = 1; i <= headlineFontSizes.size(); i++) { if (textBlock.getMostPopularWordFontSize() == headlineFontSizes.get(i - 1)) { @@ -88,7 +88,7 @@ public class ClassificationService { .startsWith("Figure ") && PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordStyle() .equals("bold") && !document.getFontStyleCounter() .getMostPopular() - .equals("bold") && PositionUtils.getApproxLineCount(textBlock) < 2.9) { + .equals("bold") && PositionUtils.getApproxLineCount(textBlock) < 2.9 && textBlock.getSequences().get(0).getTextPositions().get(0).getFontSizeInPt() >= textBlock.getMostPopularWordFontSize()) { textBlock.setClassification("H " + (headlineFontSizes.size() + 1)); document.setHeadlines(true); } else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() == document