From f256f9b30f688abe2c64e705c31ba034c4a480d4 Mon Sep 17 00:00:00 2001 From: Kilian Schuettler Date: Wed, 17 Apr 2024 17:40:21 +0200 Subject: [PATCH] RED-8995: unclassified text might be missing from document data * treat TablePageBlock.OTHER like PARAGRAPH (no special treatment) --- .../processor/services/SectionsBuilderService.java | 9 --------- 1 file changed, 9 deletions(-) diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/SectionsBuilderService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/SectionsBuilderService.java index c98c688..fec1b29 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/SectionsBuilderService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/SectionsBuilderService.java @@ -44,7 +44,6 @@ public class SectionsBuilderService { for (ClassificationPage page : document.getPages()) { List header = new ArrayList<>(); List footer = new ArrayList<>(); - List unclassifiedText = new ArrayList<>(); for (AbstractPageBlock current : page.getTextBlocks()) { if (current.getClassification() == null) { @@ -63,11 +62,6 @@ public class SectionsBuilderService { continue; } - if (current.getClassification().equals(PageBlockType.OTHER)) { - unclassifiedText.add((TextPageBlock) current); - continue; - } - if (prev != null && current.getClassification().isHeadline() && !prev.getClassification().isHeadline() || !document.isHeadlines()) { ClassificationSection chunkBlock = buildTextBlock(chunkWords, lastHeadline); chunkBlock.setHeadline(lastHeadline); @@ -95,9 +89,6 @@ public class SectionsBuilderService { if (!footer.isEmpty()) { footers.add(new ClassificationFooter(footer)); } - if (!unclassifiedText.isEmpty()) { - unclassifiedTexts.add(new UnclassifiedText(unclassifiedText)); - } } ClassificationSection chunkBlock = buildTextBlock(chunkWords, lastHeadline);