From dc892d0fecc2e018b935e17596765b0f6c038412 Mon Sep 17 00:00:00 2001 From: Maverick Studer Date: Wed, 4 Sep 2024 13:27:06 +0200 Subject: [PATCH] RED-9524: File processing does not annotate images --- .../factory/DocumentGraphFactory.java | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/DocumentGraphFactory.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/DocumentGraphFactory.java index 11aca92..e7380c5 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/DocumentGraphFactory.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/DocumentGraphFactory.java @@ -65,7 +65,7 @@ public class DocumentGraphFactory { document.getPages() .forEach(context::buildAndAddPageWithCounter); - addSections(layoutParsingType, document, context, documentGraph); + addSectionsAndImagesForToC(layoutParsingType, document, context, documentGraph); addHeaderAndFooterToEachPage(document, context); documentGraph.setNumberOfPages(context.pages.size()); @@ -92,7 +92,22 @@ public class DocumentGraphFactory { } - private void addSections(LayoutParsingType layoutParsingType, ClassificationDocument classificationDocument, Context context, Document document) { + private void addSectionsAndImagesForToC(LayoutParsingType layoutParsingType, ClassificationDocument classificationDocument, Context context, Document document) { + + // when no main sections are present, but we have images, i.e. in a document without any text + if (classificationDocument.getTableOfContents().getMainSections().isEmpty()) { + List images = classificationDocument.getPages() + .stream() + .flatMap(classificationPage -> classificationPage.getImages() + .stream()) + .toList(); + + if (!images.isEmpty()) { + images.stream() + .distinct() + .forEach(image -> DocumentGraphFactory.addImage(document, image, context)); + } + } for (TableOfContentItem tocItem : classificationDocument.getTableOfContents()) { var parent = tocItem.getParent() == null ? null : tocItem.getParent().getSection(); @@ -260,7 +275,6 @@ public class DocumentGraphFactory { DocumentTree documentTree; Map pages; List sections; - List images; TextBlockFactory textBlockFactory; @@ -269,7 +283,6 @@ public class DocumentGraphFactory { documentTree = new DocumentTree(document); pages = new HashMap<>(); sections = new LinkedList<>(); - images = new LinkedList<>(); textBlockFactory = new TextBlockFactory(); }