RED-9524: File processing does not annotate images
This commit is contained in:
parent
af45f2cd8c
commit
dc892d0fec
@ -65,7 +65,7 @@ public class DocumentGraphFactory {
|
|||||||
|
|
||||||
document.getPages()
|
document.getPages()
|
||||||
.forEach(context::buildAndAddPageWithCounter);
|
.forEach(context::buildAndAddPageWithCounter);
|
||||||
addSections(layoutParsingType, document, context, documentGraph);
|
addSectionsAndImagesForToC(layoutParsingType, document, context, documentGraph);
|
||||||
addHeaderAndFooterToEachPage(document, context);
|
addHeaderAndFooterToEachPage(document, context);
|
||||||
|
|
||||||
documentGraph.setNumberOfPages(context.pages.size());
|
documentGraph.setNumberOfPages(context.pages.size());
|
||||||
@ -92,7 +92,22 @@ public class DocumentGraphFactory {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private void addSections(LayoutParsingType layoutParsingType, ClassificationDocument classificationDocument, Context context, Document document) {
|
private void addSectionsAndImagesForToC(LayoutParsingType layoutParsingType, ClassificationDocument classificationDocument, Context context, Document document) {
|
||||||
|
|
||||||
|
// when no main sections are present, but we have images, i.e. in a document without any text
|
||||||
|
if (classificationDocument.getTableOfContents().getMainSections().isEmpty()) {
|
||||||
|
List<ClassifiedImage> images = classificationDocument.getPages()
|
||||||
|
.stream()
|
||||||
|
.flatMap(classificationPage -> classificationPage.getImages()
|
||||||
|
.stream())
|
||||||
|
.toList();
|
||||||
|
|
||||||
|
if (!images.isEmpty()) {
|
||||||
|
images.stream()
|
||||||
|
.distinct()
|
||||||
|
.forEach(image -> DocumentGraphFactory.addImage(document, image, context));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (TableOfContentItem tocItem : classificationDocument.getTableOfContents()) {
|
for (TableOfContentItem tocItem : classificationDocument.getTableOfContents()) {
|
||||||
var parent = tocItem.getParent() == null ? null : tocItem.getParent().getSection();
|
var parent = tocItem.getParent() == null ? null : tocItem.getParent().getSection();
|
||||||
@ -260,7 +275,6 @@ public class DocumentGraphFactory {
|
|||||||
DocumentTree documentTree;
|
DocumentTree documentTree;
|
||||||
Map<Page, Integer> pages;
|
Map<Page, Integer> pages;
|
||||||
List<AbstractSemanticNode> sections;
|
List<AbstractSemanticNode> sections;
|
||||||
List<ClassifiedImage> images;
|
|
||||||
TextBlockFactory textBlockFactory;
|
TextBlockFactory textBlockFactory;
|
||||||
|
|
||||||
|
|
||||||
@ -269,7 +283,6 @@ public class DocumentGraphFactory {
|
|||||||
documentTree = new DocumentTree(document);
|
documentTree = new DocumentTree(document);
|
||||||
pages = new HashMap<>();
|
pages = new HashMap<>();
|
||||||
sections = new LinkedList<>();
|
sections = new LinkedList<>();
|
||||||
images = new LinkedList<>();
|
|
||||||
textBlockFactory = new TextBlockFactory();
|
textBlockFactory = new TextBlockFactory();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user