RED-9942: File only with images not recognised

This commit is contained in:
Maverick Studer 2024-09-05 10:49:12 +02:00
parent 9650195afd
commit 46ea7edc4c
2 changed files with 3 additions and 18 deletions

View File

@ -167,7 +167,7 @@ public class TOCEnrichmentService {
}
}
if (!startBlocks.isEmpty()) {
if (!startBlocks.isEmpty() || !startImages.isEmpty()) {
TableOfContentItem unassigned = new TableOfContentItem(null);
unassigned.setSectionBlocks(startBlocks);
unassigned.setImages(startImages);

View File

@ -65,7 +65,7 @@ public class DocumentGraphFactory {
document.getPages()
.forEach(context::buildAndAddPageWithCounter);
addSectionsAndImagesForToC(layoutParsingType, document, context, documentGraph);
addSectionsForToC(layoutParsingType, document, context, documentGraph);
addHeaderAndFooterToEachPage(document, context);
documentGraph.setNumberOfPages(context.pages.size());
@ -92,22 +92,7 @@ public class DocumentGraphFactory {
}
private void addSectionsAndImagesForToC(LayoutParsingType layoutParsingType, ClassificationDocument classificationDocument, Context context, Document document) {
// when no main sections are present, but we have images, i.e. in a document without any text
if (classificationDocument.getTableOfContents().getMainSections().isEmpty()) {
List<ClassifiedImage> images = classificationDocument.getPages()
.stream()
.flatMap(classificationPage -> classificationPage.getImages()
.stream())
.toList();
if (!images.isEmpty()) {
images.stream()
.distinct()
.forEach(image -> DocumentGraphFactory.addImage(document, image, context));
}
}
private void addSectionsForToC(LayoutParsingType layoutParsingType, ClassificationDocument classificationDocument, Context context, Document document) {
for (TableOfContentItem tocItem : classificationDocument.getTableOfContents()) {
var parent = tocItem.getParent() == null ? null : tocItem.getParent().getSection();