RED-9129: Track pagestatistics also for landscape pages in documine
This commit is contained in:
parent
ae6bad830e
commit
71025f7f16
@ -83,13 +83,17 @@ public class LayoutParsingPipeline {
|
|||||||
|
|
||||||
try (PDDocument originDocument = layoutParsingStorageService.getOriginFile(layoutParsingRequest.originFileStorageId())) {
|
try (PDDocument originDocument = layoutParsingStorageService.getOriginFile(layoutParsingRequest.originFileStorageId())) {
|
||||||
ImageServiceResponse imageServiceResponse = new ImageServiceResponse();
|
ImageServiceResponse imageServiceResponse = new ImageServiceResponse();
|
||||||
if (layoutParsingRequest.imagesFileStorageId().isPresent()) {
|
if (layoutParsingRequest.imagesFileStorageId()
|
||||||
imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.imagesFileStorageId().get());
|
.isPresent()) {
|
||||||
|
imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.imagesFileStorageId()
|
||||||
|
.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
TableServiceResponse tableServiceResponse = new TableServiceResponse();
|
TableServiceResponse tableServiceResponse = new TableServiceResponse();
|
||||||
if (layoutParsingRequest.tablesFileStorageId().isPresent()) {
|
if (layoutParsingRequest.tablesFileStorageId()
|
||||||
tableServiceResponse = layoutParsingStorageService.getTablesFile(layoutParsingRequest.tablesFileStorageId().get());
|
.isPresent()) {
|
||||||
|
tableServiceResponse = layoutParsingStorageService.getTablesFile(layoutParsingRequest.tablesFileStorageId()
|
||||||
|
.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
ClassificationDocument classificationDocument = parseLayout(layoutParsingRequest.layoutParsingType(), originDocument, imageServiceResponse, tableServiceResponse);
|
ClassificationDocument classificationDocument = parseLayout(layoutParsingRequest.layoutParsingType(), originDocument, imageServiceResponse, tableServiceResponse);
|
||||||
@ -115,25 +119,25 @@ public class LayoutParsingPipeline {
|
|||||||
.numberOfPages(numberOfPages)
|
.numberOfPages(numberOfPages)
|
||||||
.duration(System.currentTimeMillis() - start)
|
.duration(System.currentTimeMillis() - start)
|
||||||
.message(format("""
|
.message(format("""
|
||||||
Layout parsing has finished in %.02f s.
|
Layout parsing has finished in %.02f s.
|
||||||
identifiers: %s
|
identifiers: %s
|
||||||
%s
|
%s
|
||||||
Files have been saved with Ids:
|
Files have been saved with Ids:
|
||||||
Structure: %s
|
Structure: %s
|
||||||
Text: %s
|
Text: %s
|
||||||
Positions: %s
|
Positions: %s
|
||||||
PageData: %s
|
PageData: %s
|
||||||
Simplified Text: %s
|
Simplified Text: %s
|
||||||
Viewer Doc: %s""",
|
Viewer Doc: %s""",
|
||||||
((float) (System.currentTimeMillis() - start)) / 1000,
|
((float) (System.currentTimeMillis() - start)) / 1000,
|
||||||
layoutParsingRequest.identifier(),
|
layoutParsingRequest.identifier(),
|
||||||
buildSemanticNodeCountMessage(numberOfPages, documentGraph.buildSemanticNodeCounts()),
|
buildSemanticNodeCountMessage(numberOfPages, documentGraph.buildSemanticNodeCounts()),
|
||||||
layoutParsingRequest.structureFileStorageId(),
|
layoutParsingRequest.structureFileStorageId(),
|
||||||
layoutParsingRequest.textBlockFileStorageId(),
|
layoutParsingRequest.textBlockFileStorageId(),
|
||||||
layoutParsingRequest.positionBlockFileStorageId(),
|
layoutParsingRequest.positionBlockFileStorageId(),
|
||||||
layoutParsingRequest.pageFileStorageId(),
|
layoutParsingRequest.pageFileStorageId(),
|
||||||
layoutParsingRequest.simplifiedTextStorageId(),
|
layoutParsingRequest.simplifiedTextStorageId(),
|
||||||
layoutParsingRequest.viewerDocumentStorageId()))
|
layoutParsingRequest.viewerDocumentStorageId()))
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -142,14 +146,14 @@ public class LayoutParsingPipeline {
|
|||||||
private String buildSemanticNodeCountMessage(int numberOfPages, Map<NodeType, Long> semanticNodeCounts) {
|
private String buildSemanticNodeCountMessage(int numberOfPages, Map<NodeType, Long> semanticNodeCounts) {
|
||||||
|
|
||||||
return String.format("%d pages with %d sections, %d headlines, %d paragraphs, %d tables with %d cells, %d headers, and %d footers parsed",
|
return String.format("%d pages with %d sections, %d headlines, %d paragraphs, %d tables with %d cells, %d headers, and %d footers parsed",
|
||||||
numberOfPages,
|
numberOfPages,
|
||||||
semanticNodeCounts.get(NodeType.SECTION) == null ? 0 : semanticNodeCounts.get(NodeType.SECTION),
|
semanticNodeCounts.get(NodeType.SECTION) == null ? 0 : semanticNodeCounts.get(NodeType.SECTION),
|
||||||
semanticNodeCounts.get(NodeType.HEADLINE) == null ? 0 : semanticNodeCounts.get(NodeType.HEADLINE),
|
semanticNodeCounts.get(NodeType.HEADLINE) == null ? 0 : semanticNodeCounts.get(NodeType.HEADLINE),
|
||||||
semanticNodeCounts.get(NodeType.PARAGRAPH) == null ? 0 : semanticNodeCounts.get(NodeType.PARAGRAPH),
|
semanticNodeCounts.get(NodeType.PARAGRAPH) == null ? 0 : semanticNodeCounts.get(NodeType.PARAGRAPH),
|
||||||
semanticNodeCounts.get(NodeType.TABLE) == null ? 0 : semanticNodeCounts.get(NodeType.TABLE),
|
semanticNodeCounts.get(NodeType.TABLE) == null ? 0 : semanticNodeCounts.get(NodeType.TABLE),
|
||||||
semanticNodeCounts.get(NodeType.TABLE_CELL) == null ? 0 : semanticNodeCounts.get(NodeType.TABLE_CELL),
|
semanticNodeCounts.get(NodeType.TABLE_CELL) == null ? 0 : semanticNodeCounts.get(NodeType.TABLE_CELL),
|
||||||
semanticNodeCounts.get(NodeType.HEADER) == null ? 0 : semanticNodeCounts.get(NodeType.HEADER),
|
semanticNodeCounts.get(NodeType.HEADER) == null ? 0 : semanticNodeCounts.get(NodeType.HEADER),
|
||||||
semanticNodeCounts.get(NodeType.FOOTER) == null ? 0 : semanticNodeCounts.get(NodeType.FOOTER));
|
semanticNodeCounts.get(NodeType.FOOTER) == null ? 0 : semanticNodeCounts.get(NodeType.FOOTER));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -213,7 +217,7 @@ public class LayoutParsingPipeline {
|
|||||||
tableExtractionService.extractTables(cleanRulings, classificationPage);
|
tableExtractionService.extractTables(cleanRulings, classificationPage);
|
||||||
|
|
||||||
buildPageStatistics(classificationPage);
|
buildPageStatistics(classificationPage);
|
||||||
increaseDocumentStatistics(classificationPage, classificationDocument);
|
increaseDocumentStatistics(layoutParsingType, classificationPage, classificationDocument);
|
||||||
|
|
||||||
classificationPages.add(classificationPage);
|
classificationPages.add(classificationPage);
|
||||||
}
|
}
|
||||||
@ -242,11 +246,11 @@ public class LayoutParsingPipeline {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private void increaseDocumentStatistics(ClassificationPage classificationPage, ClassificationDocument document) {
|
private void increaseDocumentStatistics(LayoutParsingType layoutParsingType, ClassificationPage classificationPage, ClassificationDocument document) {
|
||||||
|
|
||||||
if (!classificationPage.isLandscape()) {
|
if (layoutParsingType.equals(LayoutParsingType.DOCUMINE) || !classificationPage.isLandscape()) {
|
||||||
document.getFontSizeCounter().addAll(classificationPage.getFontSizeCounter().getCountPerValue());
|
document.getFontSizeCounter().addAll(classificationPage.getFontSizeCounter().getCountPerValue());
|
||||||
}
|
}
|
||||||
document.getFontCounter().addAll(classificationPage.getFontCounter().getCountPerValue());
|
document.getFontCounter().addAll(classificationPage.getFontCounter().getCountPerValue());
|
||||||
document.getTextHeightCounter().addAll(classificationPage.getTextHeightCounter().getCountPerValue());
|
document.getTextHeightCounter().addAll(classificationPage.getTextHeightCounter().getCountPerValue());
|
||||||
document.getFontStyleCounter().addAll(classificationPage.getFontStyleCounter().getCountPerValue());
|
document.getFontStyleCounter().addAll(classificationPage.getFontStyleCounter().getCountPerValue());
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user