diff --git a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingFinishedEvent.java b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingFinishedEvent.java index 20253bf..e8cfdb7 100644 --- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingFinishedEvent.java +++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingFinishedEvent.java @@ -8,13 +8,20 @@ import lombok.Builder; @Builder @Schema(description = "Object containing information about the layout parsing.") public record LayoutParsingFinishedEvent( - @Schema(description = "General purpose identifier. It is returned exactly the same way it is inserted with the LayoutParsingRequest.") - Map identifier,// - @Schema(description = "The duration of a single layout parsing in ms.") - long duration,// - @Schema(description = "The number of pages of the parsed document.") - int numberOfPages,// - @Schema(description = "A general message. It contains some information useful for a developer, like the paths where the files are stored. Not meant to be machine readable.") - String message) { + @Schema(description = "General purpose identifier. It is returned exactly the same way it is inserted with the LayoutParsingRequest.") // + Map identifier, + + @Schema(description = "The duration of a single layout parsing in ms.") // + long duration, + + @Schema(description = "The number of pages of the parsed document.") // + int numberOfPages, + + @Schema(description = "A general message. It contains some information useful for a developer, like the paths where the files are stored. Not meant to be machine readable.") // + String message, + + @Schema(description = "The app version of the layout parser.") // + String layoutParserVersion +) { } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutparserSettings.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParserSettings.java similarity index 89% rename from layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutparserSettings.java rename to layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParserSettings.java index 7eefa33..f9ff6e4 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutparserSettings.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParserSettings.java @@ -13,9 +13,8 @@ import lombok.experimental.FieldDefaults; @Configuration @ConfigurationProperties("layoutparser") @FieldDefaults(level = AccessLevel.PRIVATE) -public class LayoutparserSettings { +public class LayoutParserSettings { boolean debug; LayoutParsingType layoutParsingTypeOverride; - String pdftronLicense; } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java index 470fd9c..8d4f05d 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java @@ -20,6 +20,7 @@ import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDMarkedContent; +import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Service; import com.iqser.red.service.redaction.v1.server.mapper.DocumentDataMapper; @@ -87,29 +88,32 @@ import lombok.extern.slf4j.Slf4j; @Slf4j @Service @RequiredArgsConstructor -@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) +@FieldDefaults(level = AccessLevel.PRIVATE) public class LayoutParsingPipeline { - ImageServiceResponseAdapter imageServiceResponseAdapter; - CvTableParsingAdapter cvTableParsingAdapter; - LayoutParsingStorageService layoutParsingStorageService; - SectionsBuilderService sectionsBuilderService; - SimplifiedSectionTextService simplifiedSectionTextService; - RulingCleaningService rulingCleaningService; - TableExtractionService tableExtractionService; - DocuMineBlockificationService docuMineBlockificationService; - RedactManagerBlockificationService redactManagerBlockificationService; - BlockificationPostprocessingService blockificationPostprocessingService; - DocstrumBlockificationService docstrumBlockificationService; - LayoutGridService layoutGridService; - ObservationRegistry observationRegistry; - VisualLayoutParsingAdapter visualLayoutParsingAdapter; - GraphicExtractorService graphicExtractorService; - OutlineExtractorService outlineExtractorService; - SectionTreeBuilderService sectionTreeBuilderService; - SectionTreeEnhancementService sectionTreeEnhancementService; - LayoutparserSettings settings; - ClassificationService classificationService; + final ImageServiceResponseAdapter imageServiceResponseAdapter; + final CvTableParsingAdapter cvTableParsingAdapter; + final LayoutParsingStorageService layoutParsingStorageService; + final SectionsBuilderService sectionsBuilderService; + final SimplifiedSectionTextService simplifiedSectionTextService; + final RulingCleaningService rulingCleaningService; + final TableExtractionService tableExtractionService; + final DocuMineBlockificationService docuMineBlockificationService; + final RedactManagerBlockificationService redactManagerBlockificationService; + final BlockificationPostprocessingService blockificationPostprocessingService; + final DocstrumBlockificationService docstrumBlockificationService; + final LayoutGridService layoutGridService; + final ObservationRegistry observationRegistry; + final VisualLayoutParsingAdapter visualLayoutParsingAdapter; + final GraphicExtractorService graphicExtractorService; + final OutlineExtractorService outlineExtractorService; + final SectionTreeBuilderService sectionTreeBuilderService; + final SectionTreeEnhancementService sectionTreeEnhancementService; + final LayoutParserSettings settings; + final ClassificationService classificationService; + + @Value("${LAYOUT_PARSER_VERSION:}") + private String layoutParserVersion; public LayoutParsingFinishedEvent parseLayoutAndSaveFilesToStorage(LayoutParsingRequest layoutParsingRequest) throws IOException { @@ -118,17 +122,23 @@ public class LayoutParsingPipeline { log.info("Starting layout parsing for {}", layoutParsingRequest.identifier()); File originFile = layoutParsingStorageService.getOriginFile(layoutParsingRequest.originFileStorageId()); - File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId()).orElse(originFile); + File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId()) + .orElse(originFile); VisualLayoutParsingResponse visualLayoutParsingResponse = layoutParsingRequest.visualLayoutParsingFileId() - .map(layoutParsingStorageService::getVisualLayoutParsingFile).orElse(new VisualLayoutParsingResponse()); + .map(layoutParsingStorageService::getVisualLayoutParsingFile) + .orElse(new VisualLayoutParsingResponse()); ImageServiceResponse imageServiceResponse = layoutParsingRequest.imagesFileStorageId() - .map(layoutParsingStorageService::getImagesFile).orElse(new ImageServiceResponse()); + .map(layoutParsingStorageService::getImagesFile) + .orElse(new ImageServiceResponse()); TableServiceResponse tableServiceResponse = layoutParsingRequest.tablesFileStorageId() - .map(layoutParsingStorageService::getTablesFile).orElse(new TableServiceResponse()); + .map(layoutParsingStorageService::getTablesFile) + .orElse(new TableServiceResponse()); - ClassificationDocument classificationDocument = parseLayout(settings.getLayoutParsingTypeOverride() == null // - ? layoutParsingRequest.layoutParsingType() : settings.getLayoutParsingTypeOverride(), + LayoutParsingType layoutParsingType = settings.getLayoutParsingTypeOverride() == null // + ? layoutParsingRequest.layoutParsingType() : settings.getLayoutParsingTypeOverride(); + + ClassificationDocument classificationDocument = parseLayout(layoutParsingType, originFile, imageServiceResponse, tableServiceResponse, @@ -137,18 +147,19 @@ public class LayoutParsingPipeline { log.info("Building document graph for {}", layoutParsingRequest.identifier()); - DocumentWithVisualization documentWithVisualization = observeBuildDocumentGraph(settings.getLayoutParsingTypeOverride() == null // - ? layoutParsingRequest.layoutParsingType() : settings.getLayoutParsingTypeOverride(), classificationDocument); + DocumentWithVisualization documentWithVisualization = observeBuildDocumentGraph(layoutParsingType, classificationDocument); log.info("Creating viewer document for {}", layoutParsingRequest.identifier()); - layoutGridService.addLayoutGrid(viewerDocumentFile, documentWithVisualization, viewerDocumentFile, false); + layoutGridService.addLayoutGrid(viewerDocumentFile, documentWithVisualization, viewerDocumentFile, layoutParsingType, layoutParserVersion, false); log.info("Storing resulting files for {}", layoutParsingRequest.identifier()); layoutParsingStorageService.storeDocumentData(layoutParsingRequest, DocumentDataMapper.toDocumentData(documentWithVisualization.document())); - if (layoutParsingRequest.documentMarkdownFileStorageId().isPresent()) { - layoutParsingStorageService.storeMarkdownFile(layoutParsingRequest.documentMarkdownFileStorageId().get(), new MarkdownMapper().toMarkdownContent(documentWithVisualization.document())); + if (layoutParsingRequest.documentMarkdownFileStorageId() + .isPresent()) { + layoutParsingStorageService.storeMarkdownFile(layoutParsingRequest.documentMarkdownFileStorageId() + .get(), new MarkdownMapper().toMarkdownContent(documentWithVisualization.document())); } layoutParsingStorageService.storeSimplifiedText(layoutParsingRequest, simplifiedSectionTextService.toSimplifiedText(documentWithVisualization.document())); layoutParsingStorageService.storeViewerDocument(layoutParsingRequest, viewerDocumentFile); @@ -188,6 +199,7 @@ public class LayoutParsingPipeline { layoutParsingRequest.pageFileStorageId(), layoutParsingRequest.simplifiedTextStorageId(), layoutParsingRequest.viewerDocumentStorageId())) + .layoutParserVersion(layoutParserVersion) .build(); } @@ -385,7 +397,8 @@ public class LayoutParsingPipeline { .flatMap(Collection::stream) .map(Character::getTextPosition) .filter(pos -> pos.getDir().equals(dir)) - .mapToDouble(RedTextPosition::getExactDir).average().orElse(0); + .mapToDouble(RedTextPosition::getExactDir).average() + .orElse(0); if (averageRotation == 0) { continue; diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/visualization/LayoutGridService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/visualization/LayoutGridService.java index 5f32816..d4f8f4e 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/visualization/LayoutGridService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/visualization/LayoutGridService.java @@ -10,6 +10,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.Headline; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table; +import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType; import com.knecon.fforesight.service.layoutparser.processor.model.DocumentWithVisualization; import com.knecon.fforesight.service.layoutparser.processor.services.mapper.OutlineMapper; import com.knecon.fforesight.service.layoutparser.processor.visualization.LayoutGrid; @@ -32,9 +33,15 @@ public class LayoutGridService { @SneakyThrows @Observed(name = "ViewerDocumentService", contextualName = "create-viewer-document") - public void addLayoutGrid(File originFile, DocumentWithVisualization document, File destinationFile, boolean layerVisibilityDefaultValue) { + public void addLayoutGrid(File originFile, + DocumentWithVisualization document, + File destinationFile, + LayoutParsingType layoutParsingType, + String layoutParserVersion, + boolean layerVisibilityDefaultValue) { - LayoutGrid layoutGrid = createLayoutGrid(document.document()); + String layoutParsingTypeName = layoutParsingType.name(); + LayoutGrid layoutGrid = createLayoutGrid(document.document(), layoutParserVersion, layoutParsingTypeName); Outline outline = OutlineMapper.createOutline(document.document()); layoutGrid.setVisibleByDefault(layerVisibilityDefaultValue); @@ -42,16 +49,23 @@ public class LayoutGridService { document.layoutDebugLayer().addOutlineHeadlines(document.document()); if (document.layoutDebugLayer().isActive()) { - viewerDocumentService.addLayerGroups(originFile, destinationFile, List.of(layoutGrid, document.layoutDebugLayer()), outline); + viewerDocumentService.addLayerGroups(originFile, + destinationFile, + List.of(layoutGrid, document.layoutDebugLayer()), + layoutParserVersion, + layoutParsingTypeName, + outline); } else { - viewerDocumentService.addLayerGroups(originFile, destinationFile, List.of(layoutGrid), outline); + viewerDocumentService.addLayerGroups(originFile, destinationFile, List.of(layoutGrid), layoutParserVersion, layoutParsingTypeName, outline); } } - private LayoutGrid createLayoutGrid(Document document) { + private LayoutGrid createLayoutGrid(Document document, String layoutParserVersion, String layoutParsingType) { LayoutGrid layoutGrid = new LayoutGrid(); + layoutGrid.addVersionAndLayoutParsingType(layoutParserVersion, layoutParsingType, document.getFirstPage()); + document.streamAllSubNodes() .peek(layoutGrid::addTreeId) .forEach(semanticNode -> { diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/visualization/LayoutGrid.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/visualization/LayoutGrid.java index 77e7b84..fa6ea96 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/visualization/LayoutGrid.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/visualization/LayoutGrid.java @@ -1,6 +1,7 @@ package com.knecon.fforesight.service.layoutparser.processor.visualization; import java.awt.Color; +import java.awt.geom.AffineTransform; import java.awt.geom.Line2D; import java.awt.geom.Point2D; import java.awt.geom.Rectangle2D; @@ -25,6 +26,9 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNo import com.iqser.red.service.redaction.v1.server.model.document.nodes.SuperSection; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table; import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell; +import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType; +import com.knecon.fforesight.service.layoutparser.processor.utils.CoordinateTransforms; +import com.knecon.fforesight.service.layoutparser.processor.utils.PageInformation; import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations; import com.knecon.fforesight.service.viewerdoc.layers.LayoutGridLayerConfig; import com.knecon.fforesight.service.viewerdoc.model.ColoredLine; @@ -89,10 +93,19 @@ public class LayoutGrid extends LayoutGridLayerConfig { public void addTreeId(SemanticNode semanticNode) { Page page = semanticNode.getFirstPage(); - if (semanticNode.getBBox().get(page) == null) { + if (semanticNode.getBBox() + .get(page) == null) { return; } - addPlacedText(page, semanticNode.getBBox().get(page), semanticNode.getBBox().get(page), buildTreeIdString(semanticNode), 1, treeIds, TREEID_COLOR); + addPlacedText(page, + semanticNode.getBBox() + .get(page), + semanticNode.getBBox() + .get(page), + buildTreeIdString(semanticNode), + 1, + treeIds, + TREEID_COLOR); } @@ -121,7 +134,8 @@ public class LayoutGrid extends LayoutGridLayerConfig { .toList(); Integer maxChildDepth = subSections.stream() .map(node -> node.getTreeId().size()) - .max(Integer::compareTo).orElse(section.getTreeId().size()); + .max(Integer::compareTo) + .orElse(section.getTreeId().size()); int ownDepth = section.getTreeId().size(); Page firstPage = section.getFirstPage(); @@ -307,7 +321,8 @@ public class LayoutGrid extends LayoutGridLayerConfig { Visualizations visualizations = semanticNode.getType().equals(NodeType.TABLE_OF_CONTENTS) ? toc : sections; List coloredLines = getOrCreateVisualizationsOnPage(page.getNumber(), visualizations).getColoredLines(); int lineWidthModifier = maxChildDepth - ownDepth; - Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox().get(page), LINE_WIDTH * (1 + lineWidthModifier), LINE_WIDTH * (1 + lineWidthModifier)); + Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox() + .get(page), LINE_WIDTH * (1 + lineWidthModifier), LINE_WIDTH * (1 + lineWidthModifier)); SemanticNode highestParent = semanticNode.getHighestParent(); Rectangle2D highestParentRect = rectangleMap.get(new RectangleIdentifier(highestParent.getTreeId(), page.getNumber())); @@ -356,7 +371,8 @@ public class LayoutGrid extends LayoutGridLayerConfig { List ys = yStream.collect(Collectors.toList()); ys.remove(0); - Rectangle2D tableBBox = table.getBBox().get(page); + Rectangle2D tableBBox = table.getBBox() + .get(page); List coloredLines = getOrCreateVisualizationsOnPage(page.getNumber(), tables).getColoredLines(); xs.forEach(x -> { @@ -398,6 +414,21 @@ public class LayoutGrid extends LayoutGridLayerConfig { } + public void addVersionAndLayoutParsingType(String version, String layoutParsingType, Page page) { + + PageInformation pageInformation = PageInformation.fromPage(page); + double startHeight = pageInformation.heightRot() - 5; + Point2D point1 = new Point2D.Double(0, startHeight); + Point2D point2 = new Point2D.Double(0, startHeight - FONT_SIZE * 1.5); + AffineTransform affineTransform = CoordinateTransforms.calculatePageCoordsToInitialUserSpaceCoords(pageInformation); + affineTransform.transform(point1, point1); + affineTransform.transform(point2, point2); + getOrCreateVisualizationsOnPage(page.getNumber(), this.versionAndType).getPlacedTexts() + .addAll(List.of(PlacedText.textFacingUp(String.valueOf(version), point1, FONT_SIZE, Color.BLACK, FONT), + PlacedText.textFacingUp(String.valueOf(layoutParsingType), point2, FONT_SIZE, Color.BLACK, FONT))); + } + + private record RectangleAndLinesResult(List coloredLines, Rectangle2D rectangle, List pageLines) { } diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/OutlineDetectionTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/OutlineDetectionTest.java index a2cf986..34a68e9 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/OutlineDetectionTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/OutlineDetectionTest.java @@ -17,7 +17,6 @@ import org.junit.jupiter.api.Test; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.core.io.ClassPathResource; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section; import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; import com.iqser.red.service.redaction.v1.server.model.document.nodes.SuperSection; @@ -80,7 +79,7 @@ public class OutlineDetectionTest extends AbstractTest { long start = System.currentTimeMillis(); ClassificationDocument classificationDocument = parseLayout(fileName, LayoutParsingType.DOCUMINE_OLD); var document = buildGraph(fileName, classificationDocument); - layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), true); + layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), LayoutParsingType.DOCUMINE_OLD, "TEST_VERSION", true); OutlineObjectTree outlineObjectTree = classificationDocument.getOutlineObjectTree(); assertEquals(outlineObjectTree.getRootNodes().size(), 8); assertEquals(outlineObjectTree.getOutlineObjectsPerPage().get(2).size(), 1); diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java index d7fa247..3cecb90 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java @@ -11,7 +11,6 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.core.io.ClassPathResource; import com.iqser.red.commons.jackson.ObjectMapperFactory; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse; @@ -31,6 +30,8 @@ public class ViewerDocumentTest extends BuildDocumentTest { PDFTronViewerDocumentService viewerDocumentService = new PDFTronViewerDocumentService(null); LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService); + static String TEST_VERSION = "TEST_VERSION"; + @BeforeEach public void init() { @@ -51,7 +52,7 @@ public class ViewerDocumentTest extends BuildDocumentTest { long start = System.currentTimeMillis(); var document = buildGraph(fileName, LayoutParsingType.REDACT_MANAGER_WITHOUT_DUPLICATE_PARAGRAPH); - layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), true); + layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), LayoutParsingType.REDACT_MANAGER_WITHOUT_DUPLICATE_PARAGRAPH, TEST_VERSION, true); System.out.printf("Total time: %.2fs%n", ((float) (System.currentTimeMillis() - start)) / 1000); } @@ -79,7 +80,7 @@ public class ViewerDocumentTest extends BuildDocumentTest { LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService); var document = DocumentGraphFactory.buildDocumentGraph(LayoutParsingType.DOCUMINE_OLD, classificationDocument); - layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), true); + layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), LayoutParsingType.REDACT_MANAGER_WITHOUT_DUPLICATE_PARAGRAPH, TEST_VERSION, true); } } diff --git a/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/LayerIdentifier.java b/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/LayerIdentifier.java index 72a2755..13b0418 100644 --- a/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/LayerIdentifier.java +++ b/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/LayerIdentifier.java @@ -10,6 +10,7 @@ If optionalContent is false, the layer will not be created as a OCG, and will no */ public record LayerIdentifier(String name, String markedContentName) { + public String markedContentName() { // The prefix KNECON_ is used to identify marked contents as knecon contents later on return KNECON_IDENTIFIER_PREFIX + markedContentName; @@ -40,6 +41,7 @@ public record LayerIdentifier(String name, String markedContentName) { public static final LayerIdentifier KNECON_LAYOUT_FIGURES = new LayerIdentifier("Figures", "LAYOUT_FIGURES"); public static final LayerIdentifier KNECON_LAYOUT_IMAGES = new LayerIdentifier("Images", "LAYOUT_IMAGES"); public static final LayerIdentifier KNECON_LAYOUT_TREE_IDs = new LayerIdentifier("Tree IDs", "LAYOUT_TREE_IDs"); + public static final LayerIdentifier KNECON_LAYOUT_VERSION_AND_TYPE = new LayerIdentifier("Version and Type", "LAYOUT_PARSER_VERSION_AND_TYPE"); public static final LayerIdentifier KNECON_LAYOUT_TOC = new LayerIdentifier("Table of Contents", "TABLE_OF_CONTENTS"); //layout grid debug diff --git a/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/layers/LayoutGridLayerConfig.java b/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/layers/LayoutGridLayerConfig.java index 0cf76a4..eb56c67 100644 --- a/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/layers/LayoutGridLayerConfig.java +++ b/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/layers/LayoutGridLayerConfig.java @@ -46,12 +46,13 @@ public class LayoutGridLayerConfig extends AbstractLayerGroup { protected final Visualizations images = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_IMAGES).build(); protected final Visualizations keyValue = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_KEY_VALUE).build(); protected final Visualizations treeIds = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_TREE_IDs).build(); + protected final Visualizations versionAndType = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_VERSION_AND_TYPE).build(); @Override public List getVisualizations() { - return List.of(headlines, paragraphs, tables, sections, headerFooter, toc, keyValue, figures, images, treeIds); + return List.of(headlines, paragraphs, tables, sections, headerFooter, toc, keyValue, figures, images, treeIds, versionAndType); } } diff --git a/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/service/PDFTronViewerDocumentService.java b/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/service/PDFTronViewerDocumentService.java index a9f7a12..0a9f6b9 100644 --- a/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/service/PDFTronViewerDocumentService.java +++ b/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/service/PDFTronViewerDocumentService.java @@ -54,7 +54,7 @@ public class PDFTronViewerDocumentService { @SneakyThrows @Observed(name = "PDFTronViewerDocumentService", contextualName = "add-visualizations") - public void addLayerGroups(File originFile, File destinationFile, List layerGroups, Outline outline) { + public void addLayerGroups(File originFile, File destinationFile, List layerGroups, String layoutParserVersion, String layoutParsingType, Outline outline) { synchronized (PDFNet.class) { // synchronized with class, to ensure multiple instances are also synchronized @@ -116,7 +116,7 @@ public class PDFTronViewerDocumentService { // OutlineUtility.addOutline(pdfDoc, outline); - ViewerDocVersioningUtility.setVersionInDocument(pdfDoc); + ViewerDocVersioningUtility.setVersionInDocument(pdfDoc, layoutParserVersion, layoutParsingType); saveDocument(pdfDoc, destinationFile); } finally { @@ -128,9 +128,9 @@ public class PDFTronViewerDocumentService { @SneakyThrows @Observed(name = "PDFTronViewerDocumentService", contextualName = "add-visualizations") - public void addLayerGroups(File originFile, File destinationFile, List layerGroups) { + public void addLayerGroups(File originFile, File destinationFile, List layerGroups, String layoutParserVersion, String layoutParsingType) { - addLayerGroups(originFile, destinationFile, layerGroups, new Outline()); + addLayerGroups(originFile, destinationFile, layerGroups, layoutParserVersion, layoutParsingType, new Outline()); } diff --git a/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/service/ViewerDocVersioningUtility.java b/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/service/ViewerDocVersioningUtility.java index 9fa3e83..230aa7a 100644 --- a/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/service/ViewerDocVersioningUtility.java +++ b/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/service/ViewerDocVersioningUtility.java @@ -8,6 +8,7 @@ import org.apache.pdfbox.Loader; import org.apache.pdfbox.pdmodel.PDDocument; import com.pdftron.pdf.PDFDoc; +import com.pdftron.sdf.Obj; import lombok.SneakyThrows; import lombok.experimental.UtilityClass; @@ -21,10 +22,15 @@ public class ViewerDocVersioningUtility { @SneakyThrows - public void setVersionInDocument(PDFDoc pdfDoc) { + public void setVersionInDocument(PDFDoc pdfDoc, String layoutParserVersion, String layoutParsingType) { pdfDoc.getDocInfo().setAuthor(AUTHOR); pdfDoc.getDocInfo().setKeywords(CUSTOM_DICT + ":" + currentVersion); + + Obj versionInfo = pdfDoc.getSDFDoc().createIndirectDict(); + versionInfo.putString("LayoutParserVersion", layoutParserVersion); + versionInfo.putString("LayoutParsingType", layoutParsingType); + pdfDoc.getRoot().put("KneconVersionInfo", versionInfo); } diff --git a/layoutparser-service/viewer-doc-processor/src/test/java/com/knecon/fforesight/service/viewerdoc/service/ViewerDocVersioningUtilityTest.java b/layoutparser-service/viewer-doc-processor/src/test/java/com/knecon/fforesight/service/viewerdoc/service/ViewerDocVersioningUtilityTest.java index 678a873..8d60e8a 100644 --- a/layoutparser-service/viewer-doc-processor/src/test/java/com/knecon/fforesight/service/viewerdoc/service/ViewerDocVersioningUtilityTest.java +++ b/layoutparser-service/viewer-doc-processor/src/test/java/com/knecon/fforesight/service/viewerdoc/service/ViewerDocVersioningUtilityTest.java @@ -1,26 +1,18 @@ package com.knecon.fforesight.service.viewerdoc.service; -import java.awt.geom.AffineTransform; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.nio.file.Files; import java.nio.file.Path; -import javax.swing.table.AbstractTableModel; - import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.springframework.core.io.ClassPathResource; -import com.pdftron.common.Matrix2D; -import com.pdftron.pdf.ElementBuilder; -import com.pdftron.pdf.ElementReader; -import com.pdftron.pdf.ElementWriter; import com.pdftron.pdf.PDFDoc; import com.pdftron.pdf.PDFNet; -import com.pdftron.pdf.Page; import com.pdftron.sdf.SDFDoc; import lombok.SneakyThrows; @@ -48,7 +40,7 @@ class ViewerDocVersioningUtilityTest { File file = new ClassPathResource("files/empty.pdf").getFile(); Path tmpFile = Files.createTempFile("markedDocument", ".pdf"); try (var in = new FileInputStream(file); var doc = new PDFDoc(in); var out = new FileOutputStream(tmpFile.toFile())) { - ViewerDocVersioningUtility.setVersionInDocument(doc); + ViewerDocVersioningUtility.setVersionInDocument(doc, "layoutParserVersion", "layoutParsingType"); doc.save(out, SDFDoc.SaveMode.LINEARIZED, null); } assert ViewerDocVersioningUtility.isCurrentVersion(tmpFile.toFile());