From 9ecf9ca19f851d3773773ed0cd7cce6ae508703c Mon Sep 17 00:00:00 2001 From: yhampe Date: Wed, 5 Jun 2024 14:20:33 +0200 Subject: [PATCH] RED-3813: Recategorize same image as experimental feature now writing hash into structure --- .../processor/LayoutParsingPipeline.java | 36 +++++++++++++------ .../adapter/VisualLayoutParsingAdapter.java | 2 +- .../factory/DocumentGraphFactory.java | 3 ++ 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java index e095e62..f5c333c 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java @@ -105,21 +105,28 @@ public class LayoutParsingPipeline { log.info("Starting layout parsing for {}", layoutParsingRequest.identifier()); File originFile = layoutParsingStorageService.getOriginFile(layoutParsingRequest.originFileStorageId()); - File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId()).orElse(originFile); + File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId()) + .orElse(originFile); VisualLayoutParsingResponse visualLayoutParsingResponse = new VisualLayoutParsingResponse(); - if (layoutParsingRequest.visualLayoutParsingFileId().isPresent()) { - visualLayoutParsingResponse = layoutParsingStorageService.getVisualLayoutParsingFile(layoutParsingRequest.visualLayoutParsingFileId().get()); + if (layoutParsingRequest.visualLayoutParsingFileId() + .isPresent()) { + visualLayoutParsingResponse = layoutParsingStorageService.getVisualLayoutParsingFile(layoutParsingRequest.visualLayoutParsingFileId() + .get()); } ImageServiceResponse imageServiceResponse = new ImageServiceResponse(); - if (layoutParsingRequest.imagesFileStorageId().isPresent()) { - imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.imagesFileStorageId().get()); + if (layoutParsingRequest.imagesFileStorageId() + .isPresent()) { + imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.imagesFileStorageId() + .get()); } TableServiceResponse tableServiceResponse = new TableServiceResponse(); - if (layoutParsingRequest.tablesFileStorageId().isPresent()) { - tableServiceResponse = layoutParsingStorageService.getTablesFile(layoutParsingRequest.tablesFileStorageId().get()); + if (layoutParsingRequest.tablesFileStorageId() + .isPresent()) { + tableServiceResponse = layoutParsingStorageService.getTablesFile(layoutParsingRequest.tablesFileStorageId() + .get()); } ClassificationDocument classificationDocument = parseLayout(settings.getLayoutParsingTypeOverride() == null // @@ -130,14 +137,17 @@ public class LayoutParsingPipeline { visualLayoutParsingResponse, layoutParsingRequest.identifier()); - log.info("Building document graph for {}", layoutParsingRequest.identifier()); - Document documentGraph = observeBuildDocumentGraph(settings.getLayoutParsingTypeOverride() == null // ? layoutParsingRequest.layoutParsingType() : settings.getLayoutParsingTypeOverride(), classificationDocument); log.info("Creating viewer document for {}", layoutParsingRequest.identifier()); - layoutGridService.addLayoutGrid(viewerDocumentFile, documentGraph, viewerDocumentFile, false, layoutParsingRequest.visualLayoutParsingFileId().isPresent()); + layoutGridService.addLayoutGrid(viewerDocumentFile, + documentGraph, + viewerDocumentFile, + false, + layoutParsingRequest.visualLayoutParsingFileId() + .isPresent()); log.info("Storing resulting files for {}", layoutParsingRequest.identifier()); @@ -281,7 +291,11 @@ public class LayoutParsingPipeline { pdfImages.computeIfAbsent(pageNumber, x -> new ArrayList<>()) .addAll(graphics.stream() - .map(g -> new ClassifiedImage(new Rectangle2D.Double(g.x1, g.y1, g.width(), g.height()), ImageType.GRAPHIC, false, stripper.getPageNumber(),"")) + .map(g -> new ClassifiedImage(new Rectangle2D.Double(g.x1, g.y1, g.width(), g.height()), + ImageType.GRAPHIC, + false, + stripper.getPageNumber(), + "")) .toList()); ClassificationPage classificationPage = switch (layoutParsingType) { diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java index 9972310..c5cb41b 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java @@ -79,7 +79,7 @@ public class VisualLayoutParsingAdapter { ClassifiedImage signature = new ClassifiedImage(new Rectangle2D.Float(t.getBox().getX1(), t.getBox().getY1(), t.getBox().getX2() - t.getBox().getX1(), - t.getBox().getY2() - t.getBox().getY1()), ImageType.SIGNATURE, true, false, false, pageNumber); + t.getBox().getY2() - t.getBox().getY1()), ImageType.SIGNATURE, true, false, false, pageNumber,""); signatures.add(signature); } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/DocumentGraphFactory.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/DocumentGraphFactory.java index 36ee3eb..26ef65c 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/DocumentGraphFactory.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/DocumentGraphFactory.java @@ -43,8 +43,10 @@ import lombok.Builder; import lombok.Getter; import lombok.experimental.FieldDefaults; import lombok.experimental.UtilityClass; +import lombok.extern.slf4j.Slf4j; @UtilityClass +@Slf4j public class DocumentGraphFactory { public Document buildDocumentGraph(LayoutParsingType layoutParsingType, ClassificationDocument document) { @@ -138,6 +140,7 @@ public class DocumentGraphFactory { .position(position) .transparent(image.isHasTransparency()) .page(page) + .representationHash(image.getRepresentation()) .documentTree(context.getDocumentTree()) .build(); page.getMainBody().add(imageNode);