From fbd01967195ea6eb77f5ae36d9ddb2dc07bd5cc7 Mon Sep 17 00:00:00 2001 From: yhampe Date: Wed, 14 Feb 2024 12:16:37 +0100 Subject: [PATCH 1/8] RED-8481: Use visual layout parsing to detect signatures implemented visuallayoutparsingresult --- .../api/queue/LayoutParsingRequest.java | 4 + .../processor/LayoutParsingPipeline.java | 16 ++++ .../LayoutParsingStorageService.java | 10 +++ .../adapter/VisualLayoutParsingAdapter.java | 83 +++++++++++++++++++ .../model/table/VisualLayoutParsingBox.java | 20 +++++ .../table/VisualLayoutParsingBoxValue.java | 19 +++++ .../model/table/VisualLayoutParsingData.java | 21 +++++ .../table/VisualLayoutParsingResponse.java | 23 +++++ .../table/VisualLayoutParsingResult.java | 22 +++++ .../layoutparser/server/BdrJsonBuildTest.java | 1 + .../HeadlinesGoldStandardIntegrationTest.java | 1 + .../graph/DocumentGraphJsonWritingTest.java | 1 + .../server/graph/ViewerDocumentTest.java | 2 +- .../PdfSegmentationServiceTest.java | 1 + .../services/RulingCleaningServiceTest.java | 2 + .../server/utils/BuildDocumentTest.java | 2 +- 16 files changed, 226 insertions(+), 2 deletions(-) create mode 100644 layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java create mode 100644 layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingBox.java create mode 100644 layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingBoxValue.java create mode 100644 layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingData.java create mode 100644 layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingResponse.java create mode 100644 layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingResult.java diff --git a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingRequest.java b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingRequest.java index c364bbc..50ae69b 100644 --- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingRequest.java +++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingRequest.java @@ -18,11 +18,15 @@ public record LayoutParsingRequest( @Schema(description = "Path to the original PDF file.")// @NonNull String originFileStorageId,// + + @Schema(description = "Optional Path to the table extraction file.")// Optional tablesFileStorageId,// @Schema(description = "Optional Path to the image classification file.")// Optional imagesFileStorageId,// + @Schema(description = "Optional Path to the the visual layout parsing service file") Optional visualLayoutParsingFileId, + @Schema(description = "Path where the Document Structure File will be stored.")// @NonNull String structureFileStorageId,// @Schema(description = "Path where the Research Data File will be stored.")// diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java index 01b4cdf..1f83105 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java @@ -33,9 +33,12 @@ import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageB import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence; import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.CvTableParsingAdapter; import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.ImageServiceResponseAdapter; +import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.VisualLayoutParsingAdapter; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableCells; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResult; import com.knecon.fforesight.service.layoutparser.processor.services.BodyTextFrameService; import com.knecon.fforesight.service.layoutparser.processor.services.RulingCleaningService; import com.knecon.fforesight.service.layoutparser.processor.services.SectionsBuilderService; @@ -86,6 +89,7 @@ public class LayoutParsingPipeline { RedactManagerBlockificationService redactManagerBlockificationService; LayoutGridService layoutGridService; ObservationRegistry observationRegistry; + VisualLayoutParsingAdapter visualLayoutParsingAdapter; public LayoutParsingFinishedEvent parseLayoutAndSaveFilesToStorage(LayoutParsingRequest layoutParsingRequest) throws IOException { @@ -96,6 +100,11 @@ public class LayoutParsingPipeline { File originFile = layoutParsingStorageService.getOriginFile(layoutParsingRequest.originFileStorageId()); File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId()).orElse(originFile); + VisualLayoutParsingResponse visualLayoutParsingResponse = new VisualLayoutParsingResponse(); + if (layoutParsingRequest.visualLayoutParsingFileId().isPresent()) { + visualLayoutParsingResponse = layoutParsingStorageService.getExtractedTablesFile(layoutParsingRequest.visualLayoutParsingFileId().get()); + } + ImageServiceResponse imageServiceResponse = new ImageServiceResponse(); if (layoutParsingRequest.imagesFileStorageId().isPresent()) { imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.imagesFileStorageId().get()); @@ -110,6 +119,7 @@ public class LayoutParsingPipeline { originFile, imageServiceResponse, tableServiceResponse, + visualLayoutParsingResponse, layoutParsingRequest.identifier().toString()); log.info("Building document graph for {}", layoutParsingRequest.identifier()); @@ -198,12 +208,14 @@ public class LayoutParsingPipeline { File originFile, ImageServiceResponse imageServiceResponse, TableServiceResponse tableServiceResponse, + VisualLayoutParsingResponse visualLayoutParsingResponse, String identifier) { PDDocument originDocument = openDocument(originFile); addNumberOfPagesToTrace(originDocument.getNumberOfPages(), Files.size(originFile.toPath())); Map> pdfTableCells = cvTableParsingAdapter.buildCvParsedTablesPerPage(tableServiceResponse); Map> pdfImages = imageServiceResponseAdapter.buildClassifiedImagesPerPage(imageServiceResponse); + Map> signatures = visualLayoutParsingAdapter.buildExtractedSignaturesPerPage(visualLayoutParsingResponse); ClassificationDocument classificationDocument = new ClassificationDocument(); List classificationPages = new ArrayList<>(); @@ -264,6 +276,10 @@ public class LayoutParsingPipeline { imageServiceResponseAdapter.findOcr(classificationPage); } + if(signatures != null && signatures.containsKey(pageNumber)) { + classificationPage.setImages(signatures.get(pageNumber)); + } + tableExtractionService.extractTables(cleanRulings, classificationPage); buildPageStatistics(classificationPage); diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingStorageService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingStorageService.java index e74fc4e..d6be645 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingStorageService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingStorageService.java @@ -22,6 +22,7 @@ import com.knecon.fforesight.service.layoutparser.internal.api.data.taas.Researc import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse; import com.knecon.fforesight.tenantcommons.TenantContext; import io.micrometer.observation.annotation.Observed; @@ -79,6 +80,15 @@ public class LayoutParsingStorageService { } } + public VisualLayoutParsingResponse getExtractedTablesFile(String storageId) throws IOException { + + try (InputStream inputStream = getObject(storageId)) { + VisualLayoutParsingResponse visualLayoutParsingResponse = objectMapper.readValue(inputStream, VisualLayoutParsingResponse.class); + inputStream.close(); + return visualLayoutParsingResponse; + } + } + @Observed(name = "LayoutParsingStorageService", contextualName = "store-document-data") public void storeDocumentData(LayoutParsingRequest layoutParsingRequest, DocumentData documentData) { diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java new file mode 100644 index 0000000..3501b36 --- /dev/null +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java @@ -0,0 +1,83 @@ +package com.knecon.fforesight.service.layoutparser.processor.python_api.adapter; + +import java.awt.geom.Rectangle2D; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.springframework.stereotype.Service; + +import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.ImageType; +import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingBox; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResult; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +@Service +@RequiredArgsConstructor +@Slf4j +public class VisualLayoutParsingAdapter { + + public Map> buildExtractedTablesPerPage(VisualLayoutParsingResponse visualLayoutParsingResponse) { + + Map> tableCells = new HashMap<>(); + visualLayoutParsingResponse.getData() + .forEach(tableData -> tableCells.computeIfAbsent(tableData.getPage_idx(), tableCell -> new ArrayList<>()).addAll(convertTableCells(tableData.getBoxes()))); + + return tableCells; + + } + + public Map> buildExtractedSignaturesPerPage(VisualLayoutParsingResponse visualLayoutParsingResponse) { + + Map> tableCells = new HashMap<>(); + visualLayoutParsingResponse.getData() + .forEach(tableData -> tableCells.computeIfAbsent(tableData.getPage_idx(), tableCell -> new ArrayList<>()).addAll(convertSignatures(tableData.getPage_idx(),tableData.getBoxes()))); + + return tableCells; + + } + + + public List convertTableCells(List tableObjects) { + + List parsedTableCells = new ArrayList<>(); + + tableObjects.stream().forEach(t -> { + VisualLayoutParsingResult result = new VisualLayoutParsingResult(); + result.setX0(t.getBox().getX1()); + result.setX1(t.getBox().getX2()); + result.setY0(t.getBox().getY1()); + result.setY1(t.getBox().getY2()); + result.setWidth(result.getX1() - result.getX0()); + result.setHeight(result.getY1() - result.getY0()); + result.setLabel(t.getLabel()); + parsedTableCells.add(result); + }); + + return parsedTableCells; + + } + + public List convertSignatures(int pageNumber, List tableObjects) { + + List signatures = new ArrayList<>(); + + tableObjects.stream().forEach(t -> { + if(t.getLabel().equals("signature")) { + ClassifiedImage signature = new ClassifiedImage(new Rectangle2D.Float(t.getBox().getX1(),t.getBox().getY1(),t.getBox().getX2() - t.getBox().getX1(),t.getBox().getY2() - t.getBox().getY1()), + ImageType.SIGNATURE,false,pageNumber); + + signatures.add(signature); + } + }); + + return signatures; + + } + +} \ No newline at end of file diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingBox.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingBox.java new file mode 100644 index 0000000..67e4fc7 --- /dev/null +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingBox.java @@ -0,0 +1,20 @@ +package com.knecon.fforesight.service.layoutparser.processor.python_api.model.table; + +import java.util.List; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class VisualLayoutParsingBox { + + private com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingBoxValue box; + private String label; + private float probability; + +} \ No newline at end of file diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingBoxValue.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingBoxValue.java new file mode 100644 index 0000000..a56f06e --- /dev/null +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingBoxValue.java @@ -0,0 +1,19 @@ +package com.knecon.fforesight.service.layoutparser.processor.python_api.model.table; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class VisualLayoutParsingBoxValue { + + private float x1; + private float y1; + private float x2; + private float y2; + +} \ No newline at end of file diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingData.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingData.java new file mode 100644 index 0000000..0b4ddeb --- /dev/null +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingData.java @@ -0,0 +1,21 @@ +package com.knecon.fforesight.service.layoutparser.processor.python_api.model.table; + +import java.util.List; + + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class VisualLayoutParsingData { + + private int page_idx; + + private List boxes; + +} \ No newline at end of file diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingResponse.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingResponse.java new file mode 100644 index 0000000..db73c71 --- /dev/null +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingResponse.java @@ -0,0 +1,23 @@ +package com.knecon.fforesight.service.layoutparser.processor.python_api.model.table; + +import java.util.List; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class VisualLayoutParsingResponse { + + private String dossierId; + private String fileId; + private String targetFileExtension; + private String responseFileExtension; + private String X_TENANT_ID; + private List data; + +} \ No newline at end of file diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingResult.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingResult.java new file mode 100644 index 0000000..8362a2d --- /dev/null +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingResult.java @@ -0,0 +1,22 @@ +package com.knecon.fforesight.service.layoutparser.processor.python_api.model.table; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class VisualLayoutParsingResult { + + private float x0; + private float y0; + private float x1; + private float y1; + private float width; + private float height; + private String label; + +} \ No newline at end of file diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/BdrJsonBuildTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/BdrJsonBuildTest.java index 4b2358e..213678c 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/BdrJsonBuildTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/BdrJsonBuildTest.java @@ -50,6 +50,7 @@ public class BdrJsonBuildTest extends AbstractTest { file, new ImageServiceResponse(), new TableServiceResponse(), + null, file.toString())); } diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/HeadlinesGoldStandardIntegrationTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/HeadlinesGoldStandardIntegrationTest.java index c02c71d..d9e0555 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/HeadlinesGoldStandardIntegrationTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/HeadlinesGoldStandardIntegrationTest.java @@ -98,6 +98,7 @@ public class HeadlinesGoldStandardIntegrationTest { pdfFileResource.getFile(), new ImageServiceResponse(), new TableServiceResponse(), + null, filePath)); var foundHeadlines = documentGraph.streamAllSubNodes() diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java index ab829c0..ad15966 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java @@ -58,6 +58,7 @@ public class DocumentGraphJsonWritingTest extends BuildDocumentTest { filename.toFile(), new ImageServiceResponse(), new TableServiceResponse(), + null, filename.toFile().toString())); DocumentData documentData = DocumentDataMapper.toDocumentData(documentGraph); diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java index 5c5eae9..11e41ac 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java @@ -51,7 +51,7 @@ public class ViewerDocumentTest extends BuildDocumentTest { var tableResponse = mapper.readValue(new ClassPathResource(tableFileName).getInputStream(), TableServiceResponse.class); var documentFile = new ClassPathResource(fileName).getFile(); - var classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.DOCUMINE, documentFile, new ImageServiceResponse(), tableResponse, Path.of(fileName).getFileName().toFile().toString()); + var classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.DOCUMINE, documentFile, new ImageServiceResponse(), tableResponse, null,Path.of(fileName).getFileName().toFile().toString()); ViewerDocumentService viewerDocumentService = new ViewerDocumentService(null); LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService); Document document = DocumentGraphFactory.buildDocumentGraph(classificationDocument); diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java index c3a7058..84d31e1 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java @@ -67,6 +67,7 @@ public class PdfSegmentationServiceTest extends AbstractTest { originDocument, new ImageServiceResponse(), tableServiceResponse, + null, "document"); redactManagerClassificationService.classifyDocument(classificationDocument); diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/RulingCleaningServiceTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/RulingCleaningServiceTest.java index 03d8cae..a113521 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/RulingCleaningServiceTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/RulingCleaningServiceTest.java @@ -80,11 +80,13 @@ public class RulingCleaningServiceTest extends BuildDocumentTest { filename.toFile(), new ImageServiceResponse(), new TableServiceResponse(), + null, filename.toFile().toString())); Document documentGraphAfter = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER, filename.toFile(), new ImageServiceResponse(), new TableServiceResponse(), + null, filename.toFile().toString())); DocumentData documentDataBefore = DocumentDataMapper.toDocumentData(documentGraphBefore); DocumentData documentDataAfter = DocumentDataMapper.toDocumentData(documentGraphAfter); diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/BuildDocumentTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/BuildDocumentTest.java index 69ed656..0791fa1 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/BuildDocumentTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/BuildDocumentTest.java @@ -25,7 +25,7 @@ public abstract class BuildDocumentTest extends AbstractTest { File fileResource = new ClassPathResource(filename).getFile(); prepareStorage(filename); - return layoutParsingPipeline.parseLayout(layoutParsingType, fileResource, layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID), new TableServiceResponse(), filename); + return layoutParsingPipeline.parseLayout(layoutParsingType, fileResource, layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID), new TableServiceResponse(), null,filename); } From cfc5db45cd0c8d6403239bc4511af102adf5045e Mon Sep 17 00:00:00 2001 From: yhampe Date: Wed, 14 Feb 2024 12:24:32 +0100 Subject: [PATCH 2/8] RED-8481: Use visual layout parsing to detect signatures fixed failing tests because of null pointer --- .../adapter/VisualLayoutParsingAdapter.java | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java index 3501b36..3eca613 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java @@ -33,13 +33,16 @@ public class VisualLayoutParsingAdapter { } public Map> buildExtractedSignaturesPerPage(VisualLayoutParsingResponse visualLayoutParsingResponse) { + if(visualLayoutParsingResponse != null) { - Map> tableCells = new HashMap<>(); - visualLayoutParsingResponse.getData() - .forEach(tableData -> tableCells.computeIfAbsent(tableData.getPage_idx(), tableCell -> new ArrayList<>()).addAll(convertSignatures(tableData.getPage_idx(),tableData.getBoxes()))); - - return tableCells; + Map> signatures = new HashMap<>(); + visualLayoutParsingResponse.getData() + .forEach(tableData -> signatures.computeIfAbsent(tableData.getPage_idx(), tableCell -> new ArrayList<>()) + .addAll(convertSignatures(tableData.getPage_idx(), tableData.getBoxes()))); + return signatures; + } + return null; } From c3e7582ee31d8c2580a135bcae1b90e3442b5aac Mon Sep 17 00:00:00 2001 From: yhampe Date: Wed, 14 Feb 2024 12:33:36 +0100 Subject: [PATCH 3/8] RED-8481: Use visual layout parsing to detect signatures fixed failing tests because of null pointer --- .../processor/LayoutParsingPipeline.java | 5 ++++- .../adapter/VisualLayoutParsingAdapter.java | 13 +++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java index 1f83105..6011c70 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java @@ -215,7 +215,10 @@ public class LayoutParsingPipeline { addNumberOfPagesToTrace(originDocument.getNumberOfPages(), Files.size(originFile.toPath())); Map> pdfTableCells = cvTableParsingAdapter.buildCvParsedTablesPerPage(tableServiceResponse); Map> pdfImages = imageServiceResponseAdapter.buildClassifiedImagesPerPage(imageServiceResponse); - Map> signatures = visualLayoutParsingAdapter.buildExtractedSignaturesPerPage(visualLayoutParsingResponse); + Map> signatures = null; + if(visualLayoutParsingResponse != null) { + signatures = visualLayoutParsingAdapter.buildExtractedSignaturesPerPage(visualLayoutParsingResponse); + } ClassificationDocument classificationDocument = new ClassificationDocument(); List classificationPages = new ArrayList<>(); diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java index 3eca613..e81aa19 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java @@ -33,16 +33,13 @@ public class VisualLayoutParsingAdapter { } public Map> buildExtractedSignaturesPerPage(VisualLayoutParsingResponse visualLayoutParsingResponse) { - if(visualLayoutParsingResponse != null) { - Map> signatures = new HashMap<>(); - visualLayoutParsingResponse.getData() - .forEach(tableData -> signatures.computeIfAbsent(tableData.getPage_idx(), tableCell -> new ArrayList<>()) - .addAll(convertSignatures(tableData.getPage_idx(), tableData.getBoxes()))); + Map> signatures = new HashMap<>(); + visualLayoutParsingResponse.getData() + .forEach(tableData -> signatures.computeIfAbsent(tableData.getPage_idx(), tableCell -> new ArrayList<>()) + .addAll(convertSignatures(tableData.getPage_idx(), tableData.getBoxes()))); - return signatures; - } - return null; + return signatures; } From 903b1c1fd4bf061d55235655124acf077294baa2 Mon Sep 17 00:00:00 2001 From: yhampe Date: Thu, 15 Feb 2024 09:27:07 +0100 Subject: [PATCH 4/8] RED-8481: Use visual layout parsing to detect signatures fixed failing tests because of null pointer --- .../layoutparser/processor/LayoutParsingPipeline.java | 3 ++- .../python_api/adapter/VisualLayoutParsingAdapter.java | 9 +++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java index 6011c70..6a59a14 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java @@ -101,7 +101,7 @@ public class LayoutParsingPipeline { File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId()).orElse(originFile); VisualLayoutParsingResponse visualLayoutParsingResponse = new VisualLayoutParsingResponse(); - if (layoutParsingRequest.visualLayoutParsingFileId().isPresent()) { + if (layoutParsingRequest.visualLayoutParsingFileId() != null && layoutParsingRequest.visualLayoutParsingFileId().isPresent()) { visualLayoutParsingResponse = layoutParsingStorageService.getExtractedTablesFile(layoutParsingRequest.visualLayoutParsingFileId().get()); } @@ -217,6 +217,7 @@ public class LayoutParsingPipeline { Map> pdfImages = imageServiceResponseAdapter.buildClassifiedImagesPerPage(imageServiceResponse); Map> signatures = null; if(visualLayoutParsingResponse != null) { + log.info("response: {}",visualLayoutParsingResponse); signatures = visualLayoutParsingAdapter.buildExtractedSignaturesPerPage(visualLayoutParsingResponse); } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java index e81aa19..7372fd0 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java @@ -35,11 +35,12 @@ public class VisualLayoutParsingAdapter { public Map> buildExtractedSignaturesPerPage(VisualLayoutParsingResponse visualLayoutParsingResponse) { Map> signatures = new HashMap<>(); - visualLayoutParsingResponse.getData() - .forEach(tableData -> signatures.computeIfAbsent(tableData.getPage_idx(), tableCell -> new ArrayList<>()) - .addAll(convertSignatures(tableData.getPage_idx(), tableData.getBoxes()))); + if(visualLayoutParsingResponse.getData() != null ) { + visualLayoutParsingResponse.getData().forEach(tableData -> signatures.computeIfAbsent(tableData.getPage_idx(), tableCell -> new ArrayList<>()).addAll(convertSignatures(tableData.getPage_idx(), tableData.getBoxes()))); - return signatures; + return signatures; + } + return null; } From b4a225144dc04c7efc46099608885358abdc8698 Mon Sep 17 00:00:00 2001 From: yhampe Date: Thu, 15 Feb 2024 10:16:07 +0100 Subject: [PATCH 5/8] RED-8481: Use visual layout parsing to detect signatures working on failing tests --- .../adapter/VisualLayoutParsingAdapter.java | 3 --- .../VisualLayoutParsingServiceTests.java | 5 ++++ .../server/utils/AbstractTest.java | 27 ++++++++++++++++++- .../visual_layout_parsing_response/empty.json | 8 ++++++ 4 files changed, 39 insertions(+), 4 deletions(-) create mode 100644 layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/VisualLayoutParsingServiceTests.java create mode 100644 layoutparser-service/layoutparser-service-server/src/test/resources/visual_layout_parsing_response/empty.json diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java index 7372fd0..2e7b092 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java @@ -35,12 +35,9 @@ public class VisualLayoutParsingAdapter { public Map> buildExtractedSignaturesPerPage(VisualLayoutParsingResponse visualLayoutParsingResponse) { Map> signatures = new HashMap<>(); - if(visualLayoutParsingResponse.getData() != null ) { visualLayoutParsingResponse.getData().forEach(tableData -> signatures.computeIfAbsent(tableData.getPage_idx(), tableCell -> new ArrayList<>()).addAll(convertSignatures(tableData.getPage_idx(), tableData.getBoxes()))); return signatures; - } - return null; } diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/VisualLayoutParsingServiceTests.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/VisualLayoutParsingServiceTests.java new file mode 100644 index 0000000..d291619 --- /dev/null +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/VisualLayoutParsingServiceTests.java @@ -0,0 +1,5 @@ +package com.knecon.fforesight.service.layoutparser.server; + +public class VisualLayoutParsingServiceTests { + +} diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/AbstractTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/AbstractTest.java index a4ebaca..759f0e8 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/AbstractTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/AbstractTest.java @@ -20,6 +20,7 @@ import org.springframework.context.annotation.Import; import org.springframework.context.annotation.Primary; import org.springframework.core.io.ClassPathResource; import org.springframework.test.context.junit.jupiter.SpringExtension; +import org.xmlunit.builder.Input; import com.iqser.red.commons.jackson.ObjectMapperFactory; import com.iqser.red.storage.commons.service.StorageService; @@ -51,6 +52,8 @@ public abstract class AbstractTest { private RabbitTemplate rabbitTemplate; protected final static String ORIGIN_FILE_ID = "origin"; + + protected final static String VISUAL_LAYOUT_FILE = "visual"; protected final static String TABLE_FILE_ID = "table"; protected final static String IMAGE_FILE_ID = "image"; protected final static String STRUCTURE_FILE_ID = "structure"; @@ -96,7 +99,7 @@ public abstract class AbstractTest { @SneakyThrows protected LayoutParsingRequest prepareStorage(String file) { - return prepareStorage(file, "cv_table_parsing_response/empty.json", "image_service_response/empty.json"); + return prepareStorage(file, "cv_table_parsing_response/empty.json", "image_service_response/empty.json","visual_layout_parsing_response/empty.json"); } @@ -116,6 +119,7 @@ public abstract class AbstractTest { .originFileStorageId(ORIGIN_FILE_ID) .tablesFileStorageId(Optional.of(TABLE_FILE_ID)) .imagesFileStorageId(Optional.of(IMAGE_FILE_ID)) + .visualLayoutParsingFileId(Optional.of(VISUAL_LAYOUT_FILE)) .structureFileStorageId(STRUCTURE_FILE_ID) .textBlockFileStorageId(TEXT_FILE_ID) .positionBlockFileStorageId(POSITION_FILE_ID) @@ -136,6 +140,16 @@ public abstract class AbstractTest { return prepareStorage(pdfFileResource.getInputStream(), cvServiceResponseFileResource.getInputStream(), imageInfoFileResource.getInputStream()); } + @SneakyThrows + protected LayoutParsingRequest prepareStorage(String file, String cvServiceResponseFile, String imageInfoFile, String visualLayoutParsingResponseFile) { + + ClassPathResource pdfFileResource = new ClassPathResource(file); + ClassPathResource cvServiceResponseFileResource = new ClassPathResource(cvServiceResponseFile); + ClassPathResource imageInfoFileResource = new ClassPathResource(imageInfoFile); + ClassPathResource visualLayoutParsingResponseResource = new ClassPathResource(visualLayoutParsingResponseFile); + + return prepareStorage(pdfFileResource.getInputStream(), cvServiceResponseFileResource.getInputStream(), imageInfoFileResource.getInputStream(), visualLayoutParsingResponseResource.getInputStream()); + } @SneakyThrows protected LayoutParsingRequest prepareStorage(InputStream fileStream, InputStream cvServiceResponseFileStream, InputStream imageInfoStream) { @@ -147,6 +161,17 @@ public abstract class AbstractTest { return buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER); } + @SneakyThrows + protected LayoutParsingRequest prepareStorage(InputStream fileStream, InputStream cvServiceResponseFileStream, InputStream imageInfoStream, InputStream visualLayoutParsingResponseFileStream) { + + storageService.storeObject(TenantContext.getTenantId(), IMAGE_FILE_ID, imageInfoStream); + storageService.storeObject(TenantContext.getTenantId(), TABLE_FILE_ID, cvServiceResponseFileStream); + storageService.storeObject(TenantContext.getTenantId(), ORIGIN_FILE_ID, fileStream); + storageService.storeObject(TenantContext.getTenantId(),VISUAL_LAYOUT_FILE,visualLayoutParsingResponseFileStream ); + + return buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER); + } + @AfterEach public void cleanupStorage() { diff --git a/layoutparser-service/layoutparser-service-server/src/test/resources/visual_layout_parsing_response/empty.json b/layoutparser-service/layoutparser-service-server/src/test/resources/visual_layout_parsing_response/empty.json new file mode 100644 index 0000000..48a1a98 --- /dev/null +++ b/layoutparser-service/layoutparser-service-server/src/test/resources/visual_layout_parsing_response/empty.json @@ -0,0 +1,8 @@ +{ + "dossierId": "123", + "fileId": "123", + "targetFileExtension": "ORIGIN.pdf.gz", + "responseFileExtension": "EXTRACTED_TABLES.json.gz", + "data": [], + "X_TENANT_ID": "" +} From bdf1161c91b4ad1954eab89e27bd9b82114a4960 Mon Sep 17 00:00:00 2001 From: yhampe Date: Thu, 15 Feb 2024 12:12:23 +0100 Subject: [PATCH 6/8] RED-8481: Use visual layout parsing to detect signatures addressed review comments --- .../internal/api/queue/LayoutParsingRequest.java | 2 +- .../processor/LayoutParsingPipeline.java | 12 +++++------- .../processor/LayoutParsingStorageService.java | 5 +---- .../model/table/VisualLayoutParsingBox.java | 4 +--- .../layoutparser/server/BdrJsonBuildTest.java | 3 ++- .../server/HeadlinesGoldStandardIntegrationTest.java | 3 ++- .../server/VisualLayoutParsingServiceTests.java | 5 ----- .../server/graph/DocumentGraphJsonWritingTest.java | 3 ++- .../server/graph/ViewerDocumentTest.java | 3 ++- .../segmentation/PdfSegmentationServiceTest.java | 3 ++- .../layoutparser/server/utils/BuildDocumentTest.java | 3 ++- 11 files changed, 20 insertions(+), 26 deletions(-) delete mode 100644 layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/VisualLayoutParsingServiceTests.java diff --git a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingRequest.java b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingRequest.java index 50ae69b..f39572f 100644 --- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingRequest.java +++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingRequest.java @@ -25,7 +25,7 @@ public record LayoutParsingRequest( @Schema(description = "Optional Path to the image classification file.")// Optional imagesFileStorageId,// - @Schema(description = "Optional Path to the the visual layout parsing service file") Optional visualLayoutParsingFileId, + @Schema(description = "Optional Path to the the visual layout parsing service file") Optional visualLayoutParsingFileId,// @Schema(description = "Path where the Document Structure File will be stored.")// @NonNull String structureFileStorageId,// diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java index 6a59a14..5866256 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java @@ -38,7 +38,6 @@ import com.knecon.fforesight.service.layoutparser.processor.python_api.model.ima import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableCells; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse; -import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResult; import com.knecon.fforesight.service.layoutparser.processor.services.BodyTextFrameService; import com.knecon.fforesight.service.layoutparser.processor.services.RulingCleaningService; import com.knecon.fforesight.service.layoutparser.processor.services.SectionsBuilderService; @@ -101,8 +100,8 @@ public class LayoutParsingPipeline { File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId()).orElse(originFile); VisualLayoutParsingResponse visualLayoutParsingResponse = new VisualLayoutParsingResponse(); - if (layoutParsingRequest.visualLayoutParsingFileId() != null && layoutParsingRequest.visualLayoutParsingFileId().isPresent()) { - visualLayoutParsingResponse = layoutParsingStorageService.getExtractedTablesFile(layoutParsingRequest.visualLayoutParsingFileId().get()); + if (layoutParsingRequest.visualLayoutParsingFileId().isPresent()) { + visualLayoutParsingResponse = layoutParsingStorageService.getVisualLayoutParsingFile(layoutParsingRequest.visualLayoutParsingFileId().get()); } ImageServiceResponse imageServiceResponse = new ImageServiceResponse(); @@ -215,10 +214,9 @@ public class LayoutParsingPipeline { addNumberOfPagesToTrace(originDocument.getNumberOfPages(), Files.size(originFile.toPath())); Map> pdfTableCells = cvTableParsingAdapter.buildCvParsedTablesPerPage(tableServiceResponse); Map> pdfImages = imageServiceResponseAdapter.buildClassifiedImagesPerPage(imageServiceResponse); - Map> signatures = null; - if(visualLayoutParsingResponse != null) { - log.info("response: {}",visualLayoutParsingResponse); - signatures = visualLayoutParsingAdapter.buildExtractedSignaturesPerPage(visualLayoutParsingResponse); + Map> signatures = new HashMap<>(); + if(signatures.size() > 0) { + visualLayoutParsingAdapter.buildExtractedSignaturesPerPage(visualLayoutParsingResponse); } ClassificationDocument classificationDocument = new ClassificationDocument(); diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingStorageService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingStorageService.java index d6be645..471db6a 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingStorageService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingStorageService.java @@ -10,8 +10,6 @@ import java.nio.file.Paths; import java.nio.file.StandardOpenOption; import java.util.Optional; -import org.apache.pdfbox.Loader; -import org.apache.pdfbox.pdmodel.PDDocument; import org.springframework.stereotype.Service; import com.fasterxml.jackson.databind.ObjectMapper; @@ -80,11 +78,10 @@ public class LayoutParsingStorageService { } } - public VisualLayoutParsingResponse getExtractedTablesFile(String storageId) throws IOException { + public VisualLayoutParsingResponse getVisualLayoutParsingFile(String storageId) throws IOException { try (InputStream inputStream = getObject(storageId)) { VisualLayoutParsingResponse visualLayoutParsingResponse = objectMapper.readValue(inputStream, VisualLayoutParsingResponse.class); - inputStream.close(); return visualLayoutParsingResponse; } } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingBox.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingBox.java index 67e4fc7..aba6168 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingBox.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingBox.java @@ -1,7 +1,5 @@ package com.knecon.fforesight.service.layoutparser.processor.python_api.model.table; -import java.util.List; - import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; @@ -13,7 +11,7 @@ import lombok.NoArgsConstructor; @AllArgsConstructor public class VisualLayoutParsingBox { - private com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingBoxValue box; + private VisualLayoutParsingBoxValue box; private String label; private float probability; diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/BdrJsonBuildTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/BdrJsonBuildTest.java index 213678c..eaddda7 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/BdrJsonBuildTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/BdrJsonBuildTest.java @@ -27,6 +27,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Do import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse; import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory; import com.knecon.fforesight.service.layoutparser.processor.services.mapper.TaasDocumentDataMapper; import com.knecon.fforesight.service.layoutparser.server.utils.AbstractTest; @@ -50,7 +51,7 @@ public class BdrJsonBuildTest extends AbstractTest { file, new ImageServiceResponse(), new TableServiceResponse(), - null, + new VisualLayoutParsingResponse(), file.toString())); } diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/HeadlinesGoldStandardIntegrationTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/HeadlinesGoldStandardIntegrationTest.java index d9e0555..4ea6204 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/HeadlinesGoldStandardIntegrationTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/HeadlinesGoldStandardIntegrationTest.java @@ -33,6 +33,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Do import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SemanticNode; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse; import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory; import com.knecon.fforesight.tenantcommons.TenantsClient; @@ -98,7 +99,7 @@ public class HeadlinesGoldStandardIntegrationTest { pdfFileResource.getFile(), new ImageServiceResponse(), new TableServiceResponse(), - null, + new VisualLayoutParsingResponse(), filePath)); var foundHeadlines = documentGraph.streamAllSubNodes() diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/VisualLayoutParsingServiceTests.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/VisualLayoutParsingServiceTests.java deleted file mode 100644 index d291619..0000000 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/VisualLayoutParsingServiceTests.java +++ /dev/null @@ -1,5 +0,0 @@ -package com.knecon.fforesight.service.layoutparser.server; - -public class VisualLayoutParsingServiceTests { - -} diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java index ad15966..f5bf3a2 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java @@ -16,6 +16,7 @@ import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsi import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse; import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory; import com.knecon.fforesight.service.layoutparser.processor.services.mapper.DocumentDataMapper; import com.knecon.fforesight.service.layoutparser.server.utils.BuildDocumentTest; @@ -58,7 +59,7 @@ public class DocumentGraphJsonWritingTest extends BuildDocumentTest { filename.toFile(), new ImageServiceResponse(), new TableServiceResponse(), - null, + new VisualLayoutParsingResponse(), filename.toFile().toString())); DocumentData documentData = DocumentDataMapper.toDocumentData(documentGraph); diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java index 11e41ac..06c053d 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java @@ -12,6 +12,7 @@ import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsi import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse; import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory; import com.knecon.fforesight.service.layoutparser.processor.services.visualization.LayoutGridService; import com.knecon.fforesight.service.layoutparser.server.utils.BuildDocumentTest; @@ -51,7 +52,7 @@ public class ViewerDocumentTest extends BuildDocumentTest { var tableResponse = mapper.readValue(new ClassPathResource(tableFileName).getInputStream(), TableServiceResponse.class); var documentFile = new ClassPathResource(fileName).getFile(); - var classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.DOCUMINE, documentFile, new ImageServiceResponse(), tableResponse, null,Path.of(fileName).getFileName().toFile().toString()); + var classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.DOCUMINE, documentFile, new ImageServiceResponse(), tableResponse, new VisualLayoutParsingResponse(),Path.of(fileName).getFileName().toFile().toString()); ViewerDocumentService viewerDocumentService = new ViewerDocumentService(null); LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService); Document document = DocumentGraphFactory.buildDocumentGraph(classificationDocument); diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java index 84d31e1..1f9e8a0 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java @@ -33,6 +33,7 @@ import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.C import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.ImageServiceResponseAdapter; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse; import com.knecon.fforesight.service.layoutparser.processor.services.SectionsBuilderService; import com.knecon.fforesight.service.layoutparser.processor.services.classification.RedactManagerClassificationService; import com.knecon.fforesight.service.layoutparser.server.utils.AbstractTest; @@ -67,7 +68,7 @@ public class PdfSegmentationServiceTest extends AbstractTest { originDocument, new ImageServiceResponse(), tableServiceResponse, - null, + new VisualLayoutParsingResponse(), "document"); redactManagerClassificationService.classifyDocument(classificationDocument); diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/BuildDocumentTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/BuildDocumentTest.java index 0791fa1..79db6bf 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/BuildDocumentTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/BuildDocumentTest.java @@ -10,6 +10,7 @@ import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingPipelin import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument; import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse; import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory; import lombok.SneakyThrows; @@ -25,7 +26,7 @@ public abstract class BuildDocumentTest extends AbstractTest { File fileResource = new ClassPathResource(filename).getFile(); prepareStorage(filename); - return layoutParsingPipeline.parseLayout(layoutParsingType, fileResource, layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID), new TableServiceResponse(), null,filename); + return layoutParsingPipeline.parseLayout(layoutParsingType, fileResource, layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID), new TableServiceResponse(), new VisualLayoutParsingResponse(),filename); } From fa048b2fe0567348444a7a5f05fdbf169bae30fe Mon Sep 17 00:00:00 2001 From: yhampe Date: Thu, 15 Feb 2024 12:19:26 +0100 Subject: [PATCH 7/8] RED-8481: Use visual layout parsing to detect signatures addressed review comments --- .../python_api/adapter/VisualLayoutParsingAdapter.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java index 2e7b092..f91364d 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java @@ -22,6 +22,8 @@ import lombok.extern.slf4j.Slf4j; @Slf4j public class VisualLayoutParsingAdapter { + private static String SIGNATURES = "signature"; + public Map> buildExtractedTablesPerPage(VisualLayoutParsingResponse visualLayoutParsingResponse) { Map> tableCells = new HashMap<>(); @@ -66,7 +68,7 @@ public class VisualLayoutParsingAdapter { List signatures = new ArrayList<>(); tableObjects.stream().forEach(t -> { - if(t.getLabel().equals("signature")) { + if(t.getLabel().equals(SIGNATURES)) { ClassifiedImage signature = new ClassifiedImage(new Rectangle2D.Float(t.getBox().getX1(),t.getBox().getY1(),t.getBox().getX2() - t.getBox().getX1(),t.getBox().getY2() - t.getBox().getY1()), ImageType.SIGNATURE,false,pageNumber); From cc77d195009a6972ac04fa02ac98fbb7ef53aec3 Mon Sep 17 00:00:00 2001 From: yhampe Date: Thu, 15 Feb 2024 13:01:30 +0100 Subject: [PATCH 8/8] RED-8481: Use visual layout parsing to detect signatures addressed review comments --- .../layoutparser/processor/LayoutParsingPipeline.java | 2 +- .../server/services/RulingCleaningServiceTest.java | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java index 5866256..5c8c6fa 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java @@ -278,7 +278,7 @@ public class LayoutParsingPipeline { imageServiceResponseAdapter.findOcr(classificationPage); } - if(signatures != null && signatures.containsKey(pageNumber)) { + if(signatures.containsKey(pageNumber)) { classificationPage.setImages(signatures.get(pageNumber)); } diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/RulingCleaningServiceTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/RulingCleaningServiceTest.java index a113521..8025534 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/RulingCleaningServiceTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/RulingCleaningServiceTest.java @@ -21,6 +21,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Ta import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse; import com.knecon.fforesight.service.layoutparser.processor.services.PageContentExtractor; import com.knecon.fforesight.service.layoutparser.processor.services.RulingCleaningService; import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory; @@ -80,13 +81,13 @@ public class RulingCleaningServiceTest extends BuildDocumentTest { filename.toFile(), new ImageServiceResponse(), new TableServiceResponse(), - null, + new VisualLayoutParsingResponse(), filename.toFile().toString())); Document documentGraphAfter = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER, filename.toFile(), new ImageServiceResponse(), new TableServiceResponse(), - null, + new VisualLayoutParsingResponse(), filename.toFile().toString())); DocumentData documentDataBefore = DocumentDataMapper.toDocumentData(documentGraphBefore); DocumentData documentDataAfter = DocumentDataMapper.toDocumentData(documentGraphAfter);