diff --git a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingRequest.java b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingRequest.java index c364bbc..f39572f 100644 --- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingRequest.java +++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingRequest.java @@ -18,11 +18,15 @@ public record LayoutParsingRequest( @Schema(description = "Path to the original PDF file.")// @NonNull String originFileStorageId,// + + @Schema(description = "Optional Path to the table extraction file.")// Optional tablesFileStorageId,// @Schema(description = "Optional Path to the image classification file.")// Optional imagesFileStorageId,// + @Schema(description = "Optional Path to the the visual layout parsing service file") Optional visualLayoutParsingFileId,// + @Schema(description = "Path where the Document Structure File will be stored.")// @NonNull String structureFileStorageId,// @Schema(description = "Path where the Research Data File will be stored.")// diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java index 01b4cdf..5c8c6fa 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java @@ -33,9 +33,11 @@ import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageB import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence; import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.CvTableParsingAdapter; import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.ImageServiceResponseAdapter; +import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.VisualLayoutParsingAdapter; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableCells; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse; import com.knecon.fforesight.service.layoutparser.processor.services.BodyTextFrameService; import com.knecon.fforesight.service.layoutparser.processor.services.RulingCleaningService; import com.knecon.fforesight.service.layoutparser.processor.services.SectionsBuilderService; @@ -86,6 +88,7 @@ public class LayoutParsingPipeline { RedactManagerBlockificationService redactManagerBlockificationService; LayoutGridService layoutGridService; ObservationRegistry observationRegistry; + VisualLayoutParsingAdapter visualLayoutParsingAdapter; public LayoutParsingFinishedEvent parseLayoutAndSaveFilesToStorage(LayoutParsingRequest layoutParsingRequest) throws IOException { @@ -96,6 +99,11 @@ public class LayoutParsingPipeline { File originFile = layoutParsingStorageService.getOriginFile(layoutParsingRequest.originFileStorageId()); File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId()).orElse(originFile); + VisualLayoutParsingResponse visualLayoutParsingResponse = new VisualLayoutParsingResponse(); + if (layoutParsingRequest.visualLayoutParsingFileId().isPresent()) { + visualLayoutParsingResponse = layoutParsingStorageService.getVisualLayoutParsingFile(layoutParsingRequest.visualLayoutParsingFileId().get()); + } + ImageServiceResponse imageServiceResponse = new ImageServiceResponse(); if (layoutParsingRequest.imagesFileStorageId().isPresent()) { imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.imagesFileStorageId().get()); @@ -110,6 +118,7 @@ public class LayoutParsingPipeline { originFile, imageServiceResponse, tableServiceResponse, + visualLayoutParsingResponse, layoutParsingRequest.identifier().toString()); log.info("Building document graph for {}", layoutParsingRequest.identifier()); @@ -198,12 +207,17 @@ public class LayoutParsingPipeline { File originFile, ImageServiceResponse imageServiceResponse, TableServiceResponse tableServiceResponse, + VisualLayoutParsingResponse visualLayoutParsingResponse, String identifier) { PDDocument originDocument = openDocument(originFile); addNumberOfPagesToTrace(originDocument.getNumberOfPages(), Files.size(originFile.toPath())); Map> pdfTableCells = cvTableParsingAdapter.buildCvParsedTablesPerPage(tableServiceResponse); Map> pdfImages = imageServiceResponseAdapter.buildClassifiedImagesPerPage(imageServiceResponse); + Map> signatures = new HashMap<>(); + if(signatures.size() > 0) { + visualLayoutParsingAdapter.buildExtractedSignaturesPerPage(visualLayoutParsingResponse); + } ClassificationDocument classificationDocument = new ClassificationDocument(); List classificationPages = new ArrayList<>(); @@ -264,6 +278,10 @@ public class LayoutParsingPipeline { imageServiceResponseAdapter.findOcr(classificationPage); } + if(signatures.containsKey(pageNumber)) { + classificationPage.setImages(signatures.get(pageNumber)); + } + tableExtractionService.extractTables(cleanRulings, classificationPage); buildPageStatistics(classificationPage); diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingStorageService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingStorageService.java index e74fc4e..471db6a 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingStorageService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingStorageService.java @@ -10,8 +10,6 @@ import java.nio.file.Paths; import java.nio.file.StandardOpenOption; import java.util.Optional; -import org.apache.pdfbox.Loader; -import org.apache.pdfbox.pdmodel.PDDocument; import org.springframework.stereotype.Service; import com.fasterxml.jackson.databind.ObjectMapper; @@ -22,6 +20,7 @@ import com.knecon.fforesight.service.layoutparser.internal.api.data.taas.Researc import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse; import com.knecon.fforesight.tenantcommons.TenantContext; import io.micrometer.observation.annotation.Observed; @@ -79,6 +78,14 @@ public class LayoutParsingStorageService { } } + public VisualLayoutParsingResponse getVisualLayoutParsingFile(String storageId) throws IOException { + + try (InputStream inputStream = getObject(storageId)) { + VisualLayoutParsingResponse visualLayoutParsingResponse = objectMapper.readValue(inputStream, VisualLayoutParsingResponse.class); + return visualLayoutParsingResponse; + } + } + @Observed(name = "LayoutParsingStorageService", contextualName = "store-document-data") public void storeDocumentData(LayoutParsingRequest layoutParsingRequest, DocumentData documentData) { diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java new file mode 100644 index 0000000..f91364d --- /dev/null +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java @@ -0,0 +1,83 @@ +package com.knecon.fforesight.service.layoutparser.processor.python_api.adapter; + +import java.awt.geom.Rectangle2D; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.springframework.stereotype.Service; + +import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.ImageType; +import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingBox; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResult; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +@Service +@RequiredArgsConstructor +@Slf4j +public class VisualLayoutParsingAdapter { + + private static String SIGNATURES = "signature"; + + public Map> buildExtractedTablesPerPage(VisualLayoutParsingResponse visualLayoutParsingResponse) { + + Map> tableCells = new HashMap<>(); + visualLayoutParsingResponse.getData() + .forEach(tableData -> tableCells.computeIfAbsent(tableData.getPage_idx(), tableCell -> new ArrayList<>()).addAll(convertTableCells(tableData.getBoxes()))); + + return tableCells; + + } + + public Map> buildExtractedSignaturesPerPage(VisualLayoutParsingResponse visualLayoutParsingResponse) { + + Map> signatures = new HashMap<>(); + visualLayoutParsingResponse.getData().forEach(tableData -> signatures.computeIfAbsent(tableData.getPage_idx(), tableCell -> new ArrayList<>()).addAll(convertSignatures(tableData.getPage_idx(), tableData.getBoxes()))); + + return signatures; + } + + + public List convertTableCells(List tableObjects) { + + List parsedTableCells = new ArrayList<>(); + + tableObjects.stream().forEach(t -> { + VisualLayoutParsingResult result = new VisualLayoutParsingResult(); + result.setX0(t.getBox().getX1()); + result.setX1(t.getBox().getX2()); + result.setY0(t.getBox().getY1()); + result.setY1(t.getBox().getY2()); + result.setWidth(result.getX1() - result.getX0()); + result.setHeight(result.getY1() - result.getY0()); + result.setLabel(t.getLabel()); + parsedTableCells.add(result); + }); + + return parsedTableCells; + + } + + public List convertSignatures(int pageNumber, List tableObjects) { + + List signatures = new ArrayList<>(); + + tableObjects.stream().forEach(t -> { + if(t.getLabel().equals(SIGNATURES)) { + ClassifiedImage signature = new ClassifiedImage(new Rectangle2D.Float(t.getBox().getX1(),t.getBox().getY1(),t.getBox().getX2() - t.getBox().getX1(),t.getBox().getY2() - t.getBox().getY1()), + ImageType.SIGNATURE,false,pageNumber); + + signatures.add(signature); + } + }); + + return signatures; + + } + +} \ No newline at end of file diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingBox.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingBox.java new file mode 100644 index 0000000..aba6168 --- /dev/null +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingBox.java @@ -0,0 +1,18 @@ +package com.knecon.fforesight.service.layoutparser.processor.python_api.model.table; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class VisualLayoutParsingBox { + + private VisualLayoutParsingBoxValue box; + private String label; + private float probability; + +} \ No newline at end of file diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingBoxValue.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingBoxValue.java new file mode 100644 index 0000000..a56f06e --- /dev/null +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingBoxValue.java @@ -0,0 +1,19 @@ +package com.knecon.fforesight.service.layoutparser.processor.python_api.model.table; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class VisualLayoutParsingBoxValue { + + private float x1; + private float y1; + private float x2; + private float y2; + +} \ No newline at end of file diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingData.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingData.java new file mode 100644 index 0000000..0b4ddeb --- /dev/null +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingData.java @@ -0,0 +1,21 @@ +package com.knecon.fforesight.service.layoutparser.processor.python_api.model.table; + +import java.util.List; + + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class VisualLayoutParsingData { + + private int page_idx; + + private List boxes; + +} \ No newline at end of file diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingResponse.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingResponse.java new file mode 100644 index 0000000..db73c71 --- /dev/null +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingResponse.java @@ -0,0 +1,23 @@ +package com.knecon.fforesight.service.layoutparser.processor.python_api.model.table; + +import java.util.List; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class VisualLayoutParsingResponse { + + private String dossierId; + private String fileId; + private String targetFileExtension; + private String responseFileExtension; + private String X_TENANT_ID; + private List data; + +} \ No newline at end of file diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingResult.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingResult.java new file mode 100644 index 0000000..8362a2d --- /dev/null +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/VisualLayoutParsingResult.java @@ -0,0 +1,22 @@ +package com.knecon.fforesight.service.layoutparser.processor.python_api.model.table; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class VisualLayoutParsingResult { + + private float x0; + private float y0; + private float x1; + private float y1; + private float width; + private float height; + private String label; + +} \ No newline at end of file diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/BdrJsonBuildTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/BdrJsonBuildTest.java index 4b2358e..eaddda7 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/BdrJsonBuildTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/BdrJsonBuildTest.java @@ -27,6 +27,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Do import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse; import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory; import com.knecon.fforesight.service.layoutparser.processor.services.mapper.TaasDocumentDataMapper; import com.knecon.fforesight.service.layoutparser.server.utils.AbstractTest; @@ -50,6 +51,7 @@ public class BdrJsonBuildTest extends AbstractTest { file, new ImageServiceResponse(), new TableServiceResponse(), + new VisualLayoutParsingResponse(), file.toString())); } diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/HeadlinesGoldStandardIntegrationTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/HeadlinesGoldStandardIntegrationTest.java index c02c71d..4ea6204 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/HeadlinesGoldStandardIntegrationTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/HeadlinesGoldStandardIntegrationTest.java @@ -33,6 +33,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Do import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SemanticNode; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse; import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory; import com.knecon.fforesight.tenantcommons.TenantsClient; @@ -98,6 +99,7 @@ public class HeadlinesGoldStandardIntegrationTest { pdfFileResource.getFile(), new ImageServiceResponse(), new TableServiceResponse(), + new VisualLayoutParsingResponse(), filePath)); var foundHeadlines = documentGraph.streamAllSubNodes() diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java index ab829c0..f5bf3a2 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java @@ -16,6 +16,7 @@ import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsi import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse; import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory; import com.knecon.fforesight.service.layoutparser.processor.services.mapper.DocumentDataMapper; import com.knecon.fforesight.service.layoutparser.server.utils.BuildDocumentTest; @@ -58,6 +59,7 @@ public class DocumentGraphJsonWritingTest extends BuildDocumentTest { filename.toFile(), new ImageServiceResponse(), new TableServiceResponse(), + new VisualLayoutParsingResponse(), filename.toFile().toString())); DocumentData documentData = DocumentDataMapper.toDocumentData(documentGraph); diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java index 5c5eae9..06c053d 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java @@ -12,6 +12,7 @@ import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsi import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse; import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory; import com.knecon.fforesight.service.layoutparser.processor.services.visualization.LayoutGridService; import com.knecon.fforesight.service.layoutparser.server.utils.BuildDocumentTest; @@ -51,7 +52,7 @@ public class ViewerDocumentTest extends BuildDocumentTest { var tableResponse = mapper.readValue(new ClassPathResource(tableFileName).getInputStream(), TableServiceResponse.class); var documentFile = new ClassPathResource(fileName).getFile(); - var classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.DOCUMINE, documentFile, new ImageServiceResponse(), tableResponse, Path.of(fileName).getFileName().toFile().toString()); + var classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.DOCUMINE, documentFile, new ImageServiceResponse(), tableResponse, new VisualLayoutParsingResponse(),Path.of(fileName).getFileName().toFile().toString()); ViewerDocumentService viewerDocumentService = new ViewerDocumentService(null); LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService); Document document = DocumentGraphFactory.buildDocumentGraph(classificationDocument); diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java index c3a7058..1f9e8a0 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java @@ -33,6 +33,7 @@ import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.C import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.ImageServiceResponseAdapter; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse; import com.knecon.fforesight.service.layoutparser.processor.services.SectionsBuilderService; import com.knecon.fforesight.service.layoutparser.processor.services.classification.RedactManagerClassificationService; import com.knecon.fforesight.service.layoutparser.server.utils.AbstractTest; @@ -67,6 +68,7 @@ public class PdfSegmentationServiceTest extends AbstractTest { originDocument, new ImageServiceResponse(), tableServiceResponse, + new VisualLayoutParsingResponse(), "document"); redactManagerClassificationService.classifyDocument(classificationDocument); diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/RulingCleaningServiceTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/RulingCleaningServiceTest.java index 03d8cae..8025534 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/RulingCleaningServiceTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/RulingCleaningServiceTest.java @@ -21,6 +21,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Ta import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse; import com.knecon.fforesight.service.layoutparser.processor.services.PageContentExtractor; import com.knecon.fforesight.service.layoutparser.processor.services.RulingCleaningService; import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory; @@ -80,11 +81,13 @@ public class RulingCleaningServiceTest extends BuildDocumentTest { filename.toFile(), new ImageServiceResponse(), new TableServiceResponse(), + new VisualLayoutParsingResponse(), filename.toFile().toString())); Document documentGraphAfter = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER, filename.toFile(), new ImageServiceResponse(), new TableServiceResponse(), + new VisualLayoutParsingResponse(), filename.toFile().toString())); DocumentData documentDataBefore = DocumentDataMapper.toDocumentData(documentGraphBefore); DocumentData documentDataAfter = DocumentDataMapper.toDocumentData(documentGraphAfter); diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/AbstractTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/AbstractTest.java index a4ebaca..759f0e8 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/AbstractTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/AbstractTest.java @@ -20,6 +20,7 @@ import org.springframework.context.annotation.Import; import org.springframework.context.annotation.Primary; import org.springframework.core.io.ClassPathResource; import org.springframework.test.context.junit.jupiter.SpringExtension; +import org.xmlunit.builder.Input; import com.iqser.red.commons.jackson.ObjectMapperFactory; import com.iqser.red.storage.commons.service.StorageService; @@ -51,6 +52,8 @@ public abstract class AbstractTest { private RabbitTemplate rabbitTemplate; protected final static String ORIGIN_FILE_ID = "origin"; + + protected final static String VISUAL_LAYOUT_FILE = "visual"; protected final static String TABLE_FILE_ID = "table"; protected final static String IMAGE_FILE_ID = "image"; protected final static String STRUCTURE_FILE_ID = "structure"; @@ -96,7 +99,7 @@ public abstract class AbstractTest { @SneakyThrows protected LayoutParsingRequest prepareStorage(String file) { - return prepareStorage(file, "cv_table_parsing_response/empty.json", "image_service_response/empty.json"); + return prepareStorage(file, "cv_table_parsing_response/empty.json", "image_service_response/empty.json","visual_layout_parsing_response/empty.json"); } @@ -116,6 +119,7 @@ public abstract class AbstractTest { .originFileStorageId(ORIGIN_FILE_ID) .tablesFileStorageId(Optional.of(TABLE_FILE_ID)) .imagesFileStorageId(Optional.of(IMAGE_FILE_ID)) + .visualLayoutParsingFileId(Optional.of(VISUAL_LAYOUT_FILE)) .structureFileStorageId(STRUCTURE_FILE_ID) .textBlockFileStorageId(TEXT_FILE_ID) .positionBlockFileStorageId(POSITION_FILE_ID) @@ -136,6 +140,16 @@ public abstract class AbstractTest { return prepareStorage(pdfFileResource.getInputStream(), cvServiceResponseFileResource.getInputStream(), imageInfoFileResource.getInputStream()); } + @SneakyThrows + protected LayoutParsingRequest prepareStorage(String file, String cvServiceResponseFile, String imageInfoFile, String visualLayoutParsingResponseFile) { + + ClassPathResource pdfFileResource = new ClassPathResource(file); + ClassPathResource cvServiceResponseFileResource = new ClassPathResource(cvServiceResponseFile); + ClassPathResource imageInfoFileResource = new ClassPathResource(imageInfoFile); + ClassPathResource visualLayoutParsingResponseResource = new ClassPathResource(visualLayoutParsingResponseFile); + + return prepareStorage(pdfFileResource.getInputStream(), cvServiceResponseFileResource.getInputStream(), imageInfoFileResource.getInputStream(), visualLayoutParsingResponseResource.getInputStream()); + } @SneakyThrows protected LayoutParsingRequest prepareStorage(InputStream fileStream, InputStream cvServiceResponseFileStream, InputStream imageInfoStream) { @@ -147,6 +161,17 @@ public abstract class AbstractTest { return buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER); } + @SneakyThrows + protected LayoutParsingRequest prepareStorage(InputStream fileStream, InputStream cvServiceResponseFileStream, InputStream imageInfoStream, InputStream visualLayoutParsingResponseFileStream) { + + storageService.storeObject(TenantContext.getTenantId(), IMAGE_FILE_ID, imageInfoStream); + storageService.storeObject(TenantContext.getTenantId(), TABLE_FILE_ID, cvServiceResponseFileStream); + storageService.storeObject(TenantContext.getTenantId(), ORIGIN_FILE_ID, fileStream); + storageService.storeObject(TenantContext.getTenantId(),VISUAL_LAYOUT_FILE,visualLayoutParsingResponseFileStream ); + + return buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER); + } + @AfterEach public void cleanupStorage() { diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/BuildDocumentTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/BuildDocumentTest.java index 69ed656..79db6bf 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/BuildDocumentTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/BuildDocumentTest.java @@ -10,6 +10,7 @@ import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingPipelin import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument; import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse; import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory; import lombok.SneakyThrows; @@ -25,7 +26,7 @@ public abstract class BuildDocumentTest extends AbstractTest { File fileResource = new ClassPathResource(filename).getFile(); prepareStorage(filename); - return layoutParsingPipeline.parseLayout(layoutParsingType, fileResource, layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID), new TableServiceResponse(), filename); + return layoutParsingPipeline.parseLayout(layoutParsingType, fileResource, layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID), new TableServiceResponse(), new VisualLayoutParsingResponse(),filename); } diff --git a/layoutparser-service/layoutparser-service-server/src/test/resources/visual_layout_parsing_response/empty.json b/layoutparser-service/layoutparser-service-server/src/test/resources/visual_layout_parsing_response/empty.json new file mode 100644 index 0000000..48a1a98 --- /dev/null +++ b/layoutparser-service/layoutparser-service-server/src/test/resources/visual_layout_parsing_response/empty.json @@ -0,0 +1,8 @@ +{ + "dossierId": "123", + "fileId": "123", + "targetFileExtension": "ORIGIN.pdf.gz", + "responseFileExtension": "EXTRACTED_TABLES.json.gz", + "data": [], + "X_TENANT_ID": "" +}