From 31a1161889575bdb904ab2677f570ae71a59a47e Mon Sep 17 00:00:00 2001 From: yhampe Date: Thu, 18 Jan 2024 08:57:12 +0100 Subject: [PATCH] RED-7375: table extractor prototype --- .../build.gradle.kts | 2 +- .../configuration/MessagingConfiguration.java | 28 ++++++++++ .../model/TableExtractorRequest.java | 25 +++++++++ .../model/TableExtractorResponse.java | 17 ++++++ .../processor/service/FileStatusService.java | 25 +++++++++ .../LayoutParsingRequestFactory.java | 5 ++ .../TableExtractorMessageReceiver.java | 55 +++++++++++++++++++ .../FileManagementServiceSettings.java | 1 + .../src/main/resources/application-dev.yaml | 1 + .../dossier/file/FileType.java | 1 + .../dossier/file/ProcessingStatus.java | 3 +- publish-custom-image.sh | 4 +- 12 files changed, 163 insertions(+), 4 deletions(-) create mode 100644 persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorRequest.java create mode 100644 persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorResponse.java create mode 100644 persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/layoutparsing/TableExtractorMessageReceiver.java diff --git a/persistence-service-v1/persistence-service-processor-v1/build.gradle.kts b/persistence-service-v1/persistence-service-processor-v1/build.gradle.kts index 71a35cfe9..a9cc55e81 100644 --- a/persistence-service-v1/persistence-service-processor-v1/build.gradle.kts +++ b/persistence-service-v1/persistence-service-processor-v1/build.gradle.kts @@ -21,7 +21,7 @@ dependencies { exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1") exclude(group = "com.iqser.red.service", module = "persistence-service-shared-api-v1") } - api("com.knecon.fforesight:layoutparser-service-internal-api:0.74.0") { + api("com.knecon.fforesight:layoutparser-service-internal-api:extractor-0") { exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1") exclude(group = "com.iqser.red.service", module = "persistence-service-shared-api-v1") } diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/configuration/MessagingConfiguration.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/configuration/MessagingConfiguration.java index 2ddf009f2..6becbb7ce 100644 --- a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/configuration/MessagingConfiguration.java +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/configuration/MessagingConfiguration.java @@ -60,6 +60,11 @@ public class MessagingConfiguration { public static final String PDFTRON_DLQ = "pdftron_dlq"; public static final String PDFTRON_RESULT_QUEUE = "pdftron_result_queue"; + public static final String TABLE_EXTRACTOR_QUEUE = "table_extractor_request_queue"; + public static final String TABLE_EXTRACTOR_RESPONSE_QUEUE = "table_extractor_response_queue"; + public static final String TABLE_EXTRACTOR_DLQ = "table_extractor_dead_letter_queue"; + + public static final String CV_ANALYSIS_QUEUE = "cv_analysis_request_queue"; public static final String CV_ANALYSIS_RESPONSE_QUEUE = "cv_analysis_response_queue"; public static final String CV_ANALYSIS_DLQ = "cv_analysis_dead_letter_queue"; @@ -372,6 +377,29 @@ public class MessagingConfiguration { .maxPriority(2) .build(); } + @Bean + public Queue tableExtractorRequestQueue() { + + return QueueBuilder.durable(TABLE_EXTRACTOR_QUEUE).withArgument("x-dead-letter-exchange", "").withArgument("x-dead-letter-routing-key", TABLE_EXTRACTOR_DLQ).build(); + } + + + @Bean + public Queue tableExtractorResponseQueue() { + + return QueueBuilder.durable(TABLE_EXTRACTOR_RESPONSE_QUEUE) + .withArgument("x-dead-letter-exchange", "") + .withArgument("x-dead-letter-routing-key", TABLE_EXTRACTOR_DLQ) + .build(); + } + + + @Bean + public Queue tableExtractorDLQ() { + + return QueueBuilder.durable(TABLE_EXTRACTOR_DLQ).build(); + } + @Bean public Queue layoutparsingRequestQueue() { diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorRequest.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorRequest.java new file mode 100644 index 000000000..9edf56175 --- /dev/null +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorRequest.java @@ -0,0 +1,25 @@ +package com.iqser.red.service.persistence.management.v1.processor.model; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@AllArgsConstructor +@NoArgsConstructor +public class TableExtractorRequest { + + public static final String TABLE_EXTRACTOR_FILE_EXTENSION = FileType.EXTRACTED_TABLES.name() + FileType.EXTRACTED_TABLES.getExtension() + ".gz"; + + public static final String TARGET_FILE_EXTENSION = "ORIGIN.pdf.gz"; + + private String dossierId; + private String fileId; + private String targetFileExtension; + private String responseFileExtension; + +} \ No newline at end of file diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorResponse.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorResponse.java new file mode 100644 index 000000000..76eee9a9f --- /dev/null +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorResponse.java @@ -0,0 +1,17 @@ +package com.iqser.red.service.persistence.management.v1.processor.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@AllArgsConstructor +@NoArgsConstructor +public class TableExtractorResponse { + + private String dossierId; + private String fileId; + +} \ No newline at end of file diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusService.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusService.java index 385238d2b..a168ca48c 100644 --- a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusService.java +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusService.java @@ -21,6 +21,7 @@ import com.iqser.red.service.persistence.management.v1.processor.exception.Inter import com.iqser.red.service.persistence.management.v1.processor.model.CvAnalysisServiceRequest; import com.iqser.red.service.persistence.management.v1.processor.model.NerServiceRequest; import com.iqser.red.service.persistence.management.v1.processor.model.OCRStatusUpdateResponse; +import com.iqser.red.service.persistence.management.v1.processor.model.TableExtractorRequest; import com.iqser.red.service.persistence.management.v1.processor.model.image.ImageServiceRequest; import com.iqser.red.service.persistence.management.v1.processor.service.layoutparsing.LayoutParsingRequestFactory; import com.iqser.red.service.persistence.management.v1.processor.service.manualredactions.ManualRedactionProviderService; @@ -175,6 +176,12 @@ public class FileStatusService { return; } + if (settings.isTableExtractorEnabled() && !fileManagementStorageService.objectExists(dossierId, fileId, FileType.EXTRACTED_TABLES)) { + log.info("Add file: {} from dossier {} to Table Extractor queue", fileId, dossierId); + addToTableExtractorQueue(dossierId, fileId); + return; + } + if (settings.isImageServiceEnabled() && !fileManagementStorageService.objectExists(dossierId, fileId, FileType.IMAGE_INFO)) { log.info("Add file: {} from dossier {} to Image queue", fileId, dossierId); addToImageQueue(dossierId, fileId); @@ -234,6 +241,24 @@ public class FileStatusService { } } + private void addToTableExtractorQueue(String dossierId, String fileId) { + + fileStatusPersistenceService.updateProcessingStatus(fileId, ProcessingStatus.TABLE_EXTRACTOR_ANALYZING); + + rabbitTemplate.convertAndSend(MessagingConfiguration.TABLE_EXTRACTOR_QUEUE, + TableExtractorRequest.builder() + .dossierId(dossierId) + .fileId(fileId) + .targetFileExtension(TableExtractorRequest.TARGET_FILE_EXTENSION) + .responseFileExtension(TableExtractorRequest.TABLE_EXTRACTOR_FILE_EXTENSION) + .build(), + message -> { + message.getMessageProperties().setPriority(1); + return message; + }); + } + + @SneakyThrows public void addToPreprocessingQueue(String dossierId, String fileId, String filename) { diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/layoutparsing/LayoutParsingRequestFactory.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/layoutparsing/LayoutParsingRequestFactory.java index 796db6f24..e41186ed5 100644 --- a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/layoutparsing/LayoutParsingRequestFactory.java +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/layoutparsing/LayoutParsingRequestFactory.java @@ -39,12 +39,17 @@ public class LayoutParsingRequestFactory { Optional optionalTableFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.TABLES) // ? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.TABLES)) : Optional.empty(); + Optional optionalTableExtractorFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.EXTRACTED_TABLES) // + ? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.EXTRACTED_TABLES)) : Optional.empty(); + + return LayoutParsingRequest.builder() .layoutParsingType(type) .identifier(layoutParsingRequestIdentifierService.buildIdentifier(dossierId, fileId, priority)) .originFileStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.ORIGIN)) .imagesFileStorageId(optionalImageFileId) .tablesFileStorageId(optionalTableFileId) + .tableExtractorFileId(optionalTableExtractorFileId) .pageFileStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_PAGES)) .structureFileStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_STRUCTURE)) .textBlockFileStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_TEXT)) diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/layoutparsing/TableExtractorMessageReceiver.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/layoutparsing/TableExtractorMessageReceiver.java new file mode 100644 index 000000000..a47d49497 --- /dev/null +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/layoutparsing/TableExtractorMessageReceiver.java @@ -0,0 +1,55 @@ +package com.iqser.red.service.persistence.management.v1.processor.service.queue; + +import java.time.OffsetDateTime; +import java.time.temporal.ChronoUnit; + +import org.springframework.amqp.core.Message; +import org.springframework.amqp.rabbit.annotation.RabbitListener; +import org.springframework.stereotype.Service; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration; +import com.iqser.red.service.persistence.management.v1.processor.model.CvAnalysisServiceResponse; +import com.iqser.red.service.persistence.management.v1.processor.model.TableExtractorResponse; +import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusProcessingUpdateService; +import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusService; +import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo; + +import lombok.RequiredArgsConstructor; +import lombok.SneakyThrows; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Service +@RequiredArgsConstructor +public class TableExtractorMessageReceiver { + + private final ObjectMapper objectMapper; + private final FileStatusService fileStatusService; + private final FileStatusProcessingUpdateService fileStatusProcessingUpdateService; + + + @SneakyThrows + @RabbitListener(queues = MessagingConfiguration.TABLE_EXTRACTOR_RESPONSE_QUEUE) + public void receive(TableExtractorResponse response) { + + fileStatusService.setStatusAnalyse(response.getDossierId(), response.getFileId(), false); + + log.info("Received message in {} for dossierId {} and fileId {}", MessagingConfiguration.TABLE_EXTRACTOR_RESPONSE_QUEUE, response.getDossierId(), response.getFileId()); + } + + + @SneakyThrows + @RabbitListener(queues = MessagingConfiguration.TABLE_EXTRACTOR_DLQ) + public void handleDLQMessage(Message failedMessage) { + + var response = objectMapper.readValue(failedMessage.getBody(), TableExtractorResponse.class); + + log.warn("Received message from {} for dossierId {} and fileId {}", MessagingConfiguration.TABLE_EXTRACTOR_DLQ, response.getDossierId(), response.getFileId()); + fileStatusProcessingUpdateService.analysisFailed(response.getDossierId(), + response.getFileId(), + new FileErrorInfo("table extractor failed", MessagingConfiguration.TABLE_EXTRACTOR_DLQ, "table-extractor", OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS))); + + } + +} \ No newline at end of file diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/settings/FileManagementServiceSettings.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/settings/FileManagementServiceSettings.java index 93e2ba057..8d2cfaede 100644 --- a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/settings/FileManagementServiceSettings.java +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/settings/FileManagementServiceSettings.java @@ -24,6 +24,7 @@ public class FileManagementServiceSettings { private boolean imageServiceEnabled = true; private boolean nerServiceEnabled = true; + private boolean tableExtractorEnabled = true; private boolean storeImageFile = true; diff --git a/persistence-service-v1/persistence-service-server-v1/src/main/resources/application-dev.yaml b/persistence-service-v1/persistence-service-server-v1/src/main/resources/application-dev.yaml index 451cedd61..05c368ec5 100644 --- a/persistence-service-v1/persistence-service-server-v1/src/main/resources/application-dev.yaml +++ b/persistence-service-v1/persistence-service-server-v1/src/main/resources/application-dev.yaml @@ -32,6 +32,7 @@ persistence-service: imageServiceEnabled: false nerServiceEnabled: false storeImageFile: false + tableExtractorEnabled: false applicationName: RedactManager fforesight: springdoc: diff --git a/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/FileType.java b/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/FileType.java index 7e67f33ac..1075a77d5 100644 --- a/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/FileType.java +++ b/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/FileType.java @@ -16,6 +16,7 @@ public enum FileType { TEXT_HIGHLIGHTS(".json"), FIGURE(".json"), TABLES(".json"), + EXTRACTED_TABLES(".json"), COMPONENTS(".json"), // document is split into 4 files, all should be overridden/deleted at the same time DOCUMENT_TEXT(".json"), diff --git a/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/ProcessingStatus.java b/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/ProcessingStatus.java index f65ceabe1..78c708907 100644 --- a/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/ProcessingStatus.java +++ b/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/ProcessingStatus.java @@ -18,5 +18,6 @@ public enum ProcessingStatus { PRE_PROCESSING, PRE_PROCESSED, FIGURE_DETECTION_ANALYZING, - TABLE_PARSING_ANALYZING + TABLE_PARSING_ANALYZING, + TABLE_EXTRACTOR_ANALYZING } diff --git a/publish-custom-image.sh b/publish-custom-image.sh index b680eb16a..6aff1e193 100755 --- a/publish-custom-image.sh +++ b/publish-custom-image.sh @@ -11,5 +11,5 @@ commit_hash=$(git rev-parse --short=5 HEAD) # Combine branch and commit hash buildName="${USER}-${branch}-${commit_hash}" -gradle bootBuildImage --publishImage -PbuildbootDockerHostNetwork=true -Pversion=$buildName -echo "nexus.knecon.com:5001/red/${dir}-server-v1:$buildName" +gradle bootBuildImage --cleanCache --publishImage -Pversion=persistence-extractor-$buildNumber --stacktrace +echo "nexus.knecon.com:5001/red/${dir}-server-v1:persistence-extractor-$buildNumber" -- 2.47.2