From 8a6dd4c5e89282b2dad2ad6f64395bbed73ff41d Mon Sep 17 00:00:00 2001 From: Kilian Schuettler Date: Mon, 21 Aug 2023 12:48:46 +0200 Subject: [PATCH 01/10] RED-7375: integrate table extractor --- .../configuration/MessagingConfiguration.java | 26 +++++++++ .../model/TableExtractorRequest.java | 23 ++++++++ .../model/TableExtractorResponse.java | 17 ++++++ .../processor/service/FileStatusService.java | 25 +++++++++ .../queue/TableExtractorMessageReceiver.java | 55 +++++++++++++++++++ .../FileManagementServiceSettings.java | 1 + .../src/main/resources/application-dev.yaml | 1 + .../dossier/file/FileType.java | 1 + .../dossier/file/ProcessingStatus.java | 3 +- 9 files changed, 151 insertions(+), 1 deletion(-) create mode 100644 persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorRequest.java create mode 100644 persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorResponse.java create mode 100644 persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/queue/TableExtractorMessageReceiver.java diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/configuration/MessagingConfiguration.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/configuration/MessagingConfiguration.java index 2d6bc486e..190cc4ba7 100644 --- a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/configuration/MessagingConfiguration.java +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/configuration/MessagingConfiguration.java @@ -59,6 +59,10 @@ public class MessagingConfiguration { public static final String CV_ANALYSIS_RESPONSE_QUEUE = "cv_analysis_response_queue"; public static final String CV_ANALYSIS_DLQ = "cv_analysis_dead_letter_queue"; + public static final String TABLE_EXTRACTOR_QUEUE = "table_extractor_request_queue"; + public static final String TABLE_EXTRACTOR_RESPONSE_QUEUE = "table_extractor_response_queue"; + public static final String TABLE_EXTRACTOR_DLQ = "table_extractor_dead_letter_queue"; + public static final String OCR_STATUS_UPDATE_RESPONSE_QUEUE = "ocr_status_update_response_queue"; public static final String OCR_STATUS_UPDATE_RESPONSE_DQL = "ocr_status_update_response_dql"; @@ -338,4 +342,26 @@ public class MessagingConfiguration { return QueueBuilder.durable(LAYOUT_PARSING_DLQ).build(); } + + @Bean + public Queue tableExtractorRequestQueue() { + + return QueueBuilder.durable(TABLE_EXTRACTOR_QUEUE)// + .withArgument("x-dead-letter-exchange", "").withArgument("x-dead-letter-routing-key", LAYOUT_PARSING_DLQ).build(); + } + + + @Bean + public Queue tableExtractorResponseQueue() { + + return QueueBuilder.durable(TABLE_EXTRACTOR_RESPONSE_QUEUE)// + .withArgument("x-dead-letter-exchange", "").withArgument("x-dead-letter-routing-key", LAYOUT_PARSING_DLQ).build(); + } + + + @Bean + public Queue tableExtractorDLQ() { + + return QueueBuilder.durable(TABLE_EXTRACTOR_DLQ).build(); + } } diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorRequest.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorRequest.java new file mode 100644 index 000000000..e5c14617e --- /dev/null +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorRequest.java @@ -0,0 +1,23 @@ +package com.iqser.red.service.persistence.management.v1.processor.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@AllArgsConstructor +@NoArgsConstructor +public class TableExtractorRequest { + + public static final String TABLE_EXTRACTOR_FILE_EXTENSION = "EXTRACTED_TABLES.json.gz"; + + public static final String TARGET_FILE_EXTENSION = "ORIGIN.pdf.gz"; + + private String dossierId; + private String fileId; + private String targetFileExtension; + private String responseFileExtension; + +} diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorResponse.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorResponse.java new file mode 100644 index 000000000..ffec256d7 --- /dev/null +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorResponse.java @@ -0,0 +1,17 @@ +package com.iqser.red.service.persistence.management.v1.processor.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@AllArgsConstructor +@NoArgsConstructor +public class TableExtractorResponse { + + private String dossierId; + private String fileId; + +} diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusService.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusService.java index 24912b848..76ffbfacb 100644 --- a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusService.java +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusService.java @@ -21,6 +21,7 @@ import com.iqser.red.service.persistence.management.v1.processor.exception.Inter import com.iqser.red.service.persistence.management.v1.processor.model.CvAnalysisServiceRequest; import com.iqser.red.service.persistence.management.v1.processor.model.NerServiceRequest; import com.iqser.red.service.persistence.management.v1.processor.model.OCRStatusUpdateResponse; +import com.iqser.red.service.persistence.management.v1.processor.model.TableExtractorRequest; import com.iqser.red.service.persistence.management.v1.processor.model.image.ImageServiceRequest; import com.iqser.red.service.persistence.management.v1.processor.service.layoutparsing.LayoutParsingRequestFactory; import com.iqser.red.service.persistence.management.v1.processor.service.manualredactions.ManualRedactionProviderService; @@ -175,6 +176,12 @@ public class FileStatusService { return; } + if (settings.isTableExtractorEnabled() && !fileManagementStorageService.objectExists(dossierId, fileId, FileType.INVISIBLE_TABLES)) { + log.info("Add file: {} from dossier {} to Table Extractor queue", fileId, dossierId); + addToTableExtractorQueue(dossierId, fileId); + return; + } + var fileModel = MagicConverter.convert(fileEntity, FileModel.class, new FileModelMapper()); fileModel = reanalysisRequiredStatusService.enhanceFileStatusWithAnalysisRequirements(fileModel, true); @@ -230,6 +237,24 @@ public class FileStatusService { } + private void addToTableExtractorQueue(String dossierId, String fileId) { + + fileStatusPersistenceService.updateProcessingStatus(fileId, ProcessingStatus.TABLE_EXTRACTOR_ANALYZING); + + rabbitTemplate.convertAndSend(MessagingConfiguration.TABLE_EXTRACTOR_QUEUE, + TableExtractorRequest.builder() + .dossierId(dossierId) + .fileId(fileId) + .targetFileExtension(TableExtractorRequest.TARGET_FILE_EXTENSION) + .responseFileExtension(TableExtractorRequest.TABLE_EXTRACTOR_FILE_EXTENSION) + .build(), + message -> { + message.getMessageProperties().setPriority(1); + return message; + }); + } + + @SneakyThrows public void addToPreprocessingQueue(String dossierId, String fileId, String filename) { diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/queue/TableExtractorMessageReceiver.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/queue/TableExtractorMessageReceiver.java new file mode 100644 index 000000000..b53ee3a19 --- /dev/null +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/queue/TableExtractorMessageReceiver.java @@ -0,0 +1,55 @@ +package com.iqser.red.service.persistence.management.v1.processor.service.queue; + +import java.time.OffsetDateTime; +import java.time.temporal.ChronoUnit; + +import org.springframework.amqp.core.Message; +import org.springframework.amqp.rabbit.annotation.RabbitListener; +import org.springframework.stereotype.Service; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration; +import com.iqser.red.service.persistence.management.v1.processor.model.CvAnalysisServiceResponse; +import com.iqser.red.service.persistence.management.v1.processor.model.TableExtractorResponse; +import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusProcessingUpdateService; +import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusService; +import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo; + +import lombok.RequiredArgsConstructor; +import lombok.SneakyThrows; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Service +@RequiredArgsConstructor +public class TableExtractorMessageReceiver { + + private final ObjectMapper objectMapper; + private final FileStatusService fileStatusService; + private final FileStatusProcessingUpdateService fileStatusProcessingUpdateService; + + + @SneakyThrows + @RabbitListener(queues = MessagingConfiguration.TABLE_EXTRACTOR_RESPONSE_QUEUE) + public void receive(TableExtractorResponse response) { + + fileStatusService.setStatusAnalyse(response.getDossierId(), response.getFileId(), false); + + log.info("Received message in {} for dossierId {} and fileId {}", MessagingConfiguration.TABLE_EXTRACTOR_RESPONSE_QUEUE, response.getDossierId(), response.getFileId()); + } + + + @SneakyThrows + @RabbitListener(queues = MessagingConfiguration.TABLE_EXTRACTOR_DLQ) + public void handleDLQMessage(Message failedMessage) { + + var response = objectMapper.readValue(failedMessage.getBody(), TableExtractorResponse.class); + + log.warn("Received message from {} for dossierId {} and fileId {}", MessagingConfiguration.TABLE_EXTRACTOR_DLQ, response.getDossierId(), response.getFileId()); + fileStatusProcessingUpdateService.analysisFailed(response.getDossierId(), + response.getFileId(), + new FileErrorInfo("table extractor failed", MessagingConfiguration.TABLE_EXTRACTOR_DLQ, "table-extractor", OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS))); + + } + +} diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/settings/FileManagementServiceSettings.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/settings/FileManagementServiceSettings.java index 452013c4b..8d942f617 100644 --- a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/settings/FileManagementServiceSettings.java +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/settings/FileManagementServiceSettings.java @@ -24,6 +24,7 @@ public class FileManagementServiceSettings { private boolean imageServiceEnabled = true; private boolean nerServiceEnabled = true; + private boolean tableExtractorEnabled = true; private boolean storeImageFile = true; diff --git a/persistence-service-v1/persistence-service-server-v1/src/main/resources/application-dev.yaml b/persistence-service-v1/persistence-service-server-v1/src/main/resources/application-dev.yaml index c49a6fac7..bf4fa88f8 100644 --- a/persistence-service-v1/persistence-service-server-v1/src/main/resources/application-dev.yaml +++ b/persistence-service-v1/persistence-service-server-v1/src/main/resources/application-dev.yaml @@ -30,6 +30,7 @@ cors.enabled: true persistence-service: imageServiceEnabled: false nerServiceEnabled: false + tableExtractorEnabled: false storeImageFile: false applicationName: RedactManager fforesight: diff --git a/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/FileType.java b/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/FileType.java index 8008340ee..afcf7fde9 100644 --- a/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/FileType.java +++ b/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/FileType.java @@ -17,6 +17,7 @@ public enum FileType { TEXT_HIGHLIGHTS(".json"), FIGURE(".json"), TABLES(".json"), + INVISIBLE_TABLES(".json"), COMPONENTS(".json"), // document is split into 4 files, all should be overridden/deleted at the same time DOCUMENT_TEXT(".json"), diff --git a/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/ProcessingStatus.java b/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/ProcessingStatus.java index f65ceabe1..1a158927a 100644 --- a/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/ProcessingStatus.java +++ b/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/ProcessingStatus.java @@ -18,5 +18,6 @@ public enum ProcessingStatus { PRE_PROCESSING, PRE_PROCESSED, FIGURE_DETECTION_ANALYZING, - TABLE_PARSING_ANALYZING + TABLE_PARSING_ANALYZING, + TABLE_EXTRACTOR_ANALYZING, } -- 2.47.2 From a76704e0b56890e1a58f17c4f8ba7d810c59be30 Mon Sep 17 00:00:00 2001 From: Kilian Schuettler Date: Mon, 21 Aug 2023 13:24:24 +0200 Subject: [PATCH 02/10] RED-7375: fix tests --- .../src/test/resources/application.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/persistence-service-v1/persistence-service-server-v1/src/test/resources/application.yaml b/persistence-service-v1/persistence-service-server-v1/src/test/resources/application.yaml index c85f2c996..06807fcb7 100644 --- a/persistence-service-v1/persistence-service-server-v1/src/test/resources/application.yaml +++ b/persistence-service-v1/persistence-service-server-v1/src/test/resources/application.yaml @@ -58,6 +58,7 @@ server: persistence-service: imageServiceEnabled: false + tableExtractorEnabled: false metrics: -- 2.47.2 From 49bcc43716240ec38788fac01c0fa93af5813e12 Mon Sep 17 00:00:00 2001 From: Kilian Schuettler Date: Thu, 31 Aug 2023 17:25:49 +0200 Subject: [PATCH 03/10] RED-7375: integrate table extractor * bind the correct DLQ --- .../configuration/MessagingConfiguration.java | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/configuration/MessagingConfiguration.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/configuration/MessagingConfiguration.java index 190cc4ba7..65880198a 100644 --- a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/configuration/MessagingConfiguration.java +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/configuration/MessagingConfiguration.java @@ -69,6 +69,7 @@ public class MessagingConfiguration { public static final String X_ERROR_INFO_HEADER = "x-error-message"; public static final String X_ERROR_INFO_TIMESTAMP_HEADER = "x-error-message-timestamp"; + @Bean public Queue nerRequestQueue() { @@ -321,19 +322,21 @@ public class MessagingConfiguration { .build(); } + @Bean public Queue layoutparsingRequestQueue() { - return QueueBuilder.durable(LAYOUT_PARSING_REQUEST_QUEUE)// - .withArgument("x-dead-letter-exchange", "").withArgument("x-dead-letter-routing-key", LAYOUT_PARSING_DLQ).build(); + return QueueBuilder.durable(LAYOUT_PARSING_REQUEST_QUEUE).withArgument("x-dead-letter-exchange", "").withArgument("x-dead-letter-routing-key", LAYOUT_PARSING_DLQ).build(); } @Bean public Queue layoutparsingResponseQueue() { - return QueueBuilder.durable(LAYOUT_PARSING_FINISHED_EVENT_QUEUE)// - .withArgument("x-dead-letter-exchange", "").withArgument("x-dead-letter-routing-key", LAYOUT_PARSING_DLQ).build(); + return QueueBuilder.durable(LAYOUT_PARSING_FINISHED_EVENT_QUEUE) + .withArgument("x-dead-letter-exchange", "") + .withArgument("x-dead-letter-routing-key", LAYOUT_PARSING_DLQ) + .build(); } @@ -343,19 +346,21 @@ public class MessagingConfiguration { return QueueBuilder.durable(LAYOUT_PARSING_DLQ).build(); } + @Bean public Queue tableExtractorRequestQueue() { - return QueueBuilder.durable(TABLE_EXTRACTOR_QUEUE)// - .withArgument("x-dead-letter-exchange", "").withArgument("x-dead-letter-routing-key", LAYOUT_PARSING_DLQ).build(); + return QueueBuilder.durable(TABLE_EXTRACTOR_QUEUE).withArgument("x-dead-letter-exchange", "").withArgument("x-dead-letter-routing-key", TABLE_EXTRACTOR_DLQ).build(); } @Bean public Queue tableExtractorResponseQueue() { - return QueueBuilder.durable(TABLE_EXTRACTOR_RESPONSE_QUEUE)// - .withArgument("x-dead-letter-exchange", "").withArgument("x-dead-letter-routing-key", LAYOUT_PARSING_DLQ).build(); + return QueueBuilder.durable(TABLE_EXTRACTOR_RESPONSE_QUEUE) + .withArgument("x-dead-letter-exchange", "") + .withArgument("x-dead-letter-routing-key", TABLE_EXTRACTOR_DLQ) + .build(); } @@ -364,4 +369,5 @@ public class MessagingConfiguration { return QueueBuilder.durable(TABLE_EXTRACTOR_DLQ).build(); } + } -- 2.47.2 From 74ac3c0c34d9aef417770b5a5e57f6b519dda0ed Mon Sep 17 00:00:00 2001 From: Kilian Schuettler Date: Wed, 6 Sep 2023 14:20:38 +0200 Subject: [PATCH 04/10] RED-7375: integrate Table Extractor * fix filenames --- .../management/v1/processor/model/TableExtractorRequest.java | 4 +++- .../management/v1/processor/service/FileStatusService.java | 2 +- .../shared/model/dossiertemplate/dossier/file/FileType.java | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorRequest.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorRequest.java index e5c14617e..6ec502b84 100644 --- a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorRequest.java +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorRequest.java @@ -1,5 +1,7 @@ package com.iqser.red.service.persistence.management.v1.processor.model; +import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType; + import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; @@ -11,7 +13,7 @@ import lombok.NoArgsConstructor; @NoArgsConstructor public class TableExtractorRequest { - public static final String TABLE_EXTRACTOR_FILE_EXTENSION = "EXTRACTED_TABLES.json.gz"; + public static final String TABLE_EXTRACTOR_FILE_EXTENSION = FileType.EXTRACTED_TABLES.name() + FileType.EXTRACTED_TABLES.getExtension() + ".gz"; public static final String TARGET_FILE_EXTENSION = "ORIGIN.pdf.gz"; diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusService.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusService.java index 76ffbfacb..ef9854c23 100644 --- a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusService.java +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusService.java @@ -176,7 +176,7 @@ public class FileStatusService { return; } - if (settings.isTableExtractorEnabled() && !fileManagementStorageService.objectExists(dossierId, fileId, FileType.INVISIBLE_TABLES)) { + if (settings.isTableExtractorEnabled() && !fileManagementStorageService.objectExists(dossierId, fileId, FileType.EXTRACTED_TABLES)) { log.info("Add file: {} from dossier {} to Table Extractor queue", fileId, dossierId); addToTableExtractorQueue(dossierId, fileId); return; diff --git a/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/FileType.java b/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/FileType.java index afcf7fde9..cea316b90 100644 --- a/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/FileType.java +++ b/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/FileType.java @@ -17,7 +17,7 @@ public enum FileType { TEXT_HIGHLIGHTS(".json"), FIGURE(".json"), TABLES(".json"), - INVISIBLE_TABLES(".json"), + EXTRACTED_TABLES(".json"), COMPONENTS(".json"), // document is split into 4 files, all should be overridden/deleted at the same time DOCUMENT_TEXT(".json"), -- 2.47.2 From dcc401ce984c399da80365a467eec5c6cee68977 Mon Sep 17 00:00:00 2001 From: yhampe Date: Fri, 1 Dec 2023 10:58:56 +0100 Subject: [PATCH 05/10] RED-7375: integrate table parsing service: added extracted table file id to layoutparsingrequest --- .../persistence-service-processor-v1/pom.xml | 2 +- .../service/layoutparsing/LayoutParsingRequestFactory.java | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/persistence-service-v1/persistence-service-processor-v1/pom.xml b/persistence-service-v1/persistence-service-processor-v1/pom.xml index f635cef86..4b3ba01f9 100644 --- a/persistence-service-v1/persistence-service-processor-v1/pom.xml +++ b/persistence-service-v1/persistence-service-processor-v1/pom.xml @@ -91,7 +91,7 @@ com.knecon.fforesight layoutparser-service-internal-api - 0.57.0 + yannik-0 diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/layoutparsing/LayoutParsingRequestFactory.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/layoutparsing/LayoutParsingRequestFactory.java index 5936ed66d..451a56bd8 100644 --- a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/layoutparsing/LayoutParsingRequestFactory.java +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/layoutparsing/LayoutParsingRequestFactory.java @@ -39,18 +39,22 @@ public class LayoutParsingRequestFactory { Optional optionalTableFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.TABLES) // ? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.TABLES)) : Optional.empty(); + Optional optionalTableExtractorFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.EXTRACTED_TABLES) // + ? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.EXTRACTED_TABLES)) : Optional.empty(); + + return LayoutParsingRequest.builder() .layoutParsingType(type) .identifier(layoutParsingRequestIdentifierService.buildIdentifier(dossierId, fileId, priority)) .originFileStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.ORIGIN)) .imagesFileStorageId(optionalImageFileId) .tablesFileStorageId(optionalTableFileId) + .tableExtractorFileId(optionalTableExtractorFileId) .pageFileStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_PAGES)) .structureFileStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_STRUCTURE)) .textBlockFileStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_TEXT)) .positionBlockFileStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_POSITION)) .simplifiedTextStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.SIMPLIFIED_TEXT)) - .sectionGridStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.SECTION_GRID)) .viewerDocumentStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.VIEWER_DOCUMENT)) .build(); } -- 2.47.2 From 192e8a7e74e5dad12a960e169c3cac3e1335cd54 Mon Sep 17 00:00:00 2001 From: Kilian Schuettler Date: Mon, 21 Aug 2023 12:48:46 +0200 Subject: [PATCH 06/10] RED-7375: integrate table extractor --- .../configuration/MessagingConfiguration.java | 26 +++++++++ .../model/TableExtractorRequest.java | 23 ++++++++ .../model/TableExtractorResponse.java | 17 ++++++ .../processor/service/FileStatusService.java | 25 +++++++++ .../queue/TableExtractorMessageReceiver.java | 55 +++++++++++++++++++ .../FileManagementServiceSettings.java | 1 + .../src/main/resources/application-dev.yaml | 1 + .../dossier/file/FileType.java | 1 + .../dossier/file/ProcessingStatus.java | 3 +- 9 files changed, 151 insertions(+), 1 deletion(-) create mode 100644 persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorRequest.java create mode 100644 persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorResponse.java create mode 100644 persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/queue/TableExtractorMessageReceiver.java diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/configuration/MessagingConfiguration.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/configuration/MessagingConfiguration.java index 2d6d113c8..4f85004f6 100644 --- a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/configuration/MessagingConfiguration.java +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/configuration/MessagingConfiguration.java @@ -59,6 +59,10 @@ public class MessagingConfiguration { public static final String CV_ANALYSIS_RESPONSE_QUEUE = "cv_analysis_response_queue"; public static final String CV_ANALYSIS_DLQ = "cv_analysis_dead_letter_queue"; + public static final String TABLE_EXTRACTOR_QUEUE = "table_extractor_request_queue"; + public static final String TABLE_EXTRACTOR_RESPONSE_QUEUE = "table_extractor_response_queue"; + public static final String TABLE_EXTRACTOR_DLQ = "table_extractor_dead_letter_queue"; + public static final String OCR_STATUS_UPDATE_RESPONSE_QUEUE = "ocr_status_update_response_queue"; public static final String OCR_STATUS_UPDATE_RESPONSE_DQL = "ocr_status_update_response_dql"; @@ -367,4 +371,26 @@ public class MessagingConfiguration { return QueueBuilder.durable(LAYOUT_PARSING_DLQ).build(); } + + @Bean + public Queue tableExtractorRequestQueue() { + + return QueueBuilder.durable(TABLE_EXTRACTOR_QUEUE)// + .withArgument("x-dead-letter-exchange", "").withArgument("x-dead-letter-routing-key", LAYOUT_PARSING_DLQ).build(); + } + + + @Bean + public Queue tableExtractorResponseQueue() { + + return QueueBuilder.durable(TABLE_EXTRACTOR_RESPONSE_QUEUE)// + .withArgument("x-dead-letter-exchange", "").withArgument("x-dead-letter-routing-key", LAYOUT_PARSING_DLQ).build(); + } + + + @Bean + public Queue tableExtractorDLQ() { + + return QueueBuilder.durable(TABLE_EXTRACTOR_DLQ).build(); + } } diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorRequest.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorRequest.java new file mode 100644 index 000000000..e5c14617e --- /dev/null +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorRequest.java @@ -0,0 +1,23 @@ +package com.iqser.red.service.persistence.management.v1.processor.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@AllArgsConstructor +@NoArgsConstructor +public class TableExtractorRequest { + + public static final String TABLE_EXTRACTOR_FILE_EXTENSION = "EXTRACTED_TABLES.json.gz"; + + public static final String TARGET_FILE_EXTENSION = "ORIGIN.pdf.gz"; + + private String dossierId; + private String fileId; + private String targetFileExtension; + private String responseFileExtension; + +} diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorResponse.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorResponse.java new file mode 100644 index 000000000..ffec256d7 --- /dev/null +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorResponse.java @@ -0,0 +1,17 @@ +package com.iqser.red.service.persistence.management.v1.processor.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@AllArgsConstructor +@NoArgsConstructor +public class TableExtractorResponse { + + private String dossierId; + private String fileId; + +} diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusService.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusService.java index 74cd03b42..3c4a0b1b5 100644 --- a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusService.java +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusService.java @@ -21,6 +21,7 @@ import com.iqser.red.service.persistence.management.v1.processor.exception.Inter import com.iqser.red.service.persistence.management.v1.processor.model.CvAnalysisServiceRequest; import com.iqser.red.service.persistence.management.v1.processor.model.NerServiceRequest; import com.iqser.red.service.persistence.management.v1.processor.model.OCRStatusUpdateResponse; +import com.iqser.red.service.persistence.management.v1.processor.model.TableExtractorRequest; import com.iqser.red.service.persistence.management.v1.processor.model.image.ImageServiceRequest; import com.iqser.red.service.persistence.management.v1.processor.service.layoutparsing.LayoutParsingRequestFactory; import com.iqser.red.service.persistence.management.v1.processor.service.manualredactions.ManualRedactionProviderService; @@ -175,6 +176,12 @@ public class FileStatusService { return; } + if (settings.isTableExtractorEnabled() && !fileManagementStorageService.objectExists(dossierId, fileId, FileType.INVISIBLE_TABLES)) { + log.info("Add file: {} from dossier {} to Table Extractor queue", fileId, dossierId); + addToTableExtractorQueue(dossierId, fileId); + return; + } + var fileModel = MagicConverter.convert(fileEntity, FileModel.class, new FileModelMapper()); fileModel = reanalysisRequiredStatusService.enhanceFileStatusWithAnalysisRequirements(fileModel, true); @@ -230,6 +237,24 @@ public class FileStatusService { } + private void addToTableExtractorQueue(String dossierId, String fileId) { + + fileStatusPersistenceService.updateProcessingStatus(fileId, ProcessingStatus.TABLE_EXTRACTOR_ANALYZING); + + rabbitTemplate.convertAndSend(MessagingConfiguration.TABLE_EXTRACTOR_QUEUE, + TableExtractorRequest.builder() + .dossierId(dossierId) + .fileId(fileId) + .targetFileExtension(TableExtractorRequest.TARGET_FILE_EXTENSION) + .responseFileExtension(TableExtractorRequest.TABLE_EXTRACTOR_FILE_EXTENSION) + .build(), + message -> { + message.getMessageProperties().setPriority(1); + return message; + }); + } + + @SneakyThrows public void addToPreprocessingQueue(String dossierId, String fileId, String filename) { diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/queue/TableExtractorMessageReceiver.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/queue/TableExtractorMessageReceiver.java new file mode 100644 index 000000000..b53ee3a19 --- /dev/null +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/queue/TableExtractorMessageReceiver.java @@ -0,0 +1,55 @@ +package com.iqser.red.service.persistence.management.v1.processor.service.queue; + +import java.time.OffsetDateTime; +import java.time.temporal.ChronoUnit; + +import org.springframework.amqp.core.Message; +import org.springframework.amqp.rabbit.annotation.RabbitListener; +import org.springframework.stereotype.Service; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration; +import com.iqser.red.service.persistence.management.v1.processor.model.CvAnalysisServiceResponse; +import com.iqser.red.service.persistence.management.v1.processor.model.TableExtractorResponse; +import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusProcessingUpdateService; +import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusService; +import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo; + +import lombok.RequiredArgsConstructor; +import lombok.SneakyThrows; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Service +@RequiredArgsConstructor +public class TableExtractorMessageReceiver { + + private final ObjectMapper objectMapper; + private final FileStatusService fileStatusService; + private final FileStatusProcessingUpdateService fileStatusProcessingUpdateService; + + + @SneakyThrows + @RabbitListener(queues = MessagingConfiguration.TABLE_EXTRACTOR_RESPONSE_QUEUE) + public void receive(TableExtractorResponse response) { + + fileStatusService.setStatusAnalyse(response.getDossierId(), response.getFileId(), false); + + log.info("Received message in {} for dossierId {} and fileId {}", MessagingConfiguration.TABLE_EXTRACTOR_RESPONSE_QUEUE, response.getDossierId(), response.getFileId()); + } + + + @SneakyThrows + @RabbitListener(queues = MessagingConfiguration.TABLE_EXTRACTOR_DLQ) + public void handleDLQMessage(Message failedMessage) { + + var response = objectMapper.readValue(failedMessage.getBody(), TableExtractorResponse.class); + + log.warn("Received message from {} for dossierId {} and fileId {}", MessagingConfiguration.TABLE_EXTRACTOR_DLQ, response.getDossierId(), response.getFileId()); + fileStatusProcessingUpdateService.analysisFailed(response.getDossierId(), + response.getFileId(), + new FileErrorInfo("table extractor failed", MessagingConfiguration.TABLE_EXTRACTOR_DLQ, "table-extractor", OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS))); + + } + +} diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/settings/FileManagementServiceSettings.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/settings/FileManagementServiceSettings.java index 93e2ba057..8d2cfaede 100644 --- a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/settings/FileManagementServiceSettings.java +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/settings/FileManagementServiceSettings.java @@ -24,6 +24,7 @@ public class FileManagementServiceSettings { private boolean imageServiceEnabled = true; private boolean nerServiceEnabled = true; + private boolean tableExtractorEnabled = true; private boolean storeImageFile = true; diff --git a/persistence-service-v1/persistence-service-server-v1/src/main/resources/application-dev.yaml b/persistence-service-v1/persistence-service-server-v1/src/main/resources/application-dev.yaml index d4007e382..5ce0aaa10 100644 --- a/persistence-service-v1/persistence-service-server-v1/src/main/resources/application-dev.yaml +++ b/persistence-service-v1/persistence-service-server-v1/src/main/resources/application-dev.yaml @@ -31,6 +31,7 @@ cors.enabled: true persistence-service: imageServiceEnabled: false nerServiceEnabled: false + tableExtractorEnabled: false storeImageFile: false applicationName: RedactManager fforesight: diff --git a/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/FileType.java b/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/FileType.java index 7e67f33ac..328f49f3e 100644 --- a/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/FileType.java +++ b/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/FileType.java @@ -16,6 +16,7 @@ public enum FileType { TEXT_HIGHLIGHTS(".json"), FIGURE(".json"), TABLES(".json"), + INVISIBLE_TABLES(".json"), COMPONENTS(".json"), // document is split into 4 files, all should be overridden/deleted at the same time DOCUMENT_TEXT(".json"), diff --git a/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/ProcessingStatus.java b/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/ProcessingStatus.java index f65ceabe1..1a158927a 100644 --- a/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/ProcessingStatus.java +++ b/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/ProcessingStatus.java @@ -18,5 +18,6 @@ public enum ProcessingStatus { PRE_PROCESSING, PRE_PROCESSED, FIGURE_DETECTION_ANALYZING, - TABLE_PARSING_ANALYZING + TABLE_PARSING_ANALYZING, + TABLE_EXTRACTOR_ANALYZING, } -- 2.47.2 From e4184e442b452062fd90c742f35ca0680d68a8ce Mon Sep 17 00:00:00 2001 From: Kilian Schuettler Date: Mon, 21 Aug 2023 13:24:24 +0200 Subject: [PATCH 07/10] RED-7375: fix tests --- .../src/test/resources/application.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/persistence-service-v1/persistence-service-server-v1/src/test/resources/application.yaml b/persistence-service-v1/persistence-service-server-v1/src/test/resources/application.yaml index c85f2c996..06807fcb7 100644 --- a/persistence-service-v1/persistence-service-server-v1/src/test/resources/application.yaml +++ b/persistence-service-v1/persistence-service-server-v1/src/test/resources/application.yaml @@ -58,6 +58,7 @@ server: persistence-service: imageServiceEnabled: false + tableExtractorEnabled: false metrics: -- 2.47.2 From 836eea78298fef4ce943ffcf963f4d5e9ea8995b Mon Sep 17 00:00:00 2001 From: Kilian Schuettler Date: Thu, 31 Aug 2023 17:25:49 +0200 Subject: [PATCH 08/10] RED-7375: integrate table extractor * bind the correct DLQ --- .../configuration/MessagingConfiguration.java | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/configuration/MessagingConfiguration.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/configuration/MessagingConfiguration.java index 4f85004f6..aaa379f93 100644 --- a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/configuration/MessagingConfiguration.java +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/configuration/MessagingConfiguration.java @@ -350,19 +350,21 @@ public class MessagingConfiguration { .build(); } + @Bean public Queue layoutparsingRequestQueue() { - return QueueBuilder.durable(LAYOUT_PARSING_REQUEST_QUEUE)// - .withArgument("x-dead-letter-exchange", "").withArgument("x-dead-letter-routing-key", LAYOUT_PARSING_DLQ).build(); + return QueueBuilder.durable(LAYOUT_PARSING_REQUEST_QUEUE).withArgument("x-dead-letter-exchange", "").withArgument("x-dead-letter-routing-key", LAYOUT_PARSING_DLQ).build(); } @Bean public Queue layoutparsingResponseQueue() { - return QueueBuilder.durable(LAYOUT_PARSING_FINISHED_EVENT_QUEUE)// - .withArgument("x-dead-letter-exchange", "").withArgument("x-dead-letter-routing-key", LAYOUT_PARSING_DLQ).build(); + return QueueBuilder.durable(LAYOUT_PARSING_FINISHED_EVENT_QUEUE) + .withArgument("x-dead-letter-exchange", "") + .withArgument("x-dead-letter-routing-key", LAYOUT_PARSING_DLQ) + .build(); } @@ -375,16 +377,17 @@ public class MessagingConfiguration { @Bean public Queue tableExtractorRequestQueue() { - return QueueBuilder.durable(TABLE_EXTRACTOR_QUEUE)// - .withArgument("x-dead-letter-exchange", "").withArgument("x-dead-letter-routing-key", LAYOUT_PARSING_DLQ).build(); + return QueueBuilder.durable(TABLE_EXTRACTOR_QUEUE).withArgument("x-dead-letter-exchange", "").withArgument("x-dead-letter-routing-key", TABLE_EXTRACTOR_DLQ).build(); } @Bean public Queue tableExtractorResponseQueue() { - return QueueBuilder.durable(TABLE_EXTRACTOR_RESPONSE_QUEUE)// - .withArgument("x-dead-letter-exchange", "").withArgument("x-dead-letter-routing-key", LAYOUT_PARSING_DLQ).build(); + return QueueBuilder.durable(TABLE_EXTRACTOR_RESPONSE_QUEUE) + .withArgument("x-dead-letter-exchange", "") + .withArgument("x-dead-letter-routing-key", TABLE_EXTRACTOR_DLQ) + .build(); } -- 2.47.2 From 6b582839f776c26ea8574421ecbe7abe87dbaf7c Mon Sep 17 00:00:00 2001 From: Kilian Schuettler Date: Wed, 6 Sep 2023 14:20:38 +0200 Subject: [PATCH 09/10] RED-7375: integrate Table Extractor * fix filenames --- .../management/v1/processor/model/TableExtractorRequest.java | 4 +++- .../management/v1/processor/service/FileStatusService.java | 2 +- .../shared/model/dossiertemplate/dossier/file/FileType.java | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorRequest.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorRequest.java index e5c14617e..6ec502b84 100644 --- a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorRequest.java +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/model/TableExtractorRequest.java @@ -1,5 +1,7 @@ package com.iqser.red.service.persistence.management.v1.processor.model; +import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType; + import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; @@ -11,7 +13,7 @@ import lombok.NoArgsConstructor; @NoArgsConstructor public class TableExtractorRequest { - public static final String TABLE_EXTRACTOR_FILE_EXTENSION = "EXTRACTED_TABLES.json.gz"; + public static final String TABLE_EXTRACTOR_FILE_EXTENSION = FileType.EXTRACTED_TABLES.name() + FileType.EXTRACTED_TABLES.getExtension() + ".gz"; public static final String TARGET_FILE_EXTENSION = "ORIGIN.pdf.gz"; diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusService.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusService.java index 3c4a0b1b5..72d9a02ea 100644 --- a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusService.java +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusService.java @@ -176,7 +176,7 @@ public class FileStatusService { return; } - if (settings.isTableExtractorEnabled() && !fileManagementStorageService.objectExists(dossierId, fileId, FileType.INVISIBLE_TABLES)) { + if (settings.isTableExtractorEnabled() && !fileManagementStorageService.objectExists(dossierId, fileId, FileType.EXTRACTED_TABLES)) { log.info("Add file: {} from dossier {} to Table Extractor queue", fileId, dossierId); addToTableExtractorQueue(dossierId, fileId); return; diff --git a/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/FileType.java b/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/FileType.java index 328f49f3e..1075a77d5 100644 --- a/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/FileType.java +++ b/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/FileType.java @@ -16,7 +16,7 @@ public enum FileType { TEXT_HIGHLIGHTS(".json"), FIGURE(".json"), TABLES(".json"), - INVISIBLE_TABLES(".json"), + EXTRACTED_TABLES(".json"), COMPONENTS(".json"), // document is split into 4 files, all should be overridden/deleted at the same time DOCUMENT_TEXT(".json"), -- 2.47.2 From 6f61f1bc354f9390581d0479d9a16bd3ba259f3c Mon Sep 17 00:00:00 2001 From: yhampe Date: Fri, 1 Dec 2023 10:58:56 +0100 Subject: [PATCH 10/10] RED-7375: integrate table parsing service: added extracted table file id to layoutparsingrequest --- .../persistence-service-processor-v1/pom.xml | 2 +- .../service/layoutparsing/LayoutParsingRequestFactory.java | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/persistence-service-v1/persistence-service-processor-v1/pom.xml b/persistence-service-v1/persistence-service-processor-v1/pom.xml index e6c1ef70d..b926943e1 100644 --- a/persistence-service-v1/persistence-service-processor-v1/pom.xml +++ b/persistence-service-v1/persistence-service-processor-v1/pom.xml @@ -91,7 +91,7 @@ com.knecon.fforesight layoutparser-service-internal-api - 0.74.0 + yannik-0 diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/layoutparsing/LayoutParsingRequestFactory.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/layoutparsing/LayoutParsingRequestFactory.java index 796db6f24..e41186ed5 100644 --- a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/layoutparsing/LayoutParsingRequestFactory.java +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/layoutparsing/LayoutParsingRequestFactory.java @@ -39,12 +39,17 @@ public class LayoutParsingRequestFactory { Optional optionalTableFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.TABLES) // ? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.TABLES)) : Optional.empty(); + Optional optionalTableExtractorFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.EXTRACTED_TABLES) // + ? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.EXTRACTED_TABLES)) : Optional.empty(); + + return LayoutParsingRequest.builder() .layoutParsingType(type) .identifier(layoutParsingRequestIdentifierService.buildIdentifier(dossierId, fileId, priority)) .originFileStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.ORIGIN)) .imagesFileStorageId(optionalImageFileId) .tablesFileStorageId(optionalTableFileId) + .tableExtractorFileId(optionalTableExtractorFileId) .pageFileStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_PAGES)) .structureFileStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_STRUCTURE)) .textBlockFileStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_TEXT)) -- 2.47.2