WIP: Draft: Resolve RED-7375 #311
@ -21,7 +21,7 @@ dependencies {
|
||||
exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1")
|
||||
exclude(group = "com.iqser.red.service", module = "persistence-service-shared-api-v1")
|
||||
}
|
||||
api("com.knecon.fforesight:layoutparser-service-internal-api:0.74.0") {
|
||||
api("com.knecon.fforesight:layoutparser-service-internal-api:extractor-0") {
|
||||
exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1")
|
||||
exclude(group = "com.iqser.red.service", module = "persistence-service-shared-api-v1")
|
||||
}
|
||||
|
||||
@ -60,6 +60,11 @@ public class MessagingConfiguration {
|
||||
public static final String PDFTRON_DLQ = "pdftron_dlq";
|
||||
public static final String PDFTRON_RESULT_QUEUE = "pdftron_result_queue";
|
||||
|
||||
public static final String TABLE_EXTRACTOR_QUEUE = "table_extractor_request_queue";
|
||||
public static final String TABLE_EXTRACTOR_RESPONSE_QUEUE = "table_extractor_response_queue";
|
||||
public static final String TABLE_EXTRACTOR_DLQ = "table_extractor_dead_letter_queue";
|
||||
|
||||
|
||||
public static final String CV_ANALYSIS_QUEUE = "cv_analysis_request_queue";
|
||||
public static final String CV_ANALYSIS_RESPONSE_QUEUE = "cv_analysis_response_queue";
|
||||
public static final String CV_ANALYSIS_DLQ = "cv_analysis_dead_letter_queue";
|
||||
@ -372,6 +377,29 @@ public class MessagingConfiguration {
|
||||
.maxPriority(2)
|
||||
.build();
|
||||
}
|
||||
@Bean
|
||||
public Queue tableExtractorRequestQueue() {
|
||||
|
||||
return QueueBuilder.durable(TABLE_EXTRACTOR_QUEUE).withArgument("x-dead-letter-exchange", "").withArgument("x-dead-letter-routing-key", TABLE_EXTRACTOR_DLQ).build();
|
||||
}
|
||||
|
||||
|
||||
@Bean
|
||||
public Queue tableExtractorResponseQueue() {
|
||||
|
||||
return QueueBuilder.durable(TABLE_EXTRACTOR_RESPONSE_QUEUE)
|
||||
.withArgument("x-dead-letter-exchange", "")
|
||||
.withArgument("x-dead-letter-routing-key", TABLE_EXTRACTOR_DLQ)
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
@Bean
|
||||
public Queue tableExtractorDLQ() {
|
||||
|
||||
return QueueBuilder.durable(TABLE_EXTRACTOR_DLQ).build();
|
||||
}
|
||||
|
||||
|
||||
@Bean
|
||||
public Queue layoutparsingRequestQueue() {
|
||||
|
||||
@ -0,0 +1,25 @@
|
||||
package com.iqser.red.service.persistence.management.v1.processor.model;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
public class TableExtractorRequest {
|
||||
|
||||
public static final String TABLE_EXTRACTOR_FILE_EXTENSION = FileType.EXTRACTED_TABLES.name() + FileType.EXTRACTED_TABLES.getExtension() + ".gz";
|
||||
|
||||
public static final String TARGET_FILE_EXTENSION = "ORIGIN.pdf.gz";
|
||||
|
||||
private String dossierId;
|
||||
private String fileId;
|
||||
private String targetFileExtension;
|
||||
private String responseFileExtension;
|
||||
|
||||
}
|
||||
@ -0,0 +1,17 @@
|
||||
package com.iqser.red.service.persistence.management.v1.processor.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
public class TableExtractorResponse {
|
||||
|
||||
private String dossierId;
|
||||
private String fileId;
|
||||
|
||||
}
|
||||
@ -21,6 +21,7 @@ import com.iqser.red.service.persistence.management.v1.processor.exception.Inter
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.CvAnalysisServiceRequest;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.NerServiceRequest;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.OCRStatusUpdateResponse;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.TableExtractorRequest;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.image.ImageServiceRequest;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.layoutparsing.LayoutParsingRequestFactory;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.manualredactions.ManualRedactionProviderService;
|
||||
@ -175,6 +176,12 @@ public class FileStatusService {
|
||||
return;
|
||||
}
|
||||
|
||||
if (settings.isTableExtractorEnabled() && !fileManagementStorageService.objectExists(dossierId, fileId, FileType.EXTRACTED_TABLES)) {
|
||||
log.info("Add file: {} from dossier {} to Table Extractor queue", fileId, dossierId);
|
||||
addToTableExtractorQueue(dossierId, fileId);
|
||||
return;
|
||||
}
|
||||
|
||||
if (settings.isImageServiceEnabled() && !fileManagementStorageService.objectExists(dossierId, fileId, FileType.IMAGE_INFO)) {
|
||||
log.info("Add file: {} from dossier {} to Image queue", fileId, dossierId);
|
||||
addToImageQueue(dossierId, fileId);
|
||||
@ -234,6 +241,24 @@ public class FileStatusService {
|
||||
}
|
||||
}
|
||||
|
||||
private void addToTableExtractorQueue(String dossierId, String fileId) {
|
||||
|
||||
fileStatusPersistenceService.updateProcessingStatus(fileId, ProcessingStatus.TABLE_EXTRACTOR_ANALYZING);
|
||||
|
||||
rabbitTemplate.convertAndSend(MessagingConfiguration.TABLE_EXTRACTOR_QUEUE,
|
||||
TableExtractorRequest.builder()
|
||||
.dossierId(dossierId)
|
||||
.fileId(fileId)
|
||||
.targetFileExtension(TableExtractorRequest.TARGET_FILE_EXTENSION)
|
||||
.responseFileExtension(TableExtractorRequest.TABLE_EXTRACTOR_FILE_EXTENSION)
|
||||
.build(),
|
||||
message -> {
|
||||
message.getMessageProperties().setPriority(1);
|
||||
return message;
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public void addToPreprocessingQueue(String dossierId, String fileId, String filename) {
|
||||
|
||||
@ -39,12 +39,17 @@ public class LayoutParsingRequestFactory {
|
||||
Optional<String> optionalTableFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.TABLES) //
|
||||
? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.TABLES)) : Optional.empty();
|
||||
|
||||
Optional<String> optionalTableExtractorFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.EXTRACTED_TABLES) //
|
||||
? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.EXTRACTED_TABLES)) : Optional.empty();
|
||||
|
||||
|
||||
return LayoutParsingRequest.builder()
|
||||
.layoutParsingType(type)
|
||||
.identifier(layoutParsingRequestIdentifierService.buildIdentifier(dossierId, fileId, priority))
|
||||
.originFileStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.ORIGIN))
|
||||
.imagesFileStorageId(optionalImageFileId)
|
||||
.tablesFileStorageId(optionalTableFileId)
|
||||
.tableExtractorFileId(optionalTableExtractorFileId)
|
||||
.pageFileStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_PAGES))
|
||||
.structureFileStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_STRUCTURE))
|
||||
.textBlockFileStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_TEXT))
|
||||
|
||||
@ -0,0 +1,55 @@
|
||||
package com.iqser.red.service.persistence.management.v1.processor.service.queue;
|
||||
|
||||
import java.time.OffsetDateTime;
|
||||
import java.time.temporal.ChronoUnit;
|
||||
|
||||
import org.springframework.amqp.core.Message;
|
||||
import org.springframework.amqp.rabbit.annotation.RabbitListener;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.CvAnalysisServiceResponse;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.TableExtractorResponse;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusProcessingUpdateService;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusService;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class TableExtractorMessageReceiver {
|
||||
|
||||
private final ObjectMapper objectMapper;
|
||||
private final FileStatusService fileStatusService;
|
||||
private final FileStatusProcessingUpdateService fileStatusProcessingUpdateService;
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
@RabbitListener(queues = MessagingConfiguration.TABLE_EXTRACTOR_RESPONSE_QUEUE)
|
||||
public void receive(TableExtractorResponse response) {
|
||||
|
||||
fileStatusService.setStatusAnalyse(response.getDossierId(), response.getFileId(), false);
|
||||
|
||||
log.info("Received message in {} for dossierId {} and fileId {}", MessagingConfiguration.TABLE_EXTRACTOR_RESPONSE_QUEUE, response.getDossierId(), response.getFileId());
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
@RabbitListener(queues = MessagingConfiguration.TABLE_EXTRACTOR_DLQ)
|
||||
public void handleDLQMessage(Message failedMessage) {
|
||||
|
||||
var response = objectMapper.readValue(failedMessage.getBody(), TableExtractorResponse.class);
|
||||
|
||||
log.warn("Received message from {} for dossierId {} and fileId {}", MessagingConfiguration.TABLE_EXTRACTOR_DLQ, response.getDossierId(), response.getFileId());
|
||||
fileStatusProcessingUpdateService.analysisFailed(response.getDossierId(),
|
||||
response.getFileId(),
|
||||
new FileErrorInfo("table extractor failed", MessagingConfiguration.TABLE_EXTRACTOR_DLQ, "table-extractor", OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS)));
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -24,6 +24,7 @@ public class FileManagementServiceSettings {
|
||||
|
||||
private boolean imageServiceEnabled = true;
|
||||
private boolean nerServiceEnabled = true;
|
||||
private boolean tableExtractorEnabled = true;
|
||||
|
||||
private boolean storeImageFile = true;
|
||||
|
||||
|
||||
@ -32,6 +32,7 @@ persistence-service:
|
||||
imageServiceEnabled: false
|
||||
nerServiceEnabled: false
|
||||
storeImageFile: false
|
||||
tableExtractorEnabled: false
|
||||
applicationName: RedactManager
|
||||
fforesight:
|
||||
springdoc:
|
||||
|
||||
@ -16,6 +16,7 @@ public enum FileType {
|
||||
TEXT_HIGHLIGHTS(".json"),
|
||||
FIGURE(".json"),
|
||||
TABLES(".json"),
|
||||
EXTRACTED_TABLES(".json"),
|
||||
COMPONENTS(".json"),
|
||||
// document is split into 4 files, all should be overridden/deleted at the same time
|
||||
DOCUMENT_TEXT(".json"),
|
||||
|
||||
@ -18,5 +18,6 @@ public enum ProcessingStatus {
|
||||
PRE_PROCESSING,
|
||||
PRE_PROCESSED,
|
||||
FIGURE_DETECTION_ANALYZING,
|
||||
TABLE_PARSING_ANALYZING
|
||||
TABLE_PARSING_ANALYZING,
|
||||
TABLE_EXTRACTOR_ANALYZING
|
||||
}
|
||||
|
||||
@ -11,5 +11,5 @@ commit_hash=$(git rev-parse --short=5 HEAD)
|
||||
# Combine branch and commit hash
|
||||
buildName="${USER}-${branch}-${commit_hash}"
|
||||
|
||||
gradle bootBuildImage --publishImage -PbuildbootDockerHostNetwork=true -Pversion=$buildName
|
||||
echo "nexus.knecon.com:5001/red/${dir}-server-v1:$buildName"
|
||||
gradle bootBuildImage --cleanCache --publishImage -Pversion=persistence-extractor-$buildNumber --stacktrace
|
||||
echo "nexus.knecon.com:5001/red/${dir}-server-v1:persistence-extractor-$buildNumber"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user