RED-5022: Excute table parsing and figure detection if cv-service is enabled

This commit is contained in:
deiflaender 2022-08-19 11:17:02 +02:00
parent 0ad0b20dea
commit 97c1c9ae5f
7 changed files with 75 additions and 45 deletions

View File

@ -13,7 +13,9 @@ public enum FileType {
NER_ENTITIES(".json"),
IMAGE_INFO(".json"),
IMPORTED_REDACTIONS(".json"),
TEXT_HIGHLIGHTS(".json");
TEXT_HIGHLIGHTS(".json"),
FIGURES(".json"),
TABLES(".json");
@Getter
private final String extension;

View File

@ -1,6 +1,6 @@
package com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file;
public enum ProcessingStatus {
ANALYSE, ERROR, FULLREPROCESS, IMAGE_ANALYZING, INDEXING, NER_ANALYZING, OCR_PROCESSING_QUEUED, OCR_PROCESSING, PROCESSED, PROCESSING, REPROCESS, SURROUNDING_TEXT_PROCESSING, UNPROCESSED, FULL_PROCESSING, PRE_PROCESSING_QUEUED, PRE_PROCESSING, PRE_PROCESSED
ANALYSE, ERROR, FULLREPROCESS, IMAGE_ANALYZING, INDEXING, NER_ANALYZING, OCR_PROCESSING_QUEUED, OCR_PROCESSING, PROCESSED, PROCESSING, REPROCESS, SURROUNDING_TEXT_PROCESSING, UNPROCESSED, FULL_PROCESSING, PRE_PROCESSING_QUEUED, PRE_PROCESSING, PRE_PROCESSED, FIGURE_DETECTION_ANALYZING, TABLE_PARSING_ANALYZING
}

View File

@ -39,6 +39,8 @@ public class AdminInterfaceController {
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.REDACTION_LOG);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.TEXT);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.NER_ENTITIES);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.FIGURES);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.TABLES);
fileStatusService.setStatusFullReprocess(dossierId, fileId, true, true);

View File

@ -1,8 +1,5 @@
package com.iqser.red.service.peristence.v1.server.model;
import java.util.HashSet;
import java.util.Set;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
@ -14,12 +11,18 @@ import lombok.NoArgsConstructor;
@NoArgsConstructor
public class CvAnalysisServiceRequest {
public static final String OPERATION_TABLE_PARSING = "table_parsing";
public static final String OPERATION_TABLE_PARSING = "table";
public static final String OPERATION_FIGURE_DETECTION = "figure";
public static final String FIGURE_DETECTION_FILE_EXTENSION = "FIGURES.json";
public static final String TABLE_PARSING_FILE_EXTENSION = "TABLES.json";
public static final String TARGET_FILE_EXTENSION = "pdf";
private String dossierId;
private String fileId;
@Builder.Default
private Set<Integer> pages = new HashSet<>();
private String operation;
private String targetFileExtension;
private String responseFileExtension;
}

View File

@ -1,8 +1,5 @@
package com.iqser.red.service.peristence.v1.server.model;
import java.util.HashSet;
import java.util.Set;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
@ -14,14 +11,7 @@ import lombok.NoArgsConstructor;
@NoArgsConstructor
public class CvAnalysisServiceResponse {
public static final String OPERATION_TABLE_PARSING = "table_parsing";
private String dossierId;
private String fileId;
@Builder.Default
private Set<Integer> pages = new HashSet<>();
private String operation;
private String responseFile;
}

View File

@ -1,13 +1,9 @@
package com.iqser.red.service.peristence.v1.server.service;
import java.io.IOException;
import java.util.HashMap;
import org.springframework.amqp.core.Message;
import org.springframework.amqp.rabbit.annotation.RabbitListener;
import org.springframework.stereotype.Service;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.peristence.v1.server.configuration.MessagingConfiguration;
import com.iqser.red.service.peristence.v1.server.model.CvAnalysisServiceResponse;
@ -22,29 +18,31 @@ import lombok.extern.slf4j.Slf4j;
public class CvAnalysisMessageReceiver {
private final ObjectMapper objectMapper;
private final FileStatusService fileStatusService;
private final FileStatusProcessingUpdateService fileStatusProcessingUpdateService;
@SneakyThrows
@RabbitListener(queues = MessagingConfiguration.CV_ANALYSIS_RESPONSE_QUEUE)
public void receive(String in) {
CvAnalysisServiceResponse response = objectMapper.readValue(in, CvAnalysisServiceResponse.class);
var response = objectMapper.readValue(in, CvAnalysisServiceResponse.class);
log.debug("{}", response);
fileStatusService.setStatusAnalyse(response.getDossierId(), response.getFileId(), false);
log.info("Received message {} for dossierId {} and fileId {} and pages {}", MessagingConfiguration.CV_ANALYSIS_RESPONSE_QUEUE, response.getDossierId(), response.getFileId(), response.getPages());
log.info("Received message in {} for dossierId {} and fileId {}", MessagingConfiguration.CV_ANALYSIS_RESPONSE_QUEUE, response.getDossierId(), response.getFileId());
}
@SneakyThrows
@RabbitListener(queues = MessagingConfiguration.CV_ANALYSIS_DLQ)
public void handleDLQMessage(Message failedMessage) throws IOException {
public void handleDLQMessage(Message failedMessage) {
HashMap<String, Object> cvAnalysisResponse = objectMapper.readValue(failedMessage.getBody(), new TypeReference<>() {
});
String dossierId = (String) cvAnalysisResponse.get("dossierId");
String fileId = (String) cvAnalysisResponse.get("fileId");
var response = objectMapper.readValue(failedMessage.getBody(), CvAnalysisServiceResponse.class);
log.warn("Received message {} for dossierId {} and fileId {}", MessagingConfiguration.CV_ANALYSIS_DLQ, dossierId, fileId);
fileStatusProcessingUpdateService.analysisFailed(response.getDossierId(), response.getFileId());
log.warn("Received message from {} for dossierId {} and fileId {}", MessagingConfiguration.IMAGE_SERVICE_DLQ, response.getDossierId(), response.getFileId());
}
}

View File

@ -412,27 +412,49 @@ public class FileStatusService {
}
public void addToCvAnalysisRequestQueue(String dossierId, String fileId, Set<Integer> pages) {
public void addToFigureDetectionRequestQueue(String dossierId, String fileId) {
fileStatusPersistenceService.updateProcessingStatus(fileId, ProcessingStatus.FIGURE_DETECTION_ANALYZING);
try {
rabbitTemplate.convertAndSend(
MessagingConfiguration.CV_ANALYSIS_QUEUE,
objectMapper.writeValueAsString(CvAnalysisServiceRequest.builder()
.dossierId(dossierId)
.fileId(fileId)
.pages(pages)
.operation(CvAnalysisServiceRequest.OPERATION_TABLE_PARSING)
.build()),
message -> {
message.getMessageProperties().setPriority(1);
return message;
});
rabbitTemplate.convertAndSend(MessagingConfiguration.CV_ANALYSIS_QUEUE, objectMapper.writeValueAsString(CvAnalysisServiceRequest.builder()
.dossierId(dossierId)
.fileId(fileId)
.operation(CvAnalysisServiceRequest.OPERATION_FIGURE_DETECTION)
.targetFileExtension(CvAnalysisServiceRequest.TARGET_FILE_EXTENSION)
.responseFileExtension(CvAnalysisServiceRequest.FIGURE_DETECTION_FILE_EXTENSION)
.build()), message -> {
message.getMessageProperties().setPriority(1);
return message;
});
} catch (JsonProcessingException e) {
throw new RuntimeException(e);
}
}
public void addToTableParsingRequestQueue(String dossierId, String fileId) {
fileStatusPersistenceService.updateProcessingStatus(fileId, ProcessingStatus.TABLE_PARSING_ANALYZING);
try {
rabbitTemplate.convertAndSend(MessagingConfiguration.CV_ANALYSIS_QUEUE, objectMapper.writeValueAsString(CvAnalysisServiceRequest.builder()
.dossierId(dossierId)
.fileId(fileId)
.operation(CvAnalysisServiceRequest.OPERATION_TABLE_PARSING)
.targetFileExtension(CvAnalysisServiceRequest.TARGET_FILE_EXTENSION)
.responseFileExtension(CvAnalysisServiceRequest.TABLE_PARSING_FILE_EXTENSION)
.build()), message -> {
message.getMessageProperties().setPriority(1);
return message;
});
} catch (JsonProcessingException e) {
throw new RuntimeException(e);
}
}
public void updateLastOCRTime(String fileId) {
fileStatusPersistenceService.updateLastOCRTime(fileId, OffsetDateTime.now());
@ -448,6 +470,8 @@ public class FileStatusService {
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.IMAGE_INFO);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.TEXT);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.NER_ENTITIES);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.FIGURES);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.TABLES);
if (keepManualRedactions) {
fileStatusPersistenceService.overwriteFile(fileId, uploader, filename, true);
@ -541,6 +565,7 @@ public class FileStatusService {
return;
}
if (fileEntity.isExcluded()) {
log.debug("File {} is excluded", fileEntity.getId());
return;
@ -551,7 +576,17 @@ public class FileStatusService {
boolean reanalyse = fileModel.isReanalysisRequired() || manualRedactionReanalyse;
if (!reanalyse && settings.isImageServiceEnabled() && !fileManagementStorageService.objectExists(dossierId, fileId, FileType.IMAGE_INFO)) {
if (!reanalyse && !fileManagementStorageService.objectExists(dossierId, fileId, FileType.FIGURES) && settings.isCvServiceEnabled()) {
addToFigureDetectionRequestQueue(dossierId, fileId);
return;
}
if (!reanalyse && !fileManagementStorageService.objectExists(dossierId, fileId, FileType.TABLES) && settings.isCvServiceEnabled()) {
addToTableParsingRequestQueue(dossierId, fileId);
return;
}
if (!reanalyse && !fileManagementStorageService.objectExists(dossierId, fileId, FileType.IMAGE_INFO) && settings.isImageServiceEnabled()) {
log.debug("Add file: {} from dossier {} to Image queue", fileId, dossierId);
addToImageQueue(dossierId, fileId);
return;