Merge branch 'AZURE_NER_FP' into 'master'
RED-9918: Azure entity recognition (Spike) See merge request redactmanager/persistence-service!696
This commit is contained in:
commit
5e8d8ea6f6
@ -42,6 +42,7 @@ public class AdminInterfaceController {
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_POSITION);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_STRUCTURE);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.NER_ENTITIES);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.AZURE_NER_ENTITIES);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.FIGURE);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.TABLES);
|
||||
|
||||
@ -147,6 +148,7 @@ public class AdminInterfaceController {
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_PAGES);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_POSITION);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.NER_ENTITIES);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.AZURE_NER_ENTITIES);
|
||||
|
||||
fileStatusService.setStatusFullReprocess(dossierId, fileId, true, true);
|
||||
}
|
||||
|
||||
@ -54,6 +54,10 @@ public class MessagingConfiguration {
|
||||
public static final String NER_SERVICE_RESPONSE_QUEUE = "entity_response_queue";
|
||||
public static final String NER_SERVICE_DLQ = "entity_dead_letter_queue";
|
||||
|
||||
public static final String AZURE_NER_SERVICE_QUEUE = "azure_entity_request_queue";
|
||||
public static final String AZURE_NER_SERVICE_RESPONSE_QUEUE = "azure_entity_response_queue";
|
||||
public static final String AZURE_NER_SERVICE_DLQ = "azure_entity_dead_letter_queue";
|
||||
|
||||
public static final String PRE_PROCESSING_QUEUE = "preprocessingQueue";
|
||||
public static final String PRE_PROCESSING_DLQ = "preprocessingDLQ";
|
||||
|
||||
@ -131,6 +135,27 @@ public class MessagingConfiguration {
|
||||
}
|
||||
|
||||
|
||||
@Bean
|
||||
public Queue azureNerRequestQueue() {
|
||||
|
||||
return QueueBuilder.durable(AZURE_NER_SERVICE_QUEUE).withArgument("x-dead-letter-exchange", "").withArgument("x-dead-letter-routing-key", AZURE_NER_SERVICE_DLQ).build();
|
||||
}
|
||||
|
||||
|
||||
@Bean
|
||||
public Queue azureNerResponseQueue() {
|
||||
|
||||
return QueueBuilder.durable(AZURE_NER_SERVICE_RESPONSE_QUEUE).withArgument("x-dead-letter-exchange", "").withArgument("x-dead-letter-routing-key", AZURE_NER_SERVICE_DLQ).build();
|
||||
}
|
||||
|
||||
|
||||
@Bean
|
||||
public Queue azureNerResponseDLQ() {
|
||||
|
||||
return QueueBuilder.durable(AZURE_NER_SERVICE_DLQ).build();
|
||||
}
|
||||
|
||||
|
||||
@Bean
|
||||
public Queue imageRequestQueue() {
|
||||
|
||||
|
||||
@ -26,6 +26,7 @@ public class FileExchangeNames {
|
||||
public static Definition POSITIONS = new Definition(FileType.DOCUMENT_POSITION);
|
||||
public static Definition SIMPLIFIED_TEXT = new Definition(FileType.SIMPLIFIED_TEXT);
|
||||
public static Definition NER_ENTITIES = new Definition(FileType.NER_ENTITIES);
|
||||
public static Definition AZURE_NER_ENTITIES = new Definition(FileType.AZURE_NER_ENTITIES);
|
||||
|
||||
public static Definition TABLES = new Definition(FileType.TABLES);
|
||||
public static Definition IMAGES = new Definition(FileType.IMAGE_INFO);
|
||||
|
||||
@ -62,6 +62,7 @@ public class FileExportService {
|
||||
addArchiveModelForStorageFile(archiver, file, fileFolder, FileExchangeNames.POSITIONS);
|
||||
addArchiveModelForStorageFile(archiver, file, fileFolder, FileExchangeNames.PAGES);
|
||||
addArchiveModelForStorageFile(archiver, file, fileFolder, FileExchangeNames.NER_ENTITIES);
|
||||
addArchiveModelForStorageFile(archiver, file, fileFolder, FileExchangeNames.AZURE_NER_ENTITIES);
|
||||
addArchiveModelForStorageFile(archiver, file, fileFolder, FileExchangeNames.SIMPLIFIED_TEXT);
|
||||
}
|
||||
|
||||
|
||||
@ -99,6 +99,7 @@ public class FileExchangeArchiveReader {
|
||||
FileExchangeNames.POSITIONS,
|
||||
FileExchangeNames.SIMPLIFIED_TEXT,
|
||||
FileExchangeNames.NER_ENTITIES,
|
||||
FileExchangeNames.AZURE_NER_ENTITIES,
|
||||
FileExchangeNames.TABLES,
|
||||
FileExchangeNames.IMAGES,
|
||||
FileExchangeNames.VISUAL_LAYOUT,
|
||||
|
||||
@ -0,0 +1,24 @@
|
||||
package com.iqser.red.service.persistence.management.v1.processor.model;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
public class AzureNerServiceRequest {
|
||||
|
||||
public static final String TARGET_FILE_EXTENSION = FileType.SIMPLIFIED_TEXT + FileType.SIMPLIFIED_TEXT.getExtension() + ".gz";
|
||||
public static final String RESPONSE_FILE_EXTENSION = FileType.AZURE_NER_ENTITIES + FileType.AZURE_NER_ENTITIES.getExtension() + ".gz";
|
||||
|
||||
private String dossierId;
|
||||
private String fileId;
|
||||
private String targetFileExtension;
|
||||
private String responseFileExtension;
|
||||
|
||||
}
|
||||
@ -30,7 +30,7 @@ import com.iqser.red.service.persistence.management.v1.processor.exception.Inter
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.CvAnalysisServiceRequest;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.FileIdentifier;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.ManualChangesQueryOptions;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.NerServiceRequest;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.NerServiceRequest;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.OCRStatusUpdateResponse;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.VisualLayoutParsingServiceRequest;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.image.ImageServiceRequest;
|
||||
@ -238,14 +238,14 @@ public class FileStatusService {
|
||||
}
|
||||
|
||||
if (settings.isFigureDetectionEnabled() && !fileManagementStorageService.objectExists(dossierId, fileId, FileType.FIGURE)) {
|
||||
log.debug("Add file: {} from dossier {} to Figure Detection queue", fileId, dossierId);
|
||||
log.info("Add file: {} from dossier {} to Figure Detection queue", fileId, dossierId);
|
||||
addToFigureDetectionRequestQueue(dossierId, fileId);
|
||||
sendReadOnlyAnalysisEvent(dossierId, fileId, fileEntity);
|
||||
return;
|
||||
}
|
||||
|
||||
if (settings.isCvTableParsingEnabled() && !fileManagementStorageService.objectExists(dossierId, fileId, FileType.TABLES)) {
|
||||
log.debug("Add file: {} from dossier {} to Cv Service queue", fileId, dossierId);
|
||||
log.info("Add file: {} from dossier {} to Cv Service queue", fileId, dossierId);
|
||||
addToTableParsingRequestQueue(dossierId, fileId);
|
||||
sendReadOnlyAnalysisEvent(dossierId, fileId, fileEntity);
|
||||
return;
|
||||
@ -263,7 +263,7 @@ public class FileStatusService {
|
||||
|
||||
var dossierTemplate = dossier.getDossierTemplate();
|
||||
if (dossierTemplate.isOcrByDefault() && fileModel.getOcrEndTime() == null && !fileModel.isSoftOrHardDeleted()) {
|
||||
log.debug("Add file: {} from dossier {} to OCR queue", fileId, dossierId);
|
||||
log.info("Add file: {} from dossier {} to OCR queue", fileId, dossierId);
|
||||
setStatusOcrQueued(dossierId, fileId);
|
||||
sendReadOnlyAnalysisEvent(dossierId, fileId, fileEntity);
|
||||
return;
|
||||
@ -272,17 +272,24 @@ public class FileStatusService {
|
||||
if (!fileManagementStorageService.objectExists(dossierId, fileId, FileType.DOCUMENT_TEXT)) {
|
||||
var layoutParsingRequest = layoutParsingRequestFactory.build(dossierId, fileId, priority);
|
||||
setStatusFullProcessing(fileId);
|
||||
log.info("Add file: {} from dossier {} to layout parsing request queue", fileId, dossierId);
|
||||
rabbitTemplate.convertAndSend(LAYOUT_PARSING_REQUEST_QUEUE, layoutParsingRequest);
|
||||
sendReadOnlyAnalysisEvent(dossierId, fileId, fileEntity);
|
||||
return;
|
||||
}
|
||||
|
||||
if (settings.isNerServiceEnabled() && !fileManagementStorageService.objectExists(dossierId, fileId, FileType.NER_ENTITIES)) {
|
||||
log.debug("Add file: {} from dossier {} to NER queue", fileId, dossierId);
|
||||
log.info("Add file: {} from dossier {} to NER queue", fileId, dossierId);
|
||||
addToNerQueue(dossierId, fileId);
|
||||
sendReadOnlyAnalysisEvent(dossierId, fileId, fileEntity);
|
||||
return;
|
||||
}
|
||||
if (settings.isAzureNerServiceEnabled() && !fileManagementStorageService.objectExists(dossierId, fileId, FileType.AZURE_NER_ENTITIES)) {
|
||||
log.info("Add file: {} from dossier {} to AZURE NER queue", fileId, dossierId);
|
||||
addToAzureNerQueue(dossierId, fileId);
|
||||
sendReadOnlyAnalysisEvent(dossierId, fileId, fileEntity);
|
||||
return;
|
||||
}
|
||||
|
||||
boolean reanalyse = fileModel.isReanalysisRequired() || manualRedactionReanalyse;
|
||||
MessageType messageType = calculateMessageType(reanalyse, fileModel.getProcessingStatus(), fileModel);
|
||||
@ -481,6 +488,23 @@ public class FileStatusService {
|
||||
}
|
||||
|
||||
|
||||
protected void addToAzureNerQueue(String dossierId, String fileId) {
|
||||
|
||||
setStatusNerAnalyzing(fileId);
|
||||
rabbitTemplate.convertAndSend(MessagingConfiguration.AZURE_NER_SERVICE_QUEUE,
|
||||
NerServiceRequest.builder()
|
||||
.dossierId(dossierId)
|
||||
.fileId(fileId)
|
||||
.targetFileExtension(NerServiceRequest.TARGET_FILE_EXTENSION)
|
||||
.responseFileExtension(NerServiceRequest.AZURE_RESPONSE_FILE_EXTENSION)
|
||||
.build(),
|
||||
message -> {
|
||||
message.getMessageProperties().setPriority(1);
|
||||
return message;
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
private MessageType calculateMessageType(boolean reanalyse, ProcessingStatus processingStatus, FileModel fileModel) {
|
||||
|
||||
if (ProcessingStatus.NER_ANALYZING.equals(processingStatus)) {
|
||||
@ -792,6 +816,7 @@ public class FileStatusService {
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_POSITION);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_TEXT);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.NER_ENTITIES);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.AZURE_NER_ENTITIES);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.FIGURE);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.TABLES);
|
||||
|
||||
|
||||
@ -0,0 +1,69 @@
|
||||
package com.iqser.red.service.persistence.management.v1.processor.service.queue;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.time.temporal.ChronoUnit;
|
||||
import java.util.HashMap;
|
||||
|
||||
import org.springframework.amqp.core.Message;
|
||||
import org.springframework.amqp.rabbit.annotation.RabbitHandler;
|
||||
import org.springframework.amqp.rabbit.annotation.RabbitListener;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.fasterxml.jackson.core.type.TypeReference;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusProcessingUpdateService;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusService;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
|
||||
|
||||
import io.micrometer.observation.ObservationRegistry;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class AzureNerMessageReceiver {
|
||||
|
||||
private final FileStatusService fileStatusService;
|
||||
private final ObjectMapper objectMapper;
|
||||
private final FileStatusProcessingUpdateService fileStatusProcessingUpdateService;
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
@RabbitHandler
|
||||
@RabbitListener(queues = MessagingConfiguration.AZURE_NER_SERVICE_RESPONSE_QUEUE)
|
||||
public void receive(Message message) {
|
||||
|
||||
HashMap<String, Object> entityResponse = objectMapper.readValue(message.getBody(), new TypeReference<>() {
|
||||
});
|
||||
|
||||
String dossierId = (String) entityResponse.get("dossierId");
|
||||
String fileId = (String) entityResponse.get("fileId");
|
||||
|
||||
log.info("Received message {} for dossierId {} and fileId {}", MessagingConfiguration.AZURE_NER_SERVICE_RESPONSE_QUEUE, dossierId, fileId);
|
||||
fileStatusService.setStatusAnalyse(dossierId, fileId, false);
|
||||
}
|
||||
|
||||
|
||||
@RabbitHandler
|
||||
@RabbitListener(queues = MessagingConfiguration.AZURE_NER_SERVICE_DLQ)
|
||||
public void handleDLQMessage(Message failedMessage) throws IOException {
|
||||
|
||||
HashMap<String, Object> entityResponse = objectMapper.readValue(failedMessage.getBody(), new TypeReference<>() {
|
||||
});
|
||||
String dossierId = (String) entityResponse.get("dossierId");
|
||||
String fileId = (String) entityResponse.get("fileId");
|
||||
|
||||
log.warn("Received message {} for dossierId {} and fileId {}", MessagingConfiguration.AZURE_NER_SERVICE_DLQ, dossierId, fileId);
|
||||
fileStatusProcessingUpdateService.analysisFailed(dossierId,
|
||||
fileId,
|
||||
new FileErrorInfo("azure ner service failed",
|
||||
MessagingConfiguration.AZURE_NER_SERVICE_DLQ,
|
||||
"azure-ner-service",
|
||||
OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS)));
|
||||
}
|
||||
|
||||
}
|
||||
@ -24,6 +24,7 @@ public class FileManagementServiceSettings {
|
||||
|
||||
private boolean imageServiceEnabled = true;
|
||||
private boolean nerServiceEnabled = true;
|
||||
private boolean azureNerServiceEnabled;
|
||||
private boolean visualLayoutParsingEnabled;
|
||||
|
||||
private boolean storeImageFile = true;
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
package com.iqser.red.service.persistence.management.v1.processor.model;
|
||||
package com.iqser.red.service.persistence.service.v1.api.shared.model;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
|
||||
|
||||
@ -15,6 +15,7 @@ public class NerServiceRequest {
|
||||
|
||||
public static final String TARGET_FILE_EXTENSION = FileType.SIMPLIFIED_TEXT + FileType.SIMPLIFIED_TEXT.getExtension() + ".gz";
|
||||
public static final String RESPONSE_FILE_EXTENSION = FileType.NER_ENTITIES + FileType.NER_ENTITIES.getExtension() + ".gz";
|
||||
public static final String AZURE_RESPONSE_FILE_EXTENSION = FileType.AZURE_NER_ENTITIES + FileType.AZURE_NER_ENTITIES.getExtension() + ".gz";
|
||||
|
||||
private String dossierId;
|
||||
private String fileId;
|
||||
@ -0,0 +1,16 @@
|
||||
package com.iqser.red.service.persistence.service.v1.api.shared.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
public class NerServiceResponse {
|
||||
|
||||
private String dossierId;
|
||||
private String fileId;
|
||||
}
|
||||
@ -11,6 +11,7 @@ public enum FileType {
|
||||
SIMPLIFIED_TEXT(".json"),
|
||||
TEXT(".json"), // deprecated file type, only present in legacy migrations
|
||||
NER_ENTITIES(".json"),
|
||||
AZURE_NER_ENTITIES(".json"),
|
||||
IMAGE_INFO(".json"),
|
||||
IMPORTED_REDACTIONS(".json"),
|
||||
IMPORTED_LEGAL_BASES(".json"),
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user