Merge branch 'AZURE_NER_FP' into 'master'

RED-9918: Azure entity recognition (Spike)

See merge request redactmanager/persistence-service!696
This commit is contained in:
Maverick Studer 2024-08-26 11:39:08 +02:00
commit 5e8d8ea6f6
12 changed files with 173 additions and 6 deletions

View File

@ -42,6 +42,7 @@ public class AdminInterfaceController {
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_POSITION);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_STRUCTURE);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.NER_ENTITIES);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.AZURE_NER_ENTITIES);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.FIGURE);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.TABLES);
@ -147,6 +148,7 @@ public class AdminInterfaceController {
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_PAGES);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_POSITION);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.NER_ENTITIES);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.AZURE_NER_ENTITIES);
fileStatusService.setStatusFullReprocess(dossierId, fileId, true, true);
}

View File

@ -54,6 +54,10 @@ public class MessagingConfiguration {
public static final String NER_SERVICE_RESPONSE_QUEUE = "entity_response_queue";
public static final String NER_SERVICE_DLQ = "entity_dead_letter_queue";
public static final String AZURE_NER_SERVICE_QUEUE = "azure_entity_request_queue";
public static final String AZURE_NER_SERVICE_RESPONSE_QUEUE = "azure_entity_response_queue";
public static final String AZURE_NER_SERVICE_DLQ = "azure_entity_dead_letter_queue";
public static final String PRE_PROCESSING_QUEUE = "preprocessingQueue";
public static final String PRE_PROCESSING_DLQ = "preprocessingDLQ";
@ -131,6 +135,27 @@ public class MessagingConfiguration {
}
@Bean
public Queue azureNerRequestQueue() {
return QueueBuilder.durable(AZURE_NER_SERVICE_QUEUE).withArgument("x-dead-letter-exchange", "").withArgument("x-dead-letter-routing-key", AZURE_NER_SERVICE_DLQ).build();
}
@Bean
public Queue azureNerResponseQueue() {
return QueueBuilder.durable(AZURE_NER_SERVICE_RESPONSE_QUEUE).withArgument("x-dead-letter-exchange", "").withArgument("x-dead-letter-routing-key", AZURE_NER_SERVICE_DLQ).build();
}
@Bean
public Queue azureNerResponseDLQ() {
return QueueBuilder.durable(AZURE_NER_SERVICE_DLQ).build();
}
@Bean
public Queue imageRequestQueue() {

View File

@ -26,6 +26,7 @@ public class FileExchangeNames {
public static Definition POSITIONS = new Definition(FileType.DOCUMENT_POSITION);
public static Definition SIMPLIFIED_TEXT = new Definition(FileType.SIMPLIFIED_TEXT);
public static Definition NER_ENTITIES = new Definition(FileType.NER_ENTITIES);
public static Definition AZURE_NER_ENTITIES = new Definition(FileType.AZURE_NER_ENTITIES);
public static Definition TABLES = new Definition(FileType.TABLES);
public static Definition IMAGES = new Definition(FileType.IMAGE_INFO);

View File

@ -62,6 +62,7 @@ public class FileExportService {
addArchiveModelForStorageFile(archiver, file, fileFolder, FileExchangeNames.POSITIONS);
addArchiveModelForStorageFile(archiver, file, fileFolder, FileExchangeNames.PAGES);
addArchiveModelForStorageFile(archiver, file, fileFolder, FileExchangeNames.NER_ENTITIES);
addArchiveModelForStorageFile(archiver, file, fileFolder, FileExchangeNames.AZURE_NER_ENTITIES);
addArchiveModelForStorageFile(archiver, file, fileFolder, FileExchangeNames.SIMPLIFIED_TEXT);
}

View File

@ -99,6 +99,7 @@ public class FileExchangeArchiveReader {
FileExchangeNames.POSITIONS,
FileExchangeNames.SIMPLIFIED_TEXT,
FileExchangeNames.NER_ENTITIES,
FileExchangeNames.AZURE_NER_ENTITIES,
FileExchangeNames.TABLES,
FileExchangeNames.IMAGES,
FileExchangeNames.VISUAL_LAYOUT,

View File

@ -0,0 +1,24 @@
package com.iqser.red.service.persistence.management.v1.processor.model;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class AzureNerServiceRequest {
public static final String TARGET_FILE_EXTENSION = FileType.SIMPLIFIED_TEXT + FileType.SIMPLIFIED_TEXT.getExtension() + ".gz";
public static final String RESPONSE_FILE_EXTENSION = FileType.AZURE_NER_ENTITIES + FileType.AZURE_NER_ENTITIES.getExtension() + ".gz";
private String dossierId;
private String fileId;
private String targetFileExtension;
private String responseFileExtension;
}

View File

@ -30,7 +30,7 @@ import com.iqser.red.service.persistence.management.v1.processor.exception.Inter
import com.iqser.red.service.persistence.management.v1.processor.model.CvAnalysisServiceRequest;
import com.iqser.red.service.persistence.management.v1.processor.model.FileIdentifier;
import com.iqser.red.service.persistence.management.v1.processor.model.ManualChangesQueryOptions;
import com.iqser.red.service.persistence.management.v1.processor.model.NerServiceRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.NerServiceRequest;
import com.iqser.red.service.persistence.management.v1.processor.model.OCRStatusUpdateResponse;
import com.iqser.red.service.persistence.management.v1.processor.model.VisualLayoutParsingServiceRequest;
import com.iqser.red.service.persistence.management.v1.processor.model.image.ImageServiceRequest;
@ -238,14 +238,14 @@ public class FileStatusService {
}
if (settings.isFigureDetectionEnabled() && !fileManagementStorageService.objectExists(dossierId, fileId, FileType.FIGURE)) {
log.debug("Add file: {} from dossier {} to Figure Detection queue", fileId, dossierId);
log.info("Add file: {} from dossier {} to Figure Detection queue", fileId, dossierId);
addToFigureDetectionRequestQueue(dossierId, fileId);
sendReadOnlyAnalysisEvent(dossierId, fileId, fileEntity);
return;
}
if (settings.isCvTableParsingEnabled() && !fileManagementStorageService.objectExists(dossierId, fileId, FileType.TABLES)) {
log.debug("Add file: {} from dossier {} to Cv Service queue", fileId, dossierId);
log.info("Add file: {} from dossier {} to Cv Service queue", fileId, dossierId);
addToTableParsingRequestQueue(dossierId, fileId);
sendReadOnlyAnalysisEvent(dossierId, fileId, fileEntity);
return;
@ -263,7 +263,7 @@ public class FileStatusService {
var dossierTemplate = dossier.getDossierTemplate();
if (dossierTemplate.isOcrByDefault() && fileModel.getOcrEndTime() == null && !fileModel.isSoftOrHardDeleted()) {
log.debug("Add file: {} from dossier {} to OCR queue", fileId, dossierId);
log.info("Add file: {} from dossier {} to OCR queue", fileId, dossierId);
setStatusOcrQueued(dossierId, fileId);
sendReadOnlyAnalysisEvent(dossierId, fileId, fileEntity);
return;
@ -272,17 +272,24 @@ public class FileStatusService {
if (!fileManagementStorageService.objectExists(dossierId, fileId, FileType.DOCUMENT_TEXT)) {
var layoutParsingRequest = layoutParsingRequestFactory.build(dossierId, fileId, priority);
setStatusFullProcessing(fileId);
log.info("Add file: {} from dossier {} to layout parsing request queue", fileId, dossierId);
rabbitTemplate.convertAndSend(LAYOUT_PARSING_REQUEST_QUEUE, layoutParsingRequest);
sendReadOnlyAnalysisEvent(dossierId, fileId, fileEntity);
return;
}
if (settings.isNerServiceEnabled() && !fileManagementStorageService.objectExists(dossierId, fileId, FileType.NER_ENTITIES)) {
log.debug("Add file: {} from dossier {} to NER queue", fileId, dossierId);
log.info("Add file: {} from dossier {} to NER queue", fileId, dossierId);
addToNerQueue(dossierId, fileId);
sendReadOnlyAnalysisEvent(dossierId, fileId, fileEntity);
return;
}
if (settings.isAzureNerServiceEnabled() && !fileManagementStorageService.objectExists(dossierId, fileId, FileType.AZURE_NER_ENTITIES)) {
log.info("Add file: {} from dossier {} to AZURE NER queue", fileId, dossierId);
addToAzureNerQueue(dossierId, fileId);
sendReadOnlyAnalysisEvent(dossierId, fileId, fileEntity);
return;
}
boolean reanalyse = fileModel.isReanalysisRequired() || manualRedactionReanalyse;
MessageType messageType = calculateMessageType(reanalyse, fileModel.getProcessingStatus(), fileModel);
@ -481,6 +488,23 @@ public class FileStatusService {
}
protected void addToAzureNerQueue(String dossierId, String fileId) {
setStatusNerAnalyzing(fileId);
rabbitTemplate.convertAndSend(MessagingConfiguration.AZURE_NER_SERVICE_QUEUE,
NerServiceRequest.builder()
.dossierId(dossierId)
.fileId(fileId)
.targetFileExtension(NerServiceRequest.TARGET_FILE_EXTENSION)
.responseFileExtension(NerServiceRequest.AZURE_RESPONSE_FILE_EXTENSION)
.build(),
message -> {
message.getMessageProperties().setPriority(1);
return message;
});
}
private MessageType calculateMessageType(boolean reanalyse, ProcessingStatus processingStatus, FileModel fileModel) {
if (ProcessingStatus.NER_ANALYZING.equals(processingStatus)) {
@ -792,6 +816,7 @@ public class FileStatusService {
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_POSITION);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_TEXT);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.NER_ENTITIES);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.AZURE_NER_ENTITIES);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.FIGURE);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.TABLES);

View File

@ -0,0 +1,69 @@
package com.iqser.red.service.persistence.management.v1.processor.service.queue;
import java.io.IOException;
import java.time.OffsetDateTime;
import java.time.temporal.ChronoUnit;
import java.util.HashMap;
import org.springframework.amqp.core.Message;
import org.springframework.amqp.rabbit.annotation.RabbitHandler;
import org.springframework.amqp.rabbit.annotation.RabbitListener;
import org.springframework.stereotype.Service;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusProcessingUpdateService;
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusService;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
import io.micrometer.observation.ObservationRegistry;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
public class AzureNerMessageReceiver {
private final FileStatusService fileStatusService;
private final ObjectMapper objectMapper;
private final FileStatusProcessingUpdateService fileStatusProcessingUpdateService;
@SneakyThrows
@RabbitHandler
@RabbitListener(queues = MessagingConfiguration.AZURE_NER_SERVICE_RESPONSE_QUEUE)
public void receive(Message message) {
HashMap<String, Object> entityResponse = objectMapper.readValue(message.getBody(), new TypeReference<>() {
});
String dossierId = (String) entityResponse.get("dossierId");
String fileId = (String) entityResponse.get("fileId");
log.info("Received message {} for dossierId {} and fileId {}", MessagingConfiguration.AZURE_NER_SERVICE_RESPONSE_QUEUE, dossierId, fileId);
fileStatusService.setStatusAnalyse(dossierId, fileId, false);
}
@RabbitHandler
@RabbitListener(queues = MessagingConfiguration.AZURE_NER_SERVICE_DLQ)
public void handleDLQMessage(Message failedMessage) throws IOException {
HashMap<String, Object> entityResponse = objectMapper.readValue(failedMessage.getBody(), new TypeReference<>() {
});
String dossierId = (String) entityResponse.get("dossierId");
String fileId = (String) entityResponse.get("fileId");
log.warn("Received message {} for dossierId {} and fileId {}", MessagingConfiguration.AZURE_NER_SERVICE_DLQ, dossierId, fileId);
fileStatusProcessingUpdateService.analysisFailed(dossierId,
fileId,
new FileErrorInfo("azure ner service failed",
MessagingConfiguration.AZURE_NER_SERVICE_DLQ,
"azure-ner-service",
OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS)));
}
}

View File

@ -24,6 +24,7 @@ public class FileManagementServiceSettings {
private boolean imageServiceEnabled = true;
private boolean nerServiceEnabled = true;
private boolean azureNerServiceEnabled;
private boolean visualLayoutParsingEnabled;
private boolean storeImageFile = true;

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.persistence.management.v1.processor.model;
package com.iqser.red.service.persistence.service.v1.api.shared.model;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
@ -15,6 +15,7 @@ public class NerServiceRequest {
public static final String TARGET_FILE_EXTENSION = FileType.SIMPLIFIED_TEXT + FileType.SIMPLIFIED_TEXT.getExtension() + ".gz";
public static final String RESPONSE_FILE_EXTENSION = FileType.NER_ENTITIES + FileType.NER_ENTITIES.getExtension() + ".gz";
public static final String AZURE_RESPONSE_FILE_EXTENSION = FileType.AZURE_NER_ENTITIES + FileType.AZURE_NER_ENTITIES.getExtension() + ".gz";
private String dossierId;
private String fileId;

View File

@ -0,0 +1,16 @@
package com.iqser.red.service.persistence.service.v1.api.shared.model;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class NerServiceResponse {
private String dossierId;
private String fileId;
}

View File

@ -11,6 +11,7 @@ public enum FileType {
SIMPLIFIED_TEXT(".json"),
TEXT(".json"), // deprecated file type, only present in legacy migrations
NER_ENTITIES(".json"),
AZURE_NER_ENTITIES(".json"),
IMAGE_INFO(".json"),
IMPORTED_REDACTIONS(".json"),
IMPORTED_LEGAL_BASES(".json"),