RED-6725: integrate layoutparsing service #55
@ -75,22 +75,75 @@ public class RedactionLogController implements RedactionLogResource {
|
||||
public ResponseEntity<?> getSectionText(@PathVariable(DOSSIER_ID) String dossierId, @PathVariable(FILE_ID) String fileId) {
|
||||
|
||||
try {
|
||||
return buildZipFileResponseEntity(fileId, dossierId, FileType.TEXT);
|
||||
} catch (FeignException e) {
|
||||
throw processFeignException(e);
|
||||
}
|
||||
}
|
||||
|
||||
HttpHeaders httpHeaders = new HttpHeaders();
|
||||
httpHeaders.setContentType(MediaType.parseMediaType("application/zip"));
|
||||
|
||||
var fileStatus = fileStatusService.getStatus(fileId);
|
||||
String filename = fileStatus.getFilename();
|
||||
if (filename != null) {
|
||||
var index = filename.lastIndexOf(".");
|
||||
String prefix = filename.substring(0, index);
|
||||
filename = prefix + ".json";
|
||||
httpHeaders.add("Content-Disposition", "attachment; filename=" + prefix + ".zip");
|
||||
}
|
||||
private ResponseEntity<byte[]> buildZipFileResponseEntity(String fileId, String dossierId, FileType fileType) throws IOException {
|
||||
|
||||
byte[] zipBytes = getZippedBytes(dossierId, fileId, filename, FileType.TEXT);
|
||||
return new ResponseEntity<>(zipBytes, httpHeaders, HttpStatus.OK);
|
||||
HttpHeaders httpHeaders = new HttpHeaders();
|
||||
httpHeaders.setContentType(MediaType.parseMediaType("application/zip"));
|
||||
|
||||
var fileStatus = fileStatusService.getStatus(fileId);
|
||||
String filename = fileStatus.getFilename();
|
||||
if (filename != null) {
|
||||
var index = filename.lastIndexOf(".");
|
||||
String prefix = filename.substring(0, index);
|
||||
filename = prefix + ".json";
|
||||
httpHeaders.add(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename*=utf-8''" + StringEncodingUtils.urlEncode(prefix) + ".zip");
|
||||
}
|
||||
|
||||
byte[] zipBytes = getZippedBytes(dossierId, fileId, filename, fileType);
|
||||
httpHeaders.setContentLength(zipBytes.length);
|
||||
return new ResponseEntity<>(zipBytes, httpHeaders, HttpStatus.OK);
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
@PreAuthorize("hasAuthority('" + READ_REDACTION_LOG + "')")
|
||||
public ResponseEntity<?> getDocumentText(@PathVariable(DOSSIER_ID) String dossierId, @PathVariable(FILE_ID) String fileId) {
|
||||
|
||||
try {
|
||||
return buildZipFileResponseEntity(fileId, dossierId, FileType.DOCUMENT_TEXT);
|
||||
} catch (FeignException e) {
|
||||
throw processFeignException(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
@PreAuthorize("hasAuthority('" + READ_REDACTION_LOG + "')")
|
||||
public ResponseEntity<?> getDocumentPositions(@PathVariable(DOSSIER_ID) String dossierId, @PathVariable(FILE_ID) String fileId) {
|
||||
|
||||
try {
|
||||
return buildZipFileResponseEntity(fileId, dossierId, FileType.DOCUMENT_POSITION);
|
||||
} catch (FeignException e) {
|
||||
throw processFeignException(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
@PreAuthorize("hasAuthority('" + READ_REDACTION_LOG + "')")
|
||||
public ResponseEntity<?> getDocumentStructure(@PathVariable(DOSSIER_ID) String dossierId, @PathVariable(FILE_ID) String fileId) {
|
||||
|
||||
try {
|
||||
return buildZipFileResponseEntity(fileId, dossierId, FileType.DOCUMENT_STRUCTURE);
|
||||
} catch (FeignException e) {
|
||||
throw processFeignException(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
@PreAuthorize("hasAuthority('" + READ_REDACTION_LOG + "')")
|
||||
public ResponseEntity<?> getDocumentPages(@PathVariable(DOSSIER_ID) String dossierId, @PathVariable(FILE_ID) String fileId) {
|
||||
|
||||
try {
|
||||
return buildZipFileResponseEntity(fileId, dossierId, FileType.DOCUMENT_PAGES);
|
||||
} catch (FeignException e) {
|
||||
throw processFeignException(e);
|
||||
}
|
||||
|
||||
@ -26,6 +26,10 @@ public interface RedactionLogResource {
|
||||
String REDACTION_LOG_PATH = ExternalApi.BASE_PATH + "/redactionLog";
|
||||
String SECTION_GRID_PATH = ExternalApi.BASE_PATH + "/sectionGrid";
|
||||
String SECTION_TEXT_PATH = ExternalApi.BASE_PATH + "/sectionText";
|
||||
String DOCUMENT_TEXT_PATH = ExternalApi.BASE_PATH + "/documentText";
|
||||
String DOCUMENT_POSITIONS_PATH = ExternalApi.BASE_PATH + "/documentPositions";
|
||||
String DOCUMENT_PAGES_PATH = ExternalApi.BASE_PATH + "/documentPages";
|
||||
String DOCUMENT_STRUCTURE_PATH = ExternalApi.BASE_PATH + "/documentStructure";
|
||||
String SIMPLIFIED_SECTION_TEXT_PATH = ExternalApi.BASE_PATH + "/simplifiedSectionText";
|
||||
|
||||
String FILE_ID = "fileId";
|
||||
@ -37,7 +41,7 @@ public interface RedactionLogResource {
|
||||
|
||||
@GetMapping(value = REDACTION_LOG_PATH + DOSSIER_ID_PATH_VARIABLE + FILE_ID_PATH_VARIABLE, produces = MediaType.APPLICATION_JSON_VALUE)
|
||||
@Operation(summary = "Gets the redaction log for a fileId", description = "None")
|
||||
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "OK"), @ApiResponse(responseCode = "400", description = "Request " + "contains error."), @ApiResponse(responseCode = "404", description = "The redaction log is not found.")})
|
||||
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "OK"), @ApiResponse(responseCode = "400", description = "Request contains error."), @ApiResponse(responseCode = "404", description = "The redaction log is not found.")})
|
||||
RedactionLog getRedactionLog(@PathVariable(DOSSIER_ID) String dossierId,
|
||||
@PathVariable(FILE_ID) String fileId,
|
||||
@RequestParam(value = "excludedType", required = false) List<String> excludedTypes,
|
||||
@ -47,25 +51,50 @@ public interface RedactionLogResource {
|
||||
|
||||
@GetMapping(value = SECTION_GRID_PATH + DOSSIER_ID_PATH_VARIABLE + FILE_ID_PATH_VARIABLE, produces = MediaType.APPLICATION_JSON_VALUE)
|
||||
@Operation(summary = "Gets the section grid for a fileId", description = "None")
|
||||
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "OK"), @ApiResponse(responseCode = "400", description = "Request " + "contains error."), @ApiResponse(responseCode = "404", description = "The section grid is not found.")})
|
||||
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "OK"), @ApiResponse(responseCode = "400", description = "Request contains error."), @ApiResponse(responseCode = "404", description = "The section grid is not found.")})
|
||||
SectionGrid getSectionGrid(@PathVariable(DOSSIER_ID) String dossierId, @PathVariable(FILE_ID) String fileId);
|
||||
|
||||
|
||||
@Deprecated
|
||||
@GetMapping(value = SECTION_TEXT_PATH + DOSSIER_ID_PATH_VARIABLE + FILE_ID_PATH_VARIABLE)
|
||||
@Operation(summary = "Gets the section text for a fileId", description = "None")
|
||||
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "OK"), @ApiResponse(responseCode = "400", description = "Request " + "contains error."), @ApiResponse(responseCode = "404", description = "The section text is not found.")})
|
||||
@Operation(summary = "Gets the text blocks of a document for a fileId", description = "None")
|
||||
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "OK"), @ApiResponse(responseCode = "400", description = "Request contains error."), @ApiResponse(responseCode = "404", description = "The section text is not found.")})
|
||||
ResponseEntity<?> getSectionText(@PathVariable(DOSSIER_ID) String dossierId, @PathVariable(FILE_ID) String fileId);
|
||||
|
||||
|
||||
@GetMapping(value = DOCUMENT_TEXT_PATH + DOSSIER_ID_PATH_VARIABLE + FILE_ID_PATH_VARIABLE)
|
||||
@Operation(summary = "Gets the text blocks of a document for a fileId", description = "None")
|
||||
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "OK"), @ApiResponse(responseCode = "400", description = "Request contains error."), @ApiResponse(responseCode = "404", description = "The document text is not found.")})
|
||||
ResponseEntity<?> getDocumentText(@PathVariable(DOSSIER_ID) String dossierId, @PathVariable(FILE_ID) String fileId);
|
||||
|
||||
|
||||
@GetMapping(value = DOCUMENT_POSITIONS_PATH + DOSSIER_ID_PATH_VARIABLE + FILE_ID_PATH_VARIABLE)
|
||||
@Operation(summary = "Gets the positions of the text blocks of a document for a fileId", description = "None")
|
||||
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "OK"), @ApiResponse(responseCode = "400", description = "Request contains error."), @ApiResponse(responseCode = "404", description = "The document positions is not found.")})
|
||||
ResponseEntity<?> getDocumentPositions(@PathVariable(DOSSIER_ID) String dossierId, @PathVariable(FILE_ID) String fileId);
|
||||
|
||||
|
||||
@GetMapping(value = DOCUMENT_STRUCTURE_PATH + DOSSIER_ID_PATH_VARIABLE + FILE_ID_PATH_VARIABLE)
|
||||
@Operation(summary = "Gets the document structure for a fileId", description = "None")
|
||||
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "OK"), @ApiResponse(responseCode = "400", description = "Request contains error."), @ApiResponse(responseCode = "404", description = "The document structure is not found.")})
|
||||
ResponseEntity<?> getDocumentStructure(@PathVariable(DOSSIER_ID) String dossierId, @PathVariable(FILE_ID) String fileId);
|
||||
|
||||
|
||||
@GetMapping(value = DOCUMENT_PAGES_PATH + DOSSIER_ID_PATH_VARIABLE + FILE_ID_PATH_VARIABLE)
|
||||
@Operation(summary = "Gets the page information of a document for a fileId", description = "None")
|
||||
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "OK"), @ApiResponse(responseCode = "400", description = "Request contains error."), @ApiResponse(responseCode = "404", description = "The page information is not found.")})
|
||||
ResponseEntity<?> getDocumentPages(@PathVariable(DOSSIER_ID) String dossierId, @PathVariable(FILE_ID) String fileId);
|
||||
|
||||
|
||||
@GetMapping(value = SIMPLIFIED_SECTION_TEXT_PATH + DOSSIER_ID_PATH_VARIABLE + FILE_ID_PATH_VARIABLE)
|
||||
@Operation(summary = "Gets the simplified section text for a fileId", description = "None")
|
||||
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "OK"), @ApiResponse(responseCode = "400", description = "Request " + "contains error."), @ApiResponse(responseCode = "404", description = "The simplified section text is not found.")})
|
||||
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "OK"), @ApiResponse(responseCode = "400", description = "Request contains error."), @ApiResponse(responseCode = "404", description = "The simplified section text is not found.")})
|
||||
ResponseEntity<?> getSimplifiedSectionText(@PathVariable(DOSSIER_ID) String dossierId, @PathVariable(FILE_ID) String fileId);
|
||||
|
||||
|
||||
@PostMapping(value = REDACTION_LOG_PATH + DOSSIER_ID_PATH_VARIABLE + FILE_ID_PATH_VARIABLE + "/filtered", produces = MediaType.APPLICATION_JSON_VALUE)
|
||||
@Operation(summary = "Gets the redaction log for a fileId grater than the specified date", description = "None")
|
||||
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "OK"), @ApiResponse(responseCode = "400", description = "Request " + "contains error."), @ApiResponse(responseCode = "404", description = "The redaction log is not found.")})
|
||||
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "OK"), @ApiResponse(responseCode = "400", description = "Request contains error."), @ApiResponse(responseCode = "404", description = "The redaction log is not found.")})
|
||||
RedactionLog getFilteredRedactionLog(@PathVariable(DOSSIER_ID) String dossierId,
|
||||
@PathVariable(FILE_ID) String fileId,
|
||||
@RequestBody FilteredRedactionLogRequest filteredRedactionLogRequest);
|
||||
|
||||
@ -37,7 +37,10 @@ public class AdminInterfaceController {
|
||||
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.SECTION_GRID);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.REDACTION_LOG);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.TEXT);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_PAGES);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_TEXT);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_POSITION);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_STRUCTURE);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.NER_ENTITIES);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.FIGURE);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.TABLES);
|
||||
@ -136,7 +139,10 @@ public class AdminInterfaceController {
|
||||
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.SECTION_GRID);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.REDACTION_LOG);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.TEXT);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_STRUCTURE);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_TEXT);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_PAGES);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_POSITION);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.NER_ENTITIES);
|
||||
|
||||
fileStatusService.setStatusFullReprocess(dossierId, fileId, true, true);
|
||||
|
||||
@ -88,6 +88,12 @@
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.knecon.fforesight</groupId>
|
||||
<artifactId>layoutparser-service-internal-api</artifactId>
|
||||
<version>0.19.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.iqser.red.service</groupId>
|
||||
<artifactId>search-service-api-v1</artifactId>
|
||||
|
||||
@ -1,5 +1,9 @@
|
||||
package com.iqser.red.service.persistence.management.v1.processor.configuration;
|
||||
|
||||
import static com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingQueueNames.LAYOUT_PARSING_DLQ;
|
||||
import static com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingQueueNames.LAYOUT_PARSING_FINISHED_EVENT_QUEUE;
|
||||
import static com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingQueueNames.LAYOUT_PARSING_REQUEST_QUEUE;
|
||||
|
||||
import org.springframework.amqp.core.Queue;
|
||||
import org.springframework.amqp.core.QueueBuilder;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
@ -58,6 +62,8 @@ public class MessagingConfiguration {
|
||||
public static final String OCR_STATUS_UPDATE_RESPONSE_QUEUE = "ocr_status_update_response_queue";
|
||||
public static final String OCR_STATUS_UPDATE_RESPONSE_DQL = "ocr_status_update_response_dql";
|
||||
|
||||
public static final String X_ERROR_INFO_HEADER = "x-error-message";
|
||||
public static final String X_ERROR_INFO_TIMESTAMP_HEADER = "x-error-message-timestamp";
|
||||
|
||||
@Bean
|
||||
public Queue nerRequestQueue() {
|
||||
@ -311,4 +317,25 @@ public class MessagingConfiguration {
|
||||
.build();
|
||||
}
|
||||
|
||||
@Bean
|
||||
public Queue layoutparsingRequestQueue() {
|
||||
|
||||
return QueueBuilder.durable(LAYOUT_PARSING_REQUEST_QUEUE)//
|
||||
.withArgument("x-dead-letter-exchange", "").withArgument("x-dead-letter-routing-key", LAYOUT_PARSING_DLQ).build();
|
||||
}
|
||||
|
||||
|
||||
@Bean
|
||||
public Queue layoutparsingResponseQueue() {
|
||||
|
||||
return QueueBuilder.durable(LAYOUT_PARSING_FINISHED_EVENT_QUEUE)//
|
||||
.withArgument("x-dead-letter-exchange", "").withArgument("x-dead-letter-routing-key", LAYOUT_PARSING_DLQ).build();
|
||||
}
|
||||
|
||||
|
||||
@Bean
|
||||
public Queue layoutparsingDLQ() {
|
||||
|
||||
return QueueBuilder.durable(LAYOUT_PARSING_DLQ).build();
|
||||
}
|
||||
}
|
||||
|
||||
@ -64,7 +64,7 @@ public class FileManagementStorageService {
|
||||
try {
|
||||
return storageService.readJSONObject(TenantContext.getTenantId(), StorageIdUtils.getStorageId(dossierId, fileId, FileType.REDACTION_LOG), RedactionLog.class);
|
||||
} catch (StorageObjectDoesNotExist e) {
|
||||
log.debug("Text not available.");
|
||||
log.debug("RedactionLog does not exist");
|
||||
throw new NotFoundException("RedactionLog does not exist");
|
||||
}
|
||||
}
|
||||
@ -76,7 +76,7 @@ public class FileManagementStorageService {
|
||||
return storageService.readJSONObject(TenantContext.getTenantId(), StorageIdUtils.getStorageId(dossierId, fileId, FileType.SECTION_GRID), SectionGrid.class);
|
||||
} catch (StorageObjectDoesNotExist e) {
|
||||
log.debug("SectionGrid not available.");
|
||||
throw new NotFoundException("RedactionLog does not exist");
|
||||
throw new NotFoundException("SectionGrid does not exist");
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException("Could not convert SectionGrid", e);
|
||||
}
|
||||
|
||||
@ -40,12 +40,6 @@ public class FileStatusProcessingUpdateService {
|
||||
|
||||
switch (analyzeResult.getMessageType()) {
|
||||
|
||||
case STRUCTURE_ANALYSE:
|
||||
|
||||
//TODO This might be also priority depending on what was the pervious call.
|
||||
fileStatusService.setStatusAnalyse(dossierId, fileId, false);
|
||||
break;
|
||||
|
||||
case SURROUNDING_TEXT:
|
||||
fileStatusService.setStatusProcessed(analyzeResult.getFileId());
|
||||
manualRedactionService.updateSurroundingText(fileId, analyzeResult.getManualRedactions());
|
||||
@ -75,7 +69,6 @@ public class FileStatusProcessingUpdateService {
|
||||
}
|
||||
|
||||
@Transactional
|
||||
|
||||
public void preprocessingSuccessful(String dossierId, String fileId, UntouchedDocumentResponse untouchedDocumentResponse) {
|
||||
|
||||
fileStatusService.updateProcessingStatusPreprocessed(dossierId, fileId, untouchedDocumentResponse.isHasHighlights(), untouchedDocumentResponse.getFileSize());
|
||||
|
||||
@ -1,5 +1,7 @@
|
||||
package com.iqser.red.service.persistence.management.v1.processor.service;
|
||||
|
||||
import static com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingQueueNames.LAYOUT_PARSING_REQUEST_QUEUE;
|
||||
|
||||
import java.time.OffsetDateTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
@ -12,7 +14,6 @@ import org.springframework.stereotype.Service;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.iqser.red.service.pdftron.redaction.v1.api.model.DocumentRequest;
|
||||
import com.iqser.red.service.pdftron.redaction.v1.api.model.ProcessUntouchedDocumentRequest;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.FileAttributeEntity;
|
||||
@ -22,6 +23,7 @@ import com.iqser.red.service.persistence.management.v1.processor.model.CvAnalysi
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.NerServiceRequest;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.OCRStatusUpdateResponse;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.image.ImageServiceRequest;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.layoutparsing.LayoutParsingRequestFactory;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.DossierPersistenceService;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.FileAttributeConfigPersistenceService;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.FileStatusPersistenceService;
|
||||
@ -59,7 +61,6 @@ public class FileStatusService {
|
||||
private final FileStatusPersistenceService fileStatusPersistenceService;
|
||||
private final DossierPersistenceService dossierPersistenceService;
|
||||
private final RabbitTemplate rabbitTemplate;
|
||||
private final ObjectMapper objectMapper;
|
||||
private final ManualRedactionProviderService manualRedactionProviderService;
|
||||
private final FileManagementStorageService fileManagementStorageService;
|
||||
private final LegalBasisChangePersistenceService legalBasisChangePersistenceService;
|
||||
@ -74,6 +75,7 @@ public class FileStatusService {
|
||||
private final ReanalysisRequiredStatusService reanalysisRequiredStatusService;
|
||||
private final ViewedPagesPersistenceService viewedPagesPersistenceService;
|
||||
private final FileManagementServiceSettings fileManagementServiceSettings;
|
||||
private final LayoutParsingRequestFactory layoutParsingRequestFactory;
|
||||
|
||||
|
||||
@Transactional
|
||||
@ -174,7 +176,7 @@ public class FileStatusService {
|
||||
}
|
||||
|
||||
var fileModel = MagicConverter.convert(fileEntity, FileModel.class, new FileModelMapper());
|
||||
reanalysisRequiredStatusService.enhanceFileStatusWithAnalysisRequirements(fileModel, true);
|
||||
fileModel = reanalysisRequiredStatusService.enhanceFileStatusWithAnalysisRequirements(fileModel, true);
|
||||
|
||||
var dossierTemplate = dossier.getDossierTemplate();
|
||||
if (dossierTemplate.isOcrByDefault() && fileModel.getOcrEndTime() == null) {
|
||||
@ -183,22 +185,21 @@ public class FileStatusService {
|
||||
return;
|
||||
}
|
||||
|
||||
MessageType messageType = null;
|
||||
|
||||
if (!fileManagementStorageService.objectExists(dossierId, fileId, FileType.TEXT)) {
|
||||
messageType = MessageType.STRUCTURE_ANALYSE;
|
||||
if (!fileManagementStorageService.objectExists(dossierId, fileId, FileType.DOCUMENT_TEXT)) {
|
||||
var layoutParsingRequest = layoutParsingRequestFactory.build(dossierId, fileId, priority, dossier);
|
||||
setStatusFullProcessing(fileId);
|
||||
rabbitTemplate.convertAndSend(LAYOUT_PARSING_REQUEST_QUEUE, layoutParsingRequest);
|
||||
return;
|
||||
}
|
||||
|
||||
if (messageType == null && settings.isNerServiceEnabled() && !fileManagementStorageService.objectExists(dossierId, fileId, FileType.NER_ENTITIES)) {
|
||||
if (settings.isNerServiceEnabled() && !fileManagementStorageService.objectExists(dossierId, fileId, FileType.NER_ENTITIES)) {
|
||||
log.debug("Add file: {} from dossier {} to NER queue", fileId, dossierId);
|
||||
addToNerQueue(dossierId, fileId);
|
||||
return;
|
||||
}
|
||||
|
||||
if (messageType == null) {
|
||||
boolean reanalyse = fileModel.isReanalysisRequired() || manualRedactionReanalyse;
|
||||
messageType = calculateMessageType(reanalyse, fileModel.getProcessingStatus(), fileModel);
|
||||
}
|
||||
boolean reanalyse = fileModel.isReanalysisRequired() || manualRedactionReanalyse;
|
||||
MessageType messageType = calculateMessageType(reanalyse, fileModel.getProcessingStatus(), fileModel);
|
||||
|
||||
var analyseRequest = AnalyzeRequest.builder()
|
||||
.messageType(messageType)
|
||||
@ -598,7 +599,10 @@ public class FileStatusService {
|
||||
|
||||
if (requiresStructureAnalysis) {
|
||||
log.info("Delete text and NER entities from file {} in dossier {}", fileId, dossierId);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.TEXT);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_POSITION);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_PAGES);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_TEXT);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_STRUCTURE);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.NER_ENTITIES);
|
||||
}
|
||||
|
||||
@ -636,7 +640,10 @@ public class FileStatusService {
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.REDACTION_LOG);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.SECTION_GRID);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.IMAGE_INFO);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.TEXT);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_STRUCTURE);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_PAGES);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_POSITION);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_TEXT);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.NER_ENTITIES);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.FIGURE);
|
||||
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.TABLES);
|
||||
|
||||
@ -10,7 +10,6 @@ import java.util.stream.Collectors;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.iqser.red.service.pdftron.redaction.v1.api.model.ByteContentDocument;
|
||||
import com.iqser.red.service.pdftron.redaction.v1.api.model.highlights.TextHighlightConversionOperation;
|
||||
@ -39,7 +38,6 @@ public class ReanalysisService {
|
||||
private final IndexingService indexingService;
|
||||
private final PDFTronClient pDFTronRedactionClient;
|
||||
private final FileManagementStorageService fileManagementStorageService;
|
||||
private final ObjectMapper objectMapper;
|
||||
|
||||
|
||||
public void reanalyzeDossier(String dossierId, boolean force) {
|
||||
|
||||
@ -0,0 +1,58 @@
|
||||
package com.iqser.red.service.persistence.management.v1.processor.service.layoutparsing;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.DossierEntity;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.FileManagementStorageService;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.utils.StorageIdUtils;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class LayoutParsingRequestFactory {
|
||||
|
||||
@Value("${application.type}")
|
||||
private String applicationType;
|
||||
|
||||
private final FileManagementStorageService fileManagementStorageService;
|
||||
private final LayoutParsingRequestIdentifierService layoutParsingRequestIdentifierService;
|
||||
|
||||
|
||||
public LayoutParsingRequest build(String dossierId, String fileId, boolean priority, DossierEntity dossier) {
|
||||
|
||||
LayoutParsingType type = switch (applicationType) {
|
||||
case "DocuMine" -> LayoutParsingType.DOCUMINE;
|
||||
case "TAAS" -> LayoutParsingType.TAAS;
|
||||
default -> LayoutParsingType.REDACT_MANAGER;
|
||||
};
|
||||
|
||||
Optional<String> optionalImageFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.IMAGE_INFO) //
|
||||
? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMAGE_INFO)) : Optional.empty();
|
||||
|
||||
Optional<String> optionalTableFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.TABLES) //
|
||||
? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.TABLES)) : Optional.empty();
|
||||
|
||||
return LayoutParsingRequest.builder()
|
||||
.layoutParsingType(type)
|
||||
.identifier(layoutParsingRequestIdentifierService.buildIdentifier(dossierId, fileId, priority))
|
||||
.originFileStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.ORIGIN))
|
||||
.imagesFileStorageId(optionalImageFileId)
|
||||
.tablesFileStorageId(optionalTableFileId)
|
||||
.pageFileStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_PAGES))
|
||||
.structureFileStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_STRUCTURE))
|
||||
.textBlockFileStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_TEXT))
|
||||
.positionBlockFileStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_POSITION))
|
||||
.simplifiedTextStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.SIMPLIFIED_TEXT))
|
||||
.sectionGridStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.SECTION_GRID))
|
||||
.build();
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,43 @@
|
||||
package com.iqser.red.service.persistence.management.v1.processor.service.layoutparsing;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.DossierEntity;
|
||||
|
||||
@Service
|
||||
public class LayoutParsingRequestIdentifierService {
|
||||
|
||||
private enum IdentifierNames {
|
||||
DOSSIER_ID,
|
||||
FILE_ID,
|
||||
PRIORITY,
|
||||
DOSSIER_TEMPLATE_ID
|
||||
}
|
||||
|
||||
|
||||
public String parseDossierId(Map<String, String> identifiers) {
|
||||
|
||||
return identifiers.get(IdentifierNames.DOSSIER_ID.name());
|
||||
}
|
||||
|
||||
|
||||
public String parseFileId(Map<String, String> identifiers) {
|
||||
|
||||
return identifiers.get(IdentifierNames.FILE_ID.name());
|
||||
}
|
||||
|
||||
|
||||
public Boolean parsePriority(Map<String, String> identifiers) {
|
||||
|
||||
return Boolean.parseBoolean(identifiers.get(IdentifierNames.PRIORITY.name()));
|
||||
}
|
||||
|
||||
|
||||
public Map<String, String> buildIdentifier(String dossierId, String fileId, boolean priority) {
|
||||
|
||||
return Map.of(IdentifierNames.DOSSIER_ID.name(), dossierId, IdentifierNames.FILE_ID.name(), fileId, IdentifierNames.PRIORITY.name(), String.valueOf(priority));
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,4 +1,4 @@
|
||||
package com.iqser.red.service.persistence.management.v1.processor.service;
|
||||
package com.iqser.red.service.persistence.management.v1.processor.service.queue;
|
||||
|
||||
import java.time.OffsetDateTime;
|
||||
import java.time.temporal.ChronoUnit;
|
||||
@ -10,6 +10,8 @@ import org.springframework.stereotype.Service;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.CvAnalysisServiceResponse;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusProcessingUpdateService;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusService;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
@ -1,4 +1,4 @@
|
||||
package com.iqser.red.service.persistence.management.v1.processor.service;
|
||||
package com.iqser.red.service.persistence.management.v1.processor.service.queue;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.time.OffsetDateTime;
|
||||
@ -13,6 +13,8 @@ import com.fasterxml.jackson.core.type.TypeReference;
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusProcessingUpdateService;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusService;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.settings.FileManagementServiceSettings;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
|
||||
@ -0,0 +1,64 @@
|
||||
package com.iqser.red.service.persistence.management.v1.processor.service.queue;
|
||||
|
||||
import java.time.OffsetDateTime;
|
||||
import java.time.temporal.ChronoUnit;
|
||||
|
||||
import org.springframework.amqp.core.Message;
|
||||
import org.springframework.amqp.rabbit.annotation.RabbitListener;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusProcessingUpdateService;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusService;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.layoutparsing.LayoutParsingRequestIdentifierService;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingFinishedEvent;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingQueueNames;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class LayoutParsingFinishedMessageReceiver {
|
||||
|
||||
private final FileStatusService fileStatusService;
|
||||
private final FileStatusProcessingUpdateService fileStatusProcessingUpdateService;
|
||||
private final ObjectMapper objectMapper;
|
||||
private final LayoutParsingRequestIdentifierService layoutParsingRequestIdentifierService;
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
@RabbitListener(queues = LayoutParsingQueueNames.LAYOUT_PARSING_FINISHED_EVENT_QUEUE)
|
||||
public void receive(LayoutParsingFinishedEvent response) {
|
||||
|
||||
fileStatusService.setStatusAnalyse(layoutParsingRequestIdentifierService.parseDossierId(response.identifier()),
|
||||
layoutParsingRequestIdentifierService.parseFileId(response.identifier()),
|
||||
layoutParsingRequestIdentifierService.parsePriority(response.identifier()));
|
||||
|
||||
log.info("Received message {} in {}", response, MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_QUEUE);
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
@RabbitListener(queues = LayoutParsingQueueNames.LAYOUT_PARSING_DLQ)
|
||||
public void handleDLQMessage(Message failedMessage) {
|
||||
|
||||
var analyzeRequest = objectMapper.readValue(failedMessage.getBody(), LayoutParsingRequest.class);
|
||||
log.info("Failed to process analyze request: {}", analyzeRequest);
|
||||
String errorCause = failedMessage.getMessageProperties().getHeader(MessagingConfiguration.X_ERROR_INFO_HEADER);
|
||||
OffsetDateTime timestamp = failedMessage.getMessageProperties().getHeader(MessagingConfiguration.X_ERROR_INFO_TIMESTAMP_HEADER);
|
||||
timestamp = timestamp != null ? timestamp : OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS);
|
||||
log.info("Failed to process layout parsing request, errorCause: {}, timestamp: {}", errorCause, timestamp);
|
||||
fileStatusProcessingUpdateService.analysisFailed(layoutParsingRequestIdentifierService.parseDossierId(analyzeRequest.identifier()),
|
||||
layoutParsingRequestIdentifierService.parseFileId(analyzeRequest.identifier()),
|
||||
new FileErrorInfo(errorCause, LayoutParsingQueueNames.LAYOUT_PARSING_DLQ, "redaction-service", timestamp));
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,4 +1,4 @@
|
||||
package com.iqser.red.service.persistence.management.v1.processor.service;
|
||||
package com.iqser.red.service.persistence.management.v1.processor.service.queue;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.time.OffsetDateTime;
|
||||
@ -12,6 +12,8 @@ import org.springframework.stereotype.Service;
|
||||
import com.fasterxml.jackson.core.type.TypeReference;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusProcessingUpdateService;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusService;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
@ -1,4 +1,4 @@
|
||||
package com.iqser.red.service.persistence.management.v1.processor.service;
|
||||
package com.iqser.red.service.persistence.management.v1.processor.service.queue;
|
||||
|
||||
import org.springframework.amqp.core.Message;
|
||||
import org.springframework.amqp.rabbit.annotation.RabbitListener;
|
||||
@ -7,6 +7,7 @@ import org.springframework.stereotype.Service;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.OCRStatusUpdateResponse;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusService;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
@ -7,6 +7,9 @@ redaction-report-service.url: "http://redaction-report-service-v1:8080"
|
||||
search-service.url: "http://search-service-v1:8080"
|
||||
tenant-user-management-service.url: "http://tenant-user-management-service:8080/internal"
|
||||
|
||||
application:
|
||||
type: "RedactManager"
|
||||
|
||||
server:
|
||||
port: 8080
|
||||
|
||||
|
||||
@ -462,7 +462,10 @@ public class ManualRedactionTest extends AbstractPersistenceServerServiceTest {
|
||||
var type = typeProvider.testAndProvideType(dossierTemplate, null, "PII");
|
||||
|
||||
// assume file is already proccessed once, test that add to dict triggers reanalysis
|
||||
fileManagementStorageService.storeJSONObject(dossier.getId(), file.getId(), FileType.TEXT, "{}");
|
||||
fileManagementStorageService.storeJSONObject(dossier.getId(), file.getId(), FileType.DOCUMENT_TEXT, "{}");
|
||||
fileManagementStorageService.storeJSONObject(dossier.getId(), file.getId(), FileType.DOCUMENT_PAGES, "{}");
|
||||
fileManagementStorageService.storeJSONObject(dossier.getId(), file.getId(), FileType.DOCUMENT_STRUCTURE, "{}");
|
||||
fileManagementStorageService.storeJSONObject(dossier.getId(), file.getId(), FileType.DOCUMENT_POSITION, "{}");
|
||||
fileManagementStorageService.storeJSONObject(dossier.getId(), file.getId(), FileType.NER_ENTITIES, "{}");
|
||||
fileManagementStorageService.storeJSONObject(dossier.getId(), file.getId(), FileType.IMAGE_INFO, "{}");
|
||||
fileStatusPersistenceService.updateProcessingStatus(file.getId(), ProcessingStatus.PROCESSED);
|
||||
|
||||
@ -1,5 +1,8 @@
|
||||
monitoring.enabled: true
|
||||
|
||||
application:
|
||||
type: "RedactManager"
|
||||
|
||||
spring:
|
||||
main:
|
||||
allow-circular-references: true # FIXME
|
||||
|
||||
@ -135,20 +135,23 @@ public class FileStatus {
|
||||
private boolean hasHighlights;
|
||||
@Schema(description = "Size of the optimized, internally stored file.")
|
||||
private Long fileSize;
|
||||
@Schema(description = "Analysis Version.")
|
||||
@Schema(description = "Analysis Version.")
|
||||
private int analysisVersion;
|
||||
@Schema(description = "Last time the file was indexed in ES.")
|
||||
private OffsetDateTime lastIndexed;
|
||||
@Schema(description = "The error information for the error state of the file")
|
||||
private FileErrorInfo fileErrorInfo;
|
||||
|
||||
|
||||
@Schema(description = "Shows if this file has been OCRed by us. Last Time of OCR.")
|
||||
public OffsetDateTime getLastOCRTime() {
|
||||
|
||||
return ocrEndTime != null ? ocrEndTime : ocrStartTime;
|
||||
}
|
||||
|
||||
public String getId(){
|
||||
|
||||
public String getId() {
|
||||
|
||||
return fileId;
|
||||
}
|
||||
|
||||
|
||||
@ -9,14 +9,19 @@ public enum FileType {
|
||||
REDACTION_LOG(".json"),
|
||||
SIMPLIFIED_TEXT(".json"),
|
||||
SECTION_GRID(".json"),
|
||||
TEXT(".json"),
|
||||
TEXT(".json"), // deprecated file type, only present in legacy migrations
|
||||
NER_ENTITIES(".json"),
|
||||
IMAGE_INFO(".json"),
|
||||
IMPORTED_REDACTIONS(".json"),
|
||||
TEXT_HIGHLIGHTS(".json"),
|
||||
FIGURE(".json"),
|
||||
TABLES(".json"),
|
||||
COMPONENTS(".json");
|
||||
COMPONENTS(".json"),
|
||||
// document is split into 4 files, all should be overridden/deleted at the same time
|
||||
DOCUMENT_TEXT(".json"),
|
||||
DOCUMENT_STRUCTURE(".json"),
|
||||
DOCUMENT_POSITION(".json"),
|
||||
DOCUMENT_PAGES(".json");
|
||||
|
||||
@Getter
|
||||
private final String extension;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user