RED-6725: integrate layoutparsing service #55

Merged
kilian.schuettler1 merged 11 commits from RED-6725 into master 2023-08-01 11:02:25 +02:00
21 changed files with 362 additions and 54 deletions

View File

@ -75,22 +75,75 @@ public class RedactionLogController implements RedactionLogResource {
public ResponseEntity<?> getSectionText(@PathVariable(DOSSIER_ID) String dossierId, @PathVariable(FILE_ID) String fileId) {
try {
return buildZipFileResponseEntity(fileId, dossierId, FileType.TEXT);
} catch (FeignException e) {
throw processFeignException(e);
}
}
HttpHeaders httpHeaders = new HttpHeaders();
httpHeaders.setContentType(MediaType.parseMediaType("application/zip"));
var fileStatus = fileStatusService.getStatus(fileId);
String filename = fileStatus.getFilename();
if (filename != null) {
var index = filename.lastIndexOf(".");
String prefix = filename.substring(0, index);
filename = prefix + ".json";
httpHeaders.add("Content-Disposition", "attachment; filename=" + prefix + ".zip");
}
private ResponseEntity<byte[]> buildZipFileResponseEntity(String fileId, String dossierId, FileType fileType) throws IOException {
byte[] zipBytes = getZippedBytes(dossierId, fileId, filename, FileType.TEXT);
return new ResponseEntity<>(zipBytes, httpHeaders, HttpStatus.OK);
HttpHeaders httpHeaders = new HttpHeaders();
httpHeaders.setContentType(MediaType.parseMediaType("application/zip"));
var fileStatus = fileStatusService.getStatus(fileId);
String filename = fileStatus.getFilename();
if (filename != null) {
var index = filename.lastIndexOf(".");
String prefix = filename.substring(0, index);
filename = prefix + ".json";
httpHeaders.add(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename*=utf-8''" + StringEncodingUtils.urlEncode(prefix) + ".zip");
}
byte[] zipBytes = getZippedBytes(dossierId, fileId, filename, fileType);
httpHeaders.setContentLength(zipBytes.length);
return new ResponseEntity<>(zipBytes, httpHeaders, HttpStatus.OK);
}
@SneakyThrows
@PreAuthorize("hasAuthority('" + READ_REDACTION_LOG + "')")
public ResponseEntity<?> getDocumentText(@PathVariable(DOSSIER_ID) String dossierId, @PathVariable(FILE_ID) String fileId) {
try {
return buildZipFileResponseEntity(fileId, dossierId, FileType.DOCUMENT_TEXT);
} catch (FeignException e) {
throw processFeignException(e);
}
}
@SneakyThrows
@PreAuthorize("hasAuthority('" + READ_REDACTION_LOG + "')")
public ResponseEntity<?> getDocumentPositions(@PathVariable(DOSSIER_ID) String dossierId, @PathVariable(FILE_ID) String fileId) {
try {
return buildZipFileResponseEntity(fileId, dossierId, FileType.DOCUMENT_POSITION);
} catch (FeignException e) {
throw processFeignException(e);
}
}
@SneakyThrows
@PreAuthorize("hasAuthority('" + READ_REDACTION_LOG + "')")
public ResponseEntity<?> getDocumentStructure(@PathVariable(DOSSIER_ID) String dossierId, @PathVariable(FILE_ID) String fileId) {
try {
return buildZipFileResponseEntity(fileId, dossierId, FileType.DOCUMENT_STRUCTURE);
} catch (FeignException e) {
throw processFeignException(e);
}
}
@SneakyThrows
@PreAuthorize("hasAuthority('" + READ_REDACTION_LOG + "')")
public ResponseEntity<?> getDocumentPages(@PathVariable(DOSSIER_ID) String dossierId, @PathVariable(FILE_ID) String fileId) {
try {
return buildZipFileResponseEntity(fileId, dossierId, FileType.DOCUMENT_PAGES);
} catch (FeignException e) {
throw processFeignException(e);
}

View File

@ -26,6 +26,10 @@ public interface RedactionLogResource {
String REDACTION_LOG_PATH = ExternalApi.BASE_PATH + "/redactionLog";
String SECTION_GRID_PATH = ExternalApi.BASE_PATH + "/sectionGrid";
String SECTION_TEXT_PATH = ExternalApi.BASE_PATH + "/sectionText";
String DOCUMENT_TEXT_PATH = ExternalApi.BASE_PATH + "/documentText";
String DOCUMENT_POSITIONS_PATH = ExternalApi.BASE_PATH + "/documentPositions";
String DOCUMENT_PAGES_PATH = ExternalApi.BASE_PATH + "/documentPages";
String DOCUMENT_STRUCTURE_PATH = ExternalApi.BASE_PATH + "/documentStructure";
String SIMPLIFIED_SECTION_TEXT_PATH = ExternalApi.BASE_PATH + "/simplifiedSectionText";
String FILE_ID = "fileId";
@ -37,7 +41,7 @@ public interface RedactionLogResource {
@GetMapping(value = REDACTION_LOG_PATH + DOSSIER_ID_PATH_VARIABLE + FILE_ID_PATH_VARIABLE, produces = MediaType.APPLICATION_JSON_VALUE)
@Operation(summary = "Gets the redaction log for a fileId", description = "None")
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "OK"), @ApiResponse(responseCode = "400", description = "Request " + "contains error."), @ApiResponse(responseCode = "404", description = "The redaction log is not found.")})
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "OK"), @ApiResponse(responseCode = "400", description = "Request contains error."), @ApiResponse(responseCode = "404", description = "The redaction log is not found.")})
RedactionLog getRedactionLog(@PathVariable(DOSSIER_ID) String dossierId,
@PathVariable(FILE_ID) String fileId,
@RequestParam(value = "excludedType", required = false) List<String> excludedTypes,
@ -47,25 +51,50 @@ public interface RedactionLogResource {
@GetMapping(value = SECTION_GRID_PATH + DOSSIER_ID_PATH_VARIABLE + FILE_ID_PATH_VARIABLE, produces = MediaType.APPLICATION_JSON_VALUE)
@Operation(summary = "Gets the section grid for a fileId", description = "None")
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "OK"), @ApiResponse(responseCode = "400", description = "Request " + "contains error."), @ApiResponse(responseCode = "404", description = "The section grid is not found.")})
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "OK"), @ApiResponse(responseCode = "400", description = "Request contains error."), @ApiResponse(responseCode = "404", description = "The section grid is not found.")})
SectionGrid getSectionGrid(@PathVariable(DOSSIER_ID) String dossierId, @PathVariable(FILE_ID) String fileId);
@Deprecated
@GetMapping(value = SECTION_TEXT_PATH + DOSSIER_ID_PATH_VARIABLE + FILE_ID_PATH_VARIABLE)
@Operation(summary = "Gets the section text for a fileId", description = "None")
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "OK"), @ApiResponse(responseCode = "400", description = "Request " + "contains error."), @ApiResponse(responseCode = "404", description = "The section text is not found.")})
@Operation(summary = "Gets the text blocks of a document for a fileId", description = "None")
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "OK"), @ApiResponse(responseCode = "400", description = "Request contains error."), @ApiResponse(responseCode = "404", description = "The section text is not found.")})
ResponseEntity<?> getSectionText(@PathVariable(DOSSIER_ID) String dossierId, @PathVariable(FILE_ID) String fileId);
@GetMapping(value = DOCUMENT_TEXT_PATH + DOSSIER_ID_PATH_VARIABLE + FILE_ID_PATH_VARIABLE)
@Operation(summary = "Gets the text blocks of a document for a fileId", description = "None")
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "OK"), @ApiResponse(responseCode = "400", description = "Request contains error."), @ApiResponse(responseCode = "404", description = "The document text is not found.")})
ResponseEntity<?> getDocumentText(@PathVariable(DOSSIER_ID) String dossierId, @PathVariable(FILE_ID) String fileId);
@GetMapping(value = DOCUMENT_POSITIONS_PATH + DOSSIER_ID_PATH_VARIABLE + FILE_ID_PATH_VARIABLE)
@Operation(summary = "Gets the positions of the text blocks of a document for a fileId", description = "None")
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "OK"), @ApiResponse(responseCode = "400", description = "Request contains error."), @ApiResponse(responseCode = "404", description = "The document positions is not found.")})
ResponseEntity<?> getDocumentPositions(@PathVariable(DOSSIER_ID) String dossierId, @PathVariable(FILE_ID) String fileId);
@GetMapping(value = DOCUMENT_STRUCTURE_PATH + DOSSIER_ID_PATH_VARIABLE + FILE_ID_PATH_VARIABLE)
@Operation(summary = "Gets the document structure for a fileId", description = "None")
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "OK"), @ApiResponse(responseCode = "400", description = "Request contains error."), @ApiResponse(responseCode = "404", description = "The document structure is not found.")})
ResponseEntity<?> getDocumentStructure(@PathVariable(DOSSIER_ID) String dossierId, @PathVariable(FILE_ID) String fileId);
@GetMapping(value = DOCUMENT_PAGES_PATH + DOSSIER_ID_PATH_VARIABLE + FILE_ID_PATH_VARIABLE)
@Operation(summary = "Gets the page information of a document for a fileId", description = "None")
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "OK"), @ApiResponse(responseCode = "400", description = "Request contains error."), @ApiResponse(responseCode = "404", description = "The page information is not found.")})
ResponseEntity<?> getDocumentPages(@PathVariable(DOSSIER_ID) String dossierId, @PathVariable(FILE_ID) String fileId);
@GetMapping(value = SIMPLIFIED_SECTION_TEXT_PATH + DOSSIER_ID_PATH_VARIABLE + FILE_ID_PATH_VARIABLE)
@Operation(summary = "Gets the simplified section text for a fileId", description = "None")
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "OK"), @ApiResponse(responseCode = "400", description = "Request " + "contains error."), @ApiResponse(responseCode = "404", description = "The simplified section text is not found.")})
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "OK"), @ApiResponse(responseCode = "400", description = "Request contains error."), @ApiResponse(responseCode = "404", description = "The simplified section text is not found.")})
ResponseEntity<?> getSimplifiedSectionText(@PathVariable(DOSSIER_ID) String dossierId, @PathVariable(FILE_ID) String fileId);
@PostMapping(value = REDACTION_LOG_PATH + DOSSIER_ID_PATH_VARIABLE + FILE_ID_PATH_VARIABLE + "/filtered", produces = MediaType.APPLICATION_JSON_VALUE)
@Operation(summary = "Gets the redaction log for a fileId grater than the specified date", description = "None")
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "OK"), @ApiResponse(responseCode = "400", description = "Request " + "contains error."), @ApiResponse(responseCode = "404", description = "The redaction log is not found.")})
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "OK"), @ApiResponse(responseCode = "400", description = "Request contains error."), @ApiResponse(responseCode = "404", description = "The redaction log is not found.")})
RedactionLog getFilteredRedactionLog(@PathVariable(DOSSIER_ID) String dossierId,
@PathVariable(FILE_ID) String fileId,
@RequestBody FilteredRedactionLogRequest filteredRedactionLogRequest);

View File

@ -37,7 +37,10 @@ public class AdminInterfaceController {
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.SECTION_GRID);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.REDACTION_LOG);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.TEXT);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_PAGES);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_TEXT);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_POSITION);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_STRUCTURE);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.NER_ENTITIES);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.FIGURE);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.TABLES);
@ -136,7 +139,10 @@ public class AdminInterfaceController {
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.SECTION_GRID);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.REDACTION_LOG);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.TEXT);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_STRUCTURE);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_TEXT);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_PAGES);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_POSITION);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.NER_ENTITIES);
fileStatusService.setStatusFullReprocess(dossierId, fileId, true, true);

View File

@ -88,6 +88,12 @@
</exclusions>
</dependency>
<dependency>
<groupId>com.knecon.fforesight</groupId>
<artifactId>layoutparser-service-internal-api</artifactId>
<version>0.19.0</version>
</dependency>
<dependency>
<groupId>com.iqser.red.service</groupId>
<artifactId>search-service-api-v1</artifactId>

View File

@ -1,5 +1,9 @@
package com.iqser.red.service.persistence.management.v1.processor.configuration;
import static com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingQueueNames.LAYOUT_PARSING_DLQ;
import static com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingQueueNames.LAYOUT_PARSING_FINISHED_EVENT_QUEUE;
import static com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingQueueNames.LAYOUT_PARSING_REQUEST_QUEUE;
import org.springframework.amqp.core.Queue;
import org.springframework.amqp.core.QueueBuilder;
import org.springframework.context.annotation.Bean;
@ -58,6 +62,8 @@ public class MessagingConfiguration {
public static final String OCR_STATUS_UPDATE_RESPONSE_QUEUE = "ocr_status_update_response_queue";
public static final String OCR_STATUS_UPDATE_RESPONSE_DQL = "ocr_status_update_response_dql";
public static final String X_ERROR_INFO_HEADER = "x-error-message";
public static final String X_ERROR_INFO_TIMESTAMP_HEADER = "x-error-message-timestamp";
@Bean
public Queue nerRequestQueue() {
@ -311,4 +317,25 @@ public class MessagingConfiguration {
.build();
}
@Bean
public Queue layoutparsingRequestQueue() {
return QueueBuilder.durable(LAYOUT_PARSING_REQUEST_QUEUE)//
.withArgument("x-dead-letter-exchange", "").withArgument("x-dead-letter-routing-key", LAYOUT_PARSING_DLQ).build();
}
@Bean
public Queue layoutparsingResponseQueue() {
return QueueBuilder.durable(LAYOUT_PARSING_FINISHED_EVENT_QUEUE)//
.withArgument("x-dead-letter-exchange", "").withArgument("x-dead-letter-routing-key", LAYOUT_PARSING_DLQ).build();
}
@Bean
public Queue layoutparsingDLQ() {
return QueueBuilder.durable(LAYOUT_PARSING_DLQ).build();
}
}

View File

@ -64,7 +64,7 @@ public class FileManagementStorageService {
try {
return storageService.readJSONObject(TenantContext.getTenantId(), StorageIdUtils.getStorageId(dossierId, fileId, FileType.REDACTION_LOG), RedactionLog.class);
} catch (StorageObjectDoesNotExist e) {
log.debug("Text not available.");
log.debug("RedactionLog does not exist");
throw new NotFoundException("RedactionLog does not exist");
}
}
@ -76,7 +76,7 @@ public class FileManagementStorageService {
return storageService.readJSONObject(TenantContext.getTenantId(), StorageIdUtils.getStorageId(dossierId, fileId, FileType.SECTION_GRID), SectionGrid.class);
} catch (StorageObjectDoesNotExist e) {
log.debug("SectionGrid not available.");
throw new NotFoundException("RedactionLog does not exist");
throw new NotFoundException("SectionGrid does not exist");
} catch (Exception e) {
throw new RuntimeException("Could not convert SectionGrid", e);
}

View File

@ -40,12 +40,6 @@ public class FileStatusProcessingUpdateService {
switch (analyzeResult.getMessageType()) {
case STRUCTURE_ANALYSE:
//TODO This might be also priority depending on what was the pervious call.
fileStatusService.setStatusAnalyse(dossierId, fileId, false);
break;
case SURROUNDING_TEXT:
fileStatusService.setStatusProcessed(analyzeResult.getFileId());
manualRedactionService.updateSurroundingText(fileId, analyzeResult.getManualRedactions());
@ -75,7 +69,6 @@ public class FileStatusProcessingUpdateService {
}
@Transactional
public void preprocessingSuccessful(String dossierId, String fileId, UntouchedDocumentResponse untouchedDocumentResponse) {
fileStatusService.updateProcessingStatusPreprocessed(dossierId, fileId, untouchedDocumentResponse.isHasHighlights(), untouchedDocumentResponse.getFileSize());

View File

@ -1,5 +1,7 @@
package com.iqser.red.service.persistence.management.v1.processor.service;
import static com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingQueueNames.LAYOUT_PARSING_REQUEST_QUEUE;
import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.List;
@ -12,7 +14,6 @@ import org.springframework.stereotype.Service;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Sets;
import com.iqser.red.service.pdftron.redaction.v1.api.model.DocumentRequest;
import com.iqser.red.service.pdftron.redaction.v1.api.model.ProcessUntouchedDocumentRequest;
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.FileAttributeEntity;
@ -22,6 +23,7 @@ import com.iqser.red.service.persistence.management.v1.processor.model.CvAnalysi
import com.iqser.red.service.persistence.management.v1.processor.model.NerServiceRequest;
import com.iqser.red.service.persistence.management.v1.processor.model.OCRStatusUpdateResponse;
import com.iqser.red.service.persistence.management.v1.processor.model.image.ImageServiceRequest;
import com.iqser.red.service.persistence.management.v1.processor.service.layoutparsing.LayoutParsingRequestFactory;
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.DossierPersistenceService;
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.FileAttributeConfigPersistenceService;
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.FileStatusPersistenceService;
@ -59,7 +61,6 @@ public class FileStatusService {
private final FileStatusPersistenceService fileStatusPersistenceService;
private final DossierPersistenceService dossierPersistenceService;
private final RabbitTemplate rabbitTemplate;
private final ObjectMapper objectMapper;
private final ManualRedactionProviderService manualRedactionProviderService;
private final FileManagementStorageService fileManagementStorageService;
private final LegalBasisChangePersistenceService legalBasisChangePersistenceService;
@ -74,6 +75,7 @@ public class FileStatusService {
private final ReanalysisRequiredStatusService reanalysisRequiredStatusService;
private final ViewedPagesPersistenceService viewedPagesPersistenceService;
private final FileManagementServiceSettings fileManagementServiceSettings;
private final LayoutParsingRequestFactory layoutParsingRequestFactory;
@Transactional
@ -174,7 +176,7 @@ public class FileStatusService {
}
var fileModel = MagicConverter.convert(fileEntity, FileModel.class, new FileModelMapper());
reanalysisRequiredStatusService.enhanceFileStatusWithAnalysisRequirements(fileModel, true);
fileModel = reanalysisRequiredStatusService.enhanceFileStatusWithAnalysisRequirements(fileModel, true);
var dossierTemplate = dossier.getDossierTemplate();
if (dossierTemplate.isOcrByDefault() && fileModel.getOcrEndTime() == null) {
@ -183,22 +185,21 @@ public class FileStatusService {
return;
}
MessageType messageType = null;
if (!fileManagementStorageService.objectExists(dossierId, fileId, FileType.TEXT)) {
messageType = MessageType.STRUCTURE_ANALYSE;
if (!fileManagementStorageService.objectExists(dossierId, fileId, FileType.DOCUMENT_TEXT)) {
var layoutParsingRequest = layoutParsingRequestFactory.build(dossierId, fileId, priority, dossier);
setStatusFullProcessing(fileId);
rabbitTemplate.convertAndSend(LAYOUT_PARSING_REQUEST_QUEUE, layoutParsingRequest);
return;
}
if (messageType == null && settings.isNerServiceEnabled() && !fileManagementStorageService.objectExists(dossierId, fileId, FileType.NER_ENTITIES)) {
if (settings.isNerServiceEnabled() && !fileManagementStorageService.objectExists(dossierId, fileId, FileType.NER_ENTITIES)) {
log.debug("Add file: {} from dossier {} to NER queue", fileId, dossierId);
addToNerQueue(dossierId, fileId);
return;
}
if (messageType == null) {
boolean reanalyse = fileModel.isReanalysisRequired() || manualRedactionReanalyse;
messageType = calculateMessageType(reanalyse, fileModel.getProcessingStatus(), fileModel);
}
boolean reanalyse = fileModel.isReanalysisRequired() || manualRedactionReanalyse;
MessageType messageType = calculateMessageType(reanalyse, fileModel.getProcessingStatus(), fileModel);
var analyseRequest = AnalyzeRequest.builder()
.messageType(messageType)
@ -598,7 +599,10 @@ public class FileStatusService {
if (requiresStructureAnalysis) {
log.info("Delete text and NER entities from file {} in dossier {}", fileId, dossierId);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.TEXT);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_POSITION);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_PAGES);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_TEXT);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_STRUCTURE);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.NER_ENTITIES);
}
@ -636,7 +640,10 @@ public class FileStatusService {
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.REDACTION_LOG);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.SECTION_GRID);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.IMAGE_INFO);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.TEXT);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_STRUCTURE);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_PAGES);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_POSITION);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.DOCUMENT_TEXT);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.NER_ENTITIES);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.FIGURE);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.TABLES);

View File

@ -10,7 +10,6 @@ import java.util.stream.Collectors;
import org.springframework.stereotype.Service;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Sets;
import com.iqser.red.service.pdftron.redaction.v1.api.model.ByteContentDocument;
import com.iqser.red.service.pdftron.redaction.v1.api.model.highlights.TextHighlightConversionOperation;
@ -39,7 +38,6 @@ public class ReanalysisService {
private final IndexingService indexingService;
private final PDFTronClient pDFTronRedactionClient;
private final FileManagementStorageService fileManagementStorageService;
private final ObjectMapper objectMapper;
public void reanalyzeDossier(String dossierId, boolean force) {

View File

@ -0,0 +1,58 @@
package com.iqser.red.service.persistence.management.v1.processor.service.layoutparsing;
import java.util.Map;
import java.util.Optional;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.DossierEntity;
import com.iqser.red.service.persistence.management.v1.processor.service.FileManagementStorageService;
import com.iqser.red.service.persistence.management.v1.processor.utils.StorageIdUtils;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
import lombok.RequiredArgsConstructor;
@Service
@RequiredArgsConstructor
public class LayoutParsingRequestFactory {
@Value("${application.type}")
private String applicationType;
private final FileManagementStorageService fileManagementStorageService;
private final LayoutParsingRequestIdentifierService layoutParsingRequestIdentifierService;
public LayoutParsingRequest build(String dossierId, String fileId, boolean priority, DossierEntity dossier) {
LayoutParsingType type = switch (applicationType) {
case "DocuMine" -> LayoutParsingType.DOCUMINE;
case "TAAS" -> LayoutParsingType.TAAS;
default -> LayoutParsingType.REDACT_MANAGER;
};
Optional<String> optionalImageFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.IMAGE_INFO) //
? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMAGE_INFO)) : Optional.empty();
Optional<String> optionalTableFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.TABLES) //
? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.TABLES)) : Optional.empty();
return LayoutParsingRequest.builder()
.layoutParsingType(type)
.identifier(layoutParsingRequestIdentifierService.buildIdentifier(dossierId, fileId, priority))
.originFileStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.ORIGIN))
.imagesFileStorageId(optionalImageFileId)
.tablesFileStorageId(optionalTableFileId)
.pageFileStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_PAGES))
.structureFileStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_STRUCTURE))
.textBlockFileStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_TEXT))
.positionBlockFileStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_POSITION))
.simplifiedTextStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.SIMPLIFIED_TEXT))
.sectionGridStorageId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.SECTION_GRID))
.build();
}
}

View File

@ -0,0 +1,43 @@
package com.iqser.red.service.persistence.management.v1.processor.service.layoutparsing;
import java.util.Map;
import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.DossierEntity;
@Service
public class LayoutParsingRequestIdentifierService {
private enum IdentifierNames {
DOSSIER_ID,
FILE_ID,
PRIORITY,
DOSSIER_TEMPLATE_ID
}
public String parseDossierId(Map<String, String> identifiers) {
return identifiers.get(IdentifierNames.DOSSIER_ID.name());
}
public String parseFileId(Map<String, String> identifiers) {
return identifiers.get(IdentifierNames.FILE_ID.name());
}
public Boolean parsePriority(Map<String, String> identifiers) {
return Boolean.parseBoolean(identifiers.get(IdentifierNames.PRIORITY.name()));
}
public Map<String, String> buildIdentifier(String dossierId, String fileId, boolean priority) {
return Map.of(IdentifierNames.DOSSIER_ID.name(), dossierId, IdentifierNames.FILE_ID.name(), fileId, IdentifierNames.PRIORITY.name(), String.valueOf(priority));
}
}

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.persistence.management.v1.processor.service;
package com.iqser.red.service.persistence.management.v1.processor.service.queue;
import java.time.OffsetDateTime;
import java.time.temporal.ChronoUnit;
@ -10,6 +10,8 @@ import org.springframework.stereotype.Service;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
import com.iqser.red.service.persistence.management.v1.processor.model.CvAnalysisServiceResponse;
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusProcessingUpdateService;
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusService;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
import lombok.RequiredArgsConstructor;

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.persistence.management.v1.processor.service;
package com.iqser.red.service.persistence.management.v1.processor.service.queue;
import java.io.IOException;
import java.time.OffsetDateTime;
@ -13,6 +13,8 @@ import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusProcessingUpdateService;
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusService;
import com.iqser.red.service.persistence.management.v1.processor.settings.FileManagementServiceSettings;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;

View File

@ -0,0 +1,64 @@
package com.iqser.red.service.persistence.management.v1.processor.service.queue;
import java.time.OffsetDateTime;
import java.time.temporal.ChronoUnit;
import org.springframework.amqp.core.Message;
import org.springframework.amqp.rabbit.annotation.RabbitListener;
import org.springframework.stereotype.Service;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusProcessingUpdateService;
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusService;
import com.iqser.red.service.persistence.management.v1.processor.service.layoutparsing.LayoutParsingRequestIdentifierService;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingFinishedEvent;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingQueueNames;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
public class LayoutParsingFinishedMessageReceiver {
private final FileStatusService fileStatusService;
private final FileStatusProcessingUpdateService fileStatusProcessingUpdateService;
private final ObjectMapper objectMapper;
private final LayoutParsingRequestIdentifierService layoutParsingRequestIdentifierService;
@SneakyThrows
@RabbitListener(queues = LayoutParsingQueueNames.LAYOUT_PARSING_FINISHED_EVENT_QUEUE)
public void receive(LayoutParsingFinishedEvent response) {
fileStatusService.setStatusAnalyse(layoutParsingRequestIdentifierService.parseDossierId(response.identifier()),
layoutParsingRequestIdentifierService.parseFileId(response.identifier()),
layoutParsingRequestIdentifierService.parsePriority(response.identifier()));
log.info("Received message {} in {}", response, MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_QUEUE);
}
@SneakyThrows
@RabbitListener(queues = LayoutParsingQueueNames.LAYOUT_PARSING_DLQ)
public void handleDLQMessage(Message failedMessage) {
var analyzeRequest = objectMapper.readValue(failedMessage.getBody(), LayoutParsingRequest.class);
log.info("Failed to process analyze request: {}", analyzeRequest);
String errorCause = failedMessage.getMessageProperties().getHeader(MessagingConfiguration.X_ERROR_INFO_HEADER);
OffsetDateTime timestamp = failedMessage.getMessageProperties().getHeader(MessagingConfiguration.X_ERROR_INFO_TIMESTAMP_HEADER);
timestamp = timestamp != null ? timestamp : OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS);
log.info("Failed to process layout parsing request, errorCause: {}, timestamp: {}", errorCause, timestamp);
fileStatusProcessingUpdateService.analysisFailed(layoutParsingRequestIdentifierService.parseDossierId(analyzeRequest.identifier()),
layoutParsingRequestIdentifierService.parseFileId(analyzeRequest.identifier()),
new FileErrorInfo(errorCause, LayoutParsingQueueNames.LAYOUT_PARSING_DLQ, "redaction-service", timestamp));
}
}

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.persistence.management.v1.processor.service;
package com.iqser.red.service.persistence.management.v1.processor.service.queue;
import java.io.IOException;
import java.time.OffsetDateTime;
@ -12,6 +12,8 @@ import org.springframework.stereotype.Service;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusProcessingUpdateService;
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusService;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
import lombok.RequiredArgsConstructor;

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.persistence.management.v1.processor.service;
package com.iqser.red.service.persistence.management.v1.processor.service.queue;
import org.springframework.amqp.core.Message;
import org.springframework.amqp.rabbit.annotation.RabbitListener;
@ -7,6 +7,7 @@ import org.springframework.stereotype.Service;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
import com.iqser.red.service.persistence.management.v1.processor.model.OCRStatusUpdateResponse;
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusService;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;

View File

@ -7,6 +7,9 @@ redaction-report-service.url: "http://redaction-report-service-v1:8080"
search-service.url: "http://search-service-v1:8080"
tenant-user-management-service.url: "http://tenant-user-management-service:8080/internal"
application:
type: "RedactManager"
server:
port: 8080

View File

@ -462,7 +462,10 @@ public class ManualRedactionTest extends AbstractPersistenceServerServiceTest {
var type = typeProvider.testAndProvideType(dossierTemplate, null, "PII");
// assume file is already proccessed once, test that add to dict triggers reanalysis
fileManagementStorageService.storeJSONObject(dossier.getId(), file.getId(), FileType.TEXT, "{}");
fileManagementStorageService.storeJSONObject(dossier.getId(), file.getId(), FileType.DOCUMENT_TEXT, "{}");
fileManagementStorageService.storeJSONObject(dossier.getId(), file.getId(), FileType.DOCUMENT_PAGES, "{}");
fileManagementStorageService.storeJSONObject(dossier.getId(), file.getId(), FileType.DOCUMENT_STRUCTURE, "{}");
fileManagementStorageService.storeJSONObject(dossier.getId(), file.getId(), FileType.DOCUMENT_POSITION, "{}");
fileManagementStorageService.storeJSONObject(dossier.getId(), file.getId(), FileType.NER_ENTITIES, "{}");
fileManagementStorageService.storeJSONObject(dossier.getId(), file.getId(), FileType.IMAGE_INFO, "{}");
fileStatusPersistenceService.updateProcessingStatus(file.getId(), ProcessingStatus.PROCESSED);

View File

@ -1,5 +1,8 @@
monitoring.enabled: true
application:
type: "RedactManager"
spring:
main:
allow-circular-references: true # FIXME

View File

@ -135,20 +135,23 @@ public class FileStatus {
private boolean hasHighlights;
@Schema(description = "Size of the optimized, internally stored file.")
private Long fileSize;
@Schema(description = "Analysis Version.")
@Schema(description = "Analysis Version.")
private int analysisVersion;
@Schema(description = "Last time the file was indexed in ES.")
private OffsetDateTime lastIndexed;
@Schema(description = "The error information for the error state of the file")
private FileErrorInfo fileErrorInfo;
@Schema(description = "Shows if this file has been OCRed by us. Last Time of OCR.")
public OffsetDateTime getLastOCRTime() {
return ocrEndTime != null ? ocrEndTime : ocrStartTime;
}
public String getId(){
public String getId() {
return fileId;
}

View File

@ -9,14 +9,19 @@ public enum FileType {
REDACTION_LOG(".json"),
SIMPLIFIED_TEXT(".json"),
SECTION_GRID(".json"),
TEXT(".json"),
TEXT(".json"), // deprecated file type, only present in legacy migrations
NER_ENTITIES(".json"),
IMAGE_INFO(".json"),
IMPORTED_REDACTIONS(".json"),
TEXT_HIGHLIGHTS(".json"),
FIGURE(".json"),
TABLES(".json"),
COMPONENTS(".json");
COMPONENTS(".json"),
// document is split into 4 files, all should be overridden/deleted at the same time
DOCUMENT_TEXT(".json"),
DOCUMENT_STRUCTURE(".json"),
DOCUMENT_POSITION(".json"),
DOCUMENT_PAGES(".json");
@Getter
private final String extension;