From 43f9db59d4e8a444c984f06c02f7b72117a78743 Mon Sep 17 00:00:00 2001 From: maverickstuder Date: Thu, 16 Jan 2025 14:38:17 +0100 Subject: [PATCH] RED-10728: Endpoint to execute full OCR on specific file --- .../impl/controller/ReanalysisController.java | 7 ++++--- .../external/resource/ReanalysisResource.java | 4 +++- .../internal/AdminInterfaceController.java | 4 ++-- .../FileStatusProcessingUpdateService.java | 2 +- .../processor/service/FileStatusService.java | 19 +++++++++++-------- .../processor/service/ReanalysisService.java | 13 ++++++------- .../integration/tests/ReanalysisTest.java | 2 +- 7 files changed, 28 insertions(+), 23 deletions(-) diff --git a/persistence-service-v1/persistence-service-external-api-impl-v1/src/main/java/com/iqser/red/persistence/service/v1/external/api/impl/controller/ReanalysisController.java b/persistence-service-v1/persistence-service-external-api-impl-v1/src/main/java/com/iqser/red/persistence/service/v1/external/api/impl/controller/ReanalysisController.java index 25ba16d58..8efc19724 100644 --- a/persistence-service-v1/persistence-service-external-api-impl-v1/src/main/java/com/iqser/red/persistence/service/v1/external/api/impl/controller/ReanalysisController.java +++ b/persistence-service-v1/persistence-service-external-api-impl-v1/src/main/java/com/iqser/red/persistence/service/v1/external/api/impl/controller/ReanalysisController.java @@ -118,11 +118,12 @@ public class ReanalysisController implements ReanalysisResource { @PreAuthorize("hasAuthority('" + REANALYZE_FILE + "')") public void ocrFile(@PathVariable(DOSSIER_ID) String dossierId, @PathVariable(FILE_ID) String fileId, - @RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force) { + @RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force, + @RequestParam(value = ALL_PAGES, required = false, defaultValue = FALSE) boolean allPages) { accessControlService.checkDossierExistenceAndAccessPermissionsToDossier(dossierId); validateOCR(dossierId, fileId); - reanalysisService.ocrFile(dossierId, fileId, force); + reanalysisService.ocrFile(dossierId, fileId, force, allPages); auditPersistenceService.audit(AuditRequest.builder() .userId(KeycloakSecurity.getUserId()) .objectId(dossierId) @@ -140,7 +141,7 @@ public class ReanalysisController implements ReanalysisResource { accessControlService.checkDossierExistenceAndAccessPermissionsToDossier(dossierId); fileIds.forEach(fileId -> validateOCR(dossierId, fileId)); - reanalysisService.ocrFiles(dossierId, fileIds); + reanalysisService.ocrFiles(dossierId, fileIds, false); auditPersistenceService.audit(AuditRequest.builder() .userId(KeycloakSecurity.getUserId()) .objectId(dossierId) diff --git a/persistence-service-v1/persistence-service-external-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/external/resource/ReanalysisResource.java b/persistence-service-v1/persistence-service-external-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/external/resource/ReanalysisResource.java index 62ed4ae56..7b9f2851b 100644 --- a/persistence-service-v1/persistence-service-external-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/external/resource/ReanalysisResource.java +++ b/persistence-service-v1/persistence-service-external-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/external/resource/ReanalysisResource.java @@ -38,6 +38,7 @@ public interface ReanalysisResource { String EXCLUDED_STATUS_PARAM = "excluded"; String FORCE_PARAM = "force"; + String ALL_PAGES = "allPages"; @PostMapping(value = REANALYSIS_REST_PATH + DOSSIER_ID_PATH_VARIABLE) @@ -73,7 +74,8 @@ public interface ReanalysisResource { @ApiResponses(value = {@ApiResponse(responseCode = "204", description = "OK"), @ApiResponse(responseCode = "409", description = "Conflict"), @ApiResponse(responseCode = "404", description = "Not found"), @ApiResponse(responseCode = "403", description = "Forbidden"), @ApiResponse(responseCode = "400", description = "Cannot OCR approved file")}) void ocrFile(@PathVariable(DOSSIER_ID) String dossierId, @PathVariable(FILE_ID) String fileId, - @RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force); + @RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force, + @RequestParam(value = ALL_PAGES, required = false, defaultValue = FALSE) boolean allPages); @Operation(summary = "Ocr and reanalyze multiple files for a dossier", description = "None") diff --git a/persistence-service-v1/persistence-service-internal-api-impl-v1/src/main/java/com/iqser/red/service/persistence/v1/internal/api/controller/internal/AdminInterfaceController.java b/persistence-service-v1/persistence-service-internal-api-impl-v1/src/main/java/com/iqser/red/service/persistence/v1/internal/api/controller/internal/AdminInterfaceController.java index 2be0e938e..9786c95c7 100644 --- a/persistence-service-v1/persistence-service-internal-api-impl-v1/src/main/java/com/iqser/red/service/persistence/v1/internal/api/controller/internal/AdminInterfaceController.java +++ b/persistence-service-v1/persistence-service-internal-api-impl-v1/src/main/java/com/iqser/red/service/persistence/v1/internal/api/controller/internal/AdminInterfaceController.java @@ -57,7 +57,7 @@ public class AdminInterfaceController { fileStatusService.validateFileIsNotDeletedAndNotApproved(fileId); - fileStatusService.setStatusOcrQueued(dossierId, fileId); + fileStatusService.setStatusOcrQueued(dossierId, fileId, false); } @@ -91,7 +91,7 @@ public class AdminInterfaceController { if (!dryRun) { fileStatusService.validateFileIsNotDeletedAndNotApproved(file.getId()); - fileStatusService.setStatusOcrQueued(file.getDossierId(), file.getId()); + fileStatusService.setStatusOcrQueued(file.getDossierId(), file.getId(), false); } } diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusProcessingUpdateService.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusProcessingUpdateService.java index 190b23550..adc8d8497 100644 --- a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusProcessingUpdateService.java +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusProcessingUpdateService.java @@ -122,7 +122,7 @@ public class FileStatusProcessingUpdateService { } else { fileStatusService.setStatusOcrProcessing(fileId, fileEntity.getProcessingStatus().equals(ProcessingStatus.OCR_PROCESSING) ? fileEntity.getProcessingErrorCounter() + 1 : 0); - fileStatusService.addToOcrQueue(dossierId, fileId, 2); + fileStatusService.addToOcrQueue(dossierId, fileId, 2, false); } } diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusService.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusService.java index 9284cc37e..194055f66 100644 --- a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusService.java +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusService.java @@ -316,7 +316,7 @@ public class FileStatusService { } log.info("Add file: {} from dossier {} to OCR queue", fileId, dossierId); - setStatusOcrQueued(dossierId, fileId); + setStatusOcrQueued(dossierId, fileId, false); sendReadOnlyAnalysisEvent(dossierId, fileId, fileEntity); return; } @@ -352,7 +352,6 @@ public class FileStatusService { return; } - boolean forceAnalysis = false; if (settings.isLlmNerServiceEnabled()) { boolean objectExists = fileManagementStorageService.objectExists(dossierId, fileId, FileType.LLM_NER_ENTITIES); @@ -386,7 +385,7 @@ public class FileStatusService { boolean reanalyse = fileModel.isReanalysisRequired() || analysisType.equals(AnalysisType.MANUAL_REDACTION_REANALYZE); MessageType messageType = calculateMessageType(reanalyse, fileModel.getProcessingStatus(), fileModel); - if(analysisType == AnalysisType.FORCE_ANALYSE || forceAnalysis) { + if (analysisType == AnalysisType.FORCE_ANALYSE || forceAnalysis) { messageType = MessageType.ANALYSE; } @@ -567,7 +566,7 @@ public class FileStatusService { } - public void setStatusOcrQueued(String dossierId, String fileId) { + public void setStatusOcrQueued(String dossierId, String fileId, boolean allPages) { FileEntity fileStatus = fileStatusPersistenceService.getStatus(fileId); @@ -579,7 +578,7 @@ public class FileStatusService { updateOCRStartTime(fileId); fileStatusPersistenceService.updateProcessingStatus(fileId, ProcessingStatus.OCR_PROCESSING_QUEUED); websocketService.sendAnalysisEvent(dossierId, fileId, AnalyseStatus.OCR_PROCESSING, fileStatus.getNumberOfAnalyses() + 1); - addToOcrQueue(dossierId, fileId, 2); + addToOcrQueue(dossierId, fileId, 2, allPages); } @@ -760,13 +759,16 @@ public class FileStatusService { } - public void addToOcrQueue(String dossierId, String fileId, int priority) { + public void addToOcrQueue(String dossierId, String fileId, int priority, boolean allPages) { var removeWatermark = dossierTemplatePersistenceService.getDossierTemplate(dossierPersistenceService.getDossierTemplateId(dossierId)).isRemoveWatermark(); Set features = new HashSet<>(); if (removeWatermark) { features.add(AzureOcrFeature.REMOVE_WATERMARKS); } + if (allPages) { + features.add(AzureOcrFeature.ALL_PAGES); + } if (currentApplicationTypeProvider.isDocuMine()) { features.add(AzureOcrFeature.ROTATION_CORRECTION); features.add(AzureOcrFeature.FONT_STYLE_DETECTION); @@ -820,7 +822,7 @@ public class FileStatusService { fileStatusPersistenceService.updateWorkflowStatus(fileId, newWorkflowStatus, false); - if(oldWorkflowStatus == WorkflowStatus.APPROVED && newWorkflowStatus != WorkflowStatus.APPROVED) { + if (oldWorkflowStatus == WorkflowStatus.APPROVED && newWorkflowStatus != WorkflowStatus.APPROVED) { fileStatusPersistenceService.clearLastDownload(fileId); } } @@ -977,7 +979,7 @@ public class FileStatusService { if (runOcr) { fileStatusPersistenceService.resetOcrStartAndEndDate(fileId); - setStatusOcrQueued(dossierId, fileId); + setStatusOcrQueued(dossierId, fileId, false); return; } @@ -1064,6 +1066,7 @@ public class FileStatusService { addToAnalysisQueue(dossierId, fileId, priority, Sets.newHashSet(), AnalysisType.DEFAULT); } + @Transactional public void setStatusForceAnalyse(String dossierId, String fileId, boolean priority) { diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/ReanalysisService.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/ReanalysisService.java index d66358877..b163b8fa8 100644 --- a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/ReanalysisService.java +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/ReanalysisService.java @@ -10,7 +10,6 @@ import com.iqser.red.service.persistence.management.v1.processor.exception.Confl import com.iqser.red.service.persistence.management.v1.processor.exception.NotFoundException; import com.iqser.red.service.persistence.management.v1.processor.service.persistence.DossierPersistenceService; import com.iqser.red.service.persistence.service.v1.api.shared.model.ReanalysisSettings; -import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.DeleteImportedRedactionsRequest; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileModel; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType; @@ -178,11 +177,11 @@ public class ReanalysisService { relevantFiles.stream() .filter(fileStatus -> fileStatus.getOcrStartTime() == null) .filter(fileStatus -> fileStatus.getProcessingStatus().equals(ProcessingStatus.PROCESSED)) - .forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId())); + .forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId(), false)); } - public void ocrFile(String dossierId, String fileId, boolean force) { + public void ocrFile(String dossierId, String fileId, boolean force, boolean allPages) { dossierPersistenceService.getAndValidateDossier(dossierId); FileModel dossierFile = fileStatusService.getStatus(fileId); @@ -202,18 +201,18 @@ public class ReanalysisService { } if (force) { - fileStatusService.setStatusOcrQueued(dossierId, fileId); + fileStatusService.setStatusOcrQueued(dossierId, fileId, allPages); } else { if (dossierFile.getOcrStartTime() != null) { throw new ConflictException("File already has been OCR processed"); } - ocrFiles(dossierId, Sets.newHashSet(fileId)); + ocrFiles(dossierId, Sets.newHashSet(fileId), allPages); } } - public void ocrFiles(String dossierId, Set fileIds) { + public void ocrFiles(String dossierId, Set fileIds, boolean allPages) { var relevantFiles = getRelevantFiles(dossierId, fileIds); @@ -225,7 +224,7 @@ public class ReanalysisService { relevantFiles.stream() .filter(fileStatus -> fileStatus.getOcrStartTime() == null) - .forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId())); + .forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId(), allPages)); } diff --git a/persistence-service-v1/persistence-service-server-v1/src/test/java/com/iqser/red/service/peristence/v1/server/integration/tests/ReanalysisTest.java b/persistence-service-v1/persistence-service-server-v1/src/test/java/com/iqser/red/service/peristence/v1/server/integration/tests/ReanalysisTest.java index 19f547b96..f9fef3b65 100644 --- a/persistence-service-v1/persistence-service-server-v1/src/test/java/com/iqser/red/service/peristence/v1/server/integration/tests/ReanalysisTest.java +++ b/persistence-service-v1/persistence-service-server-v1/src/test/java/com/iqser/red/service/peristence/v1/server/integration/tests/ReanalysisTest.java @@ -56,7 +56,7 @@ public class ReanalysisTest extends AbstractPersistenceServerServiceTest { assertThat(loadedFile.getProcessingStatus()).isEqualTo(ProcessingStatus.OCR_PROCESSING_QUEUED); resetProcessingStatus(file); - reanalysisClient.ocrFile(dossier.getId(), file.getId(), true); + reanalysisClient.ocrFile(dossier.getId(), file.getId(), true, false); loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId()); assertThat(loadedFile.getProcessingStatus()).isEqualTo(ProcessingStatus.OCR_PROCESSING_QUEUED); resetProcessingStatus(file);