Merge branch 'RED-10728' into 'master'

RED-10728: Endpoint to execute full OCR on specific file

Closes RED-10728

See merge request redactmanager/persistence-service!922
This commit is contained in:
Maverick Studer 2025-01-16 15:54:23 +01:00
commit d691164af6
7 changed files with 28 additions and 23 deletions

View File

@ -118,11 +118,12 @@ public class ReanalysisController implements ReanalysisResource {
@PreAuthorize("hasAuthority('" + REANALYZE_FILE + "')")
public void ocrFile(@PathVariable(DOSSIER_ID) String dossierId,
@PathVariable(FILE_ID) String fileId,
@RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force) {
@RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force,
@RequestParam(value = ALL_PAGES, required = false, defaultValue = FALSE) boolean allPages) {
accessControlService.checkDossierExistenceAndAccessPermissionsToDossier(dossierId);
validateOCR(dossierId, fileId);
reanalysisService.ocrFile(dossierId, fileId, force);
reanalysisService.ocrFile(dossierId, fileId, force, allPages);
auditPersistenceService.audit(AuditRequest.builder()
.userId(KeycloakSecurity.getUserId())
.objectId(dossierId)
@ -140,7 +141,7 @@ public class ReanalysisController implements ReanalysisResource {
accessControlService.checkDossierExistenceAndAccessPermissionsToDossier(dossierId);
fileIds.forEach(fileId -> validateOCR(dossierId, fileId));
reanalysisService.ocrFiles(dossierId, fileIds);
reanalysisService.ocrFiles(dossierId, fileIds, false);
auditPersistenceService.audit(AuditRequest.builder()
.userId(KeycloakSecurity.getUserId())
.objectId(dossierId)

View File

@ -38,6 +38,7 @@ public interface ReanalysisResource {
String EXCLUDED_STATUS_PARAM = "excluded";
String FORCE_PARAM = "force";
String ALL_PAGES = "allPages";
@PostMapping(value = REANALYSIS_REST_PATH + DOSSIER_ID_PATH_VARIABLE)
@ -73,7 +74,8 @@ public interface ReanalysisResource {
@ApiResponses(value = {@ApiResponse(responseCode = "204", description = "OK"), @ApiResponse(responseCode = "409", description = "Conflict"), @ApiResponse(responseCode = "404", description = "Not found"), @ApiResponse(responseCode = "403", description = "Forbidden"), @ApiResponse(responseCode = "400", description = "Cannot OCR approved file")})
void ocrFile(@PathVariable(DOSSIER_ID) String dossierId,
@PathVariable(FILE_ID) String fileId,
@RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force);
@RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force,
@RequestParam(value = ALL_PAGES, required = false, defaultValue = FALSE) boolean allPages);
@Operation(summary = "Ocr and reanalyze multiple files for a dossier", description = "None")

View File

@ -57,7 +57,7 @@ public class AdminInterfaceController {
fileStatusService.validateFileIsNotDeletedAndNotApproved(fileId);
fileStatusService.setStatusOcrQueued(dossierId, fileId);
fileStatusService.setStatusOcrQueued(dossierId, fileId, false);
}
@ -91,7 +91,7 @@ public class AdminInterfaceController {
if (!dryRun) {
fileStatusService.validateFileIsNotDeletedAndNotApproved(file.getId());
fileStatusService.setStatusOcrQueued(file.getDossierId(), file.getId());
fileStatusService.setStatusOcrQueued(file.getDossierId(), file.getId(), false);
}
}

View File

@ -122,7 +122,7 @@ public class FileStatusProcessingUpdateService {
} else {
fileStatusService.setStatusOcrProcessing(fileId,
fileEntity.getProcessingStatus().equals(ProcessingStatus.OCR_PROCESSING) ? fileEntity.getProcessingErrorCounter() + 1 : 0);
fileStatusService.addToOcrQueue(dossierId, fileId, 2);
fileStatusService.addToOcrQueue(dossierId, fileId, 2, false);
}
}

View File

@ -316,7 +316,7 @@ public class FileStatusService {
}
log.info("Add file: {} from dossier {} to OCR queue", fileId, dossierId);
setStatusOcrQueued(dossierId, fileId);
setStatusOcrQueued(dossierId, fileId, false);
sendReadOnlyAnalysisEvent(dossierId, fileId, fileEntity);
return;
}
@ -352,7 +352,6 @@ public class FileStatusService {
return;
}
boolean forceAnalysis = false;
if (settings.isLlmNerServiceEnabled()) {
boolean objectExists = fileManagementStorageService.objectExists(dossierId, fileId, FileType.LLM_NER_ENTITIES);
@ -386,7 +385,7 @@ public class FileStatusService {
boolean reanalyse = fileModel.isReanalysisRequired() || analysisType.equals(AnalysisType.MANUAL_REDACTION_REANALYZE);
MessageType messageType = calculateMessageType(reanalyse, fileModel.getProcessingStatus(), fileModel);
if(analysisType == AnalysisType.FORCE_ANALYSE || forceAnalysis) {
if (analysisType == AnalysisType.FORCE_ANALYSE || forceAnalysis) {
messageType = MessageType.ANALYSE;
}
@ -567,7 +566,7 @@ public class FileStatusService {
}
public void setStatusOcrQueued(String dossierId, String fileId) {
public void setStatusOcrQueued(String dossierId, String fileId, boolean allPages) {
FileEntity fileStatus = fileStatusPersistenceService.getStatus(fileId);
@ -579,7 +578,7 @@ public class FileStatusService {
updateOCRStartTime(fileId);
fileStatusPersistenceService.updateProcessingStatus(fileId, ProcessingStatus.OCR_PROCESSING_QUEUED);
websocketService.sendAnalysisEvent(dossierId, fileId, AnalyseStatus.OCR_PROCESSING, fileStatus.getNumberOfAnalyses() + 1);
addToOcrQueue(dossierId, fileId, 2);
addToOcrQueue(dossierId, fileId, 2, allPages);
}
@ -760,13 +759,16 @@ public class FileStatusService {
}
public void addToOcrQueue(String dossierId, String fileId, int priority) {
public void addToOcrQueue(String dossierId, String fileId, int priority, boolean allPages) {
var removeWatermark = dossierTemplatePersistenceService.getDossierTemplate(dossierPersistenceService.getDossierTemplateId(dossierId)).isRemoveWatermark();
Set<AzureOcrFeature> features = new HashSet<>();
if (removeWatermark) {
features.add(AzureOcrFeature.REMOVE_WATERMARKS);
}
if (allPages) {
features.add(AzureOcrFeature.ALL_PAGES);
}
if (currentApplicationTypeProvider.isDocuMine()) {
features.add(AzureOcrFeature.ROTATION_CORRECTION);
features.add(AzureOcrFeature.FONT_STYLE_DETECTION);
@ -820,7 +822,7 @@ public class FileStatusService {
fileStatusPersistenceService.updateWorkflowStatus(fileId, newWorkflowStatus, false);
if(oldWorkflowStatus == WorkflowStatus.APPROVED && newWorkflowStatus != WorkflowStatus.APPROVED) {
if (oldWorkflowStatus == WorkflowStatus.APPROVED && newWorkflowStatus != WorkflowStatus.APPROVED) {
fileStatusPersistenceService.clearLastDownload(fileId);
}
}
@ -977,7 +979,7 @@ public class FileStatusService {
if (runOcr) {
fileStatusPersistenceService.resetOcrStartAndEndDate(fileId);
setStatusOcrQueued(dossierId, fileId);
setStatusOcrQueued(dossierId, fileId, false);
return;
}
@ -1064,6 +1066,7 @@ public class FileStatusService {
addToAnalysisQueue(dossierId, fileId, priority, Sets.newHashSet(), AnalysisType.DEFAULT);
}
@Transactional
public void setStatusForceAnalyse(String dossierId, String fileId, boolean priority) {

View File

@ -10,7 +10,6 @@ import com.iqser.red.service.persistence.management.v1.processor.exception.Confl
import com.iqser.red.service.persistence.management.v1.processor.exception.NotFoundException;
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.DossierPersistenceService;
import com.iqser.red.service.persistence.service.v1.api.shared.model.ReanalysisSettings;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.DeleteImportedRedactionsRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileModel;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
@ -178,11 +177,11 @@ public class ReanalysisService {
relevantFiles.stream()
.filter(fileStatus -> fileStatus.getOcrStartTime() == null)
.filter(fileStatus -> fileStatus.getProcessingStatus().equals(ProcessingStatus.PROCESSED))
.forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId()));
.forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId(), false));
}
public void ocrFile(String dossierId, String fileId, boolean force) {
public void ocrFile(String dossierId, String fileId, boolean force, boolean allPages) {
dossierPersistenceService.getAndValidateDossier(dossierId);
FileModel dossierFile = fileStatusService.getStatus(fileId);
@ -202,18 +201,18 @@ public class ReanalysisService {
}
if (force) {
fileStatusService.setStatusOcrQueued(dossierId, fileId);
fileStatusService.setStatusOcrQueued(dossierId, fileId, allPages);
} else {
if (dossierFile.getOcrStartTime() != null) {
throw new ConflictException("File already has been OCR processed");
}
ocrFiles(dossierId, Sets.newHashSet(fileId));
ocrFiles(dossierId, Sets.newHashSet(fileId), allPages);
}
}
public void ocrFiles(String dossierId, Set<String> fileIds) {
public void ocrFiles(String dossierId, Set<String> fileIds, boolean allPages) {
var relevantFiles = getRelevantFiles(dossierId, fileIds);
@ -225,7 +224,7 @@ public class ReanalysisService {
relevantFiles.stream()
.filter(fileStatus -> fileStatus.getOcrStartTime() == null)
.forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId()));
.forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId(), allPages));
}

View File

@ -56,7 +56,7 @@ public class ReanalysisTest extends AbstractPersistenceServerServiceTest {
assertThat(loadedFile.getProcessingStatus()).isEqualTo(ProcessingStatus.OCR_PROCESSING_QUEUED);
resetProcessingStatus(file);
reanalysisClient.ocrFile(dossier.getId(), file.getId(), true);
reanalysisClient.ocrFile(dossier.getId(), file.getId(), true, false);
loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
assertThat(loadedFile.getProcessingStatus()).isEqualTo(ProcessingStatus.OCR_PROCESSING_QUEUED);
resetProcessingStatus(file);