RED-10728: Endpoint to execute full OCR on specific file

This commit is contained in:
Maverick Studer 2025-01-16 16:03:40 +01:00
parent e62cc3c53e
commit 1c43247fba
7 changed files with 24 additions and 19 deletions

View File

@ -118,11 +118,12 @@ public class ReanalysisController implements ReanalysisResource {
@PreAuthorize("hasAuthority('" + REANALYZE_FILE + "')")
public void ocrFile(@PathVariable(DOSSIER_ID) String dossierId,
@PathVariable(FILE_ID) String fileId,
@RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force) {
@RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force,
@RequestParam(value = ALL_PAGES, required = false, defaultValue = FALSE) boolean allPages) {
accessControlService.checkDossierExistenceAndAccessPermissionsToDossier(dossierId);
validateOCR(dossierId, fileId);
reanalysisService.ocrFile(dossierId, fileId, force);
reanalysisService.ocrFile(dossierId, fileId, force, allPages);
auditPersistenceService.audit(AuditRequest.builder()
.userId(KeycloakSecurity.getUserId())
.objectId(dossierId)
@ -140,7 +141,7 @@ public class ReanalysisController implements ReanalysisResource {
accessControlService.checkDossierExistenceAndAccessPermissionsToDossier(dossierId);
fileIds.forEach(fileId -> validateOCR(dossierId, fileId));
reanalysisService.ocrFiles(dossierId, fileIds);
reanalysisService.ocrFiles(dossierId, fileIds, false);
auditPersistenceService.audit(AuditRequest.builder()
.userId(KeycloakSecurity.getUserId())
.objectId(dossierId)

View File

@ -38,6 +38,7 @@ public interface ReanalysisResource {
String EXCLUDED_STATUS_PARAM = "excluded";
String FORCE_PARAM = "force";
String ALL_PAGES = "allPages";
@PostMapping(value = REANALYSIS_REST_PATH + DOSSIER_ID_PATH_VARIABLE)
@ -73,7 +74,8 @@ public interface ReanalysisResource {
@ApiResponses(value = {@ApiResponse(responseCode = "204", description = "OK"), @ApiResponse(responseCode = "409", description = "Conflict"), @ApiResponse(responseCode = "404", description = "Not found"), @ApiResponse(responseCode = "403", description = "Forbidden"), @ApiResponse(responseCode = "400", description = "Cannot OCR approved file")})
void ocrFile(@PathVariable(DOSSIER_ID) String dossierId,
@PathVariable(FILE_ID) String fileId,
@RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force);
@RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force,
@RequestParam(value = ALL_PAGES, required = false, defaultValue = FALSE) boolean allPages);
@Operation(summary = "Ocr and reanalyze multiple files for a dossier", description = "None")

View File

@ -57,7 +57,7 @@ public class AdminInterfaceController {
fileStatusService.validateFileIsNotDeletedAndNotApproved(fileId);
fileStatusService.setStatusOcrQueued(dossierId, fileId);
fileStatusService.setStatusOcrQueued(dossierId, fileId, false);
}
@ -91,7 +91,7 @@ public class AdminInterfaceController {
if (!dryRun) {
fileStatusService.validateFileIsNotDeletedAndNotApproved(file.getId());
fileStatusService.setStatusOcrQueued(file.getDossierId(), file.getId());
fileStatusService.setStatusOcrQueued(file.getDossierId(), file.getId(), false);
}
}

View File

@ -123,7 +123,7 @@ public class FileStatusProcessingUpdateService {
} else {
fileStatusService.setStatusOcrProcessing(fileId,
fileEntity.getProcessingStatus().equals(ProcessingStatus.OCR_PROCESSING) ? fileEntity.getProcessingErrorCounter() + 1 : 0);
fileStatusService.addToOcrQueue(dossierId, fileId, 2);
fileStatusService.addToOcrQueue(dossierId, fileId, 2, false);
}
}

View File

@ -12,7 +12,6 @@ import java.util.function.BiFunction;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
@ -318,7 +317,7 @@ public class FileStatusService {
}
log.info("Add file: {} from dossier {} to OCR queue", fileId, dossierId);
setStatusOcrQueued(dossierId, fileId);
setStatusOcrQueued(dossierId, fileId, false);
sendReadOnlyAnalysisEvent(dossierId, fileId, fileEntity);
return;
}
@ -547,7 +546,7 @@ public class FileStatusService {
}
public void setStatusOcrQueued(String dossierId, String fileId) {
public void setStatusOcrQueued(String dossierId, String fileId, boolean allPages) {
FileEntity fileStatus = fileStatusPersistenceService.getStatus(fileId);
@ -559,7 +558,7 @@ public class FileStatusService {
updateOCRStartTime(fileId);
fileStatusPersistenceService.updateProcessingStatus(fileId, ProcessingStatus.OCR_PROCESSING_QUEUED);
websocketService.sendAnalysisEvent(dossierId, fileId, AnalyseStatus.OCR_PROCESSING, fileStatus.getNumberOfAnalyses() + 1);
addToOcrQueue(dossierId, fileId, 2);
addToOcrQueue(dossierId, fileId, 2, allPages);
}
@ -718,13 +717,16 @@ public class FileStatusService {
}
public void addToOcrQueue(String dossierId, String fileId, int priority) {
public void addToOcrQueue(String dossierId, String fileId, int priority, boolean allPages) {
var removeWatermark = dossierTemplatePersistenceService.getDossierTemplate(dossierPersistenceService.getDossierTemplateId(dossierId)).isRemoveWatermark();
Set<AzureOcrFeature> features = new HashSet<>();
if (removeWatermark) {
features.add(AzureOcrFeature.REMOVE_WATERMARKS);
}
if (allPages) {
features.add(AzureOcrFeature.ALL_PAGES);
}
if (applicationType.equals("DocuMine")) {
features.add(AzureOcrFeature.ROTATION_CORRECTION);
features.add(AzureOcrFeature.FONT_STYLE_DETECTION);

View File

@ -173,11 +173,11 @@ public class ReanalysisService {
relevantFiles.stream()
.filter(fileStatus -> fileStatus.getOcrStartTime() == null)
.filter(fileStatus -> fileStatus.getProcessingStatus().equals(ProcessingStatus.PROCESSED))
.forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId()));
.forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId(), false));
}
public void ocrFile(String dossierId, String fileId, boolean force) {
public void ocrFile(String dossierId, String fileId, boolean force, boolean allPages) {
dossierPersistenceService.getAndValidateDossier(dossierId);
FileModel dossierFile = fileStatusService.getStatus(fileId);
@ -197,18 +197,18 @@ public class ReanalysisService {
}
if (force) {
fileStatusService.setStatusOcrQueued(dossierId, fileId);
fileStatusService.setStatusOcrQueued(dossierId, fileId, allPages);
} else {
if (dossierFile.getOcrStartTime() != null) {
throw new ConflictException("File already has been OCR processed");
}
ocrFiles(dossierId, Sets.newHashSet(fileId));
ocrFiles(dossierId, Sets.newHashSet(fileId), allPages);
}
}
public void ocrFiles(String dossierId, Set<String> fileIds) {
public void ocrFiles(String dossierId, Set<String> fileIds, boolean allPages) {
var relevantFiles = getRelevantFiles(dossierId, fileIds);
@ -220,7 +220,7 @@ public class ReanalysisService {
relevantFiles.stream()
.filter(fileStatus -> fileStatus.getOcrStartTime() == null)
.forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId()));
.forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId(), allPages));
}

View File

@ -56,7 +56,7 @@ public class ReanalysisTest extends AbstractPersistenceServerServiceTest {
assertThat(loadedFile.getProcessingStatus()).isEqualTo(ProcessingStatus.OCR_PROCESSING_QUEUED);
resetProcessingStatus(file);
reanalysisClient.ocrFile(dossier.getId(), file.getId(), true);
reanalysisClient.ocrFile(dossier.getId(), file.getId(), true, false);
loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
assertThat(loadedFile.getProcessingStatus()).isEqualTo(ProcessingStatus.OCR_PROCESSING_QUEUED);
resetProcessingStatus(file);