Pull request #458: RED-4343: Reworked logic for scheduling with maxRetries

Merge in RED/persistence-service from RED-4343 to master

* commit 'e792848efe4f860b095ee08330fd9e83b5734d7b':
  RED-4343: Reworked logic for scheduling with maxRetries
This commit is contained in:
Dominique Eiflaender 2022-06-24 12:40:20 +02:00
commit d4bbff4678
7 changed files with 21 additions and 65 deletions

View File

@ -1,6 +1,6 @@
package com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file;
public enum ProcessingStatus {
ANALYSE, ERROR, FULLREPROCESS, IMAGE_ANALYZING, INDEXING, NER_ANALYZING, OCR_PROCESSING, PROCESSED, PROCESSING, REPROCESS, SURROUNDING_TEXT_PROCESSING, UNPROCESSED, FULL_PROCESSING, PRE_PROCESSING, PRE_PROCESSED, PRE_PROCESSING_FAILED
ANALYSE, ERROR, FULLREPROCESS, IMAGE_ANALYZING, INDEXING, NER_ANALYZING, OCR_PROCESSING, PROCESSED, PROCESSING, REPROCESS, SURROUNDING_TEXT_PROCESSING, UNPROCESSED, FULL_PROCESSING, PRE_PROCESSING, PRE_PROCESSED
}

View File

@ -24,8 +24,6 @@ public interface UploadResource {
String DOSSIER_ID_PARAM = "dossierId";
String DOSSIER_ID_PATH_PARAM = "/{" + DOSSIER_ID_PARAM + "}";
String FLATTEN_PARAM = "flatten";
@PostMapping(value = UPLOAD_PATH, consumes = MediaType.APPLICATION_JSON_VALUE, produces =
MediaType.APPLICATION_JSON_VALUE)

View File

@ -68,17 +68,6 @@ public class FileStatusPersistenceService {
}
@Transactional
public void updateProcessingStatusPreprocessingFailed(String fileId) {
if (isFileDeleted(fileId)) {
return;
}
fileRepository.updateProcessingStatus(fileId, ProcessingStatus.PRE_PROCESSING_FAILED, OffsetDateTime.now()
.truncatedTo(ChronoUnit.MILLIS), calculateProcessingErrorCounter(fileId, ProcessingStatus.PRE_PROCESSING_FAILED));
}
@Transactional
public void updateProcessingStatus(String fileId, int numberOfPages, long dictionaryVersion, long rulesVersion, long legalBasisVersion, long duration,
long dossierDictionaryVersion, int analysisVersion, int analysisNumber) {
@ -370,9 +359,9 @@ public class FileStatusPersistenceService {
}
public List<FileEntity> getAllRelevantStatusesForReanalysisScheduler() {
public List<FileEntity> getAllRelevantStatusesForReanalysisScheduler(int maxRetries) {
return fileRepository.getAllRelevantStatusesForReanalysisScheduler();
return fileRepository.getAllRelevantStatusesForReanalysisScheduler(maxRetries);
}
@ -394,7 +383,6 @@ public class FileStatusPersistenceService {
switch (processingStatus) {
case ERROR:
case PRE_PROCESSING_FAILED:
return fileRepository.findById(fileId).map(FileEntity::getProcessingErrorCounter).orElse(0) + 1;
case PROCESSED:

View File

@ -149,8 +149,8 @@ public interface FileRepository extends JpaRepository<FileEntity, String> {
void setLastManualChangeDate(String fileId, OffsetDateTime lastManualChangeDate, OffsetDateTime lastUpdated);
@Query("select f from FileEntity f join DossierEntity d on d.id = f.dossierId where f.workflowStatus <> 'APPROVED' and f.excludedFromAutomaticAnalysis = false " + "and ( f.processingStatus = 'PROCESSED' or f.processingStatus = 'UNPROCESSED' or f.processingStatus = 'ERROR' )" + "and d.softDeletedTime is null and d.hardDeletedTime is null and d.archivedTime is null " + "and f.deleted is null and f.hardDeletedTime is null")
List<FileEntity> getAllRelevantStatusesForReanalysisScheduler();
@Query("select f from FileEntity f join DossierEntity d on d.id = f.dossierId where f.workflowStatus <> 'APPROVED' and f.excludedFromAutomaticAnalysis = false " + "and ( f.processingStatus = 'PROCESSED' or f.processingStatus = 'UNPROCESSED' or f.processingStatus = 'ERROR' )" + "and d.softDeletedTime is null and d.hardDeletedTime is null and d.archivedTime is null " + "and f.deleted is null and f.hardDeletedTime is null and f.processingErrorCounter <= :maxRetries")
List<FileEntity> getAllRelevantStatusesForReanalysisScheduler(int maxRetries);
@Modifying(clearAutomatically = true)

View File

@ -43,7 +43,7 @@ public class FileStatusProcessingUpdateService {
fileStatusService.setStatusAnalyse(dossierId, fileId, false);
//TODO This might be also priority depending on what was the pervious call.
fileStatusService.addToAnalysisQueue(dossierId, fileId, false, null, false);
fileStatusService.addToAnalysisQueue(dossierId, fileId, false, null);
}
break;
@ -80,7 +80,7 @@ public class FileStatusProcessingUpdateService {
public void preprocessingFailed(String dossierId, String fileId) {
fileStatusService.updateProcessingStatusPreprocessingFailed(dossierId, fileId);
setStatusError(dossierId, fileId, "preprocessingFailed");
}

View File

@ -74,14 +74,13 @@ public class FileStatusService {
private final FileManagementServiceSettings settings;
private final ReanalysisRequiredStatusService reanalysisRequiredStatusService;
private final ViewedPagesPersistenceService viewedPagesPersistenceService;
private final ApplicationConfigService applicationConfigService;
private final FileManagementServiceSettings fileManagementServiceSettings;
@Transactional
public List<FileModel> getAllRelevantStatusesForReanalysisScheduler() {
var fileEntities = fileStatusPersistenceService.getAllRelevantStatusesForReanalysisScheduler();
var fileEntities = fileStatusPersistenceService.getAllRelevantStatusesForReanalysisScheduler(fileManagementServiceSettings.getMaxErrorRetries());
var convertedList = convert(fileEntities, FileModel.class, new FileModelMapper());
return reanalysisRequiredStatusService.enhanceFileStatusWithAnalysisRequirements(convertedList).stream().filter(FileModel::isAnalysisRequired).collect(Collectors.toList());
}
@ -125,7 +124,7 @@ public class FileStatusService {
public void updateProcessingStatusPreprocessed(String dossierId, String fileId, boolean hasHighlights, long fileSize) {
fileStatusPersistenceService.updateProcessingStatusPreprocessed(fileId, hasHighlights, fileSize);
addToAnalysisQueue(dossierId, fileId, false, Set.of(), false);
addToAnalysisQueue(dossierId, fileId, false, Set.of());
if (fileManagementServiceSettings.isPdf2ImageServiceEnabled()) {
addToPdf2ImageQueue(dossierId, fileId);
@ -134,16 +133,6 @@ public class FileStatusService {
}
public void updateProcessingStatusPreprocessingFailed(String dossierId, String fileId) {
// TODO add better logic than always reprocess.
fileStatusPersistenceService.updateProcessingStatusPreprocessingFailed(fileId);
var fileEntity = fileStatusPersistenceService.getStatus(fileId);
addToPreprocessingQueue(dossierId, fileId, fileEntity.getFilename());
}
public void setExcludedPages(String fileId, Set<Integer> excludedPages) {
fileStatusPersistenceService.setExcludedPages(fileId, excludedPages);
@ -203,13 +192,8 @@ public class FileStatusService {
return;
}
if (fileStatus.getProcessingErrorCounter() >= settings.getMaxErrorRetries() && !triggeredManually) {
log.warn("File {} was {} times retried with failure", fileStatus.getId(), fileStatus.getProcessingErrorCounter());
return;
}
fileStatusPersistenceService.updateProcessingStatus(fileId, ProcessingStatus.REPROCESS);
addToAnalysisQueue(dossierId, fileId, priority, sectionsToReanalyse, triggeredManually);
addToAnalysisQueue(dossierId, fileId, priority, sectionsToReanalyse);
}
@ -235,18 +219,13 @@ public class FileStatusService {
return;
}
if (fileStatus.getProcessingErrorCounter() >= settings.getMaxErrorRetries()) {
log.warn("File {} was {} times retried with failure", fileStatus.getId(), fileStatus.getProcessingErrorCounter());
return;
}
if (requiresStructureAnalysis) {
log.info("Delete text and NER entities from file {} in dossier {}", fileId, dossierId);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.TEXT);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.NER_ENTITIES);
}
addToAnalysisQueue(dossierId, fileId, priority, Sets.newHashSet(), false);
addToAnalysisQueue(dossierId, fileId, priority, Sets.newHashSet());
}
@ -262,18 +241,18 @@ public class FileStatusService {
@Transactional
protected void addToAnalysisQueue(String dossierId, String fileId, boolean priority, Set<Integer> sectionsToReanalyse, boolean triggeredManually) {
protected void addToAnalysisQueue(String dossierId, String fileId, boolean priority, Set<Integer> sectionsToReanalyse) {
var dossier = dossierPersistenceService.getAndValidateDossier(dossierId);
var fileEntity = fileStatusPersistenceService.getStatus(fileId);
if (fileEntity.isExcluded()) {
log.debug("File {} is excluded", fileEntity.getId());
if(!fileManagementStorageService.objectExists(dossierId, fileId, FileType.ORIGIN)){
addToPreprocessingQueue(dossierId, fileId, fileEntity.getFilename());
return;
}
if (fileEntity.getProcessingErrorCounter() >= settings.getMaxErrorRetries() && !triggeredManually) {
log.warn("File {} was {} times retried with failure", fileEntity.getId(), fileEntity.getProcessingErrorCounter());
if (fileEntity.isExcluded()) {
log.debug("File {} is excluded", fileEntity.getId());
return;
}
@ -347,7 +326,7 @@ public class FileStatusService {
public void createStatus(String dossierId, String fileId, String uploader, String filename) {
fileStatusPersistenceService.createStatus(dossierId, fileId, filename, uploader);
addToPreprocessingQueue(dossierId, fileId, filename);
addToAnalysisQueue(dossierId, fileId,false, Set.of());
}
@ -442,11 +421,6 @@ public class FileStatusService {
return;
}
if (fileStatus.getProcessingErrorCounter() >= settings.getMaxErrorRetries()) {
log.warn("File {} was {} times retried with failure", fileStatus.getId(), fileStatus.getProcessingErrorCounter());
return;
}
fileStatusPersistenceService.updateProcessingStatus(fileId, ProcessingStatus.OCR_PROCESSING);
addToOcrQueue(dossierId, fileId, 2);
}
@ -497,6 +471,7 @@ public class FileStatusService {
public void overwriteFile(String dossierId, String fileId, String uploader, String filename, boolean keepManualRedactions) {
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.ORIGIN);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.REDACTION_LOG);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.SECTION_GRID);
fileManagementStorageService.deleteObject(dossierId, fileId, FileType.IMAGE_INFO);
@ -512,7 +487,7 @@ public class FileStatusService {
viewedPagesPersistenceService.deleteForFile(fileId);
addToPreprocessingQueue(dossierId, fileId, filename);
addToAnalysisQueue(dossierId, fileId, false, Set.of());
}
@ -567,13 +542,8 @@ public class FileStatusService {
return;
}
if (fileStatus.getProcessingErrorCounter() >= settings.getMaxErrorRetries()) {
log.warn("File {} was {} times retried with failure", fileStatus.getId(), fileStatus.getProcessingErrorCounter());
return;
}
fileStatusPersistenceService.updateProcessingStatus(fileId, ProcessingStatus.ANALYSE);
addToAnalysisQueue(dossierId, fileId, priority, Sets.newHashSet(), false);
addToAnalysisQueue(dossierId, fileId, priority, Sets.newHashSet());
}

View File

@ -30,6 +30,6 @@ public class FileManagementServiceSettings {
private boolean pdf2ImageServiceEnabled;
private int maxErrorRetries = 5;
private int maxErrorRetries = 1;
}