RED-8670: add more settings to OCR #919
@ -24,13 +24,13 @@ import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import com.iqser.red.service.persistence.management.v1.processor.acl.custom.dossier.DossierACLService;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.dataexchange.models.ExportDownloadRequest;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.exception.BadRequestException;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.exception.ConflictException;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.DossierManagementService;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.DossierTemplateManagementService;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.DossierTemplateStatsService;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.AuditPersistenceService;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.users.UserService;
|
||||
import com.iqser.red.service.persistence.service.v1.api.external.resource.DossierTemplateResource;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AuditCategory;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.DossierTemplateModel;
|
||||
@ -42,7 +42,6 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemp
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.DossierTemplateStats;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.DossierTemplateStatus;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.Dossier;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.dataexchange.models.ExportDownloadRequest;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.importexport.ImportDossierTemplateRequest;
|
||||
import com.knecon.fforesight.keycloakcommons.security.KeycloakSecurity;
|
||||
|
||||
@ -61,7 +60,6 @@ public class DossierTemplateController implements DossierTemplateResource {
|
||||
private final AuditPersistenceService auditPersistenceService;
|
||||
private final DossierManagementService dossierManagementService;
|
||||
private final DossierACLService dossierACLService;
|
||||
private final UserService userService;
|
||||
|
||||
|
||||
@Override
|
||||
@ -314,6 +312,10 @@ public class DossierTemplateController implements DossierTemplateResource {
|
||||
.applyDictionaryUpdatesToAllDossiersByDefault(dossierTemplate.isApplyDictionaryUpdatesToAllDossiersByDefault())
|
||||
.ocrByDefault(dossierTemplate.isOcrByDefault())
|
||||
.removeWatermark(dossierTemplate.isRemoveWatermark())
|
||||
.idpByDefault(dossierTemplate.isIdpByDefault())
|
||||
.rotationCorrectionByDefault(dossierTemplate.isRotationCorrectionByDefault())
|
||||
.fontStyleDetection(dossierTemplate.isFontStyleDetection())
|
||||
.ocrAllPages(dossierTemplate.isOcrAllPages())
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
@ -36,6 +36,7 @@ import lombok.RequiredArgsConstructor;
|
||||
public class ReanalysisController implements ReanalysisResource {
|
||||
|
||||
private static final String DOSSIER_ID = "dossierId";
|
||||
|
||||
private final ReanalysisService reanalysisService;
|
||||
private final FileStatusManagementService fileStatusManagementService;
|
||||
private final AuditPersistenceService auditPersistenceService;
|
||||
@ -98,11 +99,11 @@ public class ReanalysisController implements ReanalysisResource {
|
||||
|
||||
@Override
|
||||
@PreAuthorize("hasAuthority('" + REANALYZE_DOSSIER + "')")
|
||||
public void ocrDossier(@PathVariable(DOSSIER_ID) String dossierId) {
|
||||
public void ocrDossier(@PathVariable(DOSSIER_ID) String dossierId, @RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp) {
|
||||
|
||||
accessControlService.checkDossierExistenceAndAccessPermissionsToDossier(dossierId);
|
||||
|
||||
reanalysisService.ocrDossier(dossierId);
|
||||
reanalysisService.ocrDossier(dossierId, idp);
|
||||
|
||||
auditPersistenceService.audit(AuditRequest.builder()
|
||||
.userId(KeycloakSecurity.getUserId())
|
||||
@ -118,11 +119,12 @@ public class ReanalysisController implements ReanalysisResource {
|
||||
@PreAuthorize("hasAuthority('" + REANALYZE_FILE + "')")
|
||||
public void ocrFile(@PathVariable(DOSSIER_ID) String dossierId,
|
||||
@PathVariable(FILE_ID) String fileId,
|
||||
@RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force) {
|
||||
@RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force,
|
||||
@RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp) {
|
||||
|
||||
accessControlService.checkDossierExistenceAndAccessPermissionsToDossier(dossierId);
|
||||
validateOCR(dossierId, fileId);
|
||||
reanalysisService.ocrFile(dossierId, fileId, force);
|
||||
reanalysisService.ocrFile(dossierId, fileId, force, idp);
|
||||
auditPersistenceService.audit(AuditRequest.builder()
|
||||
.userId(KeycloakSecurity.getUserId())
|
||||
.objectId(dossierId)
|
||||
@ -136,11 +138,13 @@ public class ReanalysisController implements ReanalysisResource {
|
||||
|
||||
@Override
|
||||
@PreAuthorize("hasAuthority('" + REANALYZE_FILE + "')")
|
||||
public void ocrFiles(@PathVariable(DOSSIER_ID) String dossierId, @RequestBody Set<String> fileIds) {
|
||||
public void ocrFiles(@PathVariable(DOSSIER_ID) String dossierId,
|
||||
@RequestBody Set<String> fileIds,
|
||||
@RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp) {
|
||||
|
||||
accessControlService.checkDossierExistenceAndAccessPermissionsToDossier(dossierId);
|
||||
fileIds.forEach(fileId -> validateOCR(dossierId, fileId));
|
||||
reanalysisService.ocrFiles(dossierId, fileIds);
|
||||
reanalysisService.ocrFiles(dossierId, fileIds, idp);
|
||||
auditPersistenceService.audit(AuditRequest.builder()
|
||||
.userId(KeycloakSecurity.getUserId())
|
||||
.objectId(dossierId)
|
||||
|
||||
@ -38,6 +38,7 @@ public interface ReanalysisResource {
|
||||
|
||||
String EXCLUDED_STATUS_PARAM = "excluded";
|
||||
String FORCE_PARAM = "force";
|
||||
String IDP_PARAM = "idp";
|
||||
|
||||
|
||||
@PostMapping(value = REANALYSIS_REST_PATH + DOSSIER_ID_PATH_VARIABLE)
|
||||
@ -65,7 +66,7 @@ public interface ReanalysisResource {
|
||||
@Operation(summary = "Ocr and reanalyze a dossier", description = "None")
|
||||
@PostMapping(value = OCR_REANALYSIS_REST_PATH + DOSSIER_ID_PATH_VARIABLE)
|
||||
@ApiResponses(value = {@ApiResponse(responseCode = "204", description = "OK"), @ApiResponse(responseCode = "404", description = "Not found"), @ApiResponse(responseCode = "403", description = "Forbidden")})
|
||||
void ocrDossier(@PathVariable(DOSSIER_ID) String dossierId);
|
||||
void ocrDossier(@PathVariable(DOSSIER_ID) String dossierId, @RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp);
|
||||
|
||||
|
||||
@Operation(summary = "Ocr and reanalyze a file", description = "None")
|
||||
@ -73,13 +74,16 @@ public interface ReanalysisResource {
|
||||
@ApiResponses(value = {@ApiResponse(responseCode = "204", description = "OK"), @ApiResponse(responseCode = "409", description = "Conflict"), @ApiResponse(responseCode = "404", description = "Not found"), @ApiResponse(responseCode = "403", description = "Forbidden"), @ApiResponse(responseCode = "400", description = "Cannot OCR approved file")})
|
||||
void ocrFile(@PathVariable(DOSSIER_ID) String dossierId,
|
||||
@PathVariable(FILE_ID) String fileId,
|
||||
@RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force);
|
||||
@RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force,
|
||||
@RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp);
|
||||
|
||||
|
||||
@Operation(summary = "Ocr and reanalyze multiple files for a dossier", description = "None")
|
||||
@PostMapping(value = OCR_REANALYSIS_REST_PATH + DOSSIER_ID_PATH_VARIABLE + BULK_REST_PATH)
|
||||
@ApiResponses(value = {@ApiResponse(responseCode = "204", description = "OK"), @ApiResponse(responseCode = "404", description = "Not found"), @ApiResponse(responseCode = "403", description = "Forbidden")})
|
||||
void ocrFiles(@PathVariable(DOSSIER_ID) String dossierId, @RequestBody Set<String> fileIds);
|
||||
void ocrFiles(@PathVariable(DOSSIER_ID) String dossierId,
|
||||
@RequestBody Set<String> fileIds,
|
||||
@RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp);
|
||||
|
||||
|
||||
@Operation(summary = "Exclude or re-include a file to the automatic analysis", description = "None")
|
||||
|
||||
@ -35,8 +35,8 @@ dependencies {
|
||||
exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1")
|
||||
exclude(group = "com.iqser.red.service", module = "persistence-service-shared-api-v1")
|
||||
}
|
||||
api("com.knecon.fforesight:azure-ocr-service-api:0.13.0")
|
||||
implementation("com.knecon.fforesight:llm-service-api:1.20.0-RED10072.2")
|
||||
api("com.knecon.fforesight:azure-ocr-service-api:0.24.0")
|
||||
implementation("com.knecon.fforesight:llm-service-api:1.35.0")
|
||||
api("com.knecon.fforesight:jobs-commons:0.13.0")
|
||||
api("com.iqser.red.commons:storage-commons:2.50.0")
|
||||
api("com.knecon.fforesight:tenant-commons:0.31.0-RED10196.0") {
|
||||
|
||||
@ -83,6 +83,18 @@ public class DossierTemplateEntity {
|
||||
@Column(name = "ocr_by_default")
|
||||
private boolean ocrByDefault;
|
||||
|
||||
@Column(name = "rotation_correction_by_default")
|
||||
private boolean rotationCorrectionByDefault;
|
||||
|
||||
@Column(name = "idp_by_default")
|
||||
private boolean idpByDefault;
|
||||
|
||||
@Column(name = "font_style_detection")
|
||||
private boolean fontStyleDetection;
|
||||
|
||||
@Column(name = "ocr_all_pages")
|
||||
private boolean ocrAllPages;
|
||||
|
||||
@Column(name = "remove_watermark")
|
||||
private boolean removeWatermark;
|
||||
|
||||
@ -128,6 +140,7 @@ public class DossierTemplateEntity {
|
||||
@Enumerated(EnumType.STRING)
|
||||
private LayoutParsingType layoutParsingType;
|
||||
|
||||
|
||||
public static DossierTemplateEntity copyDossierTemplateEntityWithoutChildEntities(DossierTemplateEntity dossierTemplateEntity) {
|
||||
|
||||
DossierTemplateEntity dossierTemplateCopy = new DossierTemplateEntity();
|
||||
@ -148,6 +161,10 @@ public class DossierTemplateEntity {
|
||||
dossierTemplateCopy.removeWatermark = dossierTemplateEntity.removeWatermark;
|
||||
dossierTemplateCopy.downloadFileTypes = dossierTemplateEntity.downloadFileTypes;
|
||||
dossierTemplateCopy.layoutParsingType = dossierTemplateEntity.layoutParsingType;
|
||||
dossierTemplateCopy.rotationCorrectionByDefault = dossierTemplateEntity.rotationCorrectionByDefault;
|
||||
dossierTemplateCopy.idpByDefault = dossierTemplateEntity.idpByDefault;
|
||||
dossierTemplateCopy.fontStyleDetection = dossierTemplateEntity.fontStyleDetection;
|
||||
dossierTemplateCopy.ocrAllPages = dossierTemplateEntity.ocrAllPages;
|
||||
return dossierTemplateCopy;
|
||||
}
|
||||
|
||||
|
||||
@ -152,9 +152,16 @@ public class FileEntity {
|
||||
@Column(name = "number_of_ocred_pages")
|
||||
private Integer numberOfOCRedPages;
|
||||
|
||||
@Column(name = "number_of_idp_pages")
|
||||
private Integer numberOfIdpPages;
|
||||
|
||||
@Column(name = "ocr_end_time")
|
||||
private OffsetDateTime ocrEndTime;
|
||||
|
||||
private int usedPromptTokens;
|
||||
|
||||
private int usedCompletionTokens;
|
||||
|
||||
@Column
|
||||
private boolean hasAnnotationComments;
|
||||
|
||||
|
||||
@ -1,20 +0,0 @@
|
||||
package com.iqser.red.service.persistence.management.v1.processor.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@Builder
|
||||
public class OCRStatusUpdateResponse {
|
||||
|
||||
private String fileId;
|
||||
private int numberOfPagesToOCR;
|
||||
private int numberOfOCRedPages;
|
||||
private boolean ocrFinished;
|
||||
private boolean ocrStarted;
|
||||
|
||||
}
|
||||
@ -1,5 +1,7 @@
|
||||
package com.iqser.red.service.persistence.management.v1.processor.service;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import org.springframework.retry.support.RetryTemplate;
|
||||
@ -15,6 +17,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.UntouchedDo
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.ProcessingStatus;
|
||||
import com.iqser.red.service.search.v1.model.IndexMessageType;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
|
||||
|
||||
import jakarta.transaction.Transactional;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
@ -114,7 +117,7 @@ public class FileStatusProcessingUpdateService {
|
||||
}
|
||||
|
||||
|
||||
public void requeueOCROrMarkFailed(String dossierId, String fileId, FileErrorInfo fileErrorInfo) {
|
||||
public void requeueOCROrMarkFailed(String dossierId, String fileId, Set<AzureOcrFeature> features, FileErrorInfo fileErrorInfo) {
|
||||
|
||||
var fileEntity = fileStatusPersistenceService.getStatus(fileId);
|
||||
if (fileEntity.getProcessingErrorCounter() > settings.getMaxErrorRetries()) {
|
||||
@ -122,7 +125,7 @@ public class FileStatusProcessingUpdateService {
|
||||
} else {
|
||||
fileStatusService.setStatusOcrProcessing(fileId,
|
||||
fileEntity.getProcessingStatus().equals(ProcessingStatus.OCR_PROCESSING) ? fileEntity.getProcessingErrorCounter() + 1 : 0);
|
||||
fileStatusService.addToOcrQueue(dossierId, fileId, 2);
|
||||
fileStatusService.addToOcrQueue(dossierId, fileId, 2, features);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -12,7 +12,6 @@ import java.util.function.BiFunction;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import org.springframework.amqp.rabbit.core.RabbitTemplate;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
@ -21,6 +20,7 @@ import com.iqser.red.service.pdftron.redaction.v1.api.model.ProcessUntouchedDocu
|
||||
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.entity.configuration.TypeEntity;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.ComponentDefinitionEntity;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.DossierTemplateEntity;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.FileAttributeEntity;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.FileEntity;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.entity.projection.DossierStatsFileProjection;
|
||||
@ -31,7 +31,6 @@ import com.iqser.red.service.persistence.management.v1.processor.model.AnalysisT
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.CvAnalysisServiceRequest;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.FileIdentifier;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.ManualChangesQueryOptions;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.OCRStatusUpdateResponse;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.VisualLayoutParsingServiceRequest;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.image.ImageServiceRequest;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.websocket.AnalyseStatus;
|
||||
@ -80,6 +79,7 @@ import com.knecon.fforesight.llm.service.LlmNerMessage;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingQueueNames;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.DocumentRequest;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.OCRStatusUpdateResponse;
|
||||
import com.knecon.fforesight.tenantcommons.TenantContext;
|
||||
|
||||
import jakarta.transaction.Transactional;
|
||||
@ -352,7 +352,6 @@ public class FileStatusService {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
boolean forceAnalysis = false;
|
||||
if (settings.isLlmNerServiceEnabled()) {
|
||||
boolean objectExists = fileManagementStorageService.objectExists(dossierId, fileId, FileType.LLM_NER_ENTITIES);
|
||||
@ -386,7 +385,7 @@ public class FileStatusService {
|
||||
|
||||
boolean reanalyse = fileModel.isReanalysisRequired() || analysisType.equals(AnalysisType.MANUAL_REDACTION_REANALYZE);
|
||||
MessageType messageType = calculateMessageType(reanalyse, fileModel.getProcessingStatus(), fileModel);
|
||||
if(analysisType == AnalysisType.FORCE_ANALYSE || forceAnalysis) {
|
||||
if (analysisType == AnalysisType.FORCE_ANALYSE || forceAnalysis) {
|
||||
messageType = MessageType.ANALYSE;
|
||||
}
|
||||
|
||||
@ -569,6 +568,12 @@ public class FileStatusService {
|
||||
|
||||
public void setStatusOcrQueued(String dossierId, String fileId) {
|
||||
|
||||
setStatusOcrQueued(dossierId, fileId, false);
|
||||
}
|
||||
|
||||
|
||||
public void setStatusOcrQueued(String dossierId, String fileId, boolean idp) {
|
||||
|
||||
FileEntity fileStatus = fileStatusPersistenceService.getStatus(fileId);
|
||||
|
||||
if (fileStatus.isExcluded()) {
|
||||
@ -579,7 +584,7 @@ public class FileStatusService {
|
||||
updateOCRStartTime(fileId);
|
||||
fileStatusPersistenceService.updateProcessingStatus(fileId, ProcessingStatus.OCR_PROCESSING_QUEUED);
|
||||
websocketService.sendAnalysisEvent(dossierId, fileId, AnalyseStatus.OCR_PROCESSING, fileStatus.getNumberOfAnalyses() + 1);
|
||||
addToOcrQueue(dossierId, fileId, 2);
|
||||
addToOcrQueue(dossierId, fileId, 2, idp);
|
||||
}
|
||||
|
||||
|
||||
@ -760,22 +765,39 @@ public class FileStatusService {
|
||||
}
|
||||
|
||||
|
||||
public void addToOcrQueue(String dossierId, String fileId, int priority) {
|
||||
public void addToOcrQueue(String dossierId, String fileId, int priority, boolean useIdp) {
|
||||
|
||||
var removeWatermark = dossierTemplatePersistenceService.getDossierTemplate(dossierPersistenceService.getDossierTemplateId(dossierId)).isRemoveWatermark();
|
||||
DossierTemplateEntity dt = dossierTemplatePersistenceService.getDossierTemplate(dossierPersistenceService.getDossierTemplateId(dossierId));
|
||||
Set<AzureOcrFeature> features = new HashSet<>();
|
||||
if (removeWatermark) {
|
||||
features.add(AzureOcrFeature.REMOVE_WATERMARKS);
|
||||
}
|
||||
if (currentApplicationTypeProvider.isDocuMine()) {
|
||||
features.add(AzureOcrFeature.ROTATION_CORRECTION);
|
||||
|
||||
if (dt.isFontStyleDetection()) {
|
||||
features.add(AzureOcrFeature.FONT_STYLE_DETECTION);
|
||||
}
|
||||
if (dt.isRemoveWatermark()) {
|
||||
features.add(AzureOcrFeature.REMOVE_WATERMARKS);
|
||||
}
|
||||
if (useIdp || dt.isIdpByDefault()) {
|
||||
features.add(AzureOcrFeature.IDP);
|
||||
}
|
||||
if (dt.isRotationCorrectionByDefault()) {
|
||||
features.add(AzureOcrFeature.ROTATION_CORRECTION);
|
||||
}
|
||||
|
||||
addToOcrQueue(dossierId, fileId, priority, features);
|
||||
}
|
||||
|
||||
|
||||
public void addToOcrQueue(String dossierId, String fileId, int priority, Set<AzureOcrFeature> features) {
|
||||
|
||||
var request = DocumentRequest.builder()
|
||||
// needed for legacy OCR-services
|
||||
.dossierId(dossierId)
|
||||
.fileId(fileId)
|
||||
.removeWatermark(features.contains(AzureOcrFeature.REMOVE_WATERMARKS)) // needed for legacy OCR-services
|
||||
.removeWatermark(features.contains(AzureOcrFeature.REMOVE_WATERMARKS))
|
||||
// new api
|
||||
.originDocumentId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.ORIGIN))
|
||||
.viewerDocId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.VIEWER_DOCUMENT))
|
||||
.idpResultId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.IDP_RESULT))
|
||||
.features(features)
|
||||
.build();
|
||||
|
||||
@ -820,7 +842,7 @@ public class FileStatusService {
|
||||
|
||||
fileStatusPersistenceService.updateWorkflowStatus(fileId, newWorkflowStatus, false);
|
||||
|
||||
if(oldWorkflowStatus == WorkflowStatus.APPROVED && newWorkflowStatus != WorkflowStatus.APPROVED) {
|
||||
if (oldWorkflowStatus == WorkflowStatus.APPROVED && newWorkflowStatus != WorkflowStatus.APPROVED) {
|
||||
fileStatusPersistenceService.clearLastDownload(fileId);
|
||||
}
|
||||
}
|
||||
@ -963,6 +985,13 @@ public class FileStatusService {
|
||||
@Transactional
|
||||
public void setStatusFullReprocess(String dossierId, String fileId, boolean priority, boolean requiresStructureAnalysis, boolean runOcr) {
|
||||
|
||||
setStatusFullReprocess(dossierId, fileId, priority, requiresStructureAnalysis, runOcr, false);
|
||||
}
|
||||
|
||||
|
||||
@Transactional
|
||||
public void setStatusFullReprocess(String dossierId, String fileId, boolean priority, boolean requiresStructureAnalysis, boolean runOcr, boolean idp) {
|
||||
|
||||
FileEntity fileStatus = fileStatusPersistenceService.getStatus(fileId);
|
||||
|
||||
if (fileStatus.isExcluded()) {
|
||||
@ -970,14 +999,14 @@ public class FileStatusService {
|
||||
return;
|
||||
}
|
||||
|
||||
if (requiresStructureAnalysis || runOcr) {
|
||||
if (requiresStructureAnalysis || runOcr || idp) {
|
||||
log.info("Delete text and NER entities from file {} in dossier {}", fileId, dossierId);
|
||||
fileManagementStorageService.deleteDocumentAndNerObjects(dossierId, fileId);
|
||||
}
|
||||
|
||||
if (runOcr) {
|
||||
if (runOcr || idp) {
|
||||
fileStatusPersistenceService.resetOcrStartAndEndDate(fileId);
|
||||
setStatusOcrQueued(dossierId, fileId);
|
||||
setStatusOcrQueued(dossierId, fileId, idp);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1064,6 +1093,7 @@ public class FileStatusService {
|
||||
addToAnalysisQueue(dossierId, fileId, priority, Sets.newHashSet(), AnalysisType.DEFAULT);
|
||||
}
|
||||
|
||||
|
||||
@Transactional
|
||||
public void setStatusForceAnalyse(String dossierId, String fileId, boolean priority) {
|
||||
|
||||
@ -1175,4 +1205,10 @@ public class FileStatusService {
|
||||
return reanalysisRequiredStatusService.enhanceFileStatusWithAnalysisRequirements(fileModels);
|
||||
}
|
||||
|
||||
|
||||
public void increaseTokenUsage(String fileId, int promptTokens, int completionTokens) {
|
||||
|
||||
fileStatusPersistenceService.increaseTokenUsage(fileId, promptTokens, completionTokens);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -171,18 +171,18 @@ public class ReanalysisService {
|
||||
}
|
||||
|
||||
|
||||
public void ocrDossier(String dossierId) {
|
||||
public void ocrDossier(String dossierId, boolean idp) {
|
||||
|
||||
var relevantFiles = getAllFilesForDossier(dossierId, validFilesFilter);
|
||||
|
||||
relevantFiles.stream()
|
||||
.filter(fileStatus -> fileStatus.getOcrStartTime() == null)
|
||||
.filter(fileStatus -> fileStatus.getProcessingStatus().equals(ProcessingStatus.PROCESSED))
|
||||
.forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId()));
|
||||
.forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId(), idp));
|
||||
}
|
||||
|
||||
|
||||
public void ocrFile(String dossierId, String fileId, boolean force) {
|
||||
public void ocrFile(String dossierId, String fileId, boolean force, boolean idp) {
|
||||
|
||||
dossierPersistenceService.getAndValidateDossier(dossierId);
|
||||
FileModel dossierFile = fileStatusService.getStatus(fileId);
|
||||
@ -202,30 +202,31 @@ public class ReanalysisService {
|
||||
}
|
||||
|
||||
if (force) {
|
||||
fileStatusService.setStatusOcrQueued(dossierId, fileId);
|
||||
fileStatusService.setStatusOcrQueued(dossierId, fileId, idp);
|
||||
} else {
|
||||
if (dossierFile.getOcrStartTime() != null) {
|
||||
throw new ConflictException("File already has been OCR processed");
|
||||
}
|
||||
|
||||
ocrFiles(dossierId, Sets.newHashSet(fileId));
|
||||
ocrFiles(dossierId, Sets.newHashSet(fileId), idp);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void ocrFiles(String dossierId, Set<String> fileIds) {
|
||||
public void ocrFiles(String dossierId, Set<String> fileIds, boolean idp) {
|
||||
|
||||
var relevantFiles = getRelevantFiles(dossierId, fileIds);
|
||||
|
||||
if (relevantFiles.stream()
|
||||
.anyMatch(fileStatus -> !fileStatus.getProcessingStatus().equals(ProcessingStatus.PROCESSED) && !fileStatus.getProcessingStatus()
|
||||
.equals(ProcessingStatus.OCR_PROCESSING_QUEUED) && !fileStatus.getProcessingStatus().equals(ProcessingStatus.OCR_PROCESSING))) {
|
||||
.anyMatch(fileStatus -> !fileStatus.getProcessingStatus().equals(ProcessingStatus.PROCESSED) //
|
||||
&& !fileStatus.getProcessingStatus().equals(ProcessingStatus.OCR_PROCESSING_QUEUED) //
|
||||
&& !fileStatus.getProcessingStatus().equals(ProcessingStatus.OCR_PROCESSING))) {
|
||||
throw new ConflictException("File is not processed");
|
||||
}
|
||||
|
||||
relevantFiles.stream()
|
||||
.filter(fileStatus -> fileStatus.getOcrStartTime() == null)
|
||||
.forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId()));
|
||||
.forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId(), idp));
|
||||
}
|
||||
|
||||
|
||||
@ -275,12 +276,12 @@ public class ReanalysisService {
|
||||
|
||||
public List<FileModel> reanalyzeTemplate(String dossierTemplateId, ReanalysisSettings reanalysisSettings) {
|
||||
|
||||
requestValidator.validateRequestOrThrow404(dossierTemplateId, reanalysisSettings.dossierIds(), reanalysisSettings.fileIds());
|
||||
requestValidator.validateRequestOrThrow404(dossierTemplateId, reanalysisSettings.getDossierIds(), reanalysisSettings.getFileIds());
|
||||
|
||||
var files = fileStatusService.getDossierTemplateStatus(dossierTemplateId)
|
||||
.stream()
|
||||
.filter(file -> isInList(file, reanalysisSettings))
|
||||
.filter(reanalysisSettings.fileStatusFilter().asPredicate())
|
||||
.filter(reanalysisSettings.getFileStatusFilter())
|
||||
.peek(file -> log.info("Reanalyzing file {}", file.getId()))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
@ -289,8 +290,9 @@ public class ReanalysisService {
|
||||
files.forEach(file -> fileStatusService.setStatusFullReprocess(file.getDossierId(),
|
||||
file.getId(),
|
||||
false,
|
||||
reanalysisSettings.repeatStructureAnalysis(),
|
||||
reanalysisSettings.runOcr()));
|
||||
reanalysisSettings.isRepeatStructureAnalysis(),
|
||||
reanalysisSettings.isRunOcr(),
|
||||
reanalysisSettings.isRunIdp()));
|
||||
|
||||
return rejectedFiles;
|
||||
}
|
||||
@ -314,8 +316,8 @@ public class ReanalysisService {
|
||||
|
||||
private boolean isInList(FileModel file, ReanalysisSettings reAnalysisSettings) {
|
||||
|
||||
return (reAnalysisSettings.fileIds().isEmpty() || reAnalysisSettings.fileIds().contains(file.getId())) //
|
||||
&& (reAnalysisSettings.dossierIds().isEmpty() || reAnalysisSettings.dossierIds().contains(file.getDossierId()));
|
||||
return (reAnalysisSettings.getFileIds().isEmpty() || reAnalysisSettings.getFileIds().contains(file.getId())) //
|
||||
&& (reAnalysisSettings.getDossierIds().isEmpty() || reAnalysisSettings.getDossierIds().contains(file.getDossierId()));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -27,8 +27,7 @@ public class LayoutParsingRequestFactory {
|
||||
|
||||
public LayoutParsingRequest build(String dossierTemplateId, String dossierId, String fileId, boolean priority) {
|
||||
|
||||
LayoutParsingType layoutParsingType = dossierTemplatePersistenceService.getDossierTemplate(
|
||||
dossierTemplateId).getLayoutParsingType();
|
||||
LayoutParsingType layoutParsingType = dossierTemplatePersistenceService.getDossierTemplate(dossierTemplateId).getLayoutParsingType();
|
||||
|
||||
Optional<String> optionalImageFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.IMAGE_INFO) //
|
||||
? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMAGE_INFO)) : Optional.empty();
|
||||
@ -39,6 +38,9 @@ public class LayoutParsingRequestFactory {
|
||||
Optional<String> optionalVisualLayoutParsingFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.VISUAL_LAYOUT) //
|
||||
? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.VISUAL_LAYOUT)) : Optional.empty();
|
||||
|
||||
Optional<String> optionalIdpResultFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.IDP_RESULT) //
|
||||
? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.IDP_RESULT)) : Optional.empty();
|
||||
|
||||
return LayoutParsingRequest.builder()
|
||||
.layoutParsingType(layoutParsingType)
|
||||
.identifier(QueueMessageIdentifierService.buildIdentifier(dossierId, fileId, priority))
|
||||
@ -55,6 +57,7 @@ public class LayoutParsingRequestFactory {
|
||||
.documentMarkdownFileStorageId(fileManagementServiceSettings.isStoreMarkdown() ? Optional.of(StorageIdUtils.getStorageId(dossierId,
|
||||
fileId,
|
||||
FileType.MARKDOWN)) : Optional.empty())
|
||||
.idpResultStorageId(optionalIdpResultFileId)
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
@ -21,7 +21,6 @@ import com.iqser.red.service.persistence.management.v1.processor.entity.projecti
|
||||
import com.iqser.red.service.persistence.management.v1.processor.exception.BadRequestException;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.exception.NotFoundException;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.FileIdentifier;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.OCRStatusUpdateResponse;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.websocket.AnalyseStatus;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.repository.FileAttributesRepository;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.repository.FileRepository;
|
||||
@ -31,6 +30,8 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.component.C
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.ProcessingStatus;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.WorkflowStatus;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.OCRStatusUpdateResponse;
|
||||
|
||||
import jakarta.persistence.EntityManager;
|
||||
import jakarta.transaction.Transactional;
|
||||
@ -602,9 +603,9 @@ public class FileStatusPersistenceService {
|
||||
public int getNumberOfAssignedFiles(String userId) {
|
||||
|
||||
List<FileEntity> files = fileRepository.findFilesByAssignee(userId);
|
||||
return files.stream()
|
||||
.filter(fileEntity -> fileEntity.getHardDeletedTime() == null)
|
||||
.collect(Collectors.toList()).size();
|
||||
return Math.toIntExact(files.stream()
|
||||
.filter(fileEntity -> fileEntity.getHardDeletedTime() == null)
|
||||
.count());
|
||||
}
|
||||
|
||||
|
||||
@ -680,6 +681,7 @@ public class FileStatusPersistenceService {
|
||||
fileRepository.updateOCRStatus(response.getFileId(),
|
||||
response.getNumberOfPagesToOCR(),
|
||||
response.getNumberOfOCRedPages(),
|
||||
response.getFeatures().contains(AzureOcrFeature.IDP) ? response.getNumberOfOCRedPages() : 0,
|
||||
response.isOcrFinished() ? OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS) : null,
|
||||
OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS));
|
||||
}
|
||||
@ -765,4 +767,11 @@ public class FileStatusPersistenceService {
|
||||
fileRepository.updateLastDownloadForFile(fileId, null);
|
||||
}
|
||||
|
||||
|
||||
@Transactional
|
||||
public void increaseTokenUsage(String fileId, int promptTokens, int completionTokens) {
|
||||
|
||||
fileRepository.increaseTokenUsage(fileId, promptTokens, completionTokens);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -389,11 +389,15 @@ public interface FileRepository extends JpaRepository<FileEntity, String> {
|
||||
@Transactional
|
||||
@Modifying(clearAutomatically = true)
|
||||
@Query(value = "update FileEntity f set f.numberOfOCRedPages = :numberOfOCRedPages, "
|
||||
+ "f.numberOfPagesToOCR = :numberOfPagesToOCR, f.ocrEndTime = :ocrEndTime, "
|
||||
+ "f.lastUpdated = :lastUpdated where f.id = :fileId")
|
||||
+ "f.numberOfPagesToOCR = :numberOfPagesToOCR, "
|
||||
+ "f.numberOfIdpPages = :numberOfIdpPages, "
|
||||
+ "f.ocrEndTime = :ocrEndTime, "
|
||||
+ "f.lastUpdated = :lastUpdated "
|
||||
+ "where f.id = :fileId")
|
||||
void updateOCRStatus(@Param("fileId") String fileId,
|
||||
@Param("numberOfPagesToOCR") int numberOfPagesToOCR,
|
||||
@Param("numberOfOCRedPages") int numberOfOCRedPages,
|
||||
@Param("numberOfIdpPages") int numberOfIdpPages,
|
||||
@Param("ocrEndTime") OffsetDateTime ocrEndTime,
|
||||
@Param("lastUpdated") OffsetDateTime lastUpdated);
|
||||
|
||||
@ -409,7 +413,7 @@ public interface FileRepository extends JpaRepository<FileEntity, String> {
|
||||
|
||||
|
||||
@Modifying(clearAutomatically = true)
|
||||
@Query("update FileEntity f set f.ocrStartTime = NULL, f.ocrEndTime = NULL, f.numberOfPagesToOCR = NULL, f.numberOfOCRedPages = NULL where f.id = :fileId")
|
||||
@Query("update FileEntity f set f.ocrStartTime = NULL, f.ocrEndTime = NULL, f.numberOfPagesToOCR = NULL, f.numberOfOCRedPages = NULL, f.numberOfIdpPages = NULL where f.id = :fileId")
|
||||
void resetOcrStartAndEndDate(@Param("fileId") String fileId);
|
||||
|
||||
|
||||
@ -479,6 +483,11 @@ public interface FileRepository extends JpaRepository<FileEntity, String> {
|
||||
@Query("SELECT f FROM FileEntity f WHERE f.id in :fileIds AND f.dossierId = :dossierId")
|
||||
List<FileEntity> findAllDossierIdAndIds(@Param("dossierId") String dossierId, @Param("fileIds") Set<String> fileIds);
|
||||
|
||||
|
||||
@Modifying
|
||||
@Query("UPDATE FileEntity f SET f.usedPromptTokens = f.usedPromptTokens + :promptTokens, f.usedCompletionTokens = f.usedCompletionTokens + :completionTokens WHERE f.id = :id")
|
||||
void increaseTokenUsage(@Param("id") String fileId, @Param("promptTokens") int promptTokens, @Param("completionTokens") int completionTokens);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -63,7 +63,7 @@ public class NerMessageReceiver {
|
||||
String dossierId = QueueMessageIdentifierService.parseDossierId(message.getIdentifier());
|
||||
String fileId = QueueMessageIdentifierService.parseFileId(message.getIdentifier());
|
||||
addFileIdToTrace(fileId);
|
||||
|
||||
fileStatusService.increaseTokenUsage(fileId, message.getPromptTokens(), message.getCompletionTokens());
|
||||
log.info("Received message from {} for dossierId {} and fileId {}", LLM_ENTITY_RESPONSE_LISTENER_ID, dossierId, fileId);
|
||||
fileStatusPersistenceService.setAiCreationVersion(fileId, message.getAiCreationVersion());
|
||||
fileStatusService.setStatusForceAnalyse(dossierId, fileId, false);
|
||||
|
||||
@ -11,14 +11,13 @@ import org.springframework.stereotype.Service;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.OCRStatusUpdateResponse;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.websocket.AnalyseStatus;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusProcessingUpdateService;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusService;
|
||||
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.websocket.WebsocketService;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.DocumentRequest;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.OCRStatusUpdateResponse;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
@ -43,6 +42,8 @@ public class OCRProcessingMessageReceiver {
|
||||
@RabbitListener(id = OCR_STATUS_UPDATE_LISTENER_ID)
|
||||
public void handleOCRStatusUpdateMessage(OCRStatusUpdateResponse response) {
|
||||
|
||||
log.debug("Received message {} in {}", response, MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_EXCHANGE);
|
||||
|
||||
var fileModel = fileStatusService.getStatus(response.getFileId());
|
||||
|
||||
if (response.isOcrStarted()) {
|
||||
@ -57,7 +58,6 @@ public class OCRProcessingMessageReceiver {
|
||||
response.getNumberOfOCRedPages());
|
||||
}
|
||||
|
||||
log.debug("Received message {} in {}", response, MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_EXCHANGE);
|
||||
}
|
||||
|
||||
|
||||
@ -95,6 +95,7 @@ public class OCRProcessingMessageReceiver {
|
||||
timestamp = timestamp != null ? timestamp : OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS);
|
||||
fileStatusProcessingUpdateService.requeueOCROrMarkFailed(ocrRequestMessage.getDossierId(),
|
||||
ocrRequestMessage.getFileId(),
|
||||
ocrRequestMessage.getFeatures(),
|
||||
new FileErrorInfo(errorMessage, MessagingConfiguration.OCR_DLQ, "ocr-service", timestamp));
|
||||
}
|
||||
|
||||
|
||||
@ -259,3 +259,7 @@ databaseChangeLog:
|
||||
file: db/changelog/tenant/158-add-app-version-history-table-and-layout-parser-version-field-to-file.yaml
|
||||
- include:
|
||||
file: db/changelog/tenant/159-cleanup-truncated-indices.yaml
|
||||
- include:
|
||||
file: db/changelog/tenant/160-add-usage-fields-to-file-for-idp-and-llm.yaml
|
||||
- include:
|
||||
file: db/changelog/tenant/161-add-idp-related-fields-to-dossier-template.yaml
|
||||
|
||||
@ -0,0 +1,24 @@
|
||||
databaseChangeLog:
|
||||
- changeSet:
|
||||
id: add-llm-usage-fields-to-file
|
||||
author: kilian
|
||||
changes:
|
||||
- addColumn:
|
||||
tableName: file
|
||||
columns:
|
||||
- column:
|
||||
name: used_completion_tokens
|
||||
type: int
|
||||
defaultValueNumeric: "0"
|
||||
constraints:
|
||||
nullable: false
|
||||
- column:
|
||||
name: used_prompt_tokens
|
||||
type: int
|
||||
defaultValueNumeric: "0"
|
||||
constraints:
|
||||
nullable: false
|
||||
- column:
|
||||
name: number_of_idp_pages
|
||||
type: int
|
||||
defaultValueNumeric: "0"
|
||||
@ -0,0 +1,64 @@
|
||||
databaseChangeLog:
|
||||
- changeSet:
|
||||
id: add-idp-related-fields-to-dossier-template
|
||||
author: kilian
|
||||
changes:
|
||||
- addColumn:
|
||||
tableName: dossier_template
|
||||
columns:
|
||||
- column:
|
||||
name: idp_by_default
|
||||
type: boolean
|
||||
defaultValueBoolean: false
|
||||
remarks: "Indicates if IDP is enabled by default"
|
||||
- column:
|
||||
name: rotation_correction_by_default
|
||||
type: boolean
|
||||
remarks: "Indicates if rotation correction is enabled by default"
|
||||
- column:
|
||||
name: font_style_detection
|
||||
type: boolean
|
||||
defaultValueBoolean: true
|
||||
remarks: "Indicates if font style detection is enabled in OCR"
|
||||
- column:
|
||||
name: ocr_all_pages
|
||||
type: boolean
|
||||
defaultValueBoolean: false
|
||||
remarks: "Indicates if all pages should be processed during OCR instead of only pages with images"
|
||||
|
||||
- update:
|
||||
tableName: dossier_template
|
||||
columns:
|
||||
- column:
|
||||
name: rotation_correction_by_default
|
||||
valueBoolean: true
|
||||
where: "layout_parsing_type = 'DOCUMINE_OLD'"
|
||||
|
||||
- update:
|
||||
tableName: dossier_template
|
||||
columns:
|
||||
- column:
|
||||
name: rotation_correction_by_default
|
||||
valueBoolean: false
|
||||
where: "layout_parsing_type != 'DOCUMINE_OLD'"
|
||||
|
||||
- changeSet:
|
||||
id: make-fields-non-nullable
|
||||
author: kilian
|
||||
changes:
|
||||
- addNotNullConstraint:
|
||||
tableName: dossier_template
|
||||
columnName: idp_by_default
|
||||
columnDataType: boolean
|
||||
- addNotNullConstraint:
|
||||
tableName: dossier_template
|
||||
columnName: rotation_correction_by_default
|
||||
columnDataType: boolean
|
||||
- addNotNullConstraint:
|
||||
tableName: dossier_template
|
||||
columnName: font_style_detection
|
||||
columnDataType: boolean
|
||||
- addNotNullConstraint:
|
||||
tableName: dossier_template
|
||||
columnName: ocr_all_pages
|
||||
columnDataType: boolean
|
||||
@ -191,7 +191,7 @@ public class ComponentOverrideTest extends AbstractPersistenceServerServiceTest
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
public void testDeletedFileOverrides() throws IOException {
|
||||
public void testDeletedFileOverrides() {
|
||||
|
||||
var dossier = dossierTesterAndProvider.provideTestDossier();
|
||||
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package com.iqser.red.service.peristence.v1.server.integration.tests;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
@ -149,7 +150,10 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
|
||||
var dossierTemplate = dossierTemplateTesterAndProvider.provideTestTemplate();
|
||||
|
||||
TypeResponse types = dictionaryClient.getAllTypes(dossierTemplate.getId(), null, true);
|
||||
List<TypeValue> systemManagedTypes = types.getTypes().stream().filter(TypeValue::isSystemManaged).collect(Collectors.toList());
|
||||
List<TypeValue> systemManagedTypes = types.getTypes()
|
||||
.stream()
|
||||
.filter(TypeValue::isSystemManaged)
|
||||
.collect(Collectors.toList());
|
||||
assertThat(systemManagedTypes.size()).isEqualTo(8);
|
||||
|
||||
var allTemplates = dossierTemplateClient.getAllDossierTemplates();
|
||||
@ -284,17 +288,17 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
|
||||
dictionaryClient.getDictionaryForType(type2.getType(), type2.getDossierTemplateId(), dossier.getId());
|
||||
|
||||
var allTypes = dictionaryClient.getAllTypes(dossierTemplate.getId(), dossier.getId(), false).getTypes();
|
||||
assertThat(allTypes
|
||||
.stream().filter(t -> !t.isSystemManaged()).collect(Collectors.toList())
|
||||
.size()).isEqualTo(4);
|
||||
assertThat(allTypes.stream()
|
||||
.filter(t -> !t.isSystemManaged())
|
||||
.count()).isEqualTo(4);
|
||||
var typesWithRankOfType1 = allTypes.stream()
|
||||
.filter(t -> t.getRank() == type.getRank())
|
||||
.collect(Collectors.toList());
|
||||
.toList();
|
||||
assertThat(typesWithRankOfType1.size()).isEqualTo(2);
|
||||
|
||||
var typesWithRankOfType2 = allTypes.stream()
|
||||
.filter(t -> t.getRank() == type2.getRank())
|
||||
.collect(Collectors.toList());
|
||||
.toList();
|
||||
assertThat(typesWithRankOfType2.size()).isEqualTo(2);
|
||||
|
||||
dictionaryClient.addEntry(createdType1.getType(), createdType1.getDossierTemplateId(), List.of("entry1", "entry2"), false, null, DictionaryEntryType.ENTRY);
|
||||
@ -596,13 +600,13 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
|
||||
.build());
|
||||
|
||||
// add new justifications
|
||||
legalBasisClient.setLegalBasisMapping(List.of(new LegalBasis("nameAgain", "description", "reason","technicalReason")), dossierTemplate.getId());
|
||||
legalBasisClient.setLegalBasisMapping(List.of(new LegalBasis("nameAgain", "description", "reason", "technicalReason")), dossierTemplate.getId());
|
||||
existingLegalBasis = legalBasisClient.getLegalBasisMapping(dossierTemplate.getId());
|
||||
assertThat(existingLegalBasis.size()).isEqualTo(1);
|
||||
|
||||
// update dossier template metadata
|
||||
var cru = new DossierTemplateModel();
|
||||
cru.setDossierTemplateId(dossierTemplate.getId());
|
||||
cru.setId(dossierTemplate.getId());
|
||||
BeanUtils.copyProperties(dossierTemplate, cru);
|
||||
cru.setName("Template 1 Update");
|
||||
cru.setDescription("new description");
|
||||
@ -944,7 +948,7 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
|
||||
assertThat(result.getName()).isEqualTo(name);
|
||||
assertThat(result.isOcrByDefault()).isTrue();
|
||||
|
||||
DossierTemplateModel loadedTemplate = dossierTemplateClient.getDossierTemplate(result.getDossierTemplateId());
|
||||
DossierTemplateModel loadedTemplate = dossierTemplateClient.getDossierTemplate(result.getId());
|
||||
assertThat(loadedTemplate).isEqualTo(result);
|
||||
|
||||
dossierTemplateModel.setName("Test Dossier Template Update");
|
||||
@ -992,7 +996,7 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
|
||||
|
||||
// update
|
||||
var cru = new DossierTemplateModel();
|
||||
cru.setDossierTemplateId(dossierTemplate.getId());
|
||||
cru.setId(dossierTemplate.getId());
|
||||
BeanUtils.copyProperties(dossierTemplate, cru);
|
||||
cru.setName("Template 1 Update");
|
||||
cru.setValidTo(OffsetDateTime.of(2020, 1, 1, 1, 1, 1, 1, ZoneOffset.UTC));
|
||||
@ -1002,4 +1006,40 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
|
||||
assertTrue(exception.getMessage().contains("Invalid dates! validFrom can't be after validTo."));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testUpdateDossierTemplateWithOCRSettings() {
|
||||
|
||||
var dossierTemplate = dossierTemplateTesterAndProvider.provideTestTemplate();
|
||||
|
||||
var allTemplates = dossierTemplateClient.getAllDossierTemplates();
|
||||
assertThat(allTemplates.size()).isEqualTo(1);
|
||||
assertThat(allTemplates.get(0)).isEqualTo(dossierTemplate);
|
||||
|
||||
// update
|
||||
var cru = new DossierTemplateModel();
|
||||
cru.setId(dossierTemplate.getId());
|
||||
BeanUtils.copyProperties(dossierTemplate, cru);
|
||||
String updatedName = "Template 1 Update";
|
||||
cru.setName(updatedName);
|
||||
cru.setIdpByDefault(true);
|
||||
cru.setRotationCorrectionByDefault(true);
|
||||
cru.setOcrAllPages(true);
|
||||
cru.setFontStyleDetection(true);
|
||||
|
||||
var updatedDT = dossierTemplateClient.createOrUpdateDossierTemplate(cru);
|
||||
assertEquals(updatedName, updatedDT.getName());
|
||||
assertTrue(updatedDT.isIdpByDefault());
|
||||
assertTrue(updatedDT.isRotationCorrectionByDefault());
|
||||
assertTrue(updatedDT.isFontStyleDetection());
|
||||
assertTrue(updatedDT.isOcrAllPages());
|
||||
|
||||
var loadedDT = dossierTemplateClient.getDossierTemplate(updatedDT.getId());
|
||||
assertEquals(updatedName, loadedDT.getName());
|
||||
assertTrue(loadedDT.isIdpByDefault());
|
||||
assertTrue(loadedDT.isRotationCorrectionByDefault());
|
||||
assertTrue(loadedDT.isFontStyleDetection());
|
||||
assertTrue(loadedDT.isOcrAllPages());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -46,22 +46,22 @@ public class ReanalysisTest extends AbstractPersistenceServerServiceTest {
|
||||
var loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
|
||||
assertThat(loadedFile.getProcessingStatus()).isNotEqualTo(ProcessingStatus.PROCESSED);
|
||||
|
||||
reanalysisClient.ocrDossier(dossier.getId());
|
||||
reanalysisClient.ocrDossier(dossier.getId(), false);
|
||||
loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
|
||||
assertThat(loadedFile.getProcessingStatus()).isNotEqualTo(ProcessingStatus.PROCESSED);
|
||||
resetProcessingStatus(file);
|
||||
|
||||
reanalysisClient.ocrDossier(dossier.getId());
|
||||
reanalysisClient.ocrDossier(dossier.getId(), false);
|
||||
loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
|
||||
assertThat(loadedFile.getProcessingStatus()).isEqualTo(ProcessingStatus.OCR_PROCESSING_QUEUED);
|
||||
resetProcessingStatus(file);
|
||||
|
||||
reanalysisClient.ocrFile(dossier.getId(), file.getId(), true);
|
||||
reanalysisClient.ocrFile(dossier.getId(), file.getId(), true, false);
|
||||
loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
|
||||
assertThat(loadedFile.getProcessingStatus()).isEqualTo(ProcessingStatus.OCR_PROCESSING_QUEUED);
|
||||
resetProcessingStatus(file);
|
||||
|
||||
reanalysisClient.ocrFiles(dossier.getId(), Set.of(file.getId()));
|
||||
reanalysisClient.ocrFiles(dossier.getId(), Set.of(file.getId()), false);
|
||||
loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
|
||||
assertThat(loadedFile.getProcessingStatus()).isEqualTo(ProcessingStatus.OCR_PROCESSING_QUEUED);
|
||||
resetProcessingStatus(file);
|
||||
|
||||
@ -201,6 +201,7 @@ public class SupportControllerTest extends AbstractPersistenceServerServiceTest
|
||||
Collections.emptySet(),
|
||||
true,
|
||||
false,
|
||||
false,
|
||||
new FileStatusFilter(null, null, true, true)));
|
||||
loadedFile2 = fileClient.getFileStatus(dossier2.getId(), file2.getId());
|
||||
assertThat(loadedFile2.getProcessingStatus()).isEqualTo(ProcessingStatus.FULL_PROCESSING);
|
||||
@ -247,6 +248,7 @@ public class SupportControllerTest extends AbstractPersistenceServerServiceTest
|
||||
Collections.emptySet(),
|
||||
true,
|
||||
false,
|
||||
false,
|
||||
null));
|
||||
loadedFile2 = fileClient.getFileStatus(dossier2.getId(), file2.getId());
|
||||
assertThat(loadedFile2.getProcessingStatus()).isEqualTo(ProcessingStatus.FULL_PROCESSING);
|
||||
|
||||
@ -10,7 +10,7 @@ dependencies {
|
||||
api("com.knecon.fforesight:document:${rootProject.extra.get("documentVersion")}"){
|
||||
exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1")
|
||||
}
|
||||
api("com.knecon.fforesight:layoutparser-service-internal-api:0.194.0-RED9998.1") {
|
||||
api("com.knecon.fforesight:layoutparser-service-internal-api:0.196.0-RED8670.0") {
|
||||
exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1")
|
||||
exclude(group = "com.iqser.red.service", module = "persistence-service-shared-api-v1")
|
||||
}
|
||||
|
||||
@ -68,6 +68,18 @@ public class DossierTemplateModel {
|
||||
@Schema(description = "Flag that specifies if OCR is automatically performed on upload for all dossiers of this template")
|
||||
private boolean ocrByDefault;
|
||||
|
||||
@Schema(description = "Flag that specifies if rotation correction is attempted during OCR for all dossiers of this template")
|
||||
private boolean rotationCorrectionByDefault;
|
||||
|
||||
@Schema(description = "Flag that specifies if IDP is performed instead of OCR for all dossiers of this template")
|
||||
private boolean idpByDefault;
|
||||
|
||||
@Schema(description = "Flag that specifies if font style detection is performed during OCR")
|
||||
private boolean fontStyleDetection;
|
||||
|
||||
@Schema(description = "Flag that specifies if OCR should be performed on all pages instead of only pages with images")
|
||||
private boolean ocrAllPages;
|
||||
|
||||
@Schema(description = "Flag that specifies the watermark removal in documents will be performed before the OCR processing")
|
||||
private boolean removeWatermark;
|
||||
|
||||
|
||||
@ -88,6 +88,12 @@ public class FileStatus {
|
||||
private int numberOfPagesToOCR;
|
||||
@Schema(description = "Number of pages already OCRed by us")
|
||||
private int numberOfOCRedPages;
|
||||
@Schema(description = "Number of pages already IDPed by us")
|
||||
private int numberOfIdpPages;
|
||||
@Schema(description = "Number of prompt tokens used by this file")
|
||||
private int usedPromptTokens;
|
||||
@Schema(description = "Number of completion tokens used by this file")
|
||||
private int usedCompletionTokens;
|
||||
@Schema(description = "Shows if this file has been OCRed by us. End time of OCR Process")
|
||||
private OffsetDateTime ocrEndTime;
|
||||
@Schema(description = "Shows if this file has comments on annotations.")
|
||||
|
||||
@ -15,7 +15,7 @@ import lombok.NoArgsConstructor;
|
||||
@Data
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class FileStatusFilter {
|
||||
public class FileStatusFilter implements Predicate<FileModel> {
|
||||
|
||||
private List<ProcessingStatus> processingStatusList = new ArrayList<>();
|
||||
private List<WorkflowStatus> workflowStatusList = new ArrayList<>();
|
||||
@ -32,7 +32,8 @@ public class FileStatusFilter {
|
||||
}
|
||||
|
||||
|
||||
public Predicate<FileModel> asPredicate() {
|
||||
@Override
|
||||
public boolean test(FileModel fileModel) {
|
||||
|
||||
if (this.getProcessingStatusList() == null) {
|
||||
this.setProcessingStatusList(new ArrayList<>());
|
||||
@ -42,10 +43,12 @@ public class FileStatusFilter {
|
||||
this.setWorkflowStatusList(new ArrayList<>());
|
||||
}
|
||||
|
||||
return fileStatus -> (this.getProcessingStatusList().isEmpty() || this.getProcessingStatusList().contains(fileStatus.getProcessingStatus()))
|
||||
&& (this.getWorkflowStatusList().isEmpty() || this.getWorkflowStatusList().contains(fileStatus.getWorkflowStatus()))
|
||||
&& (this.isIncludeSoftDeletedFiles() || fileStatus.getDeleted() == null)
|
||||
&& (this.isIncludeHardDeletedFiles() || fileStatus.getHardDeletedTime() == null);
|
||||
return (this.getProcessingStatusList().isEmpty() || this.getProcessingStatusList().contains(fileModel.getProcessingStatus()))
|
||||
&& (this.getWorkflowStatusList().isEmpty()
|
||||
|| this.getWorkflowStatusList()
|
||||
.contains(fileModel.getWorkflowStatus()))
|
||||
&& (this.isIncludeSoftDeletedFiles() || fileModel.getDeleted() == null)
|
||||
&& (this.isIncludeHardDeletedFiles() || fileModel.getHardDeletedTime() == null);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -4,16 +4,33 @@ import java.util.Optional;
|
||||
import java.util.Set;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import lombok.AccessLevel;
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
public record ReanalysisSettings(
|
||||
@Schema(description = "Provide a list of dossierIds to filter for. If the list is empty, every dossier is selected for reanalysis.", defaultValue = "[]") Set<String> dossierIds,
|
||||
@Schema(description = "Provide a list of fileIds to filter for. If the list is empty, every file is selected for reanalysis.", defaultValue = "[]") Set<String> fileIds,
|
||||
@Schema(description = "If set to true, layout parsing and named entity recognition will be repeated.", defaultValue = "false") boolean repeatStructureAnalysis,
|
||||
@Schema(description = "If set to true, ocr will be repeated and therefore also layout parsing and named entity recognition.", defaultValue = "false") boolean runOcr,
|
||||
@Schema(description = "Use this to create a filter for files to reanalyse. Matches anything if set to null.", defaultValue = "{}") FileStatusFilter fileStatusFilter
|
||||
) {
|
||||
@Getter
|
||||
@Builder
|
||||
@RequiredArgsConstructor
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public final class ReanalysisSettings {
|
||||
|
||||
public FileStatusFilter fileStatusFilter() {
|
||||
@Schema(description = "Provide a list of dossierIds to filter for. If the list is empty, every dossier is selected for reanalysis.", defaultValue = "[]")
|
||||
Set<String> dossierIds;
|
||||
@Schema(description = "Provide a list of fileIds to filter for. If the list is empty, every file is selected for reanalysis.", defaultValue = "[]")
|
||||
Set<String> fileIds;
|
||||
@Schema(description = "If set to true, layout parsing and named entity recognition will be repeated.", defaultValue = "false")
|
||||
boolean repeatStructureAnalysis;
|
||||
@Schema(description = "If set to true, OCR will be repeated.", defaultValue = "false")
|
||||
boolean runOcr;
|
||||
@Schema(description = "If set to true, OCR with IDP will be repeated.", defaultValue = "false")
|
||||
boolean runIdp;
|
||||
@Schema(description = "Use this to create a filter for files to reanalyse. Matches anything if set to null.", defaultValue = "null")
|
||||
FileStatusFilter fileStatusFilter;
|
||||
|
||||
|
||||
public FileStatusFilter getFileStatusFilter() {
|
||||
|
||||
return Optional.ofNullable(fileStatusFilter)
|
||||
.orElse(new FileStatusFilter());
|
||||
|
||||
@ -41,8 +41,16 @@ public class CreateOrUpdateDossierTemplateRequest {
|
||||
|
||||
private boolean ocrByDefault;
|
||||
|
||||
private boolean idpByDefault;
|
||||
|
||||
private boolean rotationCorrectionByDefault;
|
||||
|
||||
private boolean fontStyleDetection;
|
||||
|
||||
private boolean removeWatermark;
|
||||
|
||||
private boolean ocrAllPages;
|
||||
|
||||
private LayoutParsingType layoutParsingType;
|
||||
|
||||
}
|
||||
|
||||
@ -36,7 +36,11 @@ public class DossierTemplate {
|
||||
private boolean keepOverlappingObjects;
|
||||
private boolean applyDictionaryUpdatesToAllDossiersByDefault;
|
||||
private boolean ocrByDefault;
|
||||
private boolean rotationCorrectionByDefault;
|
||||
private boolean idpByDefault;
|
||||
private boolean removeWatermark;
|
||||
private boolean fontStyleDetection;
|
||||
private boolean ocrAllPages;
|
||||
private LayoutParsingType layoutParsingType;
|
||||
|
||||
}
|
||||
|
||||
@ -8,6 +8,7 @@ import java.util.Set;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
@ -60,6 +61,9 @@ public class FileModel {
|
||||
private OffsetDateTime ocrStartTime;
|
||||
private Integer numberOfPagesToOCR;
|
||||
private Integer numberOfOCRedPages;
|
||||
private Integer numberOfIdpPages;
|
||||
private int usedPromptTokens;
|
||||
private int usedCompletionTokens;
|
||||
private OffsetDateTime ocrEndTime;
|
||||
private boolean hasAnnotationComments;
|
||||
private boolean excluded;
|
||||
|
||||
@ -21,6 +21,7 @@ public enum FileType {
|
||||
TABLES(".json"),
|
||||
|
||||
VISUAL_LAYOUT(".json"),
|
||||
IDP_RESULT(".json"),
|
||||
COMPONENTS(".json"),
|
||||
// document is split into 4 files, all should be overridden/deleted at the same time
|
||||
DOCUMENT_TEXT_OLD(".json"),
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user