RED-8670: add more settings to OCR #919
@ -36,6 +36,7 @@ import lombok.RequiredArgsConstructor;
|
|||||||
public class ReanalysisController implements ReanalysisResource {
|
public class ReanalysisController implements ReanalysisResource {
|
||||||
|
|
||||||
private static final String DOSSIER_ID = "dossierId";
|
private static final String DOSSIER_ID = "dossierId";
|
||||||
|
|
||||||
private final ReanalysisService reanalysisService;
|
private final ReanalysisService reanalysisService;
|
||||||
private final FileStatusManagementService fileStatusManagementService;
|
private final FileStatusManagementService fileStatusManagementService;
|
||||||
private final AuditPersistenceService auditPersistenceService;
|
private final AuditPersistenceService auditPersistenceService;
|
||||||
@ -98,11 +99,11 @@ public class ReanalysisController implements ReanalysisResource {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
@PreAuthorize("hasAuthority('" + REANALYZE_DOSSIER + "')")
|
@PreAuthorize("hasAuthority('" + REANALYZE_DOSSIER + "')")
|
||||||
public void ocrDossier(@PathVariable(DOSSIER_ID) String dossierId) {
|
public void ocrDossier(@PathVariable(DOSSIER_ID) String dossierId, @RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp) {
|
||||||
|
|
||||||
accessControlService.checkDossierExistenceAndAccessPermissionsToDossier(dossierId);
|
accessControlService.checkDossierExistenceAndAccessPermissionsToDossier(dossierId);
|
||||||
|
|
||||||
reanalysisService.ocrDossier(dossierId);
|
reanalysisService.ocrDossier(dossierId, idp);
|
||||||
|
|
||||||
auditPersistenceService.audit(AuditRequest.builder()
|
auditPersistenceService.audit(AuditRequest.builder()
|
||||||
.userId(KeycloakSecurity.getUserId())
|
.userId(KeycloakSecurity.getUserId())
|
||||||
@ -118,11 +119,12 @@ public class ReanalysisController implements ReanalysisResource {
|
|||||||
@PreAuthorize("hasAuthority('" + REANALYZE_FILE + "')")
|
@PreAuthorize("hasAuthority('" + REANALYZE_FILE + "')")
|
||||||
public void ocrFile(@PathVariable(DOSSIER_ID) String dossierId,
|
public void ocrFile(@PathVariable(DOSSIER_ID) String dossierId,
|
||||||
@PathVariable(FILE_ID) String fileId,
|
@PathVariable(FILE_ID) String fileId,
|
||||||
@RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force) {
|
@RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force,
|
||||||
|
@RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp) {
|
||||||
|
|
||||||
accessControlService.checkDossierExistenceAndAccessPermissionsToDossier(dossierId);
|
accessControlService.checkDossierExistenceAndAccessPermissionsToDossier(dossierId);
|
||||||
validateOCR(dossierId, fileId);
|
validateOCR(dossierId, fileId);
|
||||||
reanalysisService.ocrFile(dossierId, fileId, force);
|
reanalysisService.ocrFile(dossierId, fileId, force, idp);
|
||||||
auditPersistenceService.audit(AuditRequest.builder()
|
auditPersistenceService.audit(AuditRequest.builder()
|
||||||
.userId(KeycloakSecurity.getUserId())
|
.userId(KeycloakSecurity.getUserId())
|
||||||
.objectId(dossierId)
|
.objectId(dossierId)
|
||||||
@ -136,11 +138,13 @@ public class ReanalysisController implements ReanalysisResource {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
@PreAuthorize("hasAuthority('" + REANALYZE_FILE + "')")
|
@PreAuthorize("hasAuthority('" + REANALYZE_FILE + "')")
|
||||||
public void ocrFiles(@PathVariable(DOSSIER_ID) String dossierId, @RequestBody Set<String> fileIds) {
|
public void ocrFiles(@PathVariable(DOSSIER_ID) String dossierId,
|
||||||
|
@RequestBody Set<String> fileIds,
|
||||||
|
@RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp) {
|
||||||
|
|
||||||
accessControlService.checkDossierExistenceAndAccessPermissionsToDossier(dossierId);
|
accessControlService.checkDossierExistenceAndAccessPermissionsToDossier(dossierId);
|
||||||
fileIds.forEach(fileId -> validateOCR(dossierId, fileId));
|
fileIds.forEach(fileId -> validateOCR(dossierId, fileId));
|
||||||
reanalysisService.ocrFiles(dossierId, fileIds);
|
reanalysisService.ocrFiles(dossierId, fileIds, idp);
|
||||||
auditPersistenceService.audit(AuditRequest.builder()
|
auditPersistenceService.audit(AuditRequest.builder()
|
||||||
.userId(KeycloakSecurity.getUserId())
|
.userId(KeycloakSecurity.getUserId())
|
||||||
.objectId(dossierId)
|
.objectId(dossierId)
|
||||||
|
|||||||
@ -38,6 +38,7 @@ public interface ReanalysisResource {
|
|||||||
|
|
||||||
String EXCLUDED_STATUS_PARAM = "excluded";
|
String EXCLUDED_STATUS_PARAM = "excluded";
|
||||||
String FORCE_PARAM = "force";
|
String FORCE_PARAM = "force";
|
||||||
|
String IDP_PARAM = "idp";
|
||||||
|
|
||||||
|
|
||||||
@PostMapping(value = REANALYSIS_REST_PATH + DOSSIER_ID_PATH_VARIABLE)
|
@PostMapping(value = REANALYSIS_REST_PATH + DOSSIER_ID_PATH_VARIABLE)
|
||||||
@ -65,7 +66,7 @@ public interface ReanalysisResource {
|
|||||||
@Operation(summary = "Ocr and reanalyze a dossier", description = "None")
|
@Operation(summary = "Ocr and reanalyze a dossier", description = "None")
|
||||||
@PostMapping(value = OCR_REANALYSIS_REST_PATH + DOSSIER_ID_PATH_VARIABLE)
|
@PostMapping(value = OCR_REANALYSIS_REST_PATH + DOSSIER_ID_PATH_VARIABLE)
|
||||||
@ApiResponses(value = {@ApiResponse(responseCode = "204", description = "OK"), @ApiResponse(responseCode = "404", description = "Not found"), @ApiResponse(responseCode = "403", description = "Forbidden")})
|
@ApiResponses(value = {@ApiResponse(responseCode = "204", description = "OK"), @ApiResponse(responseCode = "404", description = "Not found"), @ApiResponse(responseCode = "403", description = "Forbidden")})
|
||||||
void ocrDossier(@PathVariable(DOSSIER_ID) String dossierId);
|
void ocrDossier(@PathVariable(DOSSIER_ID) String dossierId, @RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp);
|
||||||
|
|
||||||
|
|
||||||
@Operation(summary = "Ocr and reanalyze a file", description = "None")
|
@Operation(summary = "Ocr and reanalyze a file", description = "None")
|
||||||
@ -73,13 +74,16 @@ public interface ReanalysisResource {
|
|||||||
@ApiResponses(value = {@ApiResponse(responseCode = "204", description = "OK"), @ApiResponse(responseCode = "409", description = "Conflict"), @ApiResponse(responseCode = "404", description = "Not found"), @ApiResponse(responseCode = "403", description = "Forbidden"), @ApiResponse(responseCode = "400", description = "Cannot OCR approved file")})
|
@ApiResponses(value = {@ApiResponse(responseCode = "204", description = "OK"), @ApiResponse(responseCode = "409", description = "Conflict"), @ApiResponse(responseCode = "404", description = "Not found"), @ApiResponse(responseCode = "403", description = "Forbidden"), @ApiResponse(responseCode = "400", description = "Cannot OCR approved file")})
|
||||||
void ocrFile(@PathVariable(DOSSIER_ID) String dossierId,
|
void ocrFile(@PathVariable(DOSSIER_ID) String dossierId,
|
||||||
@PathVariable(FILE_ID) String fileId,
|
@PathVariable(FILE_ID) String fileId,
|
||||||
@RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force);
|
@RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force,
|
||||||
|
@RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp);
|
||||||
|
|
||||||
|
|
||||||
@Operation(summary = "Ocr and reanalyze multiple files for a dossier", description = "None")
|
@Operation(summary = "Ocr and reanalyze multiple files for a dossier", description = "None")
|
||||||
@PostMapping(value = OCR_REANALYSIS_REST_PATH + DOSSIER_ID_PATH_VARIABLE + BULK_REST_PATH)
|
@PostMapping(value = OCR_REANALYSIS_REST_PATH + DOSSIER_ID_PATH_VARIABLE + BULK_REST_PATH)
|
||||||
@ApiResponses(value = {@ApiResponse(responseCode = "204", description = "OK"), @ApiResponse(responseCode = "404", description = "Not found"), @ApiResponse(responseCode = "403", description = "Forbidden")})
|
@ApiResponses(value = {@ApiResponse(responseCode = "204", description = "OK"), @ApiResponse(responseCode = "404", description = "Not found"), @ApiResponse(responseCode = "403", description = "Forbidden")})
|
||||||
void ocrFiles(@PathVariable(DOSSIER_ID) String dossierId, @RequestBody Set<String> fileIds);
|
void ocrFiles(@PathVariable(DOSSIER_ID) String dossierId,
|
||||||
|
@RequestBody Set<String> fileIds,
|
||||||
|
@RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp);
|
||||||
|
|
||||||
|
|
||||||
@Operation(summary = "Exclude or re-include a file to the automatic analysis", description = "None")
|
@Operation(summary = "Exclude or re-include a file to the automatic analysis", description = "None")
|
||||||
|
|||||||
@ -31,13 +31,17 @@ dependencies {
|
|||||||
exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1")
|
exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1")
|
||||||
exclude(group = "com.iqser.red.service", module = "persistence-service-shared-api-v1")
|
exclude(group = "com.iqser.red.service", module = "persistence-service-shared-api-v1")
|
||||||
}
|
}
|
||||||
|
api("com.knecon.fforesight:layoutparser-service-internal-api:0.181.0") {
|
||||||
|
exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1")
|
||||||
|
exclude(group = "com.iqser.red.service", module = "persistence-service-shared-api-v1")
|
||||||
|
}
|
||||||
api("com.iqser.red.service:search-service-api-v1:${rootProject.extra.get("searchServiceVersion")}") {
|
api("com.iqser.red.service:search-service-api-v1:${rootProject.extra.get("searchServiceVersion")}") {
|
||||||
exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1")
|
exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1")
|
||||||
exclude(group = "com.iqser.red.service", module = "persistence-service-shared-api-v1")
|
exclude(group = "com.iqser.red.service", module = "persistence-service-shared-api-v1")
|
||||||
}
|
}
|
||||||
api("com.knecon.fforesight:azure-ocr-service-api:0.13.0")
|
api("com.knecon.fforesight:azure-ocr-service-api:0.24.0")
|
||||||
implementation("com.knecon.fforesight:llm-service-api:1.20.0-RED10072.2")
|
implementation("com.knecon.fforesight:llm-service-api:1.17.0")
|
||||||
api("com.knecon.fforesight:jobs-commons:0.13.0")
|
api("com.knecon.fforesight:jobs-commons:0.10.0")
|
||||||
api("com.iqser.red.commons:storage-commons:2.50.0")
|
api("com.iqser.red.commons:storage-commons:2.50.0")
|
||||||
api("com.knecon.fforesight:tenant-commons:0.31.0-RED10196.0") {
|
api("com.knecon.fforesight:tenant-commons:0.31.0-RED10196.0") {
|
||||||
exclude(group = "com.iqser.red.commons", module = "storage-commons")
|
exclude(group = "com.iqser.red.commons", module = "storage-commons")
|
||||||
@ -71,6 +75,7 @@ dependencies {
|
|||||||
api("commons-validator:commons-validator:1.7")
|
api("commons-validator:commons-validator:1.7")
|
||||||
api("com.opencsv:opencsv:5.9")
|
api("com.opencsv:opencsv:5.9")
|
||||||
|
|
||||||
|
implementation("com.google.protobuf:protobuf-java:4.27.1")
|
||||||
implementation("org.mapstruct:mapstruct:1.6.2")
|
implementation("org.mapstruct:mapstruct:1.6.2")
|
||||||
annotationProcessor("org.mapstruct:mapstruct-processor:1.6.2")
|
annotationProcessor("org.mapstruct:mapstruct-processor:1.6.2")
|
||||||
|
|
||||||
|
|||||||
@ -83,6 +83,12 @@ public class DossierTemplateEntity {
|
|||||||
@Column(name = "ocr_by_default")
|
@Column(name = "ocr_by_default")
|
||||||
private boolean ocrByDefault;
|
private boolean ocrByDefault;
|
||||||
|
|
||||||
|
@Column(name = "rotation_correction_by_default")
|
||||||
|
private boolean rotationCorrectionByDefault;
|
||||||
|
|
||||||
|
@Column(name = "idp_by_default")
|
||||||
|
private boolean idpByDefault;
|
||||||
|
|
||||||
@Column(name = "remove_watermark")
|
@Column(name = "remove_watermark")
|
||||||
private boolean removeWatermark;
|
private boolean removeWatermark;
|
||||||
|
|
||||||
|
|||||||
@ -152,9 +152,16 @@ public class FileEntity {
|
|||||||
@Column(name = "number_of_ocred_pages")
|
@Column(name = "number_of_ocred_pages")
|
||||||
private Integer numberOfOCRedPages;
|
private Integer numberOfOCRedPages;
|
||||||
|
|
||||||
|
@Column(name = "number_of_idp_pages")
|
||||||
|
private Integer numberOfIdpPages;
|
||||||
|
|
||||||
@Column(name = "ocr_end_time")
|
@Column(name = "ocr_end_time")
|
||||||
private OffsetDateTime ocrEndTime;
|
private OffsetDateTime ocrEndTime;
|
||||||
|
|
||||||
|
private Integer usedPromptTokens;
|
||||||
|
|
||||||
|
private Integer usedCompletionTokens;
|
||||||
|
|
||||||
@Column
|
@Column
|
||||||
private boolean hasAnnotationComments;
|
private boolean hasAnnotationComments;
|
||||||
|
|
||||||
|
|||||||
@ -1,20 +0,0 @@
|
|||||||
package com.iqser.red.service.persistence.management.v1.processor.model;
|
|
||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
|
||||||
import lombok.Builder;
|
|
||||||
import lombok.Data;
|
|
||||||
import lombok.NoArgsConstructor;
|
|
||||||
|
|
||||||
@Data
|
|
||||||
@NoArgsConstructor
|
|
||||||
@AllArgsConstructor
|
|
||||||
@Builder
|
|
||||||
public class OCRStatusUpdateResponse {
|
|
||||||
|
|
||||||
private String fileId;
|
|
||||||
private int numberOfPagesToOCR;
|
|
||||||
private int numberOfOCRedPages;
|
|
||||||
private boolean ocrFinished;
|
|
||||||
private boolean ocrStarted;
|
|
||||||
|
|
||||||
}
|
|
||||||
@ -1,5 +1,7 @@
|
|||||||
package com.iqser.red.service.persistence.management.v1.processor.service;
|
package com.iqser.red.service.persistence.management.v1.processor.service;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import org.springframework.retry.support.RetryTemplate;
|
import org.springframework.retry.support.RetryTemplate;
|
||||||
@ -15,6 +17,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.UntouchedDo
|
|||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.ProcessingStatus;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.ProcessingStatus;
|
||||||
import com.iqser.red.service.search.v1.model.IndexMessageType;
|
import com.iqser.red.service.search.v1.model.IndexMessageType;
|
||||||
|
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
|
||||||
|
|
||||||
import jakarta.transaction.Transactional;
|
import jakarta.transaction.Transactional;
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
@ -114,7 +117,7 @@ public class FileStatusProcessingUpdateService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void requeueOCROrMarkFailed(String dossierId, String fileId, FileErrorInfo fileErrorInfo) {
|
public void requeueOCROrMarkFailed(String dossierId, String fileId, Set<AzureOcrFeature> features, FileErrorInfo fileErrorInfo) {
|
||||||
|
|
||||||
var fileEntity = fileStatusPersistenceService.getStatus(fileId);
|
var fileEntity = fileStatusPersistenceService.getStatus(fileId);
|
||||||
if (fileEntity.getProcessingErrorCounter() > settings.getMaxErrorRetries()) {
|
if (fileEntity.getProcessingErrorCounter() > settings.getMaxErrorRetries()) {
|
||||||
@ -122,7 +125,7 @@ public class FileStatusProcessingUpdateService {
|
|||||||
} else {
|
} else {
|
||||||
fileStatusService.setStatusOcrProcessing(fileId,
|
fileStatusService.setStatusOcrProcessing(fileId,
|
||||||
fileEntity.getProcessingStatus().equals(ProcessingStatus.OCR_PROCESSING) ? fileEntity.getProcessingErrorCounter() + 1 : 0);
|
fileEntity.getProcessingStatus().equals(ProcessingStatus.OCR_PROCESSING) ? fileEntity.getProcessingErrorCounter() + 1 : 0);
|
||||||
fileStatusService.addToOcrQueue(dossierId, fileId, 2);
|
fileStatusService.addToOcrQueue(dossierId, fileId, 2, features);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -12,7 +12,6 @@ import java.util.function.BiFunction;
|
|||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import org.springframework.amqp.rabbit.core.RabbitTemplate;
|
import org.springframework.amqp.rabbit.core.RabbitTemplate;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
@ -21,6 +20,7 @@ import com.iqser.red.service.pdftron.redaction.v1.api.model.ProcessUntouchedDocu
|
|||||||
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
|
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
|
||||||
import com.iqser.red.service.persistence.management.v1.processor.entity.configuration.TypeEntity;
|
import com.iqser.red.service.persistence.management.v1.processor.entity.configuration.TypeEntity;
|
||||||
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.ComponentDefinitionEntity;
|
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.ComponentDefinitionEntity;
|
||||||
|
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.DossierTemplateEntity;
|
||||||
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.FileAttributeEntity;
|
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.FileAttributeEntity;
|
||||||
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.FileEntity;
|
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.FileEntity;
|
||||||
import com.iqser.red.service.persistence.management.v1.processor.entity.projection.DossierStatsFileProjection;
|
import com.iqser.red.service.persistence.management.v1.processor.entity.projection.DossierStatsFileProjection;
|
||||||
@ -31,7 +31,6 @@ import com.iqser.red.service.persistence.management.v1.processor.model.AnalysisT
|
|||||||
import com.iqser.red.service.persistence.management.v1.processor.model.CvAnalysisServiceRequest;
|
import com.iqser.red.service.persistence.management.v1.processor.model.CvAnalysisServiceRequest;
|
||||||
import com.iqser.red.service.persistence.management.v1.processor.model.FileIdentifier;
|
import com.iqser.red.service.persistence.management.v1.processor.model.FileIdentifier;
|
||||||
import com.iqser.red.service.persistence.management.v1.processor.model.ManualChangesQueryOptions;
|
import com.iqser.red.service.persistence.management.v1.processor.model.ManualChangesQueryOptions;
|
||||||
import com.iqser.red.service.persistence.management.v1.processor.model.OCRStatusUpdateResponse;
|
|
||||||
import com.iqser.red.service.persistence.management.v1.processor.model.VisualLayoutParsingServiceRequest;
|
import com.iqser.red.service.persistence.management.v1.processor.model.VisualLayoutParsingServiceRequest;
|
||||||
import com.iqser.red.service.persistence.management.v1.processor.model.image.ImageServiceRequest;
|
import com.iqser.red.service.persistence.management.v1.processor.model.image.ImageServiceRequest;
|
||||||
import com.iqser.red.service.persistence.management.v1.processor.model.websocket.AnalyseStatus;
|
import com.iqser.red.service.persistence.management.v1.processor.model.websocket.AnalyseStatus;
|
||||||
@ -80,6 +79,7 @@ import com.knecon.fforesight.llm.service.LlmNerMessage;
|
|||||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingQueueNames;
|
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingQueueNames;
|
||||||
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
|
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
|
||||||
import com.knecon.fforesight.service.ocr.v1.api.model.DocumentRequest;
|
import com.knecon.fforesight.service.ocr.v1.api.model.DocumentRequest;
|
||||||
|
import com.knecon.fforesight.service.ocr.v1.api.model.OCRStatusUpdateResponse;
|
||||||
import com.knecon.fforesight.tenantcommons.TenantContext;
|
import com.knecon.fforesight.tenantcommons.TenantContext;
|
||||||
|
|
||||||
import jakarta.transaction.Transactional;
|
import jakarta.transaction.Transactional;
|
||||||
@ -569,6 +569,12 @@ public class FileStatusService {
|
|||||||
|
|
||||||
public void setStatusOcrQueued(String dossierId, String fileId) {
|
public void setStatusOcrQueued(String dossierId, String fileId) {
|
||||||
|
|
||||||
|
setStatusOcrQueued(dossierId, fileId, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void setStatusOcrQueued(String dossierId, String fileId, boolean idp) {
|
||||||
|
|
||||||
FileEntity fileStatus = fileStatusPersistenceService.getStatus(fileId);
|
FileEntity fileStatus = fileStatusPersistenceService.getStatus(fileId);
|
||||||
|
|
||||||
if (fileStatus.isExcluded()) {
|
if (fileStatus.isExcluded()) {
|
||||||
@ -579,7 +585,7 @@ public class FileStatusService {
|
|||||||
updateOCRStartTime(fileId);
|
updateOCRStartTime(fileId);
|
||||||
fileStatusPersistenceService.updateProcessingStatus(fileId, ProcessingStatus.OCR_PROCESSING_QUEUED);
|
fileStatusPersistenceService.updateProcessingStatus(fileId, ProcessingStatus.OCR_PROCESSING_QUEUED);
|
||||||
websocketService.sendAnalysisEvent(dossierId, fileId, AnalyseStatus.OCR_PROCESSING, fileStatus.getNumberOfAnalyses() + 1);
|
websocketService.sendAnalysisEvent(dossierId, fileId, AnalyseStatus.OCR_PROCESSING, fileStatus.getNumberOfAnalyses() + 1);
|
||||||
addToOcrQueue(dossierId, fileId, 2);
|
addToOcrQueue(dossierId, fileId, 2, idp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -760,18 +766,29 @@ public class FileStatusService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void addToOcrQueue(String dossierId, String fileId, int priority) {
|
public void addToOcrQueue(String dossierId, String fileId, int priority, boolean useIdp) {
|
||||||
|
|
||||||
var removeWatermark = dossierTemplatePersistenceService.getDossierTemplate(dossierPersistenceService.getDossierTemplateId(dossierId)).isRemoveWatermark();
|
DossierTemplateEntity dt = dossierTemplatePersistenceService.getDossierTemplate(dossierPersistenceService.getDossierTemplateId(dossierId));
|
||||||
Set<AzureOcrFeature> features = new HashSet<>();
|
Set<AzureOcrFeature> features = new HashSet<>();
|
||||||
if (removeWatermark) {
|
if (dt.isRemoveWatermark()) {
|
||||||
features.add(AzureOcrFeature.REMOVE_WATERMARKS);
|
features.add(AzureOcrFeature.REMOVE_WATERMARKS);
|
||||||
}
|
}
|
||||||
if (currentApplicationTypeProvider.isDocuMine()) {
|
if (useIdp || dt.isIdpByDefault()) {
|
||||||
|
features.add(AzureOcrFeature.IDP);
|
||||||
|
}
|
||||||
|
if (dt.isRotationCorrectionByDefault()) {
|
||||||
features.add(AzureOcrFeature.ROTATION_CORRECTION);
|
features.add(AzureOcrFeature.ROTATION_CORRECTION);
|
||||||
|
}
|
||||||
|
if (currentApplicationTypeProvider.isDocuMine()) {
|
||||||
features.add(AzureOcrFeature.FONT_STYLE_DETECTION);
|
features.add(AzureOcrFeature.FONT_STYLE_DETECTION);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
addToOcrQueue(dossierId, fileId, priority, features);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void addToOcrQueue(String dossierId, String fileId, int priority, Set<AzureOcrFeature> features) {
|
||||||
|
|
||||||
var request = DocumentRequest.builder()
|
var request = DocumentRequest.builder()
|
||||||
.dossierId(dossierId)
|
.dossierId(dossierId)
|
||||||
.fileId(fileId)
|
.fileId(fileId)
|
||||||
@ -1175,4 +1192,10 @@ public class FileStatusService {
|
|||||||
return reanalysisRequiredStatusService.enhanceFileStatusWithAnalysisRequirements(fileModels);
|
return reanalysisRequiredStatusService.enhanceFileStatusWithAnalysisRequirements(fileModels);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void increaseTokenUsage(String fileId, int promptTokens, int completionTokens) {
|
||||||
|
|
||||||
|
fileStatusPersistenceService.increaseTokenUsage(fileId, promptTokens, completionTokens);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -171,18 +171,18 @@ public class ReanalysisService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void ocrDossier(String dossierId) {
|
public void ocrDossier(String dossierId, boolean idp) {
|
||||||
|
|
||||||
var relevantFiles = getAllFilesForDossier(dossierId, validFilesFilter);
|
var relevantFiles = getAllFilesForDossier(dossierId, validFilesFilter);
|
||||||
|
|
||||||
relevantFiles.stream()
|
relevantFiles.stream()
|
||||||
.filter(fileStatus -> fileStatus.getOcrStartTime() == null)
|
.filter(fileStatus -> fileStatus.getOcrStartTime() == null)
|
||||||
.filter(fileStatus -> fileStatus.getProcessingStatus().equals(ProcessingStatus.PROCESSED))
|
.filter(fileStatus -> fileStatus.getProcessingStatus().equals(ProcessingStatus.PROCESSED))
|
||||||
.forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId()));
|
.forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId(), idp));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void ocrFile(String dossierId, String fileId, boolean force) {
|
public void ocrFile(String dossierId, String fileId, boolean force, boolean idp) {
|
||||||
|
|
||||||
dossierPersistenceService.getAndValidateDossier(dossierId);
|
dossierPersistenceService.getAndValidateDossier(dossierId);
|
||||||
FileModel dossierFile = fileStatusService.getStatus(fileId);
|
FileModel dossierFile = fileStatusService.getStatus(fileId);
|
||||||
@ -202,30 +202,31 @@ public class ReanalysisService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (force) {
|
if (force) {
|
||||||
fileStatusService.setStatusOcrQueued(dossierId, fileId);
|
fileStatusService.setStatusOcrQueued(dossierId, fileId, idp);
|
||||||
} else {
|
} else {
|
||||||
if (dossierFile.getOcrStartTime() != null) {
|
if (dossierFile.getOcrStartTime() != null) {
|
||||||
throw new ConflictException("File already has been OCR processed");
|
throw new ConflictException("File already has been OCR processed");
|
||||||
}
|
}
|
||||||
|
|
||||||
ocrFiles(dossierId, Sets.newHashSet(fileId));
|
ocrFiles(dossierId, Sets.newHashSet(fileId), idp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void ocrFiles(String dossierId, Set<String> fileIds) {
|
public void ocrFiles(String dossierId, Set<String> fileIds, boolean idp) {
|
||||||
|
|
||||||
var relevantFiles = getRelevantFiles(dossierId, fileIds);
|
var relevantFiles = getRelevantFiles(dossierId, fileIds);
|
||||||
|
|
||||||
if (relevantFiles.stream()
|
if (relevantFiles.stream()
|
||||||
.anyMatch(fileStatus -> !fileStatus.getProcessingStatus().equals(ProcessingStatus.PROCESSED) && !fileStatus.getProcessingStatus()
|
.anyMatch(fileStatus -> !fileStatus.getProcessingStatus().equals(ProcessingStatus.PROCESSED) //
|
||||||
.equals(ProcessingStatus.OCR_PROCESSING_QUEUED) && !fileStatus.getProcessingStatus().equals(ProcessingStatus.OCR_PROCESSING))) {
|
&& !fileStatus.getProcessingStatus().equals(ProcessingStatus.OCR_PROCESSING_QUEUED) //
|
||||||
|
&& !fileStatus.getProcessingStatus().equals(ProcessingStatus.OCR_PROCESSING))) {
|
||||||
throw new ConflictException("File is not processed");
|
throw new ConflictException("File is not processed");
|
||||||
}
|
}
|
||||||
|
|
||||||
relevantFiles.stream()
|
relevantFiles.stream()
|
||||||
.filter(fileStatus -> fileStatus.getOcrStartTime() == null)
|
.filter(fileStatus -> fileStatus.getOcrStartTime() == null)
|
||||||
.forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId()));
|
.forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId(), idp));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -275,12 +276,12 @@ public class ReanalysisService {
|
|||||||
|
|
||||||
public List<FileModel> reanalyzeTemplate(String dossierTemplateId, ReanalysisSettings reanalysisSettings) {
|
public List<FileModel> reanalyzeTemplate(String dossierTemplateId, ReanalysisSettings reanalysisSettings) {
|
||||||
|
|
||||||
requestValidator.validateRequestOrThrow404(dossierTemplateId, reanalysisSettings.dossierIds(), reanalysisSettings.fileIds());
|
requestValidator.validateRequestOrThrow404(dossierTemplateId, reanalysisSettings.getDossierIds(), reanalysisSettings.getFileIds());
|
||||||
|
|
||||||
var files = fileStatusService.getDossierTemplateStatus(dossierTemplateId)
|
var files = fileStatusService.getDossierTemplateStatus(dossierTemplateId)
|
||||||
.stream()
|
.stream()
|
||||||
.filter(file -> isInList(file, reanalysisSettings))
|
.filter(file -> isInList(file, reanalysisSettings))
|
||||||
.filter(reanalysisSettings.fileStatusFilter().asPredicate())
|
.filter(reanalysisSettings.getFileStatusFilter())
|
||||||
.peek(file -> log.info("Reanalyzing file {}", file.getId()))
|
.peek(file -> log.info("Reanalyzing file {}", file.getId()))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
@ -289,7 +290,7 @@ public class ReanalysisService {
|
|||||||
files.forEach(file -> fileStatusService.setStatusFullReprocess(file.getDossierId(),
|
files.forEach(file -> fileStatusService.setStatusFullReprocess(file.getDossierId(),
|
||||||
file.getId(),
|
file.getId(),
|
||||||
false,
|
false,
|
||||||
reanalysisSettings.repeatStructureAnalysis(),
|
reanalysisSettings.isRepeatStructureAnalysis(),
|
||||||
reanalysisSettings.runOcr()));
|
reanalysisSettings.runOcr()));
|
||||||
|
|
||||||
return rejectedFiles;
|
return rejectedFiles;
|
||||||
@ -314,8 +315,8 @@ public class ReanalysisService {
|
|||||||
|
|
||||||
private boolean isInList(FileModel file, ReanalysisSettings reAnalysisSettings) {
|
private boolean isInList(FileModel file, ReanalysisSettings reAnalysisSettings) {
|
||||||
|
|
||||||
return (reAnalysisSettings.fileIds().isEmpty() || reAnalysisSettings.fileIds().contains(file.getId())) //
|
return (reAnalysisSettings.getFileIds().isEmpty() || reAnalysisSettings.getFileIds().contains(file.getId())) //
|
||||||
&& (reAnalysisSettings.dossierIds().isEmpty() || reAnalysisSettings.dossierIds().contains(file.getDossierId()));
|
&& (reAnalysisSettings.getDossierIds().isEmpty() || reAnalysisSettings.getDossierIds().contains(file.getDossierId()));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -10,6 +10,8 @@ import java.util.Map;
|
|||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import com.iqser.red.service.persistence.management.v1.processor.entity.projection.DossierStatsFileProjection;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
@ -21,7 +23,6 @@ import com.iqser.red.service.persistence.management.v1.processor.entity.projecti
|
|||||||
import com.iqser.red.service.persistence.management.v1.processor.exception.BadRequestException;
|
import com.iqser.red.service.persistence.management.v1.processor.exception.BadRequestException;
|
||||||
import com.iqser.red.service.persistence.management.v1.processor.exception.NotFoundException;
|
import com.iqser.red.service.persistence.management.v1.processor.exception.NotFoundException;
|
||||||
import com.iqser.red.service.persistence.management.v1.processor.model.FileIdentifier;
|
import com.iqser.red.service.persistence.management.v1.processor.model.FileIdentifier;
|
||||||
import com.iqser.red.service.persistence.management.v1.processor.model.OCRStatusUpdateResponse;
|
|
||||||
import com.iqser.red.service.persistence.management.v1.processor.model.websocket.AnalyseStatus;
|
import com.iqser.red.service.persistence.management.v1.processor.model.websocket.AnalyseStatus;
|
||||||
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.repository.FileAttributesRepository;
|
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.repository.FileAttributesRepository;
|
||||||
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.repository.FileRepository;
|
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.repository.FileRepository;
|
||||||
@ -31,6 +32,8 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.component.C
|
|||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.ProcessingStatus;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.ProcessingStatus;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.WorkflowStatus;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.WorkflowStatus;
|
||||||
|
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
|
||||||
|
import com.knecon.fforesight.service.ocr.v1.api.model.OCRStatusUpdateResponse;
|
||||||
|
|
||||||
import jakarta.persistence.EntityManager;
|
import jakarta.persistence.EntityManager;
|
||||||
import jakarta.transaction.Transactional;
|
import jakarta.transaction.Transactional;
|
||||||
@ -680,6 +683,7 @@ public class FileStatusPersistenceService {
|
|||||||
fileRepository.updateOCRStatus(response.getFileId(),
|
fileRepository.updateOCRStatus(response.getFileId(),
|
||||||
response.getNumberOfPagesToOCR(),
|
response.getNumberOfPagesToOCR(),
|
||||||
response.getNumberOfOCRedPages(),
|
response.getNumberOfOCRedPages(),
|
||||||
|
response.getFeatures().contains(AzureOcrFeature.IDP) ? response.getNumberOfOCRedPages() : 0,
|
||||||
response.isOcrFinished() ? OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS) : null,
|
response.isOcrFinished() ? OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS) : null,
|
||||||
OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS));
|
OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS));
|
||||||
}
|
}
|
||||||
@ -765,4 +769,11 @@ public class FileStatusPersistenceService {
|
|||||||
fileRepository.updateLastDownloadForFile(fileId, null);
|
fileRepository.updateLastDownloadForFile(fileId, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Transactional
|
||||||
|
public void increaseTokenUsage(String fileId, int promptTokens, int completionTokens) {
|
||||||
|
|
||||||
|
fileRepository.increaseTokenUsage(fileId, promptTokens, completionTokens);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -389,11 +389,15 @@ public interface FileRepository extends JpaRepository<FileEntity, String> {
|
|||||||
@Transactional
|
@Transactional
|
||||||
@Modifying(clearAutomatically = true)
|
@Modifying(clearAutomatically = true)
|
||||||
@Query(value = "update FileEntity f set f.numberOfOCRedPages = :numberOfOCRedPages, "
|
@Query(value = "update FileEntity f set f.numberOfOCRedPages = :numberOfOCRedPages, "
|
||||||
+ "f.numberOfPagesToOCR = :numberOfPagesToOCR, f.ocrEndTime = :ocrEndTime, "
|
+ "f.numberOfPagesToOCR = :numberOfPagesToOCR, "
|
||||||
+ "f.lastUpdated = :lastUpdated where f.id = :fileId")
|
+ "f.numberOfIdpPages = :numberOfIdpPages, "
|
||||||
|
+ "f.ocrEndTime = :ocrEndTime, "
|
||||||
|
+ "f.lastUpdated = :lastUpdated "
|
||||||
|
+ "where f.id = :fileId")
|
||||||
void updateOCRStatus(@Param("fileId") String fileId,
|
void updateOCRStatus(@Param("fileId") String fileId,
|
||||||
@Param("numberOfPagesToOCR") int numberOfPagesToOCR,
|
@Param("numberOfPagesToOCR") int numberOfPagesToOCR,
|
||||||
@Param("numberOfOCRedPages") int numberOfOCRedPages,
|
@Param("numberOfOCRedPages") int numberOfOCRedPages,
|
||||||
|
@Param("numberOfIdpPages") int numberOfIdpPages,
|
||||||
@Param("ocrEndTime") OffsetDateTime ocrEndTime,
|
@Param("ocrEndTime") OffsetDateTime ocrEndTime,
|
||||||
@Param("lastUpdated") OffsetDateTime lastUpdated);
|
@Param("lastUpdated") OffsetDateTime lastUpdated);
|
||||||
|
|
||||||
@ -479,6 +483,11 @@ public interface FileRepository extends JpaRepository<FileEntity, String> {
|
|||||||
@Query("SELECT f FROM FileEntity f WHERE f.id in :fileIds AND f.dossierId = :dossierId")
|
@Query("SELECT f FROM FileEntity f WHERE f.id in :fileIds AND f.dossierId = :dossierId")
|
||||||
List<FileEntity> findAllDossierIdAndIds(@Param("dossierId") String dossierId, @Param("fileIds") Set<String> fileIds);
|
List<FileEntity> findAllDossierIdAndIds(@Param("dossierId") String dossierId, @Param("fileIds") Set<String> fileIds);
|
||||||
|
|
||||||
|
|
||||||
|
@Modifying
|
||||||
|
@Query("UPDATE FileEntity f SET f.usedPromptTokens = f.usedPromptTokens + :promptTokens, f.usedCompletionTokens = f.usedCompletionTokens + :completionTokens WHERE f.id = :id")
|
||||||
|
void increaseTokenUsage(@Param("id") String fileId, @Param("promptTokens") int promptTokens, @Param("completionTokens") int completionTokens);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -63,7 +63,7 @@ public class NerMessageReceiver {
|
|||||||
String dossierId = QueueMessageIdentifierService.parseDossierId(message.getIdentifier());
|
String dossierId = QueueMessageIdentifierService.parseDossierId(message.getIdentifier());
|
||||||
String fileId = QueueMessageIdentifierService.parseFileId(message.getIdentifier());
|
String fileId = QueueMessageIdentifierService.parseFileId(message.getIdentifier());
|
||||||
addFileIdToTrace(fileId);
|
addFileIdToTrace(fileId);
|
||||||
|
fileStatusService.increaseTokenUsage(fileId, message.getPromptTokens(), message.getCompletionTokens());
|
||||||
log.info("Received message from {} for dossierId {} and fileId {}", LLM_ENTITY_RESPONSE_LISTENER_ID, dossierId, fileId);
|
log.info("Received message from {} for dossierId {} and fileId {}", LLM_ENTITY_RESPONSE_LISTENER_ID, dossierId, fileId);
|
||||||
fileStatusPersistenceService.setAiCreationVersion(fileId, message.getAiCreationVersion());
|
fileStatusPersistenceService.setAiCreationVersion(fileId, message.getAiCreationVersion());
|
||||||
fileStatusService.setStatusForceAnalyse(dossierId, fileId, false);
|
fileStatusService.setStatusForceAnalyse(dossierId, fileId, false);
|
||||||
|
|||||||
@ -11,14 +11,13 @@ import org.springframework.stereotype.Service;
|
|||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
|
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
|
||||||
import com.iqser.red.service.persistence.management.v1.processor.model.OCRStatusUpdateResponse;
|
|
||||||
import com.iqser.red.service.persistence.management.v1.processor.model.websocket.AnalyseStatus;
|
import com.iqser.red.service.persistence.management.v1.processor.model.websocket.AnalyseStatus;
|
||||||
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusProcessingUpdateService;
|
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusProcessingUpdateService;
|
||||||
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusService;
|
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusService;
|
||||||
|
|
||||||
import com.iqser.red.service.persistence.management.v1.processor.service.websocket.WebsocketService;
|
import com.iqser.red.service.persistence.management.v1.processor.service.websocket.WebsocketService;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
|
||||||
import com.knecon.fforesight.service.ocr.v1.api.model.DocumentRequest;
|
import com.knecon.fforesight.service.ocr.v1.api.model.DocumentRequest;
|
||||||
|
import com.knecon.fforesight.service.ocr.v1.api.model.OCRStatusUpdateResponse;
|
||||||
|
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
import lombok.SneakyThrows;
|
import lombok.SneakyThrows;
|
||||||
@ -43,6 +42,8 @@ public class OCRProcessingMessageReceiver {
|
|||||||
@RabbitListener(id = OCR_STATUS_UPDATE_LISTENER_ID)
|
@RabbitListener(id = OCR_STATUS_UPDATE_LISTENER_ID)
|
||||||
public void handleOCRStatusUpdateMessage(OCRStatusUpdateResponse response) {
|
public void handleOCRStatusUpdateMessage(OCRStatusUpdateResponse response) {
|
||||||
|
|
||||||
|
log.debug("Received message {} in {}", response, MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_EXCHANGE);
|
||||||
|
|
||||||
var fileModel = fileStatusService.getStatus(response.getFileId());
|
var fileModel = fileStatusService.getStatus(response.getFileId());
|
||||||
|
|
||||||
if (response.isOcrStarted()) {
|
if (response.isOcrStarted()) {
|
||||||
@ -57,7 +58,6 @@ public class OCRProcessingMessageReceiver {
|
|||||||
response.getNumberOfOCRedPages());
|
response.getNumberOfOCRedPages());
|
||||||
}
|
}
|
||||||
|
|
||||||
log.debug("Received message {} in {}", response, MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_EXCHANGE);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -95,6 +95,7 @@ public class OCRProcessingMessageReceiver {
|
|||||||
timestamp = timestamp != null ? timestamp : OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS);
|
timestamp = timestamp != null ? timestamp : OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS);
|
||||||
fileStatusProcessingUpdateService.requeueOCROrMarkFailed(ocrRequestMessage.getDossierId(),
|
fileStatusProcessingUpdateService.requeueOCROrMarkFailed(ocrRequestMessage.getDossierId(),
|
||||||
ocrRequestMessage.getFileId(),
|
ocrRequestMessage.getFileId(),
|
||||||
|
ocrRequestMessage.getFeatures(),
|
||||||
new FileErrorInfo(errorMessage, MessagingConfiguration.OCR_DLQ, "ocr-service", timestamp));
|
new FileErrorInfo(errorMessage, MessagingConfiguration.OCR_DLQ, "ocr-service", timestamp));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -259,3 +259,7 @@ databaseChangeLog:
|
|||||||
file: db/changelog/tenant/158-add-app-version-history-table-and-layout-parser-version-field-to-file.yaml
|
file: db/changelog/tenant/158-add-app-version-history-table-and-layout-parser-version-field-to-file.yaml
|
||||||
- include:
|
- include:
|
||||||
file: db/changelog/tenant/159-cleanup-truncated-indices.yaml
|
file: db/changelog/tenant/159-cleanup-truncated-indices.yaml
|
||||||
|
- include:
|
||||||
|
file: db/changelog/tenant/151.0.0-add-usage-fields-to-file-for-idp-and-llm.yaml
|
||||||
|
- include:
|
||||||
|
file: db/changelog/tenant/152.0.0-add-idp-related-fields-to-dossier-template.yaml
|
||||||
|
|||||||
@ -0,0 +1,26 @@
|
|||||||
|
databaseChangeLog:
|
||||||
|
- changeSet:
|
||||||
|
id: add-llm-usage-fields-to-file
|
||||||
|
author: kilian
|
||||||
|
changes:
|
||||||
|
- addColumn:
|
||||||
|
tableName: file
|
||||||
|
columns:
|
||||||
|
- column:
|
||||||
|
name: used_completion_tokens
|
||||||
|
type: int
|
||||||
|
defaultValueNumeric: 0
|
||||||
|
constraints:
|
||||||
|
nullable: false
|
||||||
|
- column:
|
||||||
|
name: used_prompt_tokens
|
||||||
|
type: int
|
||||||
|
defaultValueNumeric: 0
|
||||||
|
constraints:
|
||||||
|
nullable: false
|
||||||
|
- column:
|
||||||
|
name: number_of_idp_pages
|
||||||
|
type: int
|
||||||
|
defaultValueNumeric: 0
|
||||||
|
constraints:
|
||||||
|
nullable: false
|
||||||
@ -0,0 +1,46 @@
|
|||||||
|
databaseChangeLog:
|
||||||
|
- changeSet:
|
||||||
|
id: add-idp-related-fields-to-dossier-template
|
||||||
|
author: kilian
|
||||||
|
changes:
|
||||||
|
- addColumn:
|
||||||
|
tableName: dossier_template
|
||||||
|
columns:
|
||||||
|
- column:
|
||||||
|
name: idp_by_default
|
||||||
|
type: boolean
|
||||||
|
defaultValueBoolean: false
|
||||||
|
remarks: "Indicates if IDP is enabled by default"
|
||||||
|
- column:
|
||||||
|
name: rotation_correction_by_default
|
||||||
|
type: boolean
|
||||||
|
remarks: "Indicates if rotation correction is enabled by default"
|
||||||
|
|
||||||
|
- update:
|
||||||
|
tableName: dossier_template
|
||||||
|
columns:
|
||||||
|
- column:
|
||||||
|
name: rotation_correction_by_default
|
||||||
|
valueBoolean: true
|
||||||
|
where: "layout_parsing_type = 'DOCUMINE_OLD'"
|
||||||
|
|
||||||
|
- update:
|
||||||
|
tableName: dossier_template
|
||||||
|
columns:
|
||||||
|
- column:
|
||||||
|
name: rotation_correction_by_default
|
||||||
|
valueBoolean: false
|
||||||
|
where: "layout_parsing_type != 'DOCUMINE_OLD'"
|
||||||
|
|
||||||
|
- changeSet:
|
||||||
|
id: make-fields-non-nullable
|
||||||
|
author: kilian
|
||||||
|
changes:
|
||||||
|
- addNotNullConstraint:
|
||||||
|
tableName: dossier_template
|
||||||
|
columnName: idp_by_default
|
||||||
|
columnDataType: boolean
|
||||||
|
- addNotNullConstraint:
|
||||||
|
tableName: dossier_template
|
||||||
|
columnName: rotation_correction_by_default
|
||||||
|
columnDataType: boolean
|
||||||
@ -46,22 +46,22 @@ public class ReanalysisTest extends AbstractPersistenceServerServiceTest {
|
|||||||
var loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
|
var loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
|
||||||
assertThat(loadedFile.getProcessingStatus()).isNotEqualTo(ProcessingStatus.PROCESSED);
|
assertThat(loadedFile.getProcessingStatus()).isNotEqualTo(ProcessingStatus.PROCESSED);
|
||||||
|
|
||||||
reanalysisClient.ocrDossier(dossier.getId());
|
reanalysisClient.ocrDossier(dossier.getId(), false);
|
||||||
loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
|
loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
|
||||||
assertThat(loadedFile.getProcessingStatus()).isNotEqualTo(ProcessingStatus.PROCESSED);
|
assertThat(loadedFile.getProcessingStatus()).isNotEqualTo(ProcessingStatus.PROCESSED);
|
||||||
resetProcessingStatus(file);
|
resetProcessingStatus(file);
|
||||||
|
|
||||||
reanalysisClient.ocrDossier(dossier.getId());
|
reanalysisClient.ocrDossier(dossier.getId(), false);
|
||||||
loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
|
loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
|
||||||
assertThat(loadedFile.getProcessingStatus()).isEqualTo(ProcessingStatus.OCR_PROCESSING_QUEUED);
|
assertThat(loadedFile.getProcessingStatus()).isEqualTo(ProcessingStatus.OCR_PROCESSING_QUEUED);
|
||||||
resetProcessingStatus(file);
|
resetProcessingStatus(file);
|
||||||
|
|
||||||
reanalysisClient.ocrFile(dossier.getId(), file.getId(), true);
|
reanalysisClient.ocrFile(dossier.getId(), file.getId(), true, false);
|
||||||
loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
|
loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
|
||||||
assertThat(loadedFile.getProcessingStatus()).isEqualTo(ProcessingStatus.OCR_PROCESSING_QUEUED);
|
assertThat(loadedFile.getProcessingStatus()).isEqualTo(ProcessingStatus.OCR_PROCESSING_QUEUED);
|
||||||
resetProcessingStatus(file);
|
resetProcessingStatus(file);
|
||||||
|
|
||||||
reanalysisClient.ocrFiles(dossier.getId(), Set.of(file.getId()));
|
reanalysisClient.ocrFiles(dossier.getId(), Set.of(file.getId()), false);
|
||||||
loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
|
loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
|
||||||
assertThat(loadedFile.getProcessingStatus()).isEqualTo(ProcessingStatus.OCR_PROCESSING_QUEUED);
|
assertThat(loadedFile.getProcessingStatus()).isEqualTo(ProcessingStatus.OCR_PROCESSING_QUEUED);
|
||||||
resetProcessingStatus(file);
|
resetProcessingStatus(file);
|
||||||
|
|||||||
@ -1 +0,0 @@
|
|||||||
hub.image.name.prefix=docker-dev.knecon.com/tests/
|
|
||||||
@ -68,6 +68,12 @@ public class DossierTemplateModel {
|
|||||||
@Schema(description = "Flag that specifies if OCR is automatically performed on upload for all dossiers of this template")
|
@Schema(description = "Flag that specifies if OCR is automatically performed on upload for all dossiers of this template")
|
||||||
private boolean ocrByDefault;
|
private boolean ocrByDefault;
|
||||||
|
|
||||||
|
@Schema(description = "Flag that specifies if rotation correction is attempted during OCR for all dossiers of this template")
|
||||||
|
private boolean rotationCorrectionByDefault;
|
||||||
|
|
||||||
|
@Schema(description = "Flag that specifies if IDP is automatically performed on upload for all dossiers of this template")
|
||||||
|
private boolean idpByDefault;
|
||||||
|
|
||||||
@Schema(description = "Flag that specifies the watermark removal in documents will be performed before the OCR processing")
|
@Schema(description = "Flag that specifies the watermark removal in documents will be performed before the OCR processing")
|
||||||
private boolean removeWatermark;
|
private boolean removeWatermark;
|
||||||
|
|
||||||
|
|||||||
@ -88,6 +88,12 @@ public class FileStatus {
|
|||||||
private int numberOfPagesToOCR;
|
private int numberOfPagesToOCR;
|
||||||
@Schema(description = "Number of pages already OCRed by us")
|
@Schema(description = "Number of pages already OCRed by us")
|
||||||
private int numberOfOCRedPages;
|
private int numberOfOCRedPages;
|
||||||
|
@Schema(description = "Number of pages already IDPed by us")
|
||||||
|
private int numberOfIdpPages;
|
||||||
|
@Schema(description = "Number of prompt tokens used by this file")
|
||||||
|
private int usedPromptTokens;
|
||||||
|
@Schema(description = "Number of completion tokens used by this file")
|
||||||
|
private int usedCompletionTokens;
|
||||||
@Schema(description = "Shows if this file has been OCRed by us. End time of OCR Process")
|
@Schema(description = "Shows if this file has been OCRed by us. End time of OCR Process")
|
||||||
private OffsetDateTime ocrEndTime;
|
private OffsetDateTime ocrEndTime;
|
||||||
@Schema(description = "Shows if this file has comments on annotations.")
|
@Schema(description = "Shows if this file has comments on annotations.")
|
||||||
|
|||||||
@ -15,7 +15,7 @@ import lombok.NoArgsConstructor;
|
|||||||
@Data
|
@Data
|
||||||
@NoArgsConstructor
|
@NoArgsConstructor
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
public class FileStatusFilter {
|
public class FileStatusFilter implements Predicate<FileModel> {
|
||||||
|
|
||||||
private List<ProcessingStatus> processingStatusList = new ArrayList<>();
|
private List<ProcessingStatus> processingStatusList = new ArrayList<>();
|
||||||
private List<WorkflowStatus> workflowStatusList = new ArrayList<>();
|
private List<WorkflowStatus> workflowStatusList = new ArrayList<>();
|
||||||
@ -32,7 +32,8 @@ public class FileStatusFilter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public Predicate<FileModel> asPredicate() {
|
@Override
|
||||||
|
public boolean test(FileModel fileModel) {
|
||||||
|
|
||||||
if (this.getProcessingStatusList() == null) {
|
if (this.getProcessingStatusList() == null) {
|
||||||
this.setProcessingStatusList(new ArrayList<>());
|
this.setProcessingStatusList(new ArrayList<>());
|
||||||
@ -42,10 +43,12 @@ public class FileStatusFilter {
|
|||||||
this.setWorkflowStatusList(new ArrayList<>());
|
this.setWorkflowStatusList(new ArrayList<>());
|
||||||
}
|
}
|
||||||
|
|
||||||
return fileStatus -> (this.getProcessingStatusList().isEmpty() || this.getProcessingStatusList().contains(fileStatus.getProcessingStatus()))
|
return (this.getProcessingStatusList().isEmpty() || this.getProcessingStatusList().contains(fileModel.getProcessingStatus()))
|
||||||
&& (this.getWorkflowStatusList().isEmpty() || this.getWorkflowStatusList().contains(fileStatus.getWorkflowStatus()))
|
&& (this.getWorkflowStatusList().isEmpty()
|
||||||
&& (this.isIncludeSoftDeletedFiles() || fileStatus.getDeleted() == null)
|
|| this.getWorkflowStatusList()
|
||||||
&& (this.isIncludeHardDeletedFiles() || fileStatus.getHardDeletedTime() == null);
|
.contains(fileModel.getWorkflowStatus()))
|
||||||
|
&& (this.isIncludeSoftDeletedFiles() || fileModel.getDeleted() == null)
|
||||||
|
&& (this.isIncludeHardDeletedFiles() || fileModel.getHardDeletedTime() == null);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -4,16 +4,32 @@ import java.util.Optional;
|
|||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import io.swagger.v3.oas.annotations.media.Schema;
|
import io.swagger.v3.oas.annotations.media.Schema;
|
||||||
|
import lombok.AccessLevel;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.experimental.FieldDefaults;
|
||||||
|
|
||||||
public record ReanalysisSettings(
|
@Builder
|
||||||
@Schema(description = "Provide a list of dossierIds to filter for. If the list is empty, every dossier is selected for reanalysis.", defaultValue = "[]") Set<String> dossierIds,
|
@RequiredArgsConstructor
|
||||||
@Schema(description = "Provide a list of fileIds to filter for. If the list is empty, every file is selected for reanalysis.", defaultValue = "[]") Set<String> fileIds,
|
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||||
@Schema(description = "If set to true, layout parsing and named entity recognition will be repeated.", defaultValue = "false") boolean repeatStructureAnalysis,
|
public final class ReanalysisSettings {
|
||||||
@Schema(description = "If set to true, ocr will be repeated and therefore also layout parsing and named entity recognition.", defaultValue = "false") boolean runOcr,
|
|
||||||
@Schema(description = "Use this to create a filter for files to reanalyse. Matches anything if set to null.", defaultValue = "{}") FileStatusFilter fileStatusFilter
|
|
||||||
) {
|
|
||||||
|
|
||||||
public FileStatusFilter fileStatusFilter() {
|
@Getter
|
||||||
|
@Schema(description = "Provide a list of dossierIds to filter for. If the list is empty, every dossier is selected for reanalysis.", defaultValue = "[]")
|
||||||
|
Set<String> dossierIds;
|
||||||
|
@Getter
|
||||||
|
@Schema(description = "Provide a list of fileIds to filter for. If the list is empty, every file is selected for reanalysis.", defaultValue = "[]")
|
||||||
|
Set<String> fileIds;
|
||||||
|
@Getter
|
||||||
|
@Schema(description = "If set to true, layout parsing and named entity recognition will be repeated.", defaultValue = "false")
|
||||||
|
boolean repeatStructureAnalysis;
|
||||||
|
|
||||||
|
@Schema(description = "Use this to create a filter for files to reanalyse. Matches anything if set to null.", defaultValue = "null")
|
||||||
|
FileStatusFilter fileStatusFilter;
|
||||||
|
|
||||||
|
|
||||||
|
public FileStatusFilter getFileStatusFilter() {
|
||||||
|
|
||||||
return Optional.ofNullable(fileStatusFilter)
|
return Optional.ofNullable(fileStatusFilter)
|
||||||
.orElse(new FileStatusFilter());
|
.orElse(new FileStatusFilter());
|
||||||
|
|||||||
@ -36,6 +36,8 @@ public class DossierTemplate {
|
|||||||
private boolean keepOverlappingObjects;
|
private boolean keepOverlappingObjects;
|
||||||
private boolean applyDictionaryUpdatesToAllDossiersByDefault;
|
private boolean applyDictionaryUpdatesToAllDossiersByDefault;
|
||||||
private boolean ocrByDefault;
|
private boolean ocrByDefault;
|
||||||
|
private boolean rotationCorrectionByDefault;
|
||||||
|
private boolean idpByDefault;
|
||||||
private boolean removeWatermark;
|
private boolean removeWatermark;
|
||||||
private LayoutParsingType layoutParsingType;
|
private LayoutParsingType layoutParsingType;
|
||||||
|
|
||||||
|
|||||||
@ -8,6 +8,7 @@ import java.util.Set;
|
|||||||
|
|
||||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
|
|
||||||
|
import io.swagger.v3.oas.annotations.media.Schema;
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import lombok.Builder;
|
import lombok.Builder;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
@ -60,6 +61,9 @@ public class FileModel {
|
|||||||
private OffsetDateTime ocrStartTime;
|
private OffsetDateTime ocrStartTime;
|
||||||
private Integer numberOfPagesToOCR;
|
private Integer numberOfPagesToOCR;
|
||||||
private Integer numberOfOCRedPages;
|
private Integer numberOfOCRedPages;
|
||||||
|
private Integer numberOfIdpPages;
|
||||||
|
private int usedPromptTokens;
|
||||||
|
private int usedCompletionTokens;
|
||||||
private OffsetDateTime ocrEndTime;
|
private OffsetDateTime ocrEndTime;
|
||||||
private boolean hasAnnotationComments;
|
private boolean hasAnnotationComments;
|
||||||
private boolean excluded;
|
private boolean excluded;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user