RED-8670: add idp fields and llm tracking
This commit is contained in:
parent
6d0354946a
commit
d5fc737350
@ -36,6 +36,7 @@ import lombok.RequiredArgsConstructor;
|
||||
public class ReanalysisController implements ReanalysisResource {
|
||||
|
||||
private static final String DOSSIER_ID = "dossierId";
|
||||
|
||||
private final ReanalysisService reanalysisService;
|
||||
private final FileStatusManagementService fileStatusManagementService;
|
||||
private final AuditPersistenceService auditPersistenceService;
|
||||
@ -98,11 +99,11 @@ public class ReanalysisController implements ReanalysisResource {
|
||||
|
||||
@Override
|
||||
@PreAuthorize("hasAuthority('" + REANALYZE_DOSSIER + "')")
|
||||
public void ocrDossier(@PathVariable(DOSSIER_ID) String dossierId) {
|
||||
public void ocrDossier(@PathVariable(DOSSIER_ID) String dossierId, @RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp) {
|
||||
|
||||
accessControlService.checkDossierExistenceAndAccessPermissionsToDossier(dossierId);
|
||||
|
||||
reanalysisService.ocrDossier(dossierId);
|
||||
reanalysisService.ocrDossier(dossierId, idp);
|
||||
|
||||
auditPersistenceService.audit(AuditRequest.builder()
|
||||
.userId(KeycloakSecurity.getUserId())
|
||||
@ -118,11 +119,12 @@ public class ReanalysisController implements ReanalysisResource {
|
||||
@PreAuthorize("hasAuthority('" + REANALYZE_FILE + "')")
|
||||
public void ocrFile(@PathVariable(DOSSIER_ID) String dossierId,
|
||||
@PathVariable(FILE_ID) String fileId,
|
||||
@RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force) {
|
||||
@RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force,
|
||||
@RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp) {
|
||||
|
||||
accessControlService.checkDossierExistenceAndAccessPermissionsToDossier(dossierId);
|
||||
validateOCR(dossierId, fileId);
|
||||
reanalysisService.ocrFile(dossierId, fileId, force);
|
||||
reanalysisService.ocrFile(dossierId, fileId, force, idp);
|
||||
auditPersistenceService.audit(AuditRequest.builder()
|
||||
.userId(KeycloakSecurity.getUserId())
|
||||
.objectId(dossierId)
|
||||
@ -136,11 +138,13 @@ public class ReanalysisController implements ReanalysisResource {
|
||||
|
||||
@Override
|
||||
@PreAuthorize("hasAuthority('" + REANALYZE_FILE + "')")
|
||||
public void ocrFiles(@PathVariable(DOSSIER_ID) String dossierId, @RequestBody Set<String> fileIds) {
|
||||
public void ocrFiles(@PathVariable(DOSSIER_ID) String dossierId,
|
||||
@RequestBody Set<String> fileIds,
|
||||
@RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp) {
|
||||
|
||||
accessControlService.checkDossierExistenceAndAccessPermissionsToDossier(dossierId);
|
||||
fileIds.forEach(fileId -> validateOCR(dossierId, fileId));
|
||||
reanalysisService.ocrFiles(dossierId, fileIds);
|
||||
reanalysisService.ocrFiles(dossierId, fileIds, idp);
|
||||
auditPersistenceService.audit(AuditRequest.builder()
|
||||
.userId(KeycloakSecurity.getUserId())
|
||||
.objectId(dossierId)
|
||||
|
||||
@ -38,6 +38,7 @@ public interface ReanalysisResource {
|
||||
|
||||
String EXCLUDED_STATUS_PARAM = "excluded";
|
||||
String FORCE_PARAM = "force";
|
||||
String IDP_PARAM = "idp";
|
||||
|
||||
|
||||
@PostMapping(value = REANALYSIS_REST_PATH + DOSSIER_ID_PATH_VARIABLE)
|
||||
@ -65,7 +66,7 @@ public interface ReanalysisResource {
|
||||
@Operation(summary = "Ocr and reanalyze a dossier", description = "None")
|
||||
@PostMapping(value = OCR_REANALYSIS_REST_PATH + DOSSIER_ID_PATH_VARIABLE)
|
||||
@ApiResponses(value = {@ApiResponse(responseCode = "204", description = "OK"), @ApiResponse(responseCode = "404", description = "Not found"), @ApiResponse(responseCode = "403", description = "Forbidden")})
|
||||
void ocrDossier(@PathVariable(DOSSIER_ID) String dossierId);
|
||||
void ocrDossier(@PathVariable(DOSSIER_ID) String dossierId, @RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp);
|
||||
|
||||
|
||||
@Operation(summary = "Ocr and reanalyze a file", description = "None")
|
||||
@ -73,13 +74,16 @@ public interface ReanalysisResource {
|
||||
@ApiResponses(value = {@ApiResponse(responseCode = "204", description = "OK"), @ApiResponse(responseCode = "409", description = "Conflict"), @ApiResponse(responseCode = "404", description = "Not found"), @ApiResponse(responseCode = "403", description = "Forbidden"), @ApiResponse(responseCode = "400", description = "Cannot OCR approved file")})
|
||||
void ocrFile(@PathVariable(DOSSIER_ID) String dossierId,
|
||||
@PathVariable(FILE_ID) String fileId,
|
||||
@RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force);
|
||||
@RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force,
|
||||
@RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp);
|
||||
|
||||
|
||||
@Operation(summary = "Ocr and reanalyze multiple files for a dossier", description = "None")
|
||||
@PostMapping(value = OCR_REANALYSIS_REST_PATH + DOSSIER_ID_PATH_VARIABLE + BULK_REST_PATH)
|
||||
@ApiResponses(value = {@ApiResponse(responseCode = "204", description = "OK"), @ApiResponse(responseCode = "404", description = "Not found"), @ApiResponse(responseCode = "403", description = "Forbidden")})
|
||||
void ocrFiles(@PathVariable(DOSSIER_ID) String dossierId, @RequestBody Set<String> fileIds);
|
||||
void ocrFiles(@PathVariable(DOSSIER_ID) String dossierId,
|
||||
@RequestBody Set<String> fileIds,
|
||||
@RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp);
|
||||
|
||||
|
||||
@Operation(summary = "Exclude or re-include a file to the automatic analysis", description = "None")
|
||||
|
||||
@ -31,13 +31,17 @@ dependencies {
|
||||
exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1")
|
||||
exclude(group = "com.iqser.red.service", module = "persistence-service-shared-api-v1")
|
||||
}
|
||||
api("com.knecon.fforesight:layoutparser-service-internal-api:0.181.0") {
|
||||
exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1")
|
||||
exclude(group = "com.iqser.red.service", module = "persistence-service-shared-api-v1")
|
||||
}
|
||||
api("com.iqser.red.service:search-service-api-v1:${rootProject.extra.get("searchServiceVersion")}") {
|
||||
exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1")
|
||||
exclude(group = "com.iqser.red.service", module = "persistence-service-shared-api-v1")
|
||||
}
|
||||
api("com.knecon.fforesight:azure-ocr-service-api:0.13.0")
|
||||
implementation("com.knecon.fforesight:llm-service-api:1.20.0-RED10072.2")
|
||||
api("com.knecon.fforesight:jobs-commons:0.13.0")
|
||||
api("com.knecon.fforesight:azure-ocr-service-api:0.24.0")
|
||||
implementation("com.knecon.fforesight:llm-service-api:1.17.0")
|
||||
api("com.knecon.fforesight:jobs-commons:0.10.0")
|
||||
api("com.iqser.red.commons:storage-commons:2.50.0")
|
||||
api("com.knecon.fforesight:tenant-commons:0.31.0-RED10196.0") {
|
||||
exclude(group = "com.iqser.red.commons", module = "storage-commons")
|
||||
@ -71,6 +75,7 @@ dependencies {
|
||||
api("commons-validator:commons-validator:1.7")
|
||||
api("com.opencsv:opencsv:5.9")
|
||||
|
||||
implementation("com.google.protobuf:protobuf-java:4.27.1")
|
||||
implementation("org.mapstruct:mapstruct:1.6.2")
|
||||
annotationProcessor("org.mapstruct:mapstruct-processor:1.6.2")
|
||||
|
||||
|
||||
@ -83,6 +83,12 @@ public class DossierTemplateEntity {
|
||||
@Column(name = "ocr_by_default")
|
||||
private boolean ocrByDefault;
|
||||
|
||||
@Column(name = "rotation_correction_by_default")
|
||||
private boolean rotationCorrectionByDefault;
|
||||
|
||||
@Column(name = "idp_by_default")
|
||||
private boolean idpByDefault;
|
||||
|
||||
@Column(name = "remove_watermark")
|
||||
private boolean removeWatermark;
|
||||
|
||||
|
||||
@ -152,9 +152,16 @@ public class FileEntity {
|
||||
@Column(name = "number_of_ocred_pages")
|
||||
private Integer numberOfOCRedPages;
|
||||
|
||||
@Column(name = "number_of_idp_pages")
|
||||
private Integer numberOfIdpPages;
|
||||
|
||||
@Column(name = "ocr_end_time")
|
||||
private OffsetDateTime ocrEndTime;
|
||||
|
||||
private Integer usedPromptTokens;
|
||||
|
||||
private Integer usedCompletionTokens;
|
||||
|
||||
@Column
|
||||
private boolean hasAnnotationComments;
|
||||
|
||||
|
||||
@ -1,20 +0,0 @@
|
||||
package com.iqser.red.service.persistence.management.v1.processor.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@Builder
|
||||
public class OCRStatusUpdateResponse {
|
||||
|
||||
private String fileId;
|
||||
private int numberOfPagesToOCR;
|
||||
private int numberOfOCRedPages;
|
||||
private boolean ocrFinished;
|
||||
private boolean ocrStarted;
|
||||
|
||||
}
|
||||
@ -1,5 +1,7 @@
|
||||
package com.iqser.red.service.persistence.management.v1.processor.service;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import org.springframework.retry.support.RetryTemplate;
|
||||
@ -15,6 +17,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.UntouchedDo
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.ProcessingStatus;
|
||||
import com.iqser.red.service.search.v1.model.IndexMessageType;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
|
||||
|
||||
import jakarta.transaction.Transactional;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
@ -114,7 +117,7 @@ public class FileStatusProcessingUpdateService {
|
||||
}
|
||||
|
||||
|
||||
public void requeueOCROrMarkFailed(String dossierId, String fileId, FileErrorInfo fileErrorInfo) {
|
||||
public void requeueOCROrMarkFailed(String dossierId, String fileId, Set<AzureOcrFeature> features, FileErrorInfo fileErrorInfo) {
|
||||
|
||||
var fileEntity = fileStatusPersistenceService.getStatus(fileId);
|
||||
if (fileEntity.getProcessingErrorCounter() > settings.getMaxErrorRetries()) {
|
||||
@ -122,7 +125,7 @@ public class FileStatusProcessingUpdateService {
|
||||
} else {
|
||||
fileStatusService.setStatusOcrProcessing(fileId,
|
||||
fileEntity.getProcessingStatus().equals(ProcessingStatus.OCR_PROCESSING) ? fileEntity.getProcessingErrorCounter() + 1 : 0);
|
||||
fileStatusService.addToOcrQueue(dossierId, fileId, 2);
|
||||
fileStatusService.addToOcrQueue(dossierId, fileId, 2, features);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -12,7 +12,6 @@ import java.util.function.BiFunction;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import org.springframework.amqp.rabbit.core.RabbitTemplate;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
@ -21,6 +20,7 @@ import com.iqser.red.service.pdftron.redaction.v1.api.model.ProcessUntouchedDocu
|
||||
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.entity.configuration.TypeEntity;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.ComponentDefinitionEntity;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.DossierTemplateEntity;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.FileAttributeEntity;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.FileEntity;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.entity.projection.DossierStatsFileProjection;
|
||||
@ -31,7 +31,6 @@ import com.iqser.red.service.persistence.management.v1.processor.model.AnalysisT
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.CvAnalysisServiceRequest;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.FileIdentifier;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.ManualChangesQueryOptions;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.OCRStatusUpdateResponse;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.VisualLayoutParsingServiceRequest;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.image.ImageServiceRequest;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.websocket.AnalyseStatus;
|
||||
@ -80,6 +79,7 @@ import com.knecon.fforesight.llm.service.LlmNerMessage;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingQueueNames;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.DocumentRequest;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.OCRStatusUpdateResponse;
|
||||
import com.knecon.fforesight.tenantcommons.TenantContext;
|
||||
|
||||
import jakarta.transaction.Transactional;
|
||||
@ -569,6 +569,12 @@ public class FileStatusService {
|
||||
|
||||
public void setStatusOcrQueued(String dossierId, String fileId) {
|
||||
|
||||
setStatusOcrQueued(dossierId, fileId, false);
|
||||
}
|
||||
|
||||
|
||||
public void setStatusOcrQueued(String dossierId, String fileId, boolean idp) {
|
||||
|
||||
FileEntity fileStatus = fileStatusPersistenceService.getStatus(fileId);
|
||||
|
||||
if (fileStatus.isExcluded()) {
|
||||
@ -579,7 +585,7 @@ public class FileStatusService {
|
||||
updateOCRStartTime(fileId);
|
||||
fileStatusPersistenceService.updateProcessingStatus(fileId, ProcessingStatus.OCR_PROCESSING_QUEUED);
|
||||
websocketService.sendAnalysisEvent(dossierId, fileId, AnalyseStatus.OCR_PROCESSING, fileStatus.getNumberOfAnalyses() + 1);
|
||||
addToOcrQueue(dossierId, fileId, 2);
|
||||
addToOcrQueue(dossierId, fileId, 2, idp);
|
||||
}
|
||||
|
||||
|
||||
@ -760,18 +766,29 @@ public class FileStatusService {
|
||||
}
|
||||
|
||||
|
||||
public void addToOcrQueue(String dossierId, String fileId, int priority) {
|
||||
public void addToOcrQueue(String dossierId, String fileId, int priority, boolean useIdp) {
|
||||
|
||||
var removeWatermark = dossierTemplatePersistenceService.getDossierTemplate(dossierPersistenceService.getDossierTemplateId(dossierId)).isRemoveWatermark();
|
||||
DossierTemplateEntity dt = dossierTemplatePersistenceService.getDossierTemplate(dossierPersistenceService.getDossierTemplateId(dossierId));
|
||||
Set<AzureOcrFeature> features = new HashSet<>();
|
||||
if (removeWatermark) {
|
||||
if (dt.isRemoveWatermark()) {
|
||||
features.add(AzureOcrFeature.REMOVE_WATERMARKS);
|
||||
}
|
||||
if (currentApplicationTypeProvider.isDocuMine()) {
|
||||
if (useIdp || dt.isIdpByDefault()) {
|
||||
features.add(AzureOcrFeature.IDP);
|
||||
}
|
||||
if (dt.isRotationCorrectionByDefault()) {
|
||||
features.add(AzureOcrFeature.ROTATION_CORRECTION);
|
||||
}
|
||||
if (currentApplicationTypeProvider.isDocuMine()) {
|
||||
features.add(AzureOcrFeature.FONT_STYLE_DETECTION);
|
||||
}
|
||||
|
||||
addToOcrQueue(dossierId, fileId, priority, features);
|
||||
}
|
||||
|
||||
|
||||
public void addToOcrQueue(String dossierId, String fileId, int priority, Set<AzureOcrFeature> features) {
|
||||
|
||||
var request = DocumentRequest.builder()
|
||||
.dossierId(dossierId)
|
||||
.fileId(fileId)
|
||||
@ -1175,4 +1192,10 @@ public class FileStatusService {
|
||||
return reanalysisRequiredStatusService.enhanceFileStatusWithAnalysisRequirements(fileModels);
|
||||
}
|
||||
|
||||
|
||||
public void increaseTokenUsage(String fileId, int promptTokens, int completionTokens) {
|
||||
|
||||
fileStatusPersistenceService.increaseTokenUsage(fileId, promptTokens, completionTokens);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -171,18 +171,18 @@ public class ReanalysisService {
|
||||
}
|
||||
|
||||
|
||||
public void ocrDossier(String dossierId) {
|
||||
public void ocrDossier(String dossierId, boolean idp) {
|
||||
|
||||
var relevantFiles = getAllFilesForDossier(dossierId, validFilesFilter);
|
||||
|
||||
relevantFiles.stream()
|
||||
.filter(fileStatus -> fileStatus.getOcrStartTime() == null)
|
||||
.filter(fileStatus -> fileStatus.getProcessingStatus().equals(ProcessingStatus.PROCESSED))
|
||||
.forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId()));
|
||||
.forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId(), idp));
|
||||
}
|
||||
|
||||
|
||||
public void ocrFile(String dossierId, String fileId, boolean force) {
|
||||
public void ocrFile(String dossierId, String fileId, boolean force, boolean idp) {
|
||||
|
||||
dossierPersistenceService.getAndValidateDossier(dossierId);
|
||||
FileModel dossierFile = fileStatusService.getStatus(fileId);
|
||||
@ -202,30 +202,31 @@ public class ReanalysisService {
|
||||
}
|
||||
|
||||
if (force) {
|
||||
fileStatusService.setStatusOcrQueued(dossierId, fileId);
|
||||
fileStatusService.setStatusOcrQueued(dossierId, fileId, idp);
|
||||
} else {
|
||||
if (dossierFile.getOcrStartTime() != null) {
|
||||
throw new ConflictException("File already has been OCR processed");
|
||||
}
|
||||
|
||||
ocrFiles(dossierId, Sets.newHashSet(fileId));
|
||||
ocrFiles(dossierId, Sets.newHashSet(fileId), idp);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void ocrFiles(String dossierId, Set<String> fileIds) {
|
||||
public void ocrFiles(String dossierId, Set<String> fileIds, boolean idp) {
|
||||
|
||||
var relevantFiles = getRelevantFiles(dossierId, fileIds);
|
||||
|
||||
if (relevantFiles.stream()
|
||||
.anyMatch(fileStatus -> !fileStatus.getProcessingStatus().equals(ProcessingStatus.PROCESSED) && !fileStatus.getProcessingStatus()
|
||||
.equals(ProcessingStatus.OCR_PROCESSING_QUEUED) && !fileStatus.getProcessingStatus().equals(ProcessingStatus.OCR_PROCESSING))) {
|
||||
.anyMatch(fileStatus -> !fileStatus.getProcessingStatus().equals(ProcessingStatus.PROCESSED) //
|
||||
&& !fileStatus.getProcessingStatus().equals(ProcessingStatus.OCR_PROCESSING_QUEUED) //
|
||||
&& !fileStatus.getProcessingStatus().equals(ProcessingStatus.OCR_PROCESSING))) {
|
||||
throw new ConflictException("File is not processed");
|
||||
}
|
||||
|
||||
relevantFiles.stream()
|
||||
.filter(fileStatus -> fileStatus.getOcrStartTime() == null)
|
||||
.forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId()));
|
||||
.forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId(), idp));
|
||||
}
|
||||
|
||||
|
||||
@ -275,12 +276,12 @@ public class ReanalysisService {
|
||||
|
||||
public List<FileModel> reanalyzeTemplate(String dossierTemplateId, ReanalysisSettings reanalysisSettings) {
|
||||
|
||||
requestValidator.validateRequestOrThrow404(dossierTemplateId, reanalysisSettings.dossierIds(), reanalysisSettings.fileIds());
|
||||
requestValidator.validateRequestOrThrow404(dossierTemplateId, reanalysisSettings.getDossierIds(), reanalysisSettings.getFileIds());
|
||||
|
||||
var files = fileStatusService.getDossierTemplateStatus(dossierTemplateId)
|
||||
.stream()
|
||||
.filter(file -> isInList(file, reanalysisSettings))
|
||||
.filter(reanalysisSettings.fileStatusFilter().asPredicate())
|
||||
.filter(reanalysisSettings.getFileStatusFilter())
|
||||
.peek(file -> log.info("Reanalyzing file {}", file.getId()))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
@ -289,7 +290,7 @@ public class ReanalysisService {
|
||||
files.forEach(file -> fileStatusService.setStatusFullReprocess(file.getDossierId(),
|
||||
file.getId(),
|
||||
false,
|
||||
reanalysisSettings.repeatStructureAnalysis(),
|
||||
reanalysisSettings.isRepeatStructureAnalysis(),
|
||||
reanalysisSettings.runOcr()));
|
||||
|
||||
return rejectedFiles;
|
||||
@ -314,8 +315,8 @@ public class ReanalysisService {
|
||||
|
||||
private boolean isInList(FileModel file, ReanalysisSettings reAnalysisSettings) {
|
||||
|
||||
return (reAnalysisSettings.fileIds().isEmpty() || reAnalysisSettings.fileIds().contains(file.getId())) //
|
||||
&& (reAnalysisSettings.dossierIds().isEmpty() || reAnalysisSettings.dossierIds().contains(file.getDossierId()));
|
||||
return (reAnalysisSettings.getFileIds().isEmpty() || reAnalysisSettings.getFileIds().contains(file.getId())) //
|
||||
&& (reAnalysisSettings.getDossierIds().isEmpty() || reAnalysisSettings.getDossierIds().contains(file.getDossierId()));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -10,6 +10,8 @@ import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.iqser.red.service.persistence.management.v1.processor.entity.projection.DossierStatsFileProjection;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
@ -21,7 +23,6 @@ import com.iqser.red.service.persistence.management.v1.processor.entity.projecti
|
||||
import com.iqser.red.service.persistence.management.v1.processor.exception.BadRequestException;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.exception.NotFoundException;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.FileIdentifier;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.OCRStatusUpdateResponse;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.websocket.AnalyseStatus;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.repository.FileAttributesRepository;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.repository.FileRepository;
|
||||
@ -31,6 +32,8 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.component.C
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.ProcessingStatus;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.WorkflowStatus;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.OCRStatusUpdateResponse;
|
||||
|
||||
import jakarta.persistence.EntityManager;
|
||||
import jakarta.transaction.Transactional;
|
||||
@ -680,6 +683,7 @@ public class FileStatusPersistenceService {
|
||||
fileRepository.updateOCRStatus(response.getFileId(),
|
||||
response.getNumberOfPagesToOCR(),
|
||||
response.getNumberOfOCRedPages(),
|
||||
response.getFeatures().contains(AzureOcrFeature.IDP) ? response.getNumberOfOCRedPages() : 0,
|
||||
response.isOcrFinished() ? OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS) : null,
|
||||
OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS));
|
||||
}
|
||||
@ -765,4 +769,11 @@ public class FileStatusPersistenceService {
|
||||
fileRepository.updateLastDownloadForFile(fileId, null);
|
||||
}
|
||||
|
||||
|
||||
@Transactional
|
||||
public void increaseTokenUsage(String fileId, int promptTokens, int completionTokens) {
|
||||
|
||||
fileRepository.increaseTokenUsage(fileId, promptTokens, completionTokens);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -389,11 +389,15 @@ public interface FileRepository extends JpaRepository<FileEntity, String> {
|
||||
@Transactional
|
||||
@Modifying(clearAutomatically = true)
|
||||
@Query(value = "update FileEntity f set f.numberOfOCRedPages = :numberOfOCRedPages, "
|
||||
+ "f.numberOfPagesToOCR = :numberOfPagesToOCR, f.ocrEndTime = :ocrEndTime, "
|
||||
+ "f.lastUpdated = :lastUpdated where f.id = :fileId")
|
||||
+ "f.numberOfPagesToOCR = :numberOfPagesToOCR, "
|
||||
+ "f.numberOfIdpPages = :numberOfIdpPages, "
|
||||
+ "f.ocrEndTime = :ocrEndTime, "
|
||||
+ "f.lastUpdated = :lastUpdated "
|
||||
+ "where f.id = :fileId")
|
||||
void updateOCRStatus(@Param("fileId") String fileId,
|
||||
@Param("numberOfPagesToOCR") int numberOfPagesToOCR,
|
||||
@Param("numberOfOCRedPages") int numberOfOCRedPages,
|
||||
@Param("numberOfIdpPages") int numberOfIdpPages,
|
||||
@Param("ocrEndTime") OffsetDateTime ocrEndTime,
|
||||
@Param("lastUpdated") OffsetDateTime lastUpdated);
|
||||
|
||||
@ -479,6 +483,11 @@ public interface FileRepository extends JpaRepository<FileEntity, String> {
|
||||
@Query("SELECT f FROM FileEntity f WHERE f.id in :fileIds AND f.dossierId = :dossierId")
|
||||
List<FileEntity> findAllDossierIdAndIds(@Param("dossierId") String dossierId, @Param("fileIds") Set<String> fileIds);
|
||||
|
||||
|
||||
@Modifying
|
||||
@Query("UPDATE FileEntity f SET f.usedPromptTokens = f.usedPromptTokens + :promptTokens, f.usedCompletionTokens = f.usedCompletionTokens + :completionTokens WHERE f.id = :id")
|
||||
void increaseTokenUsage(@Param("id") String fileId, @Param("promptTokens") int promptTokens, @Param("completionTokens") int completionTokens);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -63,7 +63,7 @@ public class NerMessageReceiver {
|
||||
String dossierId = QueueMessageIdentifierService.parseDossierId(message.getIdentifier());
|
||||
String fileId = QueueMessageIdentifierService.parseFileId(message.getIdentifier());
|
||||
addFileIdToTrace(fileId);
|
||||
|
||||
fileStatusService.increaseTokenUsage(fileId, message.getPromptTokens(), message.getCompletionTokens());
|
||||
log.info("Received message from {} for dossierId {} and fileId {}", LLM_ENTITY_RESPONSE_LISTENER_ID, dossierId, fileId);
|
||||
fileStatusPersistenceService.setAiCreationVersion(fileId, message.getAiCreationVersion());
|
||||
fileStatusService.setStatusForceAnalyse(dossierId, fileId, false);
|
||||
|
||||
@ -11,14 +11,13 @@ import org.springframework.stereotype.Service;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.OCRStatusUpdateResponse;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.model.websocket.AnalyseStatus;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusProcessingUpdateService;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusService;
|
||||
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.websocket.WebsocketService;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.DocumentRequest;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.OCRStatusUpdateResponse;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
@ -43,6 +42,8 @@ public class OCRProcessingMessageReceiver {
|
||||
@RabbitListener(id = OCR_STATUS_UPDATE_LISTENER_ID)
|
||||
public void handleOCRStatusUpdateMessage(OCRStatusUpdateResponse response) {
|
||||
|
||||
log.debug("Received message {} in {}", response, MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_EXCHANGE);
|
||||
|
||||
var fileModel = fileStatusService.getStatus(response.getFileId());
|
||||
|
||||
if (response.isOcrStarted()) {
|
||||
@ -57,7 +58,6 @@ public class OCRProcessingMessageReceiver {
|
||||
response.getNumberOfOCRedPages());
|
||||
}
|
||||
|
||||
log.debug("Received message {} in {}", response, MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_EXCHANGE);
|
||||
}
|
||||
|
||||
|
||||
@ -95,6 +95,7 @@ public class OCRProcessingMessageReceiver {
|
||||
timestamp = timestamp != null ? timestamp : OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS);
|
||||
fileStatusProcessingUpdateService.requeueOCROrMarkFailed(ocrRequestMessage.getDossierId(),
|
||||
ocrRequestMessage.getFileId(),
|
||||
ocrRequestMessage.getFeatures(),
|
||||
new FileErrorInfo(errorMessage, MessagingConfiguration.OCR_DLQ, "ocr-service", timestamp));
|
||||
}
|
||||
|
||||
|
||||
@ -257,3 +257,7 @@ databaseChangeLog:
|
||||
file: db/changelog/tenant/157-add-included-to-csv-export-field.yaml
|
||||
- include:
|
||||
file: db/changelog/tenant/158-add-app-version-history-table-and-layout-parser-version-field-to-file.yaml
|
||||
- include:
|
||||
file: db/changelog/tenant/151.0.0-add-usage-fields-to-file-for-idp-and-llm.yaml
|
||||
- include:
|
||||
file: db/changelog/tenant/152.0.0-add-idp-related-fields-to-dossier-template.yaml
|
||||
|
||||
@ -0,0 +1,26 @@
|
||||
databaseChangeLog:
|
||||
- changeSet:
|
||||
id: add-llm-usage-fields-to-file
|
||||
author: kilian
|
||||
changes:
|
||||
- addColumn:
|
||||
tableName: file
|
||||
columns:
|
||||
- column:
|
||||
name: used_completion_tokens
|
||||
type: int
|
||||
defaultValueNumeric: 0
|
||||
constraints:
|
||||
nullable: false
|
||||
- column:
|
||||
name: used_prompt_tokens
|
||||
type: int
|
||||
defaultValueNumeric: 0
|
||||
constraints:
|
||||
nullable: false
|
||||
- column:
|
||||
name: number_of_idp_pages
|
||||
type: int
|
||||
defaultValueNumeric: 0
|
||||
constraints:
|
||||
nullable: false
|
||||
@ -0,0 +1,46 @@
|
||||
databaseChangeLog:
|
||||
- changeSet:
|
||||
id: add-idp-related-fields-to-dossier-template
|
||||
author: kilian
|
||||
changes:
|
||||
- addColumn:
|
||||
tableName: dossier_template
|
||||
columns:
|
||||
- column:
|
||||
name: idp_by_default
|
||||
type: boolean
|
||||
defaultValueBoolean: false
|
||||
remarks: "Indicates if IDP is enabled by default"
|
||||
- column:
|
||||
name: rotation_correction_by_default
|
||||
type: boolean
|
||||
remarks: "Indicates if rotation correction is enabled by default"
|
||||
|
||||
- update:
|
||||
tableName: dossier_template
|
||||
columns:
|
||||
- column:
|
||||
name: rotation_correction_by_default
|
||||
valueBoolean: true
|
||||
where: "layout_parsing_type = 'DOCUMINE_OLD'"
|
||||
|
||||
- update:
|
||||
tableName: dossier_template
|
||||
columns:
|
||||
- column:
|
||||
name: rotation_correction_by_default
|
||||
valueBoolean: false
|
||||
where: "layout_parsing_type != 'DOCUMINE_OLD'"
|
||||
|
||||
- changeSet:
|
||||
id: make-fields-non-nullable
|
||||
author: kilian
|
||||
changes:
|
||||
- addNotNullConstraint:
|
||||
tableName: dossier_template
|
||||
columnName: idp_by_default
|
||||
columnDataType: boolean
|
||||
- addNotNullConstraint:
|
||||
tableName: dossier_template
|
||||
columnName: rotation_correction_by_default
|
||||
columnDataType: boolean
|
||||
@ -46,22 +46,22 @@ public class ReanalysisTest extends AbstractPersistenceServerServiceTest {
|
||||
var loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
|
||||
assertThat(loadedFile.getProcessingStatus()).isNotEqualTo(ProcessingStatus.PROCESSED);
|
||||
|
||||
reanalysisClient.ocrDossier(dossier.getId());
|
||||
reanalysisClient.ocrDossier(dossier.getId(), false);
|
||||
loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
|
||||
assertThat(loadedFile.getProcessingStatus()).isNotEqualTo(ProcessingStatus.PROCESSED);
|
||||
resetProcessingStatus(file);
|
||||
|
||||
reanalysisClient.ocrDossier(dossier.getId());
|
||||
reanalysisClient.ocrDossier(dossier.getId(), false);
|
||||
loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
|
||||
assertThat(loadedFile.getProcessingStatus()).isEqualTo(ProcessingStatus.OCR_PROCESSING_QUEUED);
|
||||
resetProcessingStatus(file);
|
||||
|
||||
reanalysisClient.ocrFile(dossier.getId(), file.getId(), true);
|
||||
reanalysisClient.ocrFile(dossier.getId(), file.getId(), true, false);
|
||||
loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
|
||||
assertThat(loadedFile.getProcessingStatus()).isEqualTo(ProcessingStatus.OCR_PROCESSING_QUEUED);
|
||||
resetProcessingStatus(file);
|
||||
|
||||
reanalysisClient.ocrFiles(dossier.getId(), Set.of(file.getId()));
|
||||
reanalysisClient.ocrFiles(dossier.getId(), Set.of(file.getId()), false);
|
||||
loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
|
||||
assertThat(loadedFile.getProcessingStatus()).isEqualTo(ProcessingStatus.OCR_PROCESSING_QUEUED);
|
||||
resetProcessingStatus(file);
|
||||
|
||||
@ -1 +0,0 @@
|
||||
hub.image.name.prefix=docker-dev.knecon.com/tests/
|
||||
@ -68,6 +68,12 @@ public class DossierTemplateModel {
|
||||
@Schema(description = "Flag that specifies if OCR is automatically performed on upload for all dossiers of this template")
|
||||
private boolean ocrByDefault;
|
||||
|
||||
@Schema(description = "Flag that specifies if rotation correction is attempted during OCR for all dossiers of this template")
|
||||
private boolean rotationCorrectionByDefault;
|
||||
|
||||
@Schema(description = "Flag that specifies if IDP is automatically performed on upload for all dossiers of this template")
|
||||
private boolean idpByDefault;
|
||||
|
||||
@Schema(description = "Flag that specifies the watermark removal in documents will be performed before the OCR processing")
|
||||
private boolean removeWatermark;
|
||||
|
||||
|
||||
@ -88,6 +88,12 @@ public class FileStatus {
|
||||
private int numberOfPagesToOCR;
|
||||
@Schema(description = "Number of pages already OCRed by us")
|
||||
private int numberOfOCRedPages;
|
||||
@Schema(description = "Number of pages already IDPed by us")
|
||||
private int numberOfIdpPages;
|
||||
@Schema(description = "Number of prompt tokens used by this file")
|
||||
private int usedPromptTokens;
|
||||
@Schema(description = "Number of completion tokens used by this file")
|
||||
private int usedCompletionTokens;
|
||||
@Schema(description = "Shows if this file has been OCRed by us. End time of OCR Process")
|
||||
private OffsetDateTime ocrEndTime;
|
||||
@Schema(description = "Shows if this file has comments on annotations.")
|
||||
|
||||
@ -15,7 +15,7 @@ import lombok.NoArgsConstructor;
|
||||
@Data
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class FileStatusFilter {
|
||||
public class FileStatusFilter implements Predicate<FileModel> {
|
||||
|
||||
private List<ProcessingStatus> processingStatusList = new ArrayList<>();
|
||||
private List<WorkflowStatus> workflowStatusList = new ArrayList<>();
|
||||
@ -32,7 +32,8 @@ public class FileStatusFilter {
|
||||
}
|
||||
|
||||
|
||||
public Predicate<FileModel> asPredicate() {
|
||||
@Override
|
||||
public boolean test(FileModel fileModel) {
|
||||
|
||||
if (this.getProcessingStatusList() == null) {
|
||||
this.setProcessingStatusList(new ArrayList<>());
|
||||
@ -42,10 +43,12 @@ public class FileStatusFilter {
|
||||
this.setWorkflowStatusList(new ArrayList<>());
|
||||
}
|
||||
|
||||
return fileStatus -> (this.getProcessingStatusList().isEmpty() || this.getProcessingStatusList().contains(fileStatus.getProcessingStatus()))
|
||||
&& (this.getWorkflowStatusList().isEmpty() || this.getWorkflowStatusList().contains(fileStatus.getWorkflowStatus()))
|
||||
&& (this.isIncludeSoftDeletedFiles() || fileStatus.getDeleted() == null)
|
||||
&& (this.isIncludeHardDeletedFiles() || fileStatus.getHardDeletedTime() == null);
|
||||
return (this.getProcessingStatusList().isEmpty() || this.getProcessingStatusList().contains(fileModel.getProcessingStatus()))
|
||||
&& (this.getWorkflowStatusList().isEmpty()
|
||||
|| this.getWorkflowStatusList()
|
||||
.contains(fileModel.getWorkflowStatus()))
|
||||
&& (this.isIncludeSoftDeletedFiles() || fileModel.getDeleted() == null)
|
||||
&& (this.isIncludeHardDeletedFiles() || fileModel.getHardDeletedTime() == null);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -4,16 +4,32 @@ import java.util.Optional;
|
||||
import java.util.Set;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import lombok.AccessLevel;
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
public record ReanalysisSettings(
|
||||
@Schema(description = "Provide a list of dossierIds to filter for. If the list is empty, every dossier is selected for reanalysis.", defaultValue = "[]") Set<String> dossierIds,
|
||||
@Schema(description = "Provide a list of fileIds to filter for. If the list is empty, every file is selected for reanalysis.", defaultValue = "[]") Set<String> fileIds,
|
||||
@Schema(description = "If set to true, layout parsing and named entity recognition will be repeated.", defaultValue = "false") boolean repeatStructureAnalysis,
|
||||
@Schema(description = "If set to true, ocr will be repeated and therefore also layout parsing and named entity recognition.", defaultValue = "false") boolean runOcr,
|
||||
@Schema(description = "Use this to create a filter for files to reanalyse. Matches anything if set to null.", defaultValue = "{}") FileStatusFilter fileStatusFilter
|
||||
) {
|
||||
@Builder
|
||||
@RequiredArgsConstructor
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public final class ReanalysisSettings {
|
||||
|
||||
public FileStatusFilter fileStatusFilter() {
|
||||
@Getter
|
||||
@Schema(description = "Provide a list of dossierIds to filter for. If the list is empty, every dossier is selected for reanalysis.", defaultValue = "[]")
|
||||
Set<String> dossierIds;
|
||||
@Getter
|
||||
@Schema(description = "Provide a list of fileIds to filter for. If the list is empty, every file is selected for reanalysis.", defaultValue = "[]")
|
||||
Set<String> fileIds;
|
||||
@Getter
|
||||
@Schema(description = "If set to true, layout parsing and named entity recognition will be repeated.", defaultValue = "false")
|
||||
boolean repeatStructureAnalysis;
|
||||
|
||||
@Schema(description = "Use this to create a filter for files to reanalyse. Matches anything if set to null.", defaultValue = "null")
|
||||
FileStatusFilter fileStatusFilter;
|
||||
|
||||
|
||||
public FileStatusFilter getFileStatusFilter() {
|
||||
|
||||
return Optional.ofNullable(fileStatusFilter)
|
||||
.orElse(new FileStatusFilter());
|
||||
|
||||
@ -36,6 +36,8 @@ public class DossierTemplate {
|
||||
private boolean keepOverlappingObjects;
|
||||
private boolean applyDictionaryUpdatesToAllDossiersByDefault;
|
||||
private boolean ocrByDefault;
|
||||
private boolean rotationCorrectionByDefault;
|
||||
private boolean idpByDefault;
|
||||
private boolean removeWatermark;
|
||||
private LayoutParsingType layoutParsingType;
|
||||
|
||||
|
||||
@ -8,6 +8,7 @@ import java.util.Set;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
@ -60,6 +61,9 @@ public class FileModel {
|
||||
private OffsetDateTime ocrStartTime;
|
||||
private Integer numberOfPagesToOCR;
|
||||
private Integer numberOfOCRedPages;
|
||||
private Integer numberOfIdpPages;
|
||||
private int usedPromptTokens;
|
||||
private int usedCompletionTokens;
|
||||
private OffsetDateTime ocrEndTime;
|
||||
private boolean hasAnnotationComments;
|
||||
private boolean excluded;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user