Compare commits

...

10 Commits

Author SHA1 Message Date
Kilian Schuettler
599a57b7fd RED-8670: make ocr settings configurable 2025-01-14 13:12:05 +01:00
Kilian Schuettler
fb6a16dfe9 RED-8670: make ocr settings configurable 2025-01-14 13:08:07 +01:00
Kilian Schuettler
8109e9f61b RED-8670: add idp fields and llm tracking 2025-01-14 13:08:07 +01:00
Kilian Schuettler
8e4fde6830 RED-8670: add idp fields and llm tracking 2025-01-14 13:08:07 +01:00
Kilian Schuettler
2a79f8b995 RED-8670: add idp fields and llm tracking 2025-01-14 13:08:07 +01:00
Kilian Schuettler
758aa3b165 RED-8670: add idp fields and llm tracking 2025-01-14 13:08:07 +01:00
Kilian Schuettler
6656c9eb8a RED-8670: add idp fields and llm tracking 2025-01-14 13:08:07 +01:00
Kilian Schuettler
a24cfd4b18 RED-8670: add idp fields and llm tracking 2025-01-14 13:08:07 +01:00
Kilian Schuettler
ce3da37a8b RED-8670: add idp fields and llm tracking 2025-01-14 13:08:04 +01:00
Kilian Schuettler
a03ab9f105 RED-8670: add idp fields and llm tracking 2025-01-14 13:07:32 +01:00
31 changed files with 377 additions and 111 deletions

View File

@ -24,13 +24,13 @@ import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile; import org.springframework.web.multipart.MultipartFile;
import com.iqser.red.service.persistence.management.v1.processor.acl.custom.dossier.DossierACLService; import com.iqser.red.service.persistence.management.v1.processor.acl.custom.dossier.DossierACLService;
import com.iqser.red.service.persistence.management.v1.processor.dataexchange.models.ExportDownloadRequest;
import com.iqser.red.service.persistence.management.v1.processor.exception.BadRequestException; import com.iqser.red.service.persistence.management.v1.processor.exception.BadRequestException;
import com.iqser.red.service.persistence.management.v1.processor.exception.ConflictException; import com.iqser.red.service.persistence.management.v1.processor.exception.ConflictException;
import com.iqser.red.service.persistence.management.v1.processor.service.DossierManagementService; import com.iqser.red.service.persistence.management.v1.processor.service.DossierManagementService;
import com.iqser.red.service.persistence.management.v1.processor.service.DossierTemplateManagementService; import com.iqser.red.service.persistence.management.v1.processor.service.DossierTemplateManagementService;
import com.iqser.red.service.persistence.management.v1.processor.service.DossierTemplateStatsService; import com.iqser.red.service.persistence.management.v1.processor.service.DossierTemplateStatsService;
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.AuditPersistenceService; import com.iqser.red.service.persistence.management.v1.processor.service.persistence.AuditPersistenceService;
import com.iqser.red.service.persistence.management.v1.processor.service.users.UserService;
import com.iqser.red.service.persistence.service.v1.api.external.resource.DossierTemplateResource; import com.iqser.red.service.persistence.service.v1.api.external.resource.DossierTemplateResource;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AuditCategory; import com.iqser.red.service.persistence.service.v1.api.shared.model.AuditCategory;
import com.iqser.red.service.persistence.service.v1.api.shared.model.DossierTemplateModel; import com.iqser.red.service.persistence.service.v1.api.shared.model.DossierTemplateModel;
@ -42,7 +42,6 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemp
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.DossierTemplateStats; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.DossierTemplateStats;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.DossierTemplateStatus; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.DossierTemplateStatus;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.Dossier; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.Dossier;
import com.iqser.red.service.persistence.management.v1.processor.dataexchange.models.ExportDownloadRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.importexport.ImportDossierTemplateRequest; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.importexport.ImportDossierTemplateRequest;
import com.knecon.fforesight.keycloakcommons.security.KeycloakSecurity; import com.knecon.fforesight.keycloakcommons.security.KeycloakSecurity;
@ -61,7 +60,6 @@ public class DossierTemplateController implements DossierTemplateResource {
private final AuditPersistenceService auditPersistenceService; private final AuditPersistenceService auditPersistenceService;
private final DossierManagementService dossierManagementService; private final DossierManagementService dossierManagementService;
private final DossierACLService dossierACLService; private final DossierACLService dossierACLService;
private final UserService userService;
@Override @Override
@ -314,6 +312,10 @@ public class DossierTemplateController implements DossierTemplateResource {
.applyDictionaryUpdatesToAllDossiersByDefault(dossierTemplate.isApplyDictionaryUpdatesToAllDossiersByDefault()) .applyDictionaryUpdatesToAllDossiersByDefault(dossierTemplate.isApplyDictionaryUpdatesToAllDossiersByDefault())
.ocrByDefault(dossierTemplate.isOcrByDefault()) .ocrByDefault(dossierTemplate.isOcrByDefault())
.removeWatermark(dossierTemplate.isRemoveWatermark()) .removeWatermark(dossierTemplate.isRemoveWatermark())
.idpByDefault(dossierTemplate.isIdpByDefault())
.rotationCorrectionByDefault(dossierTemplate.isRotationCorrectionByDefault())
.fontStyleDetection(dossierTemplate.isFontStyleDetection())
.ocrAllPages(dossierTemplate.isOcrAllPages())
.build(); .build();
} }

View File

@ -36,6 +36,7 @@ import lombok.RequiredArgsConstructor;
public class ReanalysisController implements ReanalysisResource { public class ReanalysisController implements ReanalysisResource {
private static final String DOSSIER_ID = "dossierId"; private static final String DOSSIER_ID = "dossierId";
private final ReanalysisService reanalysisService; private final ReanalysisService reanalysisService;
private final FileStatusManagementService fileStatusManagementService; private final FileStatusManagementService fileStatusManagementService;
private final AuditPersistenceService auditPersistenceService; private final AuditPersistenceService auditPersistenceService;
@ -98,11 +99,11 @@ public class ReanalysisController implements ReanalysisResource {
@Override @Override
@PreAuthorize("hasAuthority('" + REANALYZE_DOSSIER + "')") @PreAuthorize("hasAuthority('" + REANALYZE_DOSSIER + "')")
public void ocrDossier(@PathVariable(DOSSIER_ID) String dossierId) { public void ocrDossier(@PathVariable(DOSSIER_ID) String dossierId, @RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp) {
accessControlService.checkDossierExistenceAndAccessPermissionsToDossier(dossierId); accessControlService.checkDossierExistenceAndAccessPermissionsToDossier(dossierId);
reanalysisService.ocrDossier(dossierId); reanalysisService.ocrDossier(dossierId, idp);
auditPersistenceService.audit(AuditRequest.builder() auditPersistenceService.audit(AuditRequest.builder()
.userId(KeycloakSecurity.getUserId()) .userId(KeycloakSecurity.getUserId())
@ -118,11 +119,12 @@ public class ReanalysisController implements ReanalysisResource {
@PreAuthorize("hasAuthority('" + REANALYZE_FILE + "')") @PreAuthorize("hasAuthority('" + REANALYZE_FILE + "')")
public void ocrFile(@PathVariable(DOSSIER_ID) String dossierId, public void ocrFile(@PathVariable(DOSSIER_ID) String dossierId,
@PathVariable(FILE_ID) String fileId, @PathVariable(FILE_ID) String fileId,
@RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force) { @RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force,
@RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp) {
accessControlService.checkDossierExistenceAndAccessPermissionsToDossier(dossierId); accessControlService.checkDossierExistenceAndAccessPermissionsToDossier(dossierId);
validateOCR(dossierId, fileId); validateOCR(dossierId, fileId);
reanalysisService.ocrFile(dossierId, fileId, force); reanalysisService.ocrFile(dossierId, fileId, force, idp);
auditPersistenceService.audit(AuditRequest.builder() auditPersistenceService.audit(AuditRequest.builder()
.userId(KeycloakSecurity.getUserId()) .userId(KeycloakSecurity.getUserId())
.objectId(dossierId) .objectId(dossierId)
@ -136,11 +138,13 @@ public class ReanalysisController implements ReanalysisResource {
@Override @Override
@PreAuthorize("hasAuthority('" + REANALYZE_FILE + "')") @PreAuthorize("hasAuthority('" + REANALYZE_FILE + "')")
public void ocrFiles(@PathVariable(DOSSIER_ID) String dossierId, @RequestBody Set<String> fileIds) { public void ocrFiles(@PathVariable(DOSSIER_ID) String dossierId,
@RequestBody Set<String> fileIds,
@RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp) {
accessControlService.checkDossierExistenceAndAccessPermissionsToDossier(dossierId); accessControlService.checkDossierExistenceAndAccessPermissionsToDossier(dossierId);
fileIds.forEach(fileId -> validateOCR(dossierId, fileId)); fileIds.forEach(fileId -> validateOCR(dossierId, fileId));
reanalysisService.ocrFiles(dossierId, fileIds); reanalysisService.ocrFiles(dossierId, fileIds, idp);
auditPersistenceService.audit(AuditRequest.builder() auditPersistenceService.audit(AuditRequest.builder()
.userId(KeycloakSecurity.getUserId()) .userId(KeycloakSecurity.getUserId())
.objectId(dossierId) .objectId(dossierId)

View File

@ -38,6 +38,7 @@ public interface ReanalysisResource {
String EXCLUDED_STATUS_PARAM = "excluded"; String EXCLUDED_STATUS_PARAM = "excluded";
String FORCE_PARAM = "force"; String FORCE_PARAM = "force";
String IDP_PARAM = "idp";
@PostMapping(value = REANALYSIS_REST_PATH + DOSSIER_ID_PATH_VARIABLE) @PostMapping(value = REANALYSIS_REST_PATH + DOSSIER_ID_PATH_VARIABLE)
@ -65,7 +66,7 @@ public interface ReanalysisResource {
@Operation(summary = "Ocr and reanalyze a dossier", description = "None") @Operation(summary = "Ocr and reanalyze a dossier", description = "None")
@PostMapping(value = OCR_REANALYSIS_REST_PATH + DOSSIER_ID_PATH_VARIABLE) @PostMapping(value = OCR_REANALYSIS_REST_PATH + DOSSIER_ID_PATH_VARIABLE)
@ApiResponses(value = {@ApiResponse(responseCode = "204", description = "OK"), @ApiResponse(responseCode = "404", description = "Not found"), @ApiResponse(responseCode = "403", description = "Forbidden")}) @ApiResponses(value = {@ApiResponse(responseCode = "204", description = "OK"), @ApiResponse(responseCode = "404", description = "Not found"), @ApiResponse(responseCode = "403", description = "Forbidden")})
void ocrDossier(@PathVariable(DOSSIER_ID) String dossierId); void ocrDossier(@PathVariable(DOSSIER_ID) String dossierId, @RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp);
@Operation(summary = "Ocr and reanalyze a file", description = "None") @Operation(summary = "Ocr and reanalyze a file", description = "None")
@ -73,13 +74,16 @@ public interface ReanalysisResource {
@ApiResponses(value = {@ApiResponse(responseCode = "204", description = "OK"), @ApiResponse(responseCode = "409", description = "Conflict"), @ApiResponse(responseCode = "404", description = "Not found"), @ApiResponse(responseCode = "403", description = "Forbidden"), @ApiResponse(responseCode = "400", description = "Cannot OCR approved file")}) @ApiResponses(value = {@ApiResponse(responseCode = "204", description = "OK"), @ApiResponse(responseCode = "409", description = "Conflict"), @ApiResponse(responseCode = "404", description = "Not found"), @ApiResponse(responseCode = "403", description = "Forbidden"), @ApiResponse(responseCode = "400", description = "Cannot OCR approved file")})
void ocrFile(@PathVariable(DOSSIER_ID) String dossierId, void ocrFile(@PathVariable(DOSSIER_ID) String dossierId,
@PathVariable(FILE_ID) String fileId, @PathVariable(FILE_ID) String fileId,
@RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force); @RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force,
@RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp);
@Operation(summary = "Ocr and reanalyze multiple files for a dossier", description = "None") @Operation(summary = "Ocr and reanalyze multiple files for a dossier", description = "None")
@PostMapping(value = OCR_REANALYSIS_REST_PATH + DOSSIER_ID_PATH_VARIABLE + BULK_REST_PATH) @PostMapping(value = OCR_REANALYSIS_REST_PATH + DOSSIER_ID_PATH_VARIABLE + BULK_REST_PATH)
@ApiResponses(value = {@ApiResponse(responseCode = "204", description = "OK"), @ApiResponse(responseCode = "404", description = "Not found"), @ApiResponse(responseCode = "403", description = "Forbidden")}) @ApiResponses(value = {@ApiResponse(responseCode = "204", description = "OK"), @ApiResponse(responseCode = "404", description = "Not found"), @ApiResponse(responseCode = "403", description = "Forbidden")})
void ocrFiles(@PathVariable(DOSSIER_ID) String dossierId, @RequestBody Set<String> fileIds); void ocrFiles(@PathVariable(DOSSIER_ID) String dossierId,
@RequestBody Set<String> fileIds,
@RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp);
@Operation(summary = "Exclude or re-include a file to the automatic analysis", description = "None") @Operation(summary = "Exclude or re-include a file to the automatic analysis", description = "None")

View File

@ -35,8 +35,8 @@ dependencies {
exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1") exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1")
exclude(group = "com.iqser.red.service", module = "persistence-service-shared-api-v1") exclude(group = "com.iqser.red.service", module = "persistence-service-shared-api-v1")
} }
api("com.knecon.fforesight:azure-ocr-service-api:0.13.0") api("com.knecon.fforesight:azure-ocr-service-api:0.24.0")
implementation("com.knecon.fforesight:llm-service-api:1.20.0-RED10072.2") implementation("com.knecon.fforesight:llm-service-api:1.35.0")
api("com.knecon.fforesight:jobs-commons:0.13.0") api("com.knecon.fforesight:jobs-commons:0.13.0")
api("com.iqser.red.commons:storage-commons:2.50.0") api("com.iqser.red.commons:storage-commons:2.50.0")
api("com.knecon.fforesight:tenant-commons:0.31.0-RED10196.0") { api("com.knecon.fforesight:tenant-commons:0.31.0-RED10196.0") {

View File

@ -83,6 +83,18 @@ public class DossierTemplateEntity {
@Column(name = "ocr_by_default") @Column(name = "ocr_by_default")
private boolean ocrByDefault; private boolean ocrByDefault;
@Column(name = "rotation_correction_by_default")
private boolean rotationCorrectionByDefault;
@Column(name = "idp_by_default")
private boolean idpByDefault;
@Column(name = "font_style_detection")
private boolean fontStyleDetection;
@Column(name = "ocr_all_pages")
private boolean ocrAllPages;
@Column(name = "remove_watermark") @Column(name = "remove_watermark")
private boolean removeWatermark; private boolean removeWatermark;
@ -128,6 +140,7 @@ public class DossierTemplateEntity {
@Enumerated(EnumType.STRING) @Enumerated(EnumType.STRING)
private LayoutParsingType layoutParsingType; private LayoutParsingType layoutParsingType;
public static DossierTemplateEntity copyDossierTemplateEntityWithoutChildEntities(DossierTemplateEntity dossierTemplateEntity) { public static DossierTemplateEntity copyDossierTemplateEntityWithoutChildEntities(DossierTemplateEntity dossierTemplateEntity) {
DossierTemplateEntity dossierTemplateCopy = new DossierTemplateEntity(); DossierTemplateEntity dossierTemplateCopy = new DossierTemplateEntity();
@ -148,6 +161,10 @@ public class DossierTemplateEntity {
dossierTemplateCopy.removeWatermark = dossierTemplateEntity.removeWatermark; dossierTemplateCopy.removeWatermark = dossierTemplateEntity.removeWatermark;
dossierTemplateCopy.downloadFileTypes = dossierTemplateEntity.downloadFileTypes; dossierTemplateCopy.downloadFileTypes = dossierTemplateEntity.downloadFileTypes;
dossierTemplateCopy.layoutParsingType = dossierTemplateEntity.layoutParsingType; dossierTemplateCopy.layoutParsingType = dossierTemplateEntity.layoutParsingType;
dossierTemplateCopy.rotationCorrectionByDefault = dossierTemplateEntity.rotationCorrectionByDefault;
dossierTemplateCopy.idpByDefault = dossierTemplateEntity.idpByDefault;
dossierTemplateCopy.fontStyleDetection = dossierTemplateEntity.fontStyleDetection;
dossierTemplateCopy.ocrAllPages = dossierTemplateEntity.ocrAllPages;
return dossierTemplateCopy; return dossierTemplateCopy;
} }

View File

@ -152,9 +152,16 @@ public class FileEntity {
@Column(name = "number_of_ocred_pages") @Column(name = "number_of_ocred_pages")
private Integer numberOfOCRedPages; private Integer numberOfOCRedPages;
@Column(name = "number_of_idp_pages")
private Integer numberOfIdpPages;
@Column(name = "ocr_end_time") @Column(name = "ocr_end_time")
private OffsetDateTime ocrEndTime; private OffsetDateTime ocrEndTime;
private int usedPromptTokens;
private int usedCompletionTokens;
@Column @Column
private boolean hasAnnotationComments; private boolean hasAnnotationComments;

View File

@ -1,20 +0,0 @@
package com.iqser.red.service.persistence.management.v1.processor.model;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@NoArgsConstructor
@AllArgsConstructor
@Builder
public class OCRStatusUpdateResponse {
private String fileId;
private int numberOfPagesToOCR;
private int numberOfOCRedPages;
private boolean ocrFinished;
private boolean ocrStarted;
}

View File

@ -1,5 +1,7 @@
package com.iqser.red.service.persistence.management.v1.processor.service; package com.iqser.red.service.persistence.management.v1.processor.service;
import java.util.Set;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.springframework.retry.support.RetryTemplate; import org.springframework.retry.support.RetryTemplate;
@ -15,6 +17,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.UntouchedDo
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.ProcessingStatus; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.ProcessingStatus;
import com.iqser.red.service.search.v1.model.IndexMessageType; import com.iqser.red.service.search.v1.model.IndexMessageType;
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
import jakarta.transaction.Transactional; import jakarta.transaction.Transactional;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
@ -114,7 +117,7 @@ public class FileStatusProcessingUpdateService {
} }
public void requeueOCROrMarkFailed(String dossierId, String fileId, FileErrorInfo fileErrorInfo) { public void requeueOCROrMarkFailed(String dossierId, String fileId, Set<AzureOcrFeature> features, FileErrorInfo fileErrorInfo) {
var fileEntity = fileStatusPersistenceService.getStatus(fileId); var fileEntity = fileStatusPersistenceService.getStatus(fileId);
if (fileEntity.getProcessingErrorCounter() > settings.getMaxErrorRetries()) { if (fileEntity.getProcessingErrorCounter() > settings.getMaxErrorRetries()) {
@ -122,7 +125,7 @@ public class FileStatusProcessingUpdateService {
} else { } else {
fileStatusService.setStatusOcrProcessing(fileId, fileStatusService.setStatusOcrProcessing(fileId,
fileEntity.getProcessingStatus().equals(ProcessingStatus.OCR_PROCESSING) ? fileEntity.getProcessingErrorCounter() + 1 : 0); fileEntity.getProcessingStatus().equals(ProcessingStatus.OCR_PROCESSING) ? fileEntity.getProcessingErrorCounter() + 1 : 0);
fileStatusService.addToOcrQueue(dossierId, fileId, 2); fileStatusService.addToOcrQueue(dossierId, fileId, 2, features);
} }
} }

View File

@ -12,7 +12,6 @@ import java.util.function.BiFunction;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.springframework.amqp.rabbit.core.RabbitTemplate; import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
@ -21,6 +20,7 @@ import com.iqser.red.service.pdftron.redaction.v1.api.model.ProcessUntouchedDocu
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration; import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
import com.iqser.red.service.persistence.management.v1.processor.entity.configuration.TypeEntity; import com.iqser.red.service.persistence.management.v1.processor.entity.configuration.TypeEntity;
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.ComponentDefinitionEntity; import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.ComponentDefinitionEntity;
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.DossierTemplateEntity;
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.FileAttributeEntity; import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.FileAttributeEntity;
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.FileEntity; import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.FileEntity;
import com.iqser.red.service.persistence.management.v1.processor.entity.projection.DossierStatsFileProjection; import com.iqser.red.service.persistence.management.v1.processor.entity.projection.DossierStatsFileProjection;
@ -31,7 +31,6 @@ import com.iqser.red.service.persistence.management.v1.processor.model.AnalysisT
import com.iqser.red.service.persistence.management.v1.processor.model.CvAnalysisServiceRequest; import com.iqser.red.service.persistence.management.v1.processor.model.CvAnalysisServiceRequest;
import com.iqser.red.service.persistence.management.v1.processor.model.FileIdentifier; import com.iqser.red.service.persistence.management.v1.processor.model.FileIdentifier;
import com.iqser.red.service.persistence.management.v1.processor.model.ManualChangesQueryOptions; import com.iqser.red.service.persistence.management.v1.processor.model.ManualChangesQueryOptions;
import com.iqser.red.service.persistence.management.v1.processor.model.OCRStatusUpdateResponse;
import com.iqser.red.service.persistence.management.v1.processor.model.VisualLayoutParsingServiceRequest; import com.iqser.red.service.persistence.management.v1.processor.model.VisualLayoutParsingServiceRequest;
import com.iqser.red.service.persistence.management.v1.processor.model.image.ImageServiceRequest; import com.iqser.red.service.persistence.management.v1.processor.model.image.ImageServiceRequest;
import com.iqser.red.service.persistence.management.v1.processor.model.websocket.AnalyseStatus; import com.iqser.red.service.persistence.management.v1.processor.model.websocket.AnalyseStatus;
@ -80,6 +79,7 @@ import com.knecon.fforesight.llm.service.LlmNerMessage;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingQueueNames; import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingQueueNames;
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature; import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
import com.knecon.fforesight.service.ocr.v1.api.model.DocumentRequest; import com.knecon.fforesight.service.ocr.v1.api.model.DocumentRequest;
import com.knecon.fforesight.service.ocr.v1.api.model.OCRStatusUpdateResponse;
import com.knecon.fforesight.tenantcommons.TenantContext; import com.knecon.fforesight.tenantcommons.TenantContext;
import jakarta.transaction.Transactional; import jakarta.transaction.Transactional;
@ -352,7 +352,6 @@ public class FileStatusService {
return; return;
} }
boolean forceAnalysis = false; boolean forceAnalysis = false;
if (settings.isLlmNerServiceEnabled()) { if (settings.isLlmNerServiceEnabled()) {
boolean objectExists = fileManagementStorageService.objectExists(dossierId, fileId, FileType.LLM_NER_ENTITIES); boolean objectExists = fileManagementStorageService.objectExists(dossierId, fileId, FileType.LLM_NER_ENTITIES);
@ -386,7 +385,7 @@ public class FileStatusService {
boolean reanalyse = fileModel.isReanalysisRequired() || analysisType.equals(AnalysisType.MANUAL_REDACTION_REANALYZE); boolean reanalyse = fileModel.isReanalysisRequired() || analysisType.equals(AnalysisType.MANUAL_REDACTION_REANALYZE);
MessageType messageType = calculateMessageType(reanalyse, fileModel.getProcessingStatus(), fileModel); MessageType messageType = calculateMessageType(reanalyse, fileModel.getProcessingStatus(), fileModel);
if(analysisType == AnalysisType.FORCE_ANALYSE || forceAnalysis) { if (analysisType == AnalysisType.FORCE_ANALYSE || forceAnalysis) {
messageType = MessageType.ANALYSE; messageType = MessageType.ANALYSE;
} }
@ -569,6 +568,12 @@ public class FileStatusService {
public void setStatusOcrQueued(String dossierId, String fileId) { public void setStatusOcrQueued(String dossierId, String fileId) {
setStatusOcrQueued(dossierId, fileId, false);
}
public void setStatusOcrQueued(String dossierId, String fileId, boolean idp) {
FileEntity fileStatus = fileStatusPersistenceService.getStatus(fileId); FileEntity fileStatus = fileStatusPersistenceService.getStatus(fileId);
if (fileStatus.isExcluded()) { if (fileStatus.isExcluded()) {
@ -579,7 +584,7 @@ public class FileStatusService {
updateOCRStartTime(fileId); updateOCRStartTime(fileId);
fileStatusPersistenceService.updateProcessingStatus(fileId, ProcessingStatus.OCR_PROCESSING_QUEUED); fileStatusPersistenceService.updateProcessingStatus(fileId, ProcessingStatus.OCR_PROCESSING_QUEUED);
websocketService.sendAnalysisEvent(dossierId, fileId, AnalyseStatus.OCR_PROCESSING, fileStatus.getNumberOfAnalyses() + 1); websocketService.sendAnalysisEvent(dossierId, fileId, AnalyseStatus.OCR_PROCESSING, fileStatus.getNumberOfAnalyses() + 1);
addToOcrQueue(dossierId, fileId, 2); addToOcrQueue(dossierId, fileId, 2, idp);
} }
@ -760,22 +765,39 @@ public class FileStatusService {
} }
public void addToOcrQueue(String dossierId, String fileId, int priority) { public void addToOcrQueue(String dossierId, String fileId, int priority, boolean useIdp) {
var removeWatermark = dossierTemplatePersistenceService.getDossierTemplate(dossierPersistenceService.getDossierTemplateId(dossierId)).isRemoveWatermark(); DossierTemplateEntity dt = dossierTemplatePersistenceService.getDossierTemplate(dossierPersistenceService.getDossierTemplateId(dossierId));
Set<AzureOcrFeature> features = new HashSet<>(); Set<AzureOcrFeature> features = new HashSet<>();
if (removeWatermark) {
features.add(AzureOcrFeature.REMOVE_WATERMARKS); if (dt.isFontStyleDetection()) {
}
if (currentApplicationTypeProvider.isDocuMine()) {
features.add(AzureOcrFeature.ROTATION_CORRECTION);
features.add(AzureOcrFeature.FONT_STYLE_DETECTION); features.add(AzureOcrFeature.FONT_STYLE_DETECTION);
} }
if (dt.isRemoveWatermark()) {
features.add(AzureOcrFeature.REMOVE_WATERMARKS);
}
if (useIdp || dt.isIdpByDefault()) {
features.add(AzureOcrFeature.IDP);
}
if (dt.isRotationCorrectionByDefault()) {
features.add(AzureOcrFeature.ROTATION_CORRECTION);
}
addToOcrQueue(dossierId, fileId, priority, features);
}
public void addToOcrQueue(String dossierId, String fileId, int priority, Set<AzureOcrFeature> features) {
var request = DocumentRequest.builder() var request = DocumentRequest.builder()
// needed for legacy OCR-services
.dossierId(dossierId) .dossierId(dossierId)
.fileId(fileId) .fileId(fileId)
.removeWatermark(features.contains(AzureOcrFeature.REMOVE_WATERMARKS)) // needed for legacy OCR-services .removeWatermark(features.contains(AzureOcrFeature.REMOVE_WATERMARKS))
// new api
.originDocumentId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.ORIGIN))
.viewerDocId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.VIEWER_DOCUMENT))
.idpResultId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.IDP_RESULT))
.features(features) .features(features)
.build(); .build();
@ -820,7 +842,7 @@ public class FileStatusService {
fileStatusPersistenceService.updateWorkflowStatus(fileId, newWorkflowStatus, false); fileStatusPersistenceService.updateWorkflowStatus(fileId, newWorkflowStatus, false);
if(oldWorkflowStatus == WorkflowStatus.APPROVED && newWorkflowStatus != WorkflowStatus.APPROVED) { if (oldWorkflowStatus == WorkflowStatus.APPROVED && newWorkflowStatus != WorkflowStatus.APPROVED) {
fileStatusPersistenceService.clearLastDownload(fileId); fileStatusPersistenceService.clearLastDownload(fileId);
} }
} }
@ -963,6 +985,13 @@ public class FileStatusService {
@Transactional @Transactional
public void setStatusFullReprocess(String dossierId, String fileId, boolean priority, boolean requiresStructureAnalysis, boolean runOcr) { public void setStatusFullReprocess(String dossierId, String fileId, boolean priority, boolean requiresStructureAnalysis, boolean runOcr) {
setStatusFullReprocess(dossierId, fileId, priority, requiresStructureAnalysis, runOcr, false);
}
@Transactional
public void setStatusFullReprocess(String dossierId, String fileId, boolean priority, boolean requiresStructureAnalysis, boolean runOcr, boolean idp) {
FileEntity fileStatus = fileStatusPersistenceService.getStatus(fileId); FileEntity fileStatus = fileStatusPersistenceService.getStatus(fileId);
if (fileStatus.isExcluded()) { if (fileStatus.isExcluded()) {
@ -970,14 +999,14 @@ public class FileStatusService {
return; return;
} }
if (requiresStructureAnalysis || runOcr) { if (requiresStructureAnalysis || runOcr || idp) {
log.info("Delete text and NER entities from file {} in dossier {}", fileId, dossierId); log.info("Delete text and NER entities from file {} in dossier {}", fileId, dossierId);
fileManagementStorageService.deleteDocumentAndNerObjects(dossierId, fileId); fileManagementStorageService.deleteDocumentAndNerObjects(dossierId, fileId);
} }
if (runOcr) { if (runOcr || idp) {
fileStatusPersistenceService.resetOcrStartAndEndDate(fileId); fileStatusPersistenceService.resetOcrStartAndEndDate(fileId);
setStatusOcrQueued(dossierId, fileId); setStatusOcrQueued(dossierId, fileId, idp);
return; return;
} }
@ -1064,6 +1093,7 @@ public class FileStatusService {
addToAnalysisQueue(dossierId, fileId, priority, Sets.newHashSet(), AnalysisType.DEFAULT); addToAnalysisQueue(dossierId, fileId, priority, Sets.newHashSet(), AnalysisType.DEFAULT);
} }
@Transactional @Transactional
public void setStatusForceAnalyse(String dossierId, String fileId, boolean priority) { public void setStatusForceAnalyse(String dossierId, String fileId, boolean priority) {
@ -1175,4 +1205,10 @@ public class FileStatusService {
return reanalysisRequiredStatusService.enhanceFileStatusWithAnalysisRequirements(fileModels); return reanalysisRequiredStatusService.enhanceFileStatusWithAnalysisRequirements(fileModels);
} }
public void increaseTokenUsage(String fileId, int promptTokens, int completionTokens) {
fileStatusPersistenceService.increaseTokenUsage(fileId, promptTokens, completionTokens);
}
} }

View File

@ -171,18 +171,18 @@ public class ReanalysisService {
} }
public void ocrDossier(String dossierId) { public void ocrDossier(String dossierId, boolean idp) {
var relevantFiles = getAllFilesForDossier(dossierId, validFilesFilter); var relevantFiles = getAllFilesForDossier(dossierId, validFilesFilter);
relevantFiles.stream() relevantFiles.stream()
.filter(fileStatus -> fileStatus.getOcrStartTime() == null) .filter(fileStatus -> fileStatus.getOcrStartTime() == null)
.filter(fileStatus -> fileStatus.getProcessingStatus().equals(ProcessingStatus.PROCESSED)) .filter(fileStatus -> fileStatus.getProcessingStatus().equals(ProcessingStatus.PROCESSED))
.forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId())); .forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId(), idp));
} }
public void ocrFile(String dossierId, String fileId, boolean force) { public void ocrFile(String dossierId, String fileId, boolean force, boolean idp) {
dossierPersistenceService.getAndValidateDossier(dossierId); dossierPersistenceService.getAndValidateDossier(dossierId);
FileModel dossierFile = fileStatusService.getStatus(fileId); FileModel dossierFile = fileStatusService.getStatus(fileId);
@ -202,30 +202,31 @@ public class ReanalysisService {
} }
if (force) { if (force) {
fileStatusService.setStatusOcrQueued(dossierId, fileId); fileStatusService.setStatusOcrQueued(dossierId, fileId, idp);
} else { } else {
if (dossierFile.getOcrStartTime() != null) { if (dossierFile.getOcrStartTime() != null) {
throw new ConflictException("File already has been OCR processed"); throw new ConflictException("File already has been OCR processed");
} }
ocrFiles(dossierId, Sets.newHashSet(fileId)); ocrFiles(dossierId, Sets.newHashSet(fileId), idp);
} }
} }
public void ocrFiles(String dossierId, Set<String> fileIds) { public void ocrFiles(String dossierId, Set<String> fileIds, boolean idp) {
var relevantFiles = getRelevantFiles(dossierId, fileIds); var relevantFiles = getRelevantFiles(dossierId, fileIds);
if (relevantFiles.stream() if (relevantFiles.stream()
.anyMatch(fileStatus -> !fileStatus.getProcessingStatus().equals(ProcessingStatus.PROCESSED) && !fileStatus.getProcessingStatus() .anyMatch(fileStatus -> !fileStatus.getProcessingStatus().equals(ProcessingStatus.PROCESSED) //
.equals(ProcessingStatus.OCR_PROCESSING_QUEUED) && !fileStatus.getProcessingStatus().equals(ProcessingStatus.OCR_PROCESSING))) { && !fileStatus.getProcessingStatus().equals(ProcessingStatus.OCR_PROCESSING_QUEUED) //
&& !fileStatus.getProcessingStatus().equals(ProcessingStatus.OCR_PROCESSING))) {
throw new ConflictException("File is not processed"); throw new ConflictException("File is not processed");
} }
relevantFiles.stream() relevantFiles.stream()
.filter(fileStatus -> fileStatus.getOcrStartTime() == null) .filter(fileStatus -> fileStatus.getOcrStartTime() == null)
.forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId())); .forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId(), idp));
} }
@ -275,12 +276,12 @@ public class ReanalysisService {
public List<FileModel> reanalyzeTemplate(String dossierTemplateId, ReanalysisSettings reanalysisSettings) { public List<FileModel> reanalyzeTemplate(String dossierTemplateId, ReanalysisSettings reanalysisSettings) {
requestValidator.validateRequestOrThrow404(dossierTemplateId, reanalysisSettings.dossierIds(), reanalysisSettings.fileIds()); requestValidator.validateRequestOrThrow404(dossierTemplateId, reanalysisSettings.getDossierIds(), reanalysisSettings.getFileIds());
var files = fileStatusService.getDossierTemplateStatus(dossierTemplateId) var files = fileStatusService.getDossierTemplateStatus(dossierTemplateId)
.stream() .stream()
.filter(file -> isInList(file, reanalysisSettings)) .filter(file -> isInList(file, reanalysisSettings))
.filter(reanalysisSettings.fileStatusFilter().asPredicate()) .filter(reanalysisSettings.getFileStatusFilter())
.peek(file -> log.info("Reanalyzing file {}", file.getId())) .peek(file -> log.info("Reanalyzing file {}", file.getId()))
.collect(Collectors.toList()); .collect(Collectors.toList());
@ -289,8 +290,9 @@ public class ReanalysisService {
files.forEach(file -> fileStatusService.setStatusFullReprocess(file.getDossierId(), files.forEach(file -> fileStatusService.setStatusFullReprocess(file.getDossierId(),
file.getId(), file.getId(),
false, false,
reanalysisSettings.repeatStructureAnalysis(), reanalysisSettings.isRepeatStructureAnalysis(),
reanalysisSettings.runOcr())); reanalysisSettings.isRunOcr(),
reanalysisSettings.isRunIdp()));
return rejectedFiles; return rejectedFiles;
} }
@ -314,8 +316,8 @@ public class ReanalysisService {
private boolean isInList(FileModel file, ReanalysisSettings reAnalysisSettings) { private boolean isInList(FileModel file, ReanalysisSettings reAnalysisSettings) {
return (reAnalysisSettings.fileIds().isEmpty() || reAnalysisSettings.fileIds().contains(file.getId())) // return (reAnalysisSettings.getFileIds().isEmpty() || reAnalysisSettings.getFileIds().contains(file.getId())) //
&& (reAnalysisSettings.dossierIds().isEmpty() || reAnalysisSettings.dossierIds().contains(file.getDossierId())); && (reAnalysisSettings.getDossierIds().isEmpty() || reAnalysisSettings.getDossierIds().contains(file.getDossierId()));
} }
} }

View File

@ -27,8 +27,7 @@ public class LayoutParsingRequestFactory {
public LayoutParsingRequest build(String dossierTemplateId, String dossierId, String fileId, boolean priority) { public LayoutParsingRequest build(String dossierTemplateId, String dossierId, String fileId, boolean priority) {
LayoutParsingType layoutParsingType = dossierTemplatePersistenceService.getDossierTemplate( LayoutParsingType layoutParsingType = dossierTemplatePersistenceService.getDossierTemplate(dossierTemplateId).getLayoutParsingType();
dossierTemplateId).getLayoutParsingType();
Optional<String> optionalImageFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.IMAGE_INFO) // Optional<String> optionalImageFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.IMAGE_INFO) //
? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMAGE_INFO)) : Optional.empty(); ? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMAGE_INFO)) : Optional.empty();
@ -39,6 +38,9 @@ public class LayoutParsingRequestFactory {
Optional<String> optionalVisualLayoutParsingFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.VISUAL_LAYOUT) // Optional<String> optionalVisualLayoutParsingFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.VISUAL_LAYOUT) //
? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.VISUAL_LAYOUT)) : Optional.empty(); ? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.VISUAL_LAYOUT)) : Optional.empty();
Optional<String> optionalIdpResultFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.IDP_RESULT) //
? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.IDP_RESULT)) : Optional.empty();
return LayoutParsingRequest.builder() return LayoutParsingRequest.builder()
.layoutParsingType(layoutParsingType) .layoutParsingType(layoutParsingType)
.identifier(QueueMessageIdentifierService.buildIdentifier(dossierId, fileId, priority)) .identifier(QueueMessageIdentifierService.buildIdentifier(dossierId, fileId, priority))
@ -55,6 +57,7 @@ public class LayoutParsingRequestFactory {
.documentMarkdownFileStorageId(fileManagementServiceSettings.isStoreMarkdown() ? Optional.of(StorageIdUtils.getStorageId(dossierId, .documentMarkdownFileStorageId(fileManagementServiceSettings.isStoreMarkdown() ? Optional.of(StorageIdUtils.getStorageId(dossierId,
fileId, fileId,
FileType.MARKDOWN)) : Optional.empty()) FileType.MARKDOWN)) : Optional.empty())
.idpResultStorageId(optionalIdpResultFileId)
.build(); .build();
} }

View File

@ -21,7 +21,6 @@ import com.iqser.red.service.persistence.management.v1.processor.entity.projecti
import com.iqser.red.service.persistence.management.v1.processor.exception.BadRequestException; import com.iqser.red.service.persistence.management.v1.processor.exception.BadRequestException;
import com.iqser.red.service.persistence.management.v1.processor.exception.NotFoundException; import com.iqser.red.service.persistence.management.v1.processor.exception.NotFoundException;
import com.iqser.red.service.persistence.management.v1.processor.model.FileIdentifier; import com.iqser.red.service.persistence.management.v1.processor.model.FileIdentifier;
import com.iqser.red.service.persistence.management.v1.processor.model.OCRStatusUpdateResponse;
import com.iqser.red.service.persistence.management.v1.processor.model.websocket.AnalyseStatus; import com.iqser.red.service.persistence.management.v1.processor.model.websocket.AnalyseStatus;
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.repository.FileAttributesRepository; import com.iqser.red.service.persistence.management.v1.processor.service.persistence.repository.FileAttributesRepository;
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.repository.FileRepository; import com.iqser.red.service.persistence.management.v1.processor.service.persistence.repository.FileRepository;
@ -31,6 +30,8 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.component.C
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.ProcessingStatus; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.ProcessingStatus;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.WorkflowStatus; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.WorkflowStatus;
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
import com.knecon.fforesight.service.ocr.v1.api.model.OCRStatusUpdateResponse;
import jakarta.persistence.EntityManager; import jakarta.persistence.EntityManager;
import jakarta.transaction.Transactional; import jakarta.transaction.Transactional;
@ -602,9 +603,9 @@ public class FileStatusPersistenceService {
public int getNumberOfAssignedFiles(String userId) { public int getNumberOfAssignedFiles(String userId) {
List<FileEntity> files = fileRepository.findFilesByAssignee(userId); List<FileEntity> files = fileRepository.findFilesByAssignee(userId);
return files.stream() return Math.toIntExact(files.stream()
.filter(fileEntity -> fileEntity.getHardDeletedTime() == null) .filter(fileEntity -> fileEntity.getHardDeletedTime() == null)
.collect(Collectors.toList()).size(); .count());
} }
@ -680,6 +681,7 @@ public class FileStatusPersistenceService {
fileRepository.updateOCRStatus(response.getFileId(), fileRepository.updateOCRStatus(response.getFileId(),
response.getNumberOfPagesToOCR(), response.getNumberOfPagesToOCR(),
response.getNumberOfOCRedPages(), response.getNumberOfOCRedPages(),
response.getFeatures().contains(AzureOcrFeature.IDP) ? response.getNumberOfOCRedPages() : 0,
response.isOcrFinished() ? OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS) : null, response.isOcrFinished() ? OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS) : null,
OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS)); OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS));
} }
@ -765,4 +767,11 @@ public class FileStatusPersistenceService {
fileRepository.updateLastDownloadForFile(fileId, null); fileRepository.updateLastDownloadForFile(fileId, null);
} }
@Transactional
public void increaseTokenUsage(String fileId, int promptTokens, int completionTokens) {
fileRepository.increaseTokenUsage(fileId, promptTokens, completionTokens);
}
} }

View File

@ -389,11 +389,15 @@ public interface FileRepository extends JpaRepository<FileEntity, String> {
@Transactional @Transactional
@Modifying(clearAutomatically = true) @Modifying(clearAutomatically = true)
@Query(value = "update FileEntity f set f.numberOfOCRedPages = :numberOfOCRedPages, " @Query(value = "update FileEntity f set f.numberOfOCRedPages = :numberOfOCRedPages, "
+ "f.numberOfPagesToOCR = :numberOfPagesToOCR, f.ocrEndTime = :ocrEndTime, " + "f.numberOfPagesToOCR = :numberOfPagesToOCR, "
+ "f.lastUpdated = :lastUpdated where f.id = :fileId") + "f.numberOfIdpPages = :numberOfIdpPages, "
+ "f.ocrEndTime = :ocrEndTime, "
+ "f.lastUpdated = :lastUpdated "
+ "where f.id = :fileId")
void updateOCRStatus(@Param("fileId") String fileId, void updateOCRStatus(@Param("fileId") String fileId,
@Param("numberOfPagesToOCR") int numberOfPagesToOCR, @Param("numberOfPagesToOCR") int numberOfPagesToOCR,
@Param("numberOfOCRedPages") int numberOfOCRedPages, @Param("numberOfOCRedPages") int numberOfOCRedPages,
@Param("numberOfIdpPages") int numberOfIdpPages,
@Param("ocrEndTime") OffsetDateTime ocrEndTime, @Param("ocrEndTime") OffsetDateTime ocrEndTime,
@Param("lastUpdated") OffsetDateTime lastUpdated); @Param("lastUpdated") OffsetDateTime lastUpdated);
@ -409,7 +413,7 @@ public interface FileRepository extends JpaRepository<FileEntity, String> {
@Modifying(clearAutomatically = true) @Modifying(clearAutomatically = true)
@Query("update FileEntity f set f.ocrStartTime = NULL, f.ocrEndTime = NULL, f.numberOfPagesToOCR = NULL, f.numberOfOCRedPages = NULL where f.id = :fileId") @Query("update FileEntity f set f.ocrStartTime = NULL, f.ocrEndTime = NULL, f.numberOfPagesToOCR = NULL, f.numberOfOCRedPages = NULL, f.numberOfIdpPages = NULL where f.id = :fileId")
void resetOcrStartAndEndDate(@Param("fileId") String fileId); void resetOcrStartAndEndDate(@Param("fileId") String fileId);
@ -479,6 +483,11 @@ public interface FileRepository extends JpaRepository<FileEntity, String> {
@Query("SELECT f FROM FileEntity f WHERE f.id in :fileIds AND f.dossierId = :dossierId") @Query("SELECT f FROM FileEntity f WHERE f.id in :fileIds AND f.dossierId = :dossierId")
List<FileEntity> findAllDossierIdAndIds(@Param("dossierId") String dossierId, @Param("fileIds") Set<String> fileIds); List<FileEntity> findAllDossierIdAndIds(@Param("dossierId") String dossierId, @Param("fileIds") Set<String> fileIds);
@Modifying
@Query("UPDATE FileEntity f SET f.usedPromptTokens = f.usedPromptTokens + :promptTokens, f.usedCompletionTokens = f.usedCompletionTokens + :completionTokens WHERE f.id = :id")
void increaseTokenUsage(@Param("id") String fileId, @Param("promptTokens") int promptTokens, @Param("completionTokens") int completionTokens);
} }

View File

@ -63,7 +63,7 @@ public class NerMessageReceiver {
String dossierId = QueueMessageIdentifierService.parseDossierId(message.getIdentifier()); String dossierId = QueueMessageIdentifierService.parseDossierId(message.getIdentifier());
String fileId = QueueMessageIdentifierService.parseFileId(message.getIdentifier()); String fileId = QueueMessageIdentifierService.parseFileId(message.getIdentifier());
addFileIdToTrace(fileId); addFileIdToTrace(fileId);
fileStatusService.increaseTokenUsage(fileId, message.getPromptTokens(), message.getCompletionTokens());
log.info("Received message from {} for dossierId {} and fileId {}", LLM_ENTITY_RESPONSE_LISTENER_ID, dossierId, fileId); log.info("Received message from {} for dossierId {} and fileId {}", LLM_ENTITY_RESPONSE_LISTENER_ID, dossierId, fileId);
fileStatusPersistenceService.setAiCreationVersion(fileId, message.getAiCreationVersion()); fileStatusPersistenceService.setAiCreationVersion(fileId, message.getAiCreationVersion());
fileStatusService.setStatusForceAnalyse(dossierId, fileId, false); fileStatusService.setStatusForceAnalyse(dossierId, fileId, false);

View File

@ -11,14 +11,13 @@ import org.springframework.stereotype.Service;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration; import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
import com.iqser.red.service.persistence.management.v1.processor.model.OCRStatusUpdateResponse;
import com.iqser.red.service.persistence.management.v1.processor.model.websocket.AnalyseStatus; import com.iqser.red.service.persistence.management.v1.processor.model.websocket.AnalyseStatus;
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusProcessingUpdateService; import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusProcessingUpdateService;
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusService; import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusService;
import com.iqser.red.service.persistence.management.v1.processor.service.websocket.WebsocketService; import com.iqser.red.service.persistence.management.v1.processor.service.websocket.WebsocketService;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
import com.knecon.fforesight.service.ocr.v1.api.model.DocumentRequest; import com.knecon.fforesight.service.ocr.v1.api.model.DocumentRequest;
import com.knecon.fforesight.service.ocr.v1.api.model.OCRStatusUpdateResponse;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows; import lombok.SneakyThrows;
@ -43,6 +42,8 @@ public class OCRProcessingMessageReceiver {
@RabbitListener(id = OCR_STATUS_UPDATE_LISTENER_ID) @RabbitListener(id = OCR_STATUS_UPDATE_LISTENER_ID)
public void handleOCRStatusUpdateMessage(OCRStatusUpdateResponse response) { public void handleOCRStatusUpdateMessage(OCRStatusUpdateResponse response) {
log.debug("Received message {} in {}", response, MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_EXCHANGE);
var fileModel = fileStatusService.getStatus(response.getFileId()); var fileModel = fileStatusService.getStatus(response.getFileId());
if (response.isOcrStarted()) { if (response.isOcrStarted()) {
@ -57,7 +58,6 @@ public class OCRProcessingMessageReceiver {
response.getNumberOfOCRedPages()); response.getNumberOfOCRedPages());
} }
log.debug("Received message {} in {}", response, MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_EXCHANGE);
} }
@ -95,6 +95,7 @@ public class OCRProcessingMessageReceiver {
timestamp = timestamp != null ? timestamp : OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS); timestamp = timestamp != null ? timestamp : OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS);
fileStatusProcessingUpdateService.requeueOCROrMarkFailed(ocrRequestMessage.getDossierId(), fileStatusProcessingUpdateService.requeueOCROrMarkFailed(ocrRequestMessage.getDossierId(),
ocrRequestMessage.getFileId(), ocrRequestMessage.getFileId(),
ocrRequestMessage.getFeatures(),
new FileErrorInfo(errorMessage, MessagingConfiguration.OCR_DLQ, "ocr-service", timestamp)); new FileErrorInfo(errorMessage, MessagingConfiguration.OCR_DLQ, "ocr-service", timestamp));
} }

View File

@ -259,3 +259,7 @@ databaseChangeLog:
file: db/changelog/tenant/158-add-app-version-history-table-and-layout-parser-version-field-to-file.yaml file: db/changelog/tenant/158-add-app-version-history-table-and-layout-parser-version-field-to-file.yaml
- include: - include:
file: db/changelog/tenant/159-cleanup-truncated-indices.yaml file: db/changelog/tenant/159-cleanup-truncated-indices.yaml
- include:
file: db/changelog/tenant/160-add-usage-fields-to-file-for-idp-and-llm.yaml
- include:
file: db/changelog/tenant/161-add-idp-related-fields-to-dossier-template.yaml

View File

@ -0,0 +1,24 @@
databaseChangeLog:
- changeSet:
id: add-llm-usage-fields-to-file
author: kilian
changes:
- addColumn:
tableName: file
columns:
- column:
name: used_completion_tokens
type: int
defaultValueNumeric: "0"
constraints:
nullable: false
- column:
name: used_prompt_tokens
type: int
defaultValueNumeric: "0"
constraints:
nullable: false
- column:
name: number_of_idp_pages
type: int
defaultValueNumeric: "0"

View File

@ -0,0 +1,64 @@
databaseChangeLog:
- changeSet:
id: add-idp-related-fields-to-dossier-template
author: kilian
changes:
- addColumn:
tableName: dossier_template
columns:
- column:
name: idp_by_default
type: boolean
defaultValueBoolean: false
remarks: "Indicates if IDP is enabled by default"
- column:
name: rotation_correction_by_default
type: boolean
remarks: "Indicates if rotation correction is enabled by default"
- column:
name: font_style_detection
type: boolean
defaultValueBoolean: true
remarks: "Indicates if font style detection is enabled in OCR"
- column:
name: ocr_all_pages
type: boolean
defaultValueBoolean: false
remarks: "Indicates if all pages should be processed during OCR instead of only pages with images"
- update:
tableName: dossier_template
columns:
- column:
name: rotation_correction_by_default
valueBoolean: true
where: "layout_parsing_type = 'DOCUMINE_OLD'"
- update:
tableName: dossier_template
columns:
- column:
name: rotation_correction_by_default
valueBoolean: false
where: "layout_parsing_type != 'DOCUMINE_OLD'"
- changeSet:
id: make-fields-non-nullable
author: kilian
changes:
- addNotNullConstraint:
tableName: dossier_template
columnName: idp_by_default
columnDataType: boolean
- addNotNullConstraint:
tableName: dossier_template
columnName: rotation_correction_by_default
columnDataType: boolean
- addNotNullConstraint:
tableName: dossier_template
columnName: font_style_detection
columnDataType: boolean
- addNotNullConstraint:
tableName: dossier_template
columnName: ocr_all_pages
columnDataType: boolean

View File

@ -191,7 +191,7 @@ public class ComponentOverrideTest extends AbstractPersistenceServerServiceTest
@Test @Test
@SneakyThrows @SneakyThrows
public void testDeletedFileOverrides() throws IOException { public void testDeletedFileOverrides() {
var dossier = dossierTesterAndProvider.provideTestDossier(); var dossier = dossierTesterAndProvider.provideTestDossier();

View File

@ -1,6 +1,7 @@
package com.iqser.red.service.peristence.v1.server.integration.tests; package com.iqser.red.service.peristence.v1.server.integration.tests;
import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.assertTrue;
@ -149,7 +150,10 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
var dossierTemplate = dossierTemplateTesterAndProvider.provideTestTemplate(); var dossierTemplate = dossierTemplateTesterAndProvider.provideTestTemplate();
TypeResponse types = dictionaryClient.getAllTypes(dossierTemplate.getId(), null, true); TypeResponse types = dictionaryClient.getAllTypes(dossierTemplate.getId(), null, true);
List<TypeValue> systemManagedTypes = types.getTypes().stream().filter(TypeValue::isSystemManaged).collect(Collectors.toList()); List<TypeValue> systemManagedTypes = types.getTypes()
.stream()
.filter(TypeValue::isSystemManaged)
.collect(Collectors.toList());
assertThat(systemManagedTypes.size()).isEqualTo(8); assertThat(systemManagedTypes.size()).isEqualTo(8);
var allTemplates = dossierTemplateClient.getAllDossierTemplates(); var allTemplates = dossierTemplateClient.getAllDossierTemplates();
@ -284,17 +288,17 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
dictionaryClient.getDictionaryForType(type2.getType(), type2.getDossierTemplateId(), dossier.getId()); dictionaryClient.getDictionaryForType(type2.getType(), type2.getDossierTemplateId(), dossier.getId());
var allTypes = dictionaryClient.getAllTypes(dossierTemplate.getId(), dossier.getId(), false).getTypes(); var allTypes = dictionaryClient.getAllTypes(dossierTemplate.getId(), dossier.getId(), false).getTypes();
assertThat(allTypes assertThat(allTypes.stream()
.stream().filter(t -> !t.isSystemManaged()).collect(Collectors.toList()) .filter(t -> !t.isSystemManaged())
.size()).isEqualTo(4); .count()).isEqualTo(4);
var typesWithRankOfType1 = allTypes.stream() var typesWithRankOfType1 = allTypes.stream()
.filter(t -> t.getRank() == type.getRank()) .filter(t -> t.getRank() == type.getRank())
.collect(Collectors.toList()); .toList();
assertThat(typesWithRankOfType1.size()).isEqualTo(2); assertThat(typesWithRankOfType1.size()).isEqualTo(2);
var typesWithRankOfType2 = allTypes.stream() var typesWithRankOfType2 = allTypes.stream()
.filter(t -> t.getRank() == type2.getRank()) .filter(t -> t.getRank() == type2.getRank())
.collect(Collectors.toList()); .toList();
assertThat(typesWithRankOfType2.size()).isEqualTo(2); assertThat(typesWithRankOfType2.size()).isEqualTo(2);
dictionaryClient.addEntry(createdType1.getType(), createdType1.getDossierTemplateId(), List.of("entry1", "entry2"), false, null, DictionaryEntryType.ENTRY); dictionaryClient.addEntry(createdType1.getType(), createdType1.getDossierTemplateId(), List.of("entry1", "entry2"), false, null, DictionaryEntryType.ENTRY);
@ -596,13 +600,13 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
.build()); .build());
// add new justifications // add new justifications
legalBasisClient.setLegalBasisMapping(List.of(new LegalBasis("nameAgain", "description", "reason","technicalReason")), dossierTemplate.getId()); legalBasisClient.setLegalBasisMapping(List.of(new LegalBasis("nameAgain", "description", "reason", "technicalReason")), dossierTemplate.getId());
existingLegalBasis = legalBasisClient.getLegalBasisMapping(dossierTemplate.getId()); existingLegalBasis = legalBasisClient.getLegalBasisMapping(dossierTemplate.getId());
assertThat(existingLegalBasis.size()).isEqualTo(1); assertThat(existingLegalBasis.size()).isEqualTo(1);
// update dossier template metadata // update dossier template metadata
var cru = new DossierTemplateModel(); var cru = new DossierTemplateModel();
cru.setDossierTemplateId(dossierTemplate.getId()); cru.setId(dossierTemplate.getId());
BeanUtils.copyProperties(dossierTemplate, cru); BeanUtils.copyProperties(dossierTemplate, cru);
cru.setName("Template 1 Update"); cru.setName("Template 1 Update");
cru.setDescription("new description"); cru.setDescription("new description");
@ -944,7 +948,7 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
assertThat(result.getName()).isEqualTo(name); assertThat(result.getName()).isEqualTo(name);
assertThat(result.isOcrByDefault()).isTrue(); assertThat(result.isOcrByDefault()).isTrue();
DossierTemplateModel loadedTemplate = dossierTemplateClient.getDossierTemplate(result.getDossierTemplateId()); DossierTemplateModel loadedTemplate = dossierTemplateClient.getDossierTemplate(result.getId());
assertThat(loadedTemplate).isEqualTo(result); assertThat(loadedTemplate).isEqualTo(result);
dossierTemplateModel.setName("Test Dossier Template Update"); dossierTemplateModel.setName("Test Dossier Template Update");
@ -992,7 +996,7 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
// update // update
var cru = new DossierTemplateModel(); var cru = new DossierTemplateModel();
cru.setDossierTemplateId(dossierTemplate.getId()); cru.setId(dossierTemplate.getId());
BeanUtils.copyProperties(dossierTemplate, cru); BeanUtils.copyProperties(dossierTemplate, cru);
cru.setName("Template 1 Update"); cru.setName("Template 1 Update");
cru.setValidTo(OffsetDateTime.of(2020, 1, 1, 1, 1, 1, 1, ZoneOffset.UTC)); cru.setValidTo(OffsetDateTime.of(2020, 1, 1, 1, 1, 1, 1, ZoneOffset.UTC));
@ -1002,4 +1006,40 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
assertTrue(exception.getMessage().contains("Invalid dates! validFrom can't be after validTo.")); assertTrue(exception.getMessage().contains("Invalid dates! validFrom can't be after validTo."));
} }
@Test
public void testUpdateDossierTemplateWithOCRSettings() {
var dossierTemplate = dossierTemplateTesterAndProvider.provideTestTemplate();
var allTemplates = dossierTemplateClient.getAllDossierTemplates();
assertThat(allTemplates.size()).isEqualTo(1);
assertThat(allTemplates.get(0)).isEqualTo(dossierTemplate);
// update
var cru = new DossierTemplateModel();
cru.setId(dossierTemplate.getId());
BeanUtils.copyProperties(dossierTemplate, cru);
String updatedName = "Template 1 Update";
cru.setName(updatedName);
cru.setIdpByDefault(true);
cru.setRotationCorrectionByDefault(true);
cru.setOcrAllPages(true);
cru.setFontStyleDetection(true);
var updatedDT = dossierTemplateClient.createOrUpdateDossierTemplate(cru);
assertEquals(updatedName, updatedDT.getName());
assertTrue(updatedDT.isIdpByDefault());
assertTrue(updatedDT.isRotationCorrectionByDefault());
assertTrue(updatedDT.isFontStyleDetection());
assertTrue(updatedDT.isOcrAllPages());
var loadedDT = dossierTemplateClient.getDossierTemplate(updatedDT.getId());
assertEquals(updatedName, loadedDT.getName());
assertTrue(loadedDT.isIdpByDefault());
assertTrue(loadedDT.isRotationCorrectionByDefault());
assertTrue(loadedDT.isFontStyleDetection());
assertTrue(loadedDT.isOcrAllPages());
}
} }

View File

@ -46,22 +46,22 @@ public class ReanalysisTest extends AbstractPersistenceServerServiceTest {
var loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId()); var loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
assertThat(loadedFile.getProcessingStatus()).isNotEqualTo(ProcessingStatus.PROCESSED); assertThat(loadedFile.getProcessingStatus()).isNotEqualTo(ProcessingStatus.PROCESSED);
reanalysisClient.ocrDossier(dossier.getId()); reanalysisClient.ocrDossier(dossier.getId(), false);
loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId()); loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
assertThat(loadedFile.getProcessingStatus()).isNotEqualTo(ProcessingStatus.PROCESSED); assertThat(loadedFile.getProcessingStatus()).isNotEqualTo(ProcessingStatus.PROCESSED);
resetProcessingStatus(file); resetProcessingStatus(file);
reanalysisClient.ocrDossier(dossier.getId()); reanalysisClient.ocrDossier(dossier.getId(), false);
loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId()); loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
assertThat(loadedFile.getProcessingStatus()).isEqualTo(ProcessingStatus.OCR_PROCESSING_QUEUED); assertThat(loadedFile.getProcessingStatus()).isEqualTo(ProcessingStatus.OCR_PROCESSING_QUEUED);
resetProcessingStatus(file); resetProcessingStatus(file);
reanalysisClient.ocrFile(dossier.getId(), file.getId(), true); reanalysisClient.ocrFile(dossier.getId(), file.getId(), true, false);
loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId()); loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
assertThat(loadedFile.getProcessingStatus()).isEqualTo(ProcessingStatus.OCR_PROCESSING_QUEUED); assertThat(loadedFile.getProcessingStatus()).isEqualTo(ProcessingStatus.OCR_PROCESSING_QUEUED);
resetProcessingStatus(file); resetProcessingStatus(file);
reanalysisClient.ocrFiles(dossier.getId(), Set.of(file.getId())); reanalysisClient.ocrFiles(dossier.getId(), Set.of(file.getId()), false);
loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId()); loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
assertThat(loadedFile.getProcessingStatus()).isEqualTo(ProcessingStatus.OCR_PROCESSING_QUEUED); assertThat(loadedFile.getProcessingStatus()).isEqualTo(ProcessingStatus.OCR_PROCESSING_QUEUED);
resetProcessingStatus(file); resetProcessingStatus(file);

View File

@ -201,6 +201,7 @@ public class SupportControllerTest extends AbstractPersistenceServerServiceTest
Collections.emptySet(), Collections.emptySet(),
true, true,
false, false,
false,
new FileStatusFilter(null, null, true, true))); new FileStatusFilter(null, null, true, true)));
loadedFile2 = fileClient.getFileStatus(dossier2.getId(), file2.getId()); loadedFile2 = fileClient.getFileStatus(dossier2.getId(), file2.getId());
assertThat(loadedFile2.getProcessingStatus()).isEqualTo(ProcessingStatus.FULL_PROCESSING); assertThat(loadedFile2.getProcessingStatus()).isEqualTo(ProcessingStatus.FULL_PROCESSING);
@ -247,6 +248,7 @@ public class SupportControllerTest extends AbstractPersistenceServerServiceTest
Collections.emptySet(), Collections.emptySet(),
true, true,
false, false,
false,
null)); null));
loadedFile2 = fileClient.getFileStatus(dossier2.getId(), file2.getId()); loadedFile2 = fileClient.getFileStatus(dossier2.getId(), file2.getId());
assertThat(loadedFile2.getProcessingStatus()).isEqualTo(ProcessingStatus.FULL_PROCESSING); assertThat(loadedFile2.getProcessingStatus()).isEqualTo(ProcessingStatus.FULL_PROCESSING);

View File

@ -10,7 +10,7 @@ dependencies {
api("com.knecon.fforesight:document:${rootProject.extra.get("documentVersion")}"){ api("com.knecon.fforesight:document:${rootProject.extra.get("documentVersion")}"){
exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1") exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1")
} }
api("com.knecon.fforesight:layoutparser-service-internal-api:0.194.0-RED9998.1") { api("com.knecon.fforesight:layoutparser-service-internal-api:0.196.0-RED8670.0") {
exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1") exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1")
exclude(group = "com.iqser.red.service", module = "persistence-service-shared-api-v1") exclude(group = "com.iqser.red.service", module = "persistence-service-shared-api-v1")
} }

View File

@ -68,6 +68,18 @@ public class DossierTemplateModel {
@Schema(description = "Flag that specifies if OCR is automatically performed on upload for all dossiers of this template") @Schema(description = "Flag that specifies if OCR is automatically performed on upload for all dossiers of this template")
private boolean ocrByDefault; private boolean ocrByDefault;
@Schema(description = "Flag that specifies if rotation correction is attempted during OCR for all dossiers of this template")
private boolean rotationCorrectionByDefault;
@Schema(description = "Flag that specifies if IDP is performed instead of OCR for all dossiers of this template")
private boolean idpByDefault;
@Schema(description = "Flag that specifies if font style detection is performed during OCR")
private boolean fontStyleDetection;
@Schema(description = "Flag that specifies if OCR should be performed on all pages instead of only pages with images")
private boolean ocrAllPages;
@Schema(description = "Flag that specifies the watermark removal in documents will be performed before the OCR processing") @Schema(description = "Flag that specifies the watermark removal in documents will be performed before the OCR processing")
private boolean removeWatermark; private boolean removeWatermark;

View File

@ -88,6 +88,12 @@ public class FileStatus {
private int numberOfPagesToOCR; private int numberOfPagesToOCR;
@Schema(description = "Number of pages already OCRed by us") @Schema(description = "Number of pages already OCRed by us")
private int numberOfOCRedPages; private int numberOfOCRedPages;
@Schema(description = "Number of pages already IDPed by us")
private int numberOfIdpPages;
@Schema(description = "Number of prompt tokens used by this file")
private int usedPromptTokens;
@Schema(description = "Number of completion tokens used by this file")
private int usedCompletionTokens;
@Schema(description = "Shows if this file has been OCRed by us. End time of OCR Process") @Schema(description = "Shows if this file has been OCRed by us. End time of OCR Process")
private OffsetDateTime ocrEndTime; private OffsetDateTime ocrEndTime;
@Schema(description = "Shows if this file has comments on annotations.") @Schema(description = "Shows if this file has comments on annotations.")

View File

@ -15,7 +15,7 @@ import lombok.NoArgsConstructor;
@Data @Data
@NoArgsConstructor @NoArgsConstructor
@AllArgsConstructor @AllArgsConstructor
public class FileStatusFilter { public class FileStatusFilter implements Predicate<FileModel> {
private List<ProcessingStatus> processingStatusList = new ArrayList<>(); private List<ProcessingStatus> processingStatusList = new ArrayList<>();
private List<WorkflowStatus> workflowStatusList = new ArrayList<>(); private List<WorkflowStatus> workflowStatusList = new ArrayList<>();
@ -32,7 +32,8 @@ public class FileStatusFilter {
} }
public Predicate<FileModel> asPredicate() { @Override
public boolean test(FileModel fileModel) {
if (this.getProcessingStatusList() == null) { if (this.getProcessingStatusList() == null) {
this.setProcessingStatusList(new ArrayList<>()); this.setProcessingStatusList(new ArrayList<>());
@ -42,10 +43,12 @@ public class FileStatusFilter {
this.setWorkflowStatusList(new ArrayList<>()); this.setWorkflowStatusList(new ArrayList<>());
} }
return fileStatus -> (this.getProcessingStatusList().isEmpty() || this.getProcessingStatusList().contains(fileStatus.getProcessingStatus())) return (this.getProcessingStatusList().isEmpty() || this.getProcessingStatusList().contains(fileModel.getProcessingStatus()))
&& (this.getWorkflowStatusList().isEmpty() || this.getWorkflowStatusList().contains(fileStatus.getWorkflowStatus())) && (this.getWorkflowStatusList().isEmpty()
&& (this.isIncludeSoftDeletedFiles() || fileStatus.getDeleted() == null) || this.getWorkflowStatusList()
&& (this.isIncludeHardDeletedFiles() || fileStatus.getHardDeletedTime() == null); .contains(fileModel.getWorkflowStatus()))
&& (this.isIncludeSoftDeletedFiles() || fileModel.getDeleted() == null)
&& (this.isIncludeHardDeletedFiles() || fileModel.getHardDeletedTime() == null);
} }
} }

View File

@ -4,16 +4,33 @@ import java.util.Optional;
import java.util.Set; import java.util.Set;
import io.swagger.v3.oas.annotations.media.Schema; import io.swagger.v3.oas.annotations.media.Schema;
import lombok.AccessLevel;
import lombok.Builder;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.experimental.FieldDefaults;
public record ReanalysisSettings( @Getter
@Schema(description = "Provide a list of dossierIds to filter for. If the list is empty, every dossier is selected for reanalysis.", defaultValue = "[]") Set<String> dossierIds, @Builder
@Schema(description = "Provide a list of fileIds to filter for. If the list is empty, every file is selected for reanalysis.", defaultValue = "[]") Set<String> fileIds, @RequiredArgsConstructor
@Schema(description = "If set to true, layout parsing and named entity recognition will be repeated.", defaultValue = "false") boolean repeatStructureAnalysis, @FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
@Schema(description = "If set to true, ocr will be repeated and therefore also layout parsing and named entity recognition.", defaultValue = "false") boolean runOcr, public final class ReanalysisSettings {
@Schema(description = "Use this to create a filter for files to reanalyse. Matches anything if set to null.", defaultValue = "{}") FileStatusFilter fileStatusFilter
) {
public FileStatusFilter fileStatusFilter() { @Schema(description = "Provide a list of dossierIds to filter for. If the list is empty, every dossier is selected for reanalysis.", defaultValue = "[]")
Set<String> dossierIds;
@Schema(description = "Provide a list of fileIds to filter for. If the list is empty, every file is selected for reanalysis.", defaultValue = "[]")
Set<String> fileIds;
@Schema(description = "If set to true, layout parsing and named entity recognition will be repeated.", defaultValue = "false")
boolean repeatStructureAnalysis;
@Schema(description = "If set to true, OCR will be repeated.", defaultValue = "false")
boolean runOcr;
@Schema(description = "If set to true, OCR with IDP will be repeated.", defaultValue = "false")
boolean runIdp;
@Schema(description = "Use this to create a filter for files to reanalyse. Matches anything if set to null.", defaultValue = "null")
FileStatusFilter fileStatusFilter;
public FileStatusFilter getFileStatusFilter() {
return Optional.ofNullable(fileStatusFilter) return Optional.ofNullable(fileStatusFilter)
.orElse(new FileStatusFilter()); .orElse(new FileStatusFilter());

View File

@ -41,8 +41,16 @@ public class CreateOrUpdateDossierTemplateRequest {
private boolean ocrByDefault; private boolean ocrByDefault;
private boolean idpByDefault;
private boolean rotationCorrectionByDefault;
private boolean fontStyleDetection;
private boolean removeWatermark; private boolean removeWatermark;
private boolean ocrAllPages;
private LayoutParsingType layoutParsingType; private LayoutParsingType layoutParsingType;
} }

View File

@ -36,7 +36,11 @@ public class DossierTemplate {
private boolean keepOverlappingObjects; private boolean keepOverlappingObjects;
private boolean applyDictionaryUpdatesToAllDossiersByDefault; private boolean applyDictionaryUpdatesToAllDossiersByDefault;
private boolean ocrByDefault; private boolean ocrByDefault;
private boolean rotationCorrectionByDefault;
private boolean idpByDefault;
private boolean removeWatermark; private boolean removeWatermark;
private boolean fontStyleDetection;
private boolean ocrAllPages;
private LayoutParsingType layoutParsingType; private LayoutParsingType layoutParsingType;
} }

View File

@ -8,6 +8,7 @@ import java.util.Set;
import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonProperty;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.AllArgsConstructor; import lombok.AllArgsConstructor;
import lombok.Builder; import lombok.Builder;
import lombok.Data; import lombok.Data;
@ -60,6 +61,9 @@ public class FileModel {
private OffsetDateTime ocrStartTime; private OffsetDateTime ocrStartTime;
private Integer numberOfPagesToOCR; private Integer numberOfPagesToOCR;
private Integer numberOfOCRedPages; private Integer numberOfOCRedPages;
private Integer numberOfIdpPages;
private int usedPromptTokens;
private int usedCompletionTokens;
private OffsetDateTime ocrEndTime; private OffsetDateTime ocrEndTime;
private boolean hasAnnotationComments; private boolean hasAnnotationComments;
private boolean excluded; private boolean excluded;

View File

@ -21,6 +21,7 @@ public enum FileType {
TABLES(".json"), TABLES(".json"),
VISUAL_LAYOUT(".json"), VISUAL_LAYOUT(".json"),
IDP_RESULT(".json"),
COMPONENTS(".json"), COMPONENTS(".json"),
// document is split into 4 files, all should be overridden/deleted at the same time // document is split into 4 files, all should be overridden/deleted at the same time
DOCUMENT_TEXT_OLD(".json"), DOCUMENT_TEXT_OLD(".json"),