Compare commits

...

9 Commits

Author SHA1 Message Date
Kilian Schuettler
170f340ddb RED-8670: make ocr settings configurable 2025-01-13 13:32:32 +01:00
Kilian Schuettler
d871bf0d80 RED-8670: add idp fields and llm tracking 2024-12-19 11:14:36 +01:00
Kilian Schuettler
da9924d1e6 RED-8670: add idp fields and llm tracking 2024-12-18 12:55:50 +01:00
Kilian Schuettler
b5c469c30c RED-8670: add idp fields and llm tracking 2024-12-17 17:50:34 +01:00
Kilian Schuettler
a20a97cd13 RED-8670: add idp fields and llm tracking 2024-12-17 17:49:26 +01:00
Kilian Schuettler
5ac7e66196 RED-8670: add idp fields and llm tracking 2024-12-17 17:22:58 +01:00
Kilian Schuettler
116f088d28 RED-8670: add idp fields and llm tracking 2024-12-17 16:54:45 +01:00
Kilian Schuettler
f5b16ee111 RED-8670: add idp fields and llm tracking 2024-12-17 16:53:04 +01:00
Kilian Schuettler
d5fc737350 RED-8670: add idp fields and llm tracking 2024-12-17 13:01:58 +01:00
32 changed files with 377 additions and 112 deletions

View File

@ -24,13 +24,13 @@ import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import com.iqser.red.service.persistence.management.v1.processor.acl.custom.dossier.DossierACLService;
import com.iqser.red.service.persistence.management.v1.processor.dataexchange.models.ExportDownloadRequest;
import com.iqser.red.service.persistence.management.v1.processor.exception.BadRequestException;
import com.iqser.red.service.persistence.management.v1.processor.exception.ConflictException;
import com.iqser.red.service.persistence.management.v1.processor.service.DossierManagementService;
import com.iqser.red.service.persistence.management.v1.processor.service.DossierTemplateManagementService;
import com.iqser.red.service.persistence.management.v1.processor.service.DossierTemplateStatsService;
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.AuditPersistenceService;
import com.iqser.red.service.persistence.management.v1.processor.service.users.UserService;
import com.iqser.red.service.persistence.service.v1.api.external.resource.DossierTemplateResource;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AuditCategory;
import com.iqser.red.service.persistence.service.v1.api.shared.model.DossierTemplateModel;
@ -42,7 +42,6 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemp
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.DossierTemplateStats;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.DossierTemplateStatus;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.Dossier;
import com.iqser.red.service.persistence.management.v1.processor.dataexchange.models.ExportDownloadRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.importexport.ImportDossierTemplateRequest;
import com.knecon.fforesight.keycloakcommons.security.KeycloakSecurity;
@ -61,7 +60,6 @@ public class DossierTemplateController implements DossierTemplateResource {
private final AuditPersistenceService auditPersistenceService;
private final DossierManagementService dossierManagementService;
private final DossierACLService dossierACLService;
private final UserService userService;
@Override
@ -314,6 +312,10 @@ public class DossierTemplateController implements DossierTemplateResource {
.applyDictionaryUpdatesToAllDossiersByDefault(dossierTemplate.isApplyDictionaryUpdatesToAllDossiersByDefault())
.ocrByDefault(dossierTemplate.isOcrByDefault())
.removeWatermark(dossierTemplate.isRemoveWatermark())
.idpByDefault(dossierTemplate.isIdpByDefault())
.rotationCorrectionByDefault(dossierTemplate.isRotationCorrectionByDefault())
.fontStyleDetection(dossierTemplate.isFontStyleDetection())
.ocrAllPages(dossierTemplate.isOcrAllPages())
.build();
}

View File

@ -36,6 +36,7 @@ import lombok.RequiredArgsConstructor;
public class ReanalysisController implements ReanalysisResource {
private static final String DOSSIER_ID = "dossierId";
private final ReanalysisService reanalysisService;
private final FileStatusManagementService fileStatusManagementService;
private final AuditPersistenceService auditPersistenceService;
@ -98,11 +99,11 @@ public class ReanalysisController implements ReanalysisResource {
@Override
@PreAuthorize("hasAuthority('" + REANALYZE_DOSSIER + "')")
public void ocrDossier(@PathVariable(DOSSIER_ID) String dossierId) {
public void ocrDossier(@PathVariable(DOSSIER_ID) String dossierId, @RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp) {
accessControlService.checkDossierExistenceAndAccessPermissionsToDossier(dossierId);
reanalysisService.ocrDossier(dossierId);
reanalysisService.ocrDossier(dossierId, idp);
auditPersistenceService.audit(AuditRequest.builder()
.userId(KeycloakSecurity.getUserId())
@ -118,11 +119,12 @@ public class ReanalysisController implements ReanalysisResource {
@PreAuthorize("hasAuthority('" + REANALYZE_FILE + "')")
public void ocrFile(@PathVariable(DOSSIER_ID) String dossierId,
@PathVariable(FILE_ID) String fileId,
@RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force) {
@RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force,
@RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp) {
accessControlService.checkDossierExistenceAndAccessPermissionsToDossier(dossierId);
validateOCR(dossierId, fileId);
reanalysisService.ocrFile(dossierId, fileId, force);
reanalysisService.ocrFile(dossierId, fileId, force, idp);
auditPersistenceService.audit(AuditRequest.builder()
.userId(KeycloakSecurity.getUserId())
.objectId(dossierId)
@ -136,11 +138,13 @@ public class ReanalysisController implements ReanalysisResource {
@Override
@PreAuthorize("hasAuthority('" + REANALYZE_FILE + "')")
public void ocrFiles(@PathVariable(DOSSIER_ID) String dossierId, @RequestBody Set<String> fileIds) {
public void ocrFiles(@PathVariable(DOSSIER_ID) String dossierId,
@RequestBody Set<String> fileIds,
@RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp) {
accessControlService.checkDossierExistenceAndAccessPermissionsToDossier(dossierId);
fileIds.forEach(fileId -> validateOCR(dossierId, fileId));
reanalysisService.ocrFiles(dossierId, fileIds);
reanalysisService.ocrFiles(dossierId, fileIds, idp);
auditPersistenceService.audit(AuditRequest.builder()
.userId(KeycloakSecurity.getUserId())
.objectId(dossierId)

View File

@ -38,6 +38,7 @@ public interface ReanalysisResource {
String EXCLUDED_STATUS_PARAM = "excluded";
String FORCE_PARAM = "force";
String IDP_PARAM = "idp";
@PostMapping(value = REANALYSIS_REST_PATH + DOSSIER_ID_PATH_VARIABLE)
@ -65,7 +66,7 @@ public interface ReanalysisResource {
@Operation(summary = "Ocr and reanalyze a dossier", description = "None")
@PostMapping(value = OCR_REANALYSIS_REST_PATH + DOSSIER_ID_PATH_VARIABLE)
@ApiResponses(value = {@ApiResponse(responseCode = "204", description = "OK"), @ApiResponse(responseCode = "404", description = "Not found"), @ApiResponse(responseCode = "403", description = "Forbidden")})
void ocrDossier(@PathVariable(DOSSIER_ID) String dossierId);
void ocrDossier(@PathVariable(DOSSIER_ID) String dossierId, @RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp);
@Operation(summary = "Ocr and reanalyze a file", description = "None")
@ -73,13 +74,16 @@ public interface ReanalysisResource {
@ApiResponses(value = {@ApiResponse(responseCode = "204", description = "OK"), @ApiResponse(responseCode = "409", description = "Conflict"), @ApiResponse(responseCode = "404", description = "Not found"), @ApiResponse(responseCode = "403", description = "Forbidden"), @ApiResponse(responseCode = "400", description = "Cannot OCR approved file")})
void ocrFile(@PathVariable(DOSSIER_ID) String dossierId,
@PathVariable(FILE_ID) String fileId,
@RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force);
@RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force,
@RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp);
@Operation(summary = "Ocr and reanalyze multiple files for a dossier", description = "None")
@PostMapping(value = OCR_REANALYSIS_REST_PATH + DOSSIER_ID_PATH_VARIABLE + BULK_REST_PATH)
@ApiResponses(value = {@ApiResponse(responseCode = "204", description = "OK"), @ApiResponse(responseCode = "404", description = "Not found"), @ApiResponse(responseCode = "403", description = "Forbidden")})
void ocrFiles(@PathVariable(DOSSIER_ID) String dossierId, @RequestBody Set<String> fileIds);
void ocrFiles(@PathVariable(DOSSIER_ID) String dossierId,
@RequestBody Set<String> fileIds,
@RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp);
@Operation(summary = "Exclude or re-include a file to the automatic analysis", description = "None")

View File

@ -35,8 +35,8 @@ dependencies {
exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1")
exclude(group = "com.iqser.red.service", module = "persistence-service-shared-api-v1")
}
api("com.knecon.fforesight:azure-ocr-service-api:0.13.0")
implementation("com.knecon.fforesight:llm-service-api:1.20.0-RED10072.2")
api("com.knecon.fforesight:azure-ocr-service-api:0.24.0")
implementation("com.knecon.fforesight:llm-service-api:1.35.0")
api("com.knecon.fforesight:jobs-commons:0.13.0")
api("com.iqser.red.commons:storage-commons:2.50.0")
api("com.knecon.fforesight:tenant-commons:0.31.0-RED10196.0") {

View File

@ -83,6 +83,18 @@ public class DossierTemplateEntity {
@Column(name = "ocr_by_default")
private boolean ocrByDefault;
@Column(name = "rotation_correction_by_default")
private boolean rotationCorrectionByDefault;
@Column(name = "idp_by_default")
private boolean idpByDefault;
@Column(name = "font_style_detection")
private boolean fontStyleDetection;
@Column(name = "ocr_all_pages")
private boolean ocrAllPages;
@Column(name = "remove_watermark")
private boolean removeWatermark;
@ -128,6 +140,7 @@ public class DossierTemplateEntity {
@Enumerated(EnumType.STRING)
private LayoutParsingType layoutParsingType;
public static DossierTemplateEntity copyDossierTemplateEntityWithoutChildEntities(DossierTemplateEntity dossierTemplateEntity) {
DossierTemplateEntity dossierTemplateCopy = new DossierTemplateEntity();
@ -148,6 +161,10 @@ public class DossierTemplateEntity {
dossierTemplateCopy.removeWatermark = dossierTemplateEntity.removeWatermark;
dossierTemplateCopy.downloadFileTypes = dossierTemplateEntity.downloadFileTypes;
dossierTemplateCopy.layoutParsingType = dossierTemplateEntity.layoutParsingType;
dossierTemplateCopy.rotationCorrectionByDefault = dossierTemplateEntity.rotationCorrectionByDefault;
dossierTemplateCopy.idpByDefault = dossierTemplateEntity.idpByDefault;
dossierTemplateCopy.fontStyleDetection = dossierTemplateEntity.fontStyleDetection;
dossierTemplateCopy.ocrAllPages = dossierTemplateEntity.ocrAllPages;
return dossierTemplateCopy;
}

View File

@ -152,9 +152,16 @@ public class FileEntity {
@Column(name = "number_of_ocred_pages")
private Integer numberOfOCRedPages;
@Column(name = "number_of_idp_pages")
private Integer numberOfIdpPages;
@Column(name = "ocr_end_time")
private OffsetDateTime ocrEndTime;
private int usedPromptTokens;
private int usedCompletionTokens;
@Column
private boolean hasAnnotationComments;

View File

@ -1,20 +0,0 @@
package com.iqser.red.service.persistence.management.v1.processor.model;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@NoArgsConstructor
@AllArgsConstructor
@Builder
public class OCRStatusUpdateResponse {
private String fileId;
private int numberOfPagesToOCR;
private int numberOfOCRedPages;
private boolean ocrFinished;
private boolean ocrStarted;
}

View File

@ -1,5 +1,7 @@
package com.iqser.red.service.persistence.management.v1.processor.service;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import org.springframework.retry.support.RetryTemplate;
@ -15,6 +17,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.UntouchedDo
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.ProcessingStatus;
import com.iqser.red.service.search.v1.model.IndexMessageType;
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
import jakarta.transaction.Transactional;
import lombok.RequiredArgsConstructor;
@ -114,7 +117,7 @@ public class FileStatusProcessingUpdateService {
}
public void requeueOCROrMarkFailed(String dossierId, String fileId, FileErrorInfo fileErrorInfo) {
public void requeueOCROrMarkFailed(String dossierId, String fileId, Set<AzureOcrFeature> features, FileErrorInfo fileErrorInfo) {
var fileEntity = fileStatusPersistenceService.getStatus(fileId);
if (fileEntity.getProcessingErrorCounter() > settings.getMaxErrorRetries()) {
@ -122,7 +125,7 @@ public class FileStatusProcessingUpdateService {
} else {
fileStatusService.setStatusOcrProcessing(fileId,
fileEntity.getProcessingStatus().equals(ProcessingStatus.OCR_PROCESSING) ? fileEntity.getProcessingErrorCounter() + 1 : 0);
fileStatusService.addToOcrQueue(dossierId, fileId, 2);
fileStatusService.addToOcrQueue(dossierId, fileId, 2, features);
}
}

View File

@ -12,7 +12,6 @@ import java.util.function.BiFunction;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.stereotype.Service;
@ -21,6 +20,7 @@ import com.iqser.red.service.pdftron.redaction.v1.api.model.ProcessUntouchedDocu
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
import com.iqser.red.service.persistence.management.v1.processor.entity.configuration.TypeEntity;
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.ComponentDefinitionEntity;
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.DossierTemplateEntity;
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.FileAttributeEntity;
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.FileEntity;
import com.iqser.red.service.persistence.management.v1.processor.entity.projection.DossierStatsFileProjection;
@ -31,7 +31,6 @@ import com.iqser.red.service.persistence.management.v1.processor.model.AnalysisT
import com.iqser.red.service.persistence.management.v1.processor.model.CvAnalysisServiceRequest;
import com.iqser.red.service.persistence.management.v1.processor.model.FileIdentifier;
import com.iqser.red.service.persistence.management.v1.processor.model.ManualChangesQueryOptions;
import com.iqser.red.service.persistence.management.v1.processor.model.OCRStatusUpdateResponse;
import com.iqser.red.service.persistence.management.v1.processor.model.VisualLayoutParsingServiceRequest;
import com.iqser.red.service.persistence.management.v1.processor.model.image.ImageServiceRequest;
import com.iqser.red.service.persistence.management.v1.processor.model.websocket.AnalyseStatus;
@ -80,6 +79,7 @@ import com.knecon.fforesight.llm.service.LlmNerMessage;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingQueueNames;
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
import com.knecon.fforesight.service.ocr.v1.api.model.DocumentRequest;
import com.knecon.fforesight.service.ocr.v1.api.model.OCRStatusUpdateResponse;
import com.knecon.fforesight.tenantcommons.TenantContext;
import jakarta.transaction.Transactional;
@ -352,7 +352,6 @@ public class FileStatusService {
return;
}
boolean forceAnalysis = false;
if (settings.isLlmNerServiceEnabled()) {
boolean objectExists = fileManagementStorageService.objectExists(dossierId, fileId, FileType.LLM_NER_ENTITIES);
@ -386,7 +385,7 @@ public class FileStatusService {
boolean reanalyse = fileModel.isReanalysisRequired() || analysisType.equals(AnalysisType.MANUAL_REDACTION_REANALYZE);
MessageType messageType = calculateMessageType(reanalyse, fileModel.getProcessingStatus(), fileModel);
if(analysisType == AnalysisType.FORCE_ANALYSE || forceAnalysis) {
if (analysisType == AnalysisType.FORCE_ANALYSE || forceAnalysis) {
messageType = MessageType.ANALYSE;
}
@ -569,6 +568,12 @@ public class FileStatusService {
public void setStatusOcrQueued(String dossierId, String fileId) {
setStatusOcrQueued(dossierId, fileId, false);
}
public void setStatusOcrQueued(String dossierId, String fileId, boolean idp) {
FileEntity fileStatus = fileStatusPersistenceService.getStatus(fileId);
if (fileStatus.isExcluded()) {
@ -579,7 +584,7 @@ public class FileStatusService {
updateOCRStartTime(fileId);
fileStatusPersistenceService.updateProcessingStatus(fileId, ProcessingStatus.OCR_PROCESSING_QUEUED);
websocketService.sendAnalysisEvent(dossierId, fileId, AnalyseStatus.OCR_PROCESSING, fileStatus.getNumberOfAnalyses() + 1);
addToOcrQueue(dossierId, fileId, 2);
addToOcrQueue(dossierId, fileId, 2, idp);
}
@ -760,22 +765,39 @@ public class FileStatusService {
}
public void addToOcrQueue(String dossierId, String fileId, int priority) {
public void addToOcrQueue(String dossierId, String fileId, int priority, boolean useIdp) {
var removeWatermark = dossierTemplatePersistenceService.getDossierTemplate(dossierPersistenceService.getDossierTemplateId(dossierId)).isRemoveWatermark();
DossierTemplateEntity dt = dossierTemplatePersistenceService.getDossierTemplate(dossierPersistenceService.getDossierTemplateId(dossierId));
Set<AzureOcrFeature> features = new HashSet<>();
if (removeWatermark) {
features.add(AzureOcrFeature.REMOVE_WATERMARKS);
}
if (currentApplicationTypeProvider.isDocuMine()) {
features.add(AzureOcrFeature.ROTATION_CORRECTION);
if (dt.isFontStyleDetection()) {
features.add(AzureOcrFeature.FONT_STYLE_DETECTION);
}
if (dt.isRemoveWatermark()) {
features.add(AzureOcrFeature.REMOVE_WATERMARKS);
}
if (useIdp || dt.isIdpByDefault()) {
features.add(AzureOcrFeature.IDP);
}
if (dt.isRotationCorrectionByDefault()) {
features.add(AzureOcrFeature.ROTATION_CORRECTION);
}
addToOcrQueue(dossierId, fileId, priority, features);
}
public void addToOcrQueue(String dossierId, String fileId, int priority, Set<AzureOcrFeature> features) {
var request = DocumentRequest.builder()
// needed for legacy OCR-services
.dossierId(dossierId)
.fileId(fileId)
.removeWatermark(features.contains(AzureOcrFeature.REMOVE_WATERMARKS)) // needed for legacy OCR-services
.removeWatermark(features.contains(AzureOcrFeature.REMOVE_WATERMARKS))
// new api
.originDocumentId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.ORIGIN))
.viewerDocId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.VIEWER_DOCUMENT))
.idpResultId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.IDP_RESULT))
.features(features)
.build();
@ -820,7 +842,7 @@ public class FileStatusService {
fileStatusPersistenceService.updateWorkflowStatus(fileId, newWorkflowStatus, false);
if(oldWorkflowStatus == WorkflowStatus.APPROVED && newWorkflowStatus != WorkflowStatus.APPROVED) {
if (oldWorkflowStatus == WorkflowStatus.APPROVED && newWorkflowStatus != WorkflowStatus.APPROVED) {
fileStatusPersistenceService.clearLastDownload(fileId);
}
}
@ -963,6 +985,13 @@ public class FileStatusService {
@Transactional
public void setStatusFullReprocess(String dossierId, String fileId, boolean priority, boolean requiresStructureAnalysis, boolean runOcr) {
setStatusFullReprocess(dossierId, fileId, priority, requiresStructureAnalysis, runOcr, false);
}
@Transactional
public void setStatusFullReprocess(String dossierId, String fileId, boolean priority, boolean requiresStructureAnalysis, boolean runOcr, boolean idp) {
FileEntity fileStatus = fileStatusPersistenceService.getStatus(fileId);
if (fileStatus.isExcluded()) {
@ -970,14 +999,14 @@ public class FileStatusService {
return;
}
if (requiresStructureAnalysis || runOcr) {
if (requiresStructureAnalysis || runOcr || idp) {
log.info("Delete text and NER entities from file {} in dossier {}", fileId, dossierId);
fileManagementStorageService.deleteDocumentAndNerObjects(dossierId, fileId);
}
if (runOcr) {
if (runOcr || idp) {
fileStatusPersistenceService.resetOcrStartAndEndDate(fileId);
setStatusOcrQueued(dossierId, fileId);
setStatusOcrQueued(dossierId, fileId, idp);
return;
}
@ -1064,6 +1093,7 @@ public class FileStatusService {
addToAnalysisQueue(dossierId, fileId, priority, Sets.newHashSet(), AnalysisType.DEFAULT);
}
@Transactional
public void setStatusForceAnalyse(String dossierId, String fileId, boolean priority) {
@ -1175,4 +1205,10 @@ public class FileStatusService {
return reanalysisRequiredStatusService.enhanceFileStatusWithAnalysisRequirements(fileModels);
}
public void increaseTokenUsage(String fileId, int promptTokens, int completionTokens) {
fileStatusPersistenceService.increaseTokenUsage(fileId, promptTokens, completionTokens);
}
}

View File

@ -171,18 +171,18 @@ public class ReanalysisService {
}
public void ocrDossier(String dossierId) {
public void ocrDossier(String dossierId, boolean idp) {
var relevantFiles = getAllFilesForDossier(dossierId, validFilesFilter);
relevantFiles.stream()
.filter(fileStatus -> fileStatus.getOcrStartTime() == null)
.filter(fileStatus -> fileStatus.getProcessingStatus().equals(ProcessingStatus.PROCESSED))
.forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId()));
.forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId(), idp));
}
public void ocrFile(String dossierId, String fileId, boolean force) {
public void ocrFile(String dossierId, String fileId, boolean force, boolean idp) {
dossierPersistenceService.getAndValidateDossier(dossierId);
FileModel dossierFile = fileStatusService.getStatus(fileId);
@ -202,30 +202,31 @@ public class ReanalysisService {
}
if (force) {
fileStatusService.setStatusOcrQueued(dossierId, fileId);
fileStatusService.setStatusOcrQueued(dossierId, fileId, idp);
} else {
if (dossierFile.getOcrStartTime() != null) {
throw new ConflictException("File already has been OCR processed");
}
ocrFiles(dossierId, Sets.newHashSet(fileId));
ocrFiles(dossierId, Sets.newHashSet(fileId), idp);
}
}
public void ocrFiles(String dossierId, Set<String> fileIds) {
public void ocrFiles(String dossierId, Set<String> fileIds, boolean idp) {
var relevantFiles = getRelevantFiles(dossierId, fileIds);
if (relevantFiles.stream()
.anyMatch(fileStatus -> !fileStatus.getProcessingStatus().equals(ProcessingStatus.PROCESSED) && !fileStatus.getProcessingStatus()
.equals(ProcessingStatus.OCR_PROCESSING_QUEUED) && !fileStatus.getProcessingStatus().equals(ProcessingStatus.OCR_PROCESSING))) {
.anyMatch(fileStatus -> !fileStatus.getProcessingStatus().equals(ProcessingStatus.PROCESSED) //
&& !fileStatus.getProcessingStatus().equals(ProcessingStatus.OCR_PROCESSING_QUEUED) //
&& !fileStatus.getProcessingStatus().equals(ProcessingStatus.OCR_PROCESSING))) {
throw new ConflictException("File is not processed");
}
relevantFiles.stream()
.filter(fileStatus -> fileStatus.getOcrStartTime() == null)
.forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId()));
.forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId(), idp));
}
@ -275,12 +276,12 @@ public class ReanalysisService {
public List<FileModel> reanalyzeTemplate(String dossierTemplateId, ReanalysisSettings reanalysisSettings) {
requestValidator.validateRequestOrThrow404(dossierTemplateId, reanalysisSettings.dossierIds(), reanalysisSettings.fileIds());
requestValidator.validateRequestOrThrow404(dossierTemplateId, reanalysisSettings.getDossierIds(), reanalysisSettings.getFileIds());
var files = fileStatusService.getDossierTemplateStatus(dossierTemplateId)
.stream()
.filter(file -> isInList(file, reanalysisSettings))
.filter(reanalysisSettings.fileStatusFilter().asPredicate())
.filter(reanalysisSettings.getFileStatusFilter())
.peek(file -> log.info("Reanalyzing file {}", file.getId()))
.collect(Collectors.toList());
@ -289,8 +290,9 @@ public class ReanalysisService {
files.forEach(file -> fileStatusService.setStatusFullReprocess(file.getDossierId(),
file.getId(),
false,
reanalysisSettings.repeatStructureAnalysis(),
reanalysisSettings.runOcr()));
reanalysisSettings.isRepeatStructureAnalysis(),
reanalysisSettings.isRunOcr(),
reanalysisSettings.isRunIdp()));
return rejectedFiles;
}
@ -314,8 +316,8 @@ public class ReanalysisService {
private boolean isInList(FileModel file, ReanalysisSettings reAnalysisSettings) {
return (reAnalysisSettings.fileIds().isEmpty() || reAnalysisSettings.fileIds().contains(file.getId())) //
&& (reAnalysisSettings.dossierIds().isEmpty() || reAnalysisSettings.dossierIds().contains(file.getDossierId()));
return (reAnalysisSettings.getFileIds().isEmpty() || reAnalysisSettings.getFileIds().contains(file.getId())) //
&& (reAnalysisSettings.getDossierIds().isEmpty() || reAnalysisSettings.getDossierIds().contains(file.getDossierId()));
}
}

View File

@ -27,8 +27,7 @@ public class LayoutParsingRequestFactory {
public LayoutParsingRequest build(String dossierTemplateId, String dossierId, String fileId, boolean priority) {
LayoutParsingType layoutParsingType = dossierTemplatePersistenceService.getDossierTemplate(
dossierTemplateId).getLayoutParsingType();
LayoutParsingType layoutParsingType = dossierTemplatePersistenceService.getDossierTemplate(dossierTemplateId).getLayoutParsingType();
Optional<String> optionalImageFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.IMAGE_INFO) //
? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMAGE_INFO)) : Optional.empty();
@ -39,6 +38,9 @@ public class LayoutParsingRequestFactory {
Optional<String> optionalVisualLayoutParsingFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.VISUAL_LAYOUT) //
? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.VISUAL_LAYOUT)) : Optional.empty();
Optional<String> optionalIdpResultFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.IDP_RESULT) //
? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.IDP_RESULT)) : Optional.empty();
return LayoutParsingRequest.builder()
.layoutParsingType(layoutParsingType)
.identifier(QueueMessageIdentifierService.buildIdentifier(dossierId, fileId, priority))
@ -55,6 +57,7 @@ public class LayoutParsingRequestFactory {
.documentMarkdownFileStorageId(fileManagementServiceSettings.isStoreMarkdown() ? Optional.of(StorageIdUtils.getStorageId(dossierId,
fileId,
FileType.MARKDOWN)) : Optional.empty())
.idpResultStorageId(optionalIdpResultFileId)
.build();
}

View File

@ -21,7 +21,6 @@ import com.iqser.red.service.persistence.management.v1.processor.entity.projecti
import com.iqser.red.service.persistence.management.v1.processor.exception.BadRequestException;
import com.iqser.red.service.persistence.management.v1.processor.exception.NotFoundException;
import com.iqser.red.service.persistence.management.v1.processor.model.FileIdentifier;
import com.iqser.red.service.persistence.management.v1.processor.model.OCRStatusUpdateResponse;
import com.iqser.red.service.persistence.management.v1.processor.model.websocket.AnalyseStatus;
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.repository.FileAttributesRepository;
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.repository.FileRepository;
@ -31,6 +30,8 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.component.C
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.ProcessingStatus;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.WorkflowStatus;
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
import com.knecon.fforesight.service.ocr.v1.api.model.OCRStatusUpdateResponse;
import jakarta.persistence.EntityManager;
import jakarta.transaction.Transactional;
@ -602,9 +603,9 @@ public class FileStatusPersistenceService {
public int getNumberOfAssignedFiles(String userId) {
List<FileEntity> files = fileRepository.findFilesByAssignee(userId);
return files.stream()
.filter(fileEntity -> fileEntity.getHardDeletedTime() == null)
.collect(Collectors.toList()).size();
return Math.toIntExact(files.stream()
.filter(fileEntity -> fileEntity.getHardDeletedTime() == null)
.count());
}
@ -680,6 +681,7 @@ public class FileStatusPersistenceService {
fileRepository.updateOCRStatus(response.getFileId(),
response.getNumberOfPagesToOCR(),
response.getNumberOfOCRedPages(),
response.getFeatures().contains(AzureOcrFeature.IDP) ? response.getNumberOfOCRedPages() : 0,
response.isOcrFinished() ? OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS) : null,
OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS));
}
@ -765,4 +767,11 @@ public class FileStatusPersistenceService {
fileRepository.updateLastDownloadForFile(fileId, null);
}
@Transactional
public void increaseTokenUsage(String fileId, int promptTokens, int completionTokens) {
fileRepository.increaseTokenUsage(fileId, promptTokens, completionTokens);
}
}

View File

@ -389,11 +389,15 @@ public interface FileRepository extends JpaRepository<FileEntity, String> {
@Transactional
@Modifying(clearAutomatically = true)
@Query(value = "update FileEntity f set f.numberOfOCRedPages = :numberOfOCRedPages, "
+ "f.numberOfPagesToOCR = :numberOfPagesToOCR, f.ocrEndTime = :ocrEndTime, "
+ "f.lastUpdated = :lastUpdated where f.id = :fileId")
+ "f.numberOfPagesToOCR = :numberOfPagesToOCR, "
+ "f.numberOfIdpPages = :numberOfIdpPages, "
+ "f.ocrEndTime = :ocrEndTime, "
+ "f.lastUpdated = :lastUpdated "
+ "where f.id = :fileId")
void updateOCRStatus(@Param("fileId") String fileId,
@Param("numberOfPagesToOCR") int numberOfPagesToOCR,
@Param("numberOfOCRedPages") int numberOfOCRedPages,
@Param("numberOfIdpPages") int numberOfIdpPages,
@Param("ocrEndTime") OffsetDateTime ocrEndTime,
@Param("lastUpdated") OffsetDateTime lastUpdated);
@ -409,7 +413,7 @@ public interface FileRepository extends JpaRepository<FileEntity, String> {
@Modifying(clearAutomatically = true)
@Query("update FileEntity f set f.ocrStartTime = NULL, f.ocrEndTime = NULL, f.numberOfPagesToOCR = NULL, f.numberOfOCRedPages = NULL where f.id = :fileId")
@Query("update FileEntity f set f.ocrStartTime = NULL, f.ocrEndTime = NULL, f.numberOfPagesToOCR = NULL, f.numberOfOCRedPages = NULL, f.numberOfIdpPages = NULL where f.id = :fileId")
void resetOcrStartAndEndDate(@Param("fileId") String fileId);
@ -479,6 +483,11 @@ public interface FileRepository extends JpaRepository<FileEntity, String> {
@Query("SELECT f FROM FileEntity f WHERE f.id in :fileIds AND f.dossierId = :dossierId")
List<FileEntity> findAllDossierIdAndIds(@Param("dossierId") String dossierId, @Param("fileIds") Set<String> fileIds);
@Modifying
@Query("UPDATE FileEntity f SET f.usedPromptTokens = f.usedPromptTokens + :promptTokens, f.usedCompletionTokens = f.usedCompletionTokens + :completionTokens WHERE f.id = :id")
void increaseTokenUsage(@Param("id") String fileId, @Param("promptTokens") int promptTokens, @Param("completionTokens") int completionTokens);
}

View File

@ -63,7 +63,7 @@ public class NerMessageReceiver {
String dossierId = QueueMessageIdentifierService.parseDossierId(message.getIdentifier());
String fileId = QueueMessageIdentifierService.parseFileId(message.getIdentifier());
addFileIdToTrace(fileId);
fileStatusService.increaseTokenUsage(fileId, message.getPromptTokens(), message.getCompletionTokens());
log.info("Received message from {} for dossierId {} and fileId {}", LLM_ENTITY_RESPONSE_LISTENER_ID, dossierId, fileId);
fileStatusPersistenceService.setAiCreationVersion(fileId, message.getAiCreationVersion());
fileStatusService.setStatusForceAnalyse(dossierId, fileId, false);

View File

@ -11,14 +11,13 @@ import org.springframework.stereotype.Service;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
import com.iqser.red.service.persistence.management.v1.processor.model.OCRStatusUpdateResponse;
import com.iqser.red.service.persistence.management.v1.processor.model.websocket.AnalyseStatus;
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusProcessingUpdateService;
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusService;
import com.iqser.red.service.persistence.management.v1.processor.service.websocket.WebsocketService;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
import com.knecon.fforesight.service.ocr.v1.api.model.DocumentRequest;
import com.knecon.fforesight.service.ocr.v1.api.model.OCRStatusUpdateResponse;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
@ -43,6 +42,8 @@ public class OCRProcessingMessageReceiver {
@RabbitListener(id = OCR_STATUS_UPDATE_LISTENER_ID)
public void handleOCRStatusUpdateMessage(OCRStatusUpdateResponse response) {
log.debug("Received message {} in {}", response, MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_EXCHANGE);
var fileModel = fileStatusService.getStatus(response.getFileId());
if (response.isOcrStarted()) {
@ -57,7 +58,6 @@ public class OCRProcessingMessageReceiver {
response.getNumberOfOCRedPages());
}
log.debug("Received message {} in {}", response, MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_EXCHANGE);
}
@ -95,6 +95,7 @@ public class OCRProcessingMessageReceiver {
timestamp = timestamp != null ? timestamp : OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS);
fileStatusProcessingUpdateService.requeueOCROrMarkFailed(ocrRequestMessage.getDossierId(),
ocrRequestMessage.getFileId(),
ocrRequestMessage.getFeatures(),
new FileErrorInfo(errorMessage, MessagingConfiguration.OCR_DLQ, "ocr-service", timestamp));
}

View File

@ -257,3 +257,7 @@ databaseChangeLog:
file: db/changelog/tenant/157-add-included-to-csv-export-field.yaml
- include:
file: db/changelog/tenant/158-add-app-version-history-table-and-layout-parser-version-field-to-file.yaml
- include:
file: db/changelog/tenant/159.0.0-add-usage-fields-to-file-for-idp-and-llm.yaml
- include:
file: db/changelog/tenant/160.0.0-add-idp-related-fields-to-dossier-template.yaml

View File

@ -0,0 +1,24 @@
databaseChangeLog:
- changeSet:
id: add-llm-usage-fields-to-file
author: kilian
changes:
- addColumn:
tableName: file
columns:
- column:
name: used_completion_tokens
type: int
defaultValueNumeric: "0"
constraints:
nullable: false
- column:
name: used_prompt_tokens
type: int
defaultValueNumeric: "0"
constraints:
nullable: false
- column:
name: number_of_idp_pages
type: int
defaultValueNumeric: "0"

View File

@ -0,0 +1,64 @@
databaseChangeLog:
- changeSet:
id: add-idp-related-fields-to-dossier-template
author: kilian
changes:
- addColumn:
tableName: dossier_template
columns:
- column:
name: idp_by_default
type: boolean
defaultValueBoolean: false
remarks: "Indicates if IDP is enabled by default"
- column:
name: rotation_correction_by_default
type: boolean
remarks: "Indicates if rotation correction is enabled by default"
- column:
name: font_style_detection
type: boolean
defaultValueBoolean: true
remarks: "Indicates if font style detection is enabled in OCR"
- column:
name: ocr_all_pages
type: boolean
defaultValueBoolean: false
remarks: "Indicates if all pages should be processed during OCR instead of only pages with images"
- update:
tableName: dossier_template
columns:
- column:
name: rotation_correction_by_default
valueBoolean: true
where: "layout_parsing_type = 'DOCUMINE_OLD'"
- update:
tableName: dossier_template
columns:
- column:
name: rotation_correction_by_default
valueBoolean: false
where: "layout_parsing_type != 'DOCUMINE_OLD'"
- changeSet:
id: make-fields-non-nullable
author: kilian
changes:
- addNotNullConstraint:
tableName: dossier_template
columnName: idp_by_default
columnDataType: boolean
- addNotNullConstraint:
tableName: dossier_template
columnName: rotation_correction_by_default
columnDataType: boolean
- addNotNullConstraint:
tableName: dossier_template
columnName: font_style_detection
columnDataType: boolean
- addNotNullConstraint:
tableName: dossier_template
columnName: ocr_all_pages
columnDataType: boolean

View File

@ -186,7 +186,7 @@ public class ComponentOverrideTest extends AbstractPersistenceServerServiceTest
@Test
@SneakyThrows
public void testDeletedFileOverrides() throws IOException {
public void testDeletedFileOverrides() {
var dossier = dossierTesterAndProvider.provideTestDossier();

View File

@ -1,6 +1,7 @@
package com.iqser.red.service.peristence.v1.server.integration.tests;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
@ -149,7 +150,10 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
var dossierTemplate = dossierTemplateTesterAndProvider.provideTestTemplate();
TypeResponse types = dictionaryClient.getAllTypes(dossierTemplate.getId(), null, true);
List<TypeValue> systemManagedTypes = types.getTypes().stream().filter(TypeValue::isSystemManaged).collect(Collectors.toList());
List<TypeValue> systemManagedTypes = types.getTypes()
.stream()
.filter(TypeValue::isSystemManaged)
.collect(Collectors.toList());
assertThat(systemManagedTypes.size()).isEqualTo(8);
var allTemplates = dossierTemplateClient.getAllDossierTemplates();
@ -284,17 +288,17 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
dictionaryClient.getDictionaryForType(type2.getType(), type2.getDossierTemplateId(), dossier.getId());
var allTypes = dictionaryClient.getAllTypes(dossierTemplate.getId(), dossier.getId(), false).getTypes();
assertThat(allTypes
.stream().filter(t -> !t.isSystemManaged()).collect(Collectors.toList())
.size()).isEqualTo(4);
assertThat(allTypes.stream()
.filter(t -> !t.isSystemManaged())
.count()).isEqualTo(4);
var typesWithRankOfType1 = allTypes.stream()
.filter(t -> t.getRank() == type.getRank())
.collect(Collectors.toList());
.toList();
assertThat(typesWithRankOfType1.size()).isEqualTo(2);
var typesWithRankOfType2 = allTypes.stream()
.filter(t -> t.getRank() == type2.getRank())
.collect(Collectors.toList());
.toList();
assertThat(typesWithRankOfType2.size()).isEqualTo(2);
dictionaryClient.addEntry(createdType1.getType(), createdType1.getDossierTemplateId(), List.of("entry1", "entry2"), false, null, DictionaryEntryType.ENTRY);
@ -596,13 +600,13 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
.build());
// add new justifications
legalBasisClient.setLegalBasisMapping(List.of(new LegalBasis("nameAgain", "description", "reason","technicalReason")), dossierTemplate.getId());
legalBasisClient.setLegalBasisMapping(List.of(new LegalBasis("nameAgain", "description", "reason", "technicalReason")), dossierTemplate.getId());
existingLegalBasis = legalBasisClient.getLegalBasisMapping(dossierTemplate.getId());
assertThat(existingLegalBasis.size()).isEqualTo(1);
// update dossier template metadata
var cru = new DossierTemplateModel();
cru.setDossierTemplateId(dossierTemplate.getId());
cru.setId(dossierTemplate.getId());
BeanUtils.copyProperties(dossierTemplate, cru);
cru.setName("Template 1 Update");
cru.setDescription("new description");
@ -944,7 +948,7 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
assertThat(result.getName()).isEqualTo(name);
assertThat(result.isOcrByDefault()).isTrue();
DossierTemplateModel loadedTemplate = dossierTemplateClient.getDossierTemplate(result.getDossierTemplateId());
DossierTemplateModel loadedTemplate = dossierTemplateClient.getDossierTemplate(result.getId());
assertThat(loadedTemplate).isEqualTo(result);
dossierTemplateModel.setName("Test Dossier Template Update");
@ -992,7 +996,7 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
// update
var cru = new DossierTemplateModel();
cru.setDossierTemplateId(dossierTemplate.getId());
cru.setId(dossierTemplate.getId());
BeanUtils.copyProperties(dossierTemplate, cru);
cru.setName("Template 1 Update");
cru.setValidTo(OffsetDateTime.of(2020, 1, 1, 1, 1, 1, 1, ZoneOffset.UTC));
@ -1002,4 +1006,40 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
assertTrue(exception.getMessage().contains("Invalid dates! validFrom can't be after validTo."));
}
@Test
public void testUpdateDossierTemplateWithOCRSettings() {
var dossierTemplate = dossierTemplateTesterAndProvider.provideTestTemplate();
var allTemplates = dossierTemplateClient.getAllDossierTemplates();
assertThat(allTemplates.size()).isEqualTo(1);
assertThat(allTemplates.get(0)).isEqualTo(dossierTemplate);
// update
var cru = new DossierTemplateModel();
cru.setId(dossierTemplate.getId());
BeanUtils.copyProperties(dossierTemplate, cru);
String updatedName = "Template 1 Update";
cru.setName(updatedName);
cru.setIdpByDefault(true);
cru.setRotationCorrectionByDefault(true);
cru.setOcrAllPages(true);
cru.setFontStyleDetection(true);
var updatedDT = dossierTemplateClient.createOrUpdateDossierTemplate(cru);
assertEquals(updatedName, updatedDT.getName());
assertTrue(updatedDT.isIdpByDefault());
assertTrue(updatedDT.isRotationCorrectionByDefault());
assertTrue(updatedDT.isFontStyleDetection());
assertTrue(updatedDT.isOcrAllPages());
var loadedDT = dossierTemplateClient.getDossierTemplate(updatedDT.getId());
assertEquals(updatedName, loadedDT.getName());
assertTrue(loadedDT.isIdpByDefault());
assertTrue(loadedDT.isRotationCorrectionByDefault());
assertTrue(loadedDT.isFontStyleDetection());
assertTrue(loadedDT.isOcrAllPages());
}
}

View File

@ -46,22 +46,22 @@ public class ReanalysisTest extends AbstractPersistenceServerServiceTest {
var loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
assertThat(loadedFile.getProcessingStatus()).isNotEqualTo(ProcessingStatus.PROCESSED);
reanalysisClient.ocrDossier(dossier.getId());
reanalysisClient.ocrDossier(dossier.getId(), false);
loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
assertThat(loadedFile.getProcessingStatus()).isNotEqualTo(ProcessingStatus.PROCESSED);
resetProcessingStatus(file);
reanalysisClient.ocrDossier(dossier.getId());
reanalysisClient.ocrDossier(dossier.getId(), false);
loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
assertThat(loadedFile.getProcessingStatus()).isEqualTo(ProcessingStatus.OCR_PROCESSING_QUEUED);
resetProcessingStatus(file);
reanalysisClient.ocrFile(dossier.getId(), file.getId(), true);
reanalysisClient.ocrFile(dossier.getId(), file.getId(), true, false);
loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
assertThat(loadedFile.getProcessingStatus()).isEqualTo(ProcessingStatus.OCR_PROCESSING_QUEUED);
resetProcessingStatus(file);
reanalysisClient.ocrFiles(dossier.getId(), Set.of(file.getId()));
reanalysisClient.ocrFiles(dossier.getId(), Set.of(file.getId()), false);
loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
assertThat(loadedFile.getProcessingStatus()).isEqualTo(ProcessingStatus.OCR_PROCESSING_QUEUED);
resetProcessingStatus(file);

View File

@ -201,6 +201,7 @@ public class SupportControllerTest extends AbstractPersistenceServerServiceTest
Collections.emptySet(),
true,
false,
false,
new FileStatusFilter(null, null, true, true)));
loadedFile2 = fileClient.getFileStatus(dossier2.getId(), file2.getId());
assertThat(loadedFile2.getProcessingStatus()).isEqualTo(ProcessingStatus.FULL_PROCESSING);
@ -247,6 +248,7 @@ public class SupportControllerTest extends AbstractPersistenceServerServiceTest
Collections.emptySet(),
true,
false,
false,
null));
loadedFile2 = fileClient.getFileStatus(dossier2.getId(), file2.getId());
assertThat(loadedFile2.getProcessingStatus()).isEqualTo(ProcessingStatus.FULL_PROCESSING);

View File

@ -1 +0,0 @@
hub.image.name.prefix=docker-dev.knecon.com/tests/

View File

@ -10,7 +10,7 @@ dependencies {
api("com.knecon.fforesight:document:${rootProject.extra.get("documentVersion")}"){
exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1")
}
api("com.knecon.fforesight:layoutparser-service-internal-api:0.194.0-RED9998.1") {
api("com.knecon.fforesight:layoutparser-service-internal-api:0.196.0-RED8670.0") {
exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1")
exclude(group = "com.iqser.red.service", module = "persistence-service-shared-api-v1")
}

View File

@ -68,6 +68,18 @@ public class DossierTemplateModel {
@Schema(description = "Flag that specifies if OCR is automatically performed on upload for all dossiers of this template")
private boolean ocrByDefault;
@Schema(description = "Flag that specifies if rotation correction is attempted during OCR for all dossiers of this template")
private boolean rotationCorrectionByDefault;
@Schema(description = "Flag that specifies if IDP is performed instead of OCR for all dossiers of this template")
private boolean idpByDefault;
@Schema(description = "Flag that specifies if font style detection is performed during OCR")
private boolean fontStyleDetection;
@Schema(description = "Flag that specifies if OCR should be performed on all pages instead of only pages with images")
private boolean ocrAllPages;
@Schema(description = "Flag that specifies the watermark removal in documents will be performed before the OCR processing")
private boolean removeWatermark;

View File

@ -88,6 +88,12 @@ public class FileStatus {
private int numberOfPagesToOCR;
@Schema(description = "Number of pages already OCRed by us")
private int numberOfOCRedPages;
@Schema(description = "Number of pages already IDPed by us")
private int numberOfIdpPages;
@Schema(description = "Number of prompt tokens used by this file")
private int usedPromptTokens;
@Schema(description = "Number of completion tokens used by this file")
private int usedCompletionTokens;
@Schema(description = "Shows if this file has been OCRed by us. End time of OCR Process")
private OffsetDateTime ocrEndTime;
@Schema(description = "Shows if this file has comments on annotations.")

View File

@ -15,7 +15,7 @@ import lombok.NoArgsConstructor;
@Data
@NoArgsConstructor
@AllArgsConstructor
public class FileStatusFilter {
public class FileStatusFilter implements Predicate<FileModel> {
private List<ProcessingStatus> processingStatusList = new ArrayList<>();
private List<WorkflowStatus> workflowStatusList = new ArrayList<>();
@ -32,7 +32,8 @@ public class FileStatusFilter {
}
public Predicate<FileModel> asPredicate() {
@Override
public boolean test(FileModel fileModel) {
if (this.getProcessingStatusList() == null) {
this.setProcessingStatusList(new ArrayList<>());
@ -42,10 +43,12 @@ public class FileStatusFilter {
this.setWorkflowStatusList(new ArrayList<>());
}
return fileStatus -> (this.getProcessingStatusList().isEmpty() || this.getProcessingStatusList().contains(fileStatus.getProcessingStatus()))
&& (this.getWorkflowStatusList().isEmpty() || this.getWorkflowStatusList().contains(fileStatus.getWorkflowStatus()))
&& (this.isIncludeSoftDeletedFiles() || fileStatus.getDeleted() == null)
&& (this.isIncludeHardDeletedFiles() || fileStatus.getHardDeletedTime() == null);
return (this.getProcessingStatusList().isEmpty() || this.getProcessingStatusList().contains(fileModel.getProcessingStatus()))
&& (this.getWorkflowStatusList().isEmpty()
|| this.getWorkflowStatusList()
.contains(fileModel.getWorkflowStatus()))
&& (this.isIncludeSoftDeletedFiles() || fileModel.getDeleted() == null)
&& (this.isIncludeHardDeletedFiles() || fileModel.getHardDeletedTime() == null);
}
}

View File

@ -4,16 +4,33 @@ import java.util.Optional;
import java.util.Set;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.AccessLevel;
import lombok.Builder;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.experimental.FieldDefaults;
public record ReanalysisSettings(
@Schema(description = "Provide a list of dossierIds to filter for. If the list is empty, every dossier is selected for reanalysis.", defaultValue = "[]") Set<String> dossierIds,
@Schema(description = "Provide a list of fileIds to filter for. If the list is empty, every file is selected for reanalysis.", defaultValue = "[]") Set<String> fileIds,
@Schema(description = "If set to true, layout parsing and named entity recognition will be repeated.", defaultValue = "false") boolean repeatStructureAnalysis,
@Schema(description = "If set to true, ocr will be repeated and therefore also layout parsing and named entity recognition.", defaultValue = "false") boolean runOcr,
@Schema(description = "Use this to create a filter for files to reanalyse. Matches anything if set to null.", defaultValue = "{}") FileStatusFilter fileStatusFilter
) {
@Getter
@Builder
@RequiredArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public final class ReanalysisSettings {
public FileStatusFilter fileStatusFilter() {
@Schema(description = "Provide a list of dossierIds to filter for. If the list is empty, every dossier is selected for reanalysis.", defaultValue = "[]")
Set<String> dossierIds;
@Schema(description = "Provide a list of fileIds to filter for. If the list is empty, every file is selected for reanalysis.", defaultValue = "[]")
Set<String> fileIds;
@Schema(description = "If set to true, layout parsing and named entity recognition will be repeated.", defaultValue = "false")
boolean repeatStructureAnalysis;
@Schema(description = "If set to true, OCR will be repeated.", defaultValue = "false")
boolean runOcr;
@Schema(description = "If set to true, OCR with IDP will be repeated.", defaultValue = "false")
boolean runIdp;
@Schema(description = "Use this to create a filter for files to reanalyse. Matches anything if set to null.", defaultValue = "null")
FileStatusFilter fileStatusFilter;
public FileStatusFilter getFileStatusFilter() {
return Optional.ofNullable(fileStatusFilter)
.orElse(new FileStatusFilter());

View File

@ -41,8 +41,16 @@ public class CreateOrUpdateDossierTemplateRequest {
private boolean ocrByDefault;
private boolean idpByDefault;
private boolean rotationCorrectionByDefault;
private boolean fontStyleDetection;
private boolean removeWatermark;
private boolean ocrAllPages;
private LayoutParsingType layoutParsingType;
}

View File

@ -36,7 +36,11 @@ public class DossierTemplate {
private boolean keepOverlappingObjects;
private boolean applyDictionaryUpdatesToAllDossiersByDefault;
private boolean ocrByDefault;
private boolean rotationCorrectionByDefault;
private boolean idpByDefault;
private boolean removeWatermark;
private boolean fontStyleDetection;
private boolean ocrAllPages;
private LayoutParsingType layoutParsingType;
}

View File

@ -8,6 +8,7 @@ import java.util.Set;
import com.fasterxml.jackson.annotation.JsonProperty;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
@ -60,6 +61,9 @@ public class FileModel {
private OffsetDateTime ocrStartTime;
private Integer numberOfPagesToOCR;
private Integer numberOfOCRedPages;
private Integer numberOfIdpPages;
private int usedPromptTokens;
private int usedCompletionTokens;
private OffsetDateTime ocrEndTime;
private boolean hasAnnotationComments;
private boolean excluded;

View File

@ -21,6 +21,7 @@ public enum FileType {
TABLES(".json"),
VISUAL_LAYOUT(".json"),
IDP_RESULT(".json"),
COMPONENTS(".json"),
// document is split into 4 files, all should be overridden/deleted at the same time
DOCUMENT_TEXT_OLD(".json"),