Compare commits

..

12 Commits

Author SHA1 Message Date
Maverick Studer
a441909408 Merge branch 'RE-10691-fp' into 'master'
RED-10691: 500 when approving file in a dossier where dossier owner lost manager role

Closes RE-10691

See merge request redactmanager/persistence-service!930
2025-02-04 09:37:04 +01:00
maverickstuder
a7bdbb5495 RED-10691: 500 when approving file in a dossier where dossier owner lost manager role 2025-01-31 13:35:31 +01:00
Dominique Eifländer
3c0b9d36d4 Merge branch 'OPS-284' into 'master'
OPS-284: add prometheus endpoint

Closes OPS-284

See merge request redactmanager/persistence-service!928
2025-01-24 11:01:07 +01:00
Christoph Schabert
9ff3035730 OPS-284: add prometheus endpoint 2025-01-23 13:34:26 +01:00
Maverick Studer
c933793721 Merge branch 'RED-10731' into 'master'
RED-10731: Zip Bombing (~10.000 files in one zip within file limit) should be detected and aborted

Closes RED-10731

See merge request redactmanager/persistence-service!926
2025-01-21 12:21:11 +01:00
maverickstuder
4746d50627 RED-10731: Zip Bombing (~10.000 files in one zip within file limit) should be detected and aborted 2025-01-21 11:53:57 +01:00
Dominique Eifländer
fdfdba550e Merge branch 'RED-10715-masterA' into 'master'
RED-10715: Remove Saas migration fix

Closes RED-10715

See merge request redactmanager/persistence-service!925
2025-01-17 10:01:29 +01:00
Dominique Eifländer
09d53b7743 RED-10715: Remove Saas migration fix 2025-01-17 09:35:07 +01:00
Maverick Studer
d691164af6 Merge branch 'RED-10728' into 'master'
RED-10728: Endpoint to execute full OCR on specific file

Closes RED-10728

See merge request redactmanager/persistence-service!922
2025-01-16 15:54:23 +01:00
maverickstuder
43f9db59d4 RED-10728: Endpoint to execute full OCR on specific file 2025-01-16 14:38:17 +01:00
Dominique Eifländer
7669d27e7b Merge branch 'RED-10715-master' into 'master'
RED-10715: Remove Saas migration

Closes RED-10715

See merge request redactmanager/persistence-service!921
2025-01-15 13:46:38 +01:00
Dominique Eifländer
d7d46d5429 RED-10715: Remove Saas migration 2025-01-15 13:29:29 +01:00
52 changed files with 419 additions and 1564 deletions

View File

@ -24,13 +24,13 @@ import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import com.iqser.red.service.persistence.management.v1.processor.acl.custom.dossier.DossierACLService;
import com.iqser.red.service.persistence.management.v1.processor.dataexchange.models.ExportDownloadRequest;
import com.iqser.red.service.persistence.management.v1.processor.exception.BadRequestException;
import com.iqser.red.service.persistence.management.v1.processor.exception.ConflictException;
import com.iqser.red.service.persistence.management.v1.processor.service.DossierManagementService;
import com.iqser.red.service.persistence.management.v1.processor.service.DossierTemplateManagementService;
import com.iqser.red.service.persistence.management.v1.processor.service.DossierTemplateStatsService;
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.AuditPersistenceService;
import com.iqser.red.service.persistence.management.v1.processor.service.users.UserService;
import com.iqser.red.service.persistence.service.v1.api.external.resource.DossierTemplateResource;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AuditCategory;
import com.iqser.red.service.persistence.service.v1.api.shared.model.DossierTemplateModel;
@ -42,6 +42,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemp
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.DossierTemplateStats;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.DossierTemplateStatus;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.Dossier;
import com.iqser.red.service.persistence.management.v1.processor.dataexchange.models.ExportDownloadRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.importexport.ImportDossierTemplateRequest;
import com.knecon.fforesight.keycloakcommons.security.KeycloakSecurity;
@ -60,6 +61,7 @@ public class DossierTemplateController implements DossierTemplateResource {
private final AuditPersistenceService auditPersistenceService;
private final DossierManagementService dossierManagementService;
private final DossierACLService dossierACLService;
private final UserService userService;
@Override
@ -312,10 +314,6 @@ public class DossierTemplateController implements DossierTemplateResource {
.applyDictionaryUpdatesToAllDossiersByDefault(dossierTemplate.isApplyDictionaryUpdatesToAllDossiersByDefault())
.ocrByDefault(dossierTemplate.isOcrByDefault())
.removeWatermark(dossierTemplate.isRemoveWatermark())
.idpByDefault(dossierTemplate.isIdpByDefault())
.rotationCorrectionByDefault(dossierTemplate.isRotationCorrectionByDefault())
.fontStyleDetection(dossierTemplate.isFontStyleDetection())
.ocrAllPages(dossierTemplate.isOcrAllPages())
.build();
}

View File

@ -1,111 +0,0 @@
package com.iqser.red.persistence.service.v1.external.api.impl.controller;
import com.iqser.red.service.persistence.management.v1.processor.entity.migration.SaasMigrationStatusEntity;
import com.iqser.red.service.persistence.management.v1.processor.exception.BadRequestException;
import com.iqser.red.service.persistence.management.v1.processor.exception.NotFoundException;
import com.iqser.red.service.persistence.management.v1.processor.migration.SaasMigrationService;
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusService;
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.SaasMigrationStatusPersistenceService;
import com.iqser.red.service.persistence.service.v1.api.external.resource.MigrationStatusResource;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.SaasMigrationStatus;
import com.iqser.red.service.persistence.service.v1.api.shared.model.saas.migration.MigrationStatusResponse;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import lombok.experimental.FieldDefaults;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.RestController;
import java.util.HashMap;
import java.util.Map;
import java.util.stream.Collectors;
import static com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.SaasMigrationStatus.*;
@RestController
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
@RequiredArgsConstructor
public class MigrationStatusController implements MigrationStatusResource {
SaasMigrationService saasMigrationService;
SaasMigrationStatusPersistenceService saasMigrationStatusPersistenceService;
FileStatusService fileStatusService;
public MigrationStatusResponse migrationStatus() {
int numberOfFilesToMigrate = saasMigrationStatusPersistenceService.countAll();
Map<SaasMigrationStatus, Integer> filesInStatus = new HashMap<>();
filesInStatus.put(MIGRATION_REQUIRED, saasMigrationStatusPersistenceService.countByStatus(MIGRATION_REQUIRED));
filesInStatus.put(DOCUMENT_FILES_MIGRATED, saasMigrationStatusPersistenceService.countByStatus(DOCUMENT_FILES_MIGRATED));
filesInStatus.put(REDACTION_LOGS_MIGRATED, saasMigrationStatusPersistenceService.countByStatus(REDACTION_LOGS_MIGRATED));
filesInStatus.put(ANNOTATION_IDS_MIGRATED, saasMigrationStatusPersistenceService.countByStatus(ANNOTATION_IDS_MIGRATED));
filesInStatus.put(FINISHED, saasMigrationStatusPersistenceService.countByStatus(FINISHED));
filesInStatus.put(ERROR, saasMigrationStatusPersistenceService.countByStatus(ERROR));
var filesInErrorState = saasMigrationStatusPersistenceService.findAllByStatus(ERROR);
var errorCauses = filesInErrorState.stream()
.collect(Collectors.toMap(errorFile -> errorFile.getDossierId() + "/" + errorFile.getFileId(), SaasMigrationStatusEntity::getErrorCause));
return MigrationStatusResponse.builder().numberOfFilesToMigrate(numberOfFilesToMigrate).filesInStatus(filesInStatus).errorCauses(errorCauses).build();
}
@Override
public ResponseEntity<?> startMigrationForFile(String dossierId, String fileId) {
if (!fileStatusService.fileExists(fileId)) {
throw new NotFoundException(String.format("File with id %s does not exist", fileId));
}
saasMigrationService.startMigrationForFile(dossierId, fileId);
return ResponseEntity.ok().build();
}
@Override
public ResponseEntity<?> revertMigrationForFile(String dossierId, String fileId) {
if (!fileStatusService.fileExists(fileId)) {
throw new NotFoundException(String.format("File with id %s does not exist", fileId));
}
if (!saasMigrationStatusPersistenceService.findById(fileId).getStatus().equals(FINISHED)) {
throw new BadRequestException(String.format("File with id %s is not migrated yet, can't revert.", fileId));
}
saasMigrationService.revertMigrationForFile(dossierId, fileId);
return ResponseEntity.ok().build();
}
@Override
public ResponseEntity<?> requeueErrorFiles() {
MigrationStatusResponse migrationStatus = migrationStatus();
if (!migrationIsFinished(migrationStatus)) {
throw new BadRequestException("There are still files processing, please wait until migration has finished to retry!");
}
saasMigrationService.requeueErrorFiles();
return ResponseEntity.ok().build();
}
private static boolean migrationIsFinished(MigrationStatusResponse migrationStatus) {
return migrationStatus.getFilesInStatus().entrySet()
.stream()
.filter(e -> e.getValue() > 0)
.allMatch(e -> e.getKey().equals(FINISHED) || e.getKey().equals(ERROR));
}
}

View File

@ -36,7 +36,6 @@ import lombok.RequiredArgsConstructor;
public class ReanalysisController implements ReanalysisResource {
private static final String DOSSIER_ID = "dossierId";
private final ReanalysisService reanalysisService;
private final FileStatusManagementService fileStatusManagementService;
private final AuditPersistenceService auditPersistenceService;
@ -99,11 +98,11 @@ public class ReanalysisController implements ReanalysisResource {
@Override
@PreAuthorize("hasAuthority('" + REANALYZE_DOSSIER + "')")
public void ocrDossier(@PathVariable(DOSSIER_ID) String dossierId, @RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp) {
public void ocrDossier(@PathVariable(DOSSIER_ID) String dossierId) {
accessControlService.checkDossierExistenceAndAccessPermissionsToDossier(dossierId);
reanalysisService.ocrDossier(dossierId, idp);
reanalysisService.ocrDossier(dossierId);
auditPersistenceService.audit(AuditRequest.builder()
.userId(KeycloakSecurity.getUserId())
@ -120,11 +119,11 @@ public class ReanalysisController implements ReanalysisResource {
public void ocrFile(@PathVariable(DOSSIER_ID) String dossierId,
@PathVariable(FILE_ID) String fileId,
@RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force,
@RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp) {
@RequestParam(value = ALL_PAGES, required = false, defaultValue = FALSE) boolean allPages) {
accessControlService.checkDossierExistenceAndAccessPermissionsToDossier(dossierId);
validateOCR(dossierId, fileId);
reanalysisService.ocrFile(dossierId, fileId, force, idp);
reanalysisService.ocrFile(dossierId, fileId, force, allPages);
auditPersistenceService.audit(AuditRequest.builder()
.userId(KeycloakSecurity.getUserId())
.objectId(dossierId)
@ -138,13 +137,11 @@ public class ReanalysisController implements ReanalysisResource {
@Override
@PreAuthorize("hasAuthority('" + REANALYZE_FILE + "')")
public void ocrFiles(@PathVariable(DOSSIER_ID) String dossierId,
@RequestBody Set<String> fileIds,
@RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp) {
public void ocrFiles(@PathVariable(DOSSIER_ID) String dossierId, @RequestBody Set<String> fileIds) {
accessControlService.checkDossierExistenceAndAccessPermissionsToDossier(dossierId);
fileIds.forEach(fileId -> validateOCR(dossierId, fileId));
reanalysisService.ocrFiles(dossierId, fileIds, idp);
reanalysisService.ocrFiles(dossierId, fileIds, false);
auditPersistenceService.audit(AuditRequest.builder()
.userId(KeycloakSecurity.getUserId())
.objectId(dossierId)

View File

@ -28,6 +28,7 @@ import org.springframework.web.bind.annotation.RestController;
import com.iqser.red.service.persistence.management.v1.processor.acl.custom.dossier.DossierACLService;
import com.iqser.red.service.persistence.management.v1.processor.exception.BadRequestException;
import com.iqser.red.service.persistence.management.v1.processor.exception.ConflictException;
import com.iqser.red.service.persistence.management.v1.processor.exception.NotAllowedException;
import com.iqser.red.service.persistence.management.v1.processor.roles.ApplicationRoles;
import com.iqser.red.service.persistence.management.v1.processor.service.AccessControlService;
@ -101,8 +102,13 @@ public class StatusController implements StatusResource {
var accessibleDossierIds = filterByPermissionsService.onlyViewableDossierIds(new ArrayList<>(filesByDossier.getValue().keySet()));
var response = new HashMap<String, List<FileStatus>>();
for (var dossierId : accessibleDossierIds) {
var allFoundFiles = fileStatusManagementService.findAllDossierIdAndIds(dossierId, filesByDossier.getValue().get(dossierId));
response.put(dossierId, allFoundFiles.stream().map(FileStatusMapper::toFileStatus).collect(Collectors.toList()));
var allFoundFiles = fileStatusManagementService.findAllDossierIdAndIds(dossierId,
filesByDossier.getValue()
.get(dossierId));
response.put(dossierId,
allFoundFiles.stream()
.map(FileStatusMapper::toFileStatus)
.collect(Collectors.toList()));
}
return new JSONPrimitive<>(response);
@ -351,6 +357,10 @@ public class StatusController implements StatusResource {
.build());
var dossier = dossierACLService.enhanceDossierWithACLData(dossierManagementService.getDossierById(dossierId, false, false));
if (dossier.getOwnerId() == null) {
throw new ConflictException("Dossier has no owner!");
}
if (!dossier.getOwnerId().equals(KeycloakSecurity.getUserId())) {
var fileStatus = fileStatusManagementService.getFileStatus(fileId);

View File

@ -23,12 +23,11 @@ import org.springframework.web.bind.annotation.RequestPart;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import com.iqser.red.service.persistence.management.v1.processor.service.FileFormatValidationService;
import com.iqser.red.service.persistence.management.v1.processor.exception.NotAllowedException;
import com.knecon.fforesight.keycloakcommons.security.KeycloakSecurity;
import com.iqser.red.service.pdftron.redaction.v1.api.model.ByteContentDocument;
import com.iqser.red.service.persistence.management.v1.processor.exception.BadRequestException;
import com.iqser.red.service.persistence.management.v1.processor.exception.NotAllowedException;
import com.iqser.red.service.persistence.management.v1.processor.service.AccessControlService;
import com.iqser.red.service.persistence.management.v1.processor.service.FileFormatValidationService;
import com.iqser.red.service.persistence.management.v1.processor.service.ReanalysisService;
import com.iqser.red.service.persistence.management.v1.processor.service.UploadService;
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.AuditPersistenceService;
@ -37,6 +36,7 @@ import com.iqser.red.service.persistence.service.v1.api.external.resource.Upload
import com.iqser.red.service.persistence.service.v1.api.shared.model.AuditCategory;
import com.iqser.red.service.persistence.service.v1.api.shared.model.FileUploadResult;
import com.iqser.red.service.persistence.service.v1.api.shared.model.audit.AuditRequest;
import com.knecon.fforesight.keycloakcommons.security.KeycloakSecurity;
import com.knecon.fforesight.tenantcommons.TenantContext;
import feign.FeignException;
@ -53,9 +53,9 @@ import lombok.extern.slf4j.Slf4j;
@SuppressWarnings("PMD")
public class UploadController implements UploadResource {
private static final int THRESHOLD_ENTRIES = 10000;
private static final int THRESHOLD_SIZE = 1000000000; // 1 GB
private static final double THRESHOLD_RATIO = 10;
private static final int THRESHOLD_ENTRIES = 10000; // Maximum number of files allowed
private static final int THRESHOLD_SIZE = 1000000000; // 1 GB total unzipped data
private static final double THRESHOLD_RATIO = 10; // Max allowed compression ratio
private final UploadService uploadService;
private final ReanalysisService reanalysisService;
@ -72,31 +72,25 @@ public class UploadController implements UploadResource {
@Parameter(name = DISABLE_AUTOMATIC_ANALYSIS_PARAM, description = "Disables automatic redaction for the uploaded file, imports only imported redactions") @RequestParam(value = DISABLE_AUTOMATIC_ANALYSIS_PARAM, required = false, defaultValue = "false") boolean disableAutomaticAnalysis) {
accessControlService.checkAccessPermissionsToDossier(dossierId);
if (file.getOriginalFilename() == null) {
String originalFilename = file.getOriginalFilename();
if (originalFilename == null) {
throw new BadRequestException("Could not upload file, no filename provided.");
}
var extension = getExtension(file.getOriginalFilename());
String extension = getExtension(originalFilename);
try {
switch (extension) {
case "zip":
return handleZip(dossierId, file.getBytes(), keepManualRedactions, disableAutomaticAnalysis);
case "csv":
return uploadService.importCsv(dossierId, file.getBytes());
default:
if (!fileFormatValidationService.getAllFileFormats().contains(extension)) {
throw new BadRequestException("Invalid file uploaded");
}
if (!fileFormatValidationService.getValidFileFormatsForTenant(TenantContext.getTenantId()).contains(extension)) {
throw new NotAllowedException("Insufficient permissions");
}
return uploadService.processSingleFile(dossierId, file.getOriginalFilename(), file.getBytes(), keepManualRedactions, disableAutomaticAnalysis);
}
return switch (extension) {
case "zip" -> handleZip(dossierId, file.getBytes(), keepManualRedactions, disableAutomaticAnalysis);
case "csv" -> uploadService.importCsv(dossierId, file.getBytes());
default -> {
validateExtensionOrThrow(extension);
yield uploadService.processSingleFile(dossierId, originalFilename, file.getBytes(), keepManualRedactions, disableAutomaticAnalysis);
}
};
} catch (IOException e) {
throw new BadRequestException(e.getMessage(), e);
throw new BadRequestException("Failed to process file: " + e.getMessage(), e);
}
}
@ -111,7 +105,6 @@ public class UploadController implements UploadResource {
accessControlService.verifyUserIsReviewerOrApprover(dossierId, fileId);
try {
reanalysisService.importRedactions(ByteContentDocument.builder().dossierId(dossierId).fileId(fileId).document(file.getBytes()).pages(pageInclusionRequest).build());
auditPersistenceService.audit(AuditRequest.builder()
@ -122,84 +115,116 @@ public class UploadController implements UploadResource {
.details(Map.of("dossierId", dossierId))
.build());
} catch (IOException e) {
throw new BadRequestException(e.getMessage(), e);
throw new BadRequestException("Failed to import redactions: " + e.getMessage(), e);
} catch (FeignException e) {
throw processFeignException(e);
}
}
private String getExtension(String fileName) {
private void validateExtensionOrThrow(String extension) {
return fileName.substring(fileName.lastIndexOf(".") + 1).toLowerCase(Locale.ROOT);
if (!fileFormatValidationService.getAllFileFormats().contains(extension)) {
throw new BadRequestException("Invalid file uploaded (unrecognized extension).");
}
if (!fileFormatValidationService.getValidFileFormatsForTenant(TenantContext.getTenantId()).contains(extension)) {
throw new NotAllowedException("Insufficient permissions for this file type.");
}
}
/**
* 1. Write the uploaded content to a temp ZIP file
* 2. Check the number of entries and reject if too big or if symlinks found
* 3. Unzip and process each file, while checking size and ratio.
*/
private FileUploadResult handleZip(String dossierId, byte[] fileContent, boolean keepManualRedactions, boolean disableAutomaticAnalysis) throws IOException {
File tempFile = FileUtils.createTempFile(UUID.randomUUID().toString(), ".zip");
try (var fileOutputStream = new FileOutputStream(tempFile)) {
IOUtils.write(fileContent, fileOutputStream);
File tempZip = FileUtils.createTempFile(UUID.randomUUID().toString(), ".zip");
try (FileOutputStream fos = new FileOutputStream(tempZip)) {
IOUtils.write(fileContent, fos);
}
try {
checkForSymlinks(tempFile);
validateZipEntries(tempZip);
var zipData = unzip(tempFile, dossierId, keepManualRedactions, disableAutomaticAnalysis);
try {
ZipData zipData = processZipContents(tempZip, dossierId, keepManualRedactions, disableAutomaticAnalysis);
if (zipData.csvBytes != null) {
try {
var importResult = uploadService.importCsv(dossierId, zipData.csvBytes);
zipData.fileUploadResult.getProcessedAttributes().addAll(importResult.getProcessedAttributes());
zipData.fileUploadResult.getProcessedFileIds().addAll(importResult.getProcessedFileIds());
FileUploadResult csvResult = uploadService.importCsv(dossierId, zipData.csvBytes);
zipData.fileUploadResult.getProcessedAttributes().addAll(csvResult.getProcessedAttributes());
zipData.fileUploadResult.getProcessedFileIds().addAll(csvResult.getProcessedFileIds());
} catch (Exception e) {
log.debug("CSV file inside ZIP failed", e);
// TODO return un-processed files to client
log.debug("CSV file inside ZIP failed to import", e);
}
} else if (zipData.fileUploadResult.getFileIds().isEmpty()) {
if (zipData.containedUnpermittedFiles) {
throw new NotAllowedException("Zip file contains unpermitted files");
throw new NotAllowedException("Zip file contains unpermitted files.");
} else {
throw new BadRequestException("Only unsupported files in zip file");
throw new BadRequestException("Only unsupported files in the ZIP.");
}
}
return zipData.fileUploadResult;
} finally {
boolean isDeleted = tempFile.delete();
if (!isDeleted) {
log.warn("tempFile could not be deleted");
if (!tempZip.delete()) {
log.warn("Could not delete temporary ZIP file: {}", tempZip);
}
}
}
private void checkForSymlinks(File tempFile) throws IOException {
private void validateZipEntries(File tempZip) throws IOException {
try (FileInputStream fis = new FileInputStream(tempZip); ZipFile zipFile = new ZipFile(fis.getChannel())) {
int count = 0;
var entries = zipFile.getEntries();
while (entries.hasMoreElements()) {
ZipArchiveEntry ze = entries.nextElement();
try (var fis = new FileInputStream(tempFile); var zipFile = new ZipFile(fis.getChannel())) {
for (var entryEnum = zipFile.getEntries(); entryEnum.hasMoreElements(); ) {
var ze = entryEnum.nextElement();
if (ze.isUnixSymlink()) {
throw new BadRequestException("ZIP-files with symlinks are not allowed");
throw new BadRequestException("ZIP-files with symlinks are not allowed.");
}
if (!ze.isDirectory() && !ze.getName().startsWith(".")) {
count++;
if (count > THRESHOLD_ENTRIES) {
throw new BadRequestException("ZIP-Bomb detected: too many entries.");
}
}
}
}
}
private ZipData unzip(File tempFile, String dossierId, boolean keepManualRedactions, boolean disableAutomaticAnalysis) throws IOException {
private ZipData processZipContents(File tempZip, String dossierId, boolean keepManualRedactions, boolean disableAutomaticAnalysis) throws IOException {
var zipData = new ZipData();
ZipData zipData = new ZipData();
try (var fis = new FileInputStream(tempFile); var zipFile = new ZipFile(fis.getChannel())) {
try (FileInputStream fis = new FileInputStream(tempZip); ZipFile zipFile = new ZipFile(fis.getChannel())) {
for (var entryEnum = zipFile.getEntries(); entryEnum.hasMoreElements(); ) {
var ze = entryEnum.nextElement();
zipData.totalEntryArchive++;
var entries = zipFile.getEntries();
while (entries.hasMoreElements()) {
ZipArchiveEntry entry = entries.nextElement();
if (!ze.isDirectory()) {
processFileZipEntry(ze, zipFile, dossierId, keepManualRedactions, zipData, disableAutomaticAnalysis);
if (entry.isDirectory() || entry.getName().startsWith(".")) {
continue;
}
byte[] entryBytes = readEntryWithRatioCheck(entry, zipFile);
zipData.totalSizeArchive += entryBytes.length;
if (zipData.totalSizeArchive > THRESHOLD_SIZE) {
throw new BadRequestException("ZIP-Bomb detected (exceeds total size limit).");
}
String extension = getExtension(entry.getName());
if ("csv".equalsIgnoreCase(extension)) {
zipData.csvBytes = entryBytes;
} else {
handleRegularFile(dossierId, entryBytes, extension, extractFileName(entry.getName()), zipData, keepManualRedactions, disableAutomaticAnalysis);
}
}
}
@ -207,73 +232,70 @@ public class UploadController implements UploadResource {
}
private void processFileZipEntry(ZipArchiveEntry ze, ZipFile zipFile, String dossierId, boolean keepManualRedactions, ZipData zipData, boolean disableAutomaticAnalysis) throws IOException {
private byte[] readEntryWithRatioCheck(ZipArchiveEntry entry, ZipFile zipFile) throws IOException {
var extension = getExtension(ze.getName());
long compressedSize = entry.getCompressedSize() > 0 ? entry.getCompressedSize() : 1;
try (var is = zipFile.getInputStream(entry); var bos = new ByteArrayOutputStream()) {
final String fileName;
if (ze.getName().lastIndexOf("/") >= 0) {
fileName = ze.getName().substring(ze.getName().lastIndexOf("/") + 1);
} else {
fileName = ze.getName();
}
byte[] buffer = new byte[4096];
int bytesRead;
int totalUncompressed = 0;
if (fileName.startsWith(".")) {
return;
}
while ((bytesRead = is.read(buffer)) != -1) {
bos.write(buffer, 0, bytesRead);
totalUncompressed += bytesRead;
var entryAsBytes = readCurrentZipEntry(ze, zipFile);
zipData.totalSizeArchive += entryAsBytes.length;
// 1. the uncompressed data size is too much for the application resource capacity
// 2. too many entries in the archive can lead to inode exhaustion of the file-system
if (zipData.totalSizeArchive > THRESHOLD_SIZE || zipData.totalEntryArchive > THRESHOLD_ENTRIES) {
throw new BadRequestException("ZIP-Bomb detected.");
}
if ("csv".equals(extension)) {
zipData.csvBytes = entryAsBytes;
} else if (fileFormatValidationService.getAllFileFormats().contains(extension)) {
if (!fileFormatValidationService.getValidFileFormatsForTenant(TenantContext.getTenantId()).contains(extension)) {
zipData.containedUnpermittedFiles = true;
return;
}
zipData.containedUnpermittedFiles = false;
try {
var result = uploadService.processSingleFile(dossierId, fileName, entryAsBytes, keepManualRedactions, disableAutomaticAnalysis);
zipData.fileUploadResult.getFileIds().addAll(result.getFileIds());
} catch (Exception e) {
log.debug("PDF File inside ZIP failed", e);
// TODO return un-processed files to client
double ratio = (double) totalUncompressed / compressedSize;
if (ratio > THRESHOLD_RATIO) {
throw new BadRequestException("ZIP-Bomb detected (compression ratio too high).");
}
}
return bos.toByteArray();
}
}
private byte[] readCurrentZipEntry(ZipArchiveEntry ze, ZipFile zipFile) throws IOException {
private void handleRegularFile(String dossierId,
byte[] fileBytes,
String extension,
String fileName,
ZipData zipData,
boolean keepManualRedactions,
boolean disableAutomaticAnalysis) {
var bos = new ByteArrayOutputStream();
try (var entryStream = zipFile.getInputStream(ze)) {
var buffer = new byte[2048];
var nBytes = 0;
int totalSizeEntry = 0;
while ((nBytes = entryStream.read(buffer)) > 0) {
bos.write(buffer, 0, nBytes);
totalSizeEntry += nBytes;
double compressionRatio = (float) totalSizeEntry / ze.getCompressedSize();
if (compressionRatio > THRESHOLD_RATIO) {
// ratio between compressed and uncompressed data is highly suspicious, looks like a Zip Bomb Attack
throw new BadRequestException("ZIP-Bomb detected.");
}
}
if (!fileFormatValidationService.getAllFileFormats().contains(extension)) {
zipData.containedUnpermittedFiles = false;
return;
}
return bos.toByteArray();
if (!fileFormatValidationService.getValidFileFormatsForTenant(TenantContext.getTenantId()).contains(extension)) {
zipData.containedUnpermittedFiles = true;
return;
}
try {
FileUploadResult result = uploadService.processSingleFile(dossierId, fileName, fileBytes, keepManualRedactions, disableAutomaticAnalysis);
zipData.fileUploadResult.getFileIds().addAll(result.getFileIds());
} catch (Exception e) {
log.debug("Failed to process file '{}' in ZIP: {}", fileName, e.getMessage(), e);
}
}
private String extractFileName(String path) {
int idx = path.lastIndexOf('/');
return (idx >= 0) ? path.substring(idx + 1) : path;
}
private String getExtension(String fileName) {
int idx = fileName.lastIndexOf('.');
if (idx < 0) {
return "";
}
return fileName.substring(idx + 1).toLowerCase(Locale.ROOT);
}
@ -282,7 +304,6 @@ public class UploadController implements UploadResource {
byte[] csvBytes;
int totalSizeArchive;
int totalEntryArchive;
FileUploadResult fileUploadResult = new FileUploadResult();
boolean containedUnpermittedFiles;

View File

@ -1,56 +0,0 @@
package com.iqser.red.service.persistence.service.v1.api.external.resource;
import com.iqser.red.service.persistence.service.v1.api.shared.model.saas.migration.MigrationStatusResponse;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.responses.ApiResponse;
import io.swagger.v3.oas.annotations.responses.ApiResponses;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.ResponseBody;
public interface MigrationStatusResource {
String MIGRATION_STATUS_REST_PATH = ExternalApi.BASE_PATH + "/migration-status";
String START_MIGRATION_REST_PATH = ExternalApi.BASE_PATH + "/start_migration";
String REVERT_MIGRATION_REST_PATH = ExternalApi.BASE_PATH + "/revert_migration";
String RETRY_MIGRATION_REST_PATH = ExternalApi.BASE_PATH + "/retry_migration";
String FILE_ID = "fileId";
String FILE_ID_PATH_VARIABLE = "/{" + FILE_ID + "}";
String DOSSIER_ID = "dossierId";
String DOSSIER_ID_PATH_VARIABLE = "/{" + DOSSIER_ID + "}";
@ResponseBody
@PostMapping(value = MIGRATION_STATUS_REST_PATH, produces = MediaType.APPLICATION_JSON_VALUE)
@Operation(summary = "Show the status of the migration", description = "None")
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "Success.")})
MigrationStatusResponse migrationStatus();
@ResponseBody
@PostMapping(value = START_MIGRATION_REST_PATH + FILE_ID_PATH_VARIABLE + DOSSIER_ID_PATH_VARIABLE)
@Operation(summary = "Start SAAS migration for specific file", description = "None")
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "Success.")})
ResponseEntity<?> startMigrationForFile(@RequestParam(value = DOSSIER_ID) String dossierId, @RequestParam(value = FILE_ID) String fileId);
@ResponseBody
@PostMapping(value = REVERT_MIGRATION_REST_PATH + FILE_ID_PATH_VARIABLE + DOSSIER_ID_PATH_VARIABLE)
@Operation(summary = "Start SAAS migration for specific file", description = "None")
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "Success.")})
ResponseEntity<?> revertMigrationForFile(@RequestParam(value = DOSSIER_ID) String dossierId, @RequestParam(value = FILE_ID) String fileId);
@ResponseBody
@PostMapping(value = RETRY_MIGRATION_REST_PATH)
@Operation(summary = "Restart SAAS migration for all files in error state", description = "None")
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "Success.")})
ResponseEntity<?> requeueErrorFiles();
}

View File

@ -38,7 +38,7 @@ public interface ReanalysisResource {
String EXCLUDED_STATUS_PARAM = "excluded";
String FORCE_PARAM = "force";
String IDP_PARAM = "idp";
String ALL_PAGES = "allPages";
@PostMapping(value = REANALYSIS_REST_PATH + DOSSIER_ID_PATH_VARIABLE)
@ -66,7 +66,7 @@ public interface ReanalysisResource {
@Operation(summary = "Ocr and reanalyze a dossier", description = "None")
@PostMapping(value = OCR_REANALYSIS_REST_PATH + DOSSIER_ID_PATH_VARIABLE)
@ApiResponses(value = {@ApiResponse(responseCode = "204", description = "OK"), @ApiResponse(responseCode = "404", description = "Not found"), @ApiResponse(responseCode = "403", description = "Forbidden")})
void ocrDossier(@PathVariable(DOSSIER_ID) String dossierId, @RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp);
void ocrDossier(@PathVariable(DOSSIER_ID) String dossierId);
@Operation(summary = "Ocr and reanalyze a file", description = "None")
@ -75,15 +75,13 @@ public interface ReanalysisResource {
void ocrFile(@PathVariable(DOSSIER_ID) String dossierId,
@PathVariable(FILE_ID) String fileId,
@RequestParam(value = FORCE_PARAM, required = false, defaultValue = FALSE) boolean force,
@RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp);
@RequestParam(value = ALL_PAGES, required = false, defaultValue = FALSE) boolean allPages);
@Operation(summary = "Ocr and reanalyze multiple files for a dossier", description = "None")
@PostMapping(value = OCR_REANALYSIS_REST_PATH + DOSSIER_ID_PATH_VARIABLE + BULK_REST_PATH)
@ApiResponses(value = {@ApiResponse(responseCode = "204", description = "OK"), @ApiResponse(responseCode = "404", description = "Not found"), @ApiResponse(responseCode = "403", description = "Forbidden")})
void ocrFiles(@PathVariable(DOSSIER_ID) String dossierId,
@RequestBody Set<String> fileIds,
@RequestParam(value = IDP_PARAM, required = false, defaultValue = FALSE) boolean idp);
void ocrFiles(@PathVariable(DOSSIER_ID) String dossierId, @RequestBody Set<String> fileIds);
@Operation(summary = "Exclude or re-include a file to the automatic analysis", description = "None")

View File

@ -57,7 +57,7 @@ public class AdminInterfaceController {
fileStatusService.validateFileIsNotDeletedAndNotApproved(fileId);
fileStatusService.setStatusOcrQueued(dossierId, fileId);
fileStatusService.setStatusOcrQueued(dossierId, fileId, false);
}
@ -91,7 +91,7 @@ public class AdminInterfaceController {
if (!dryRun) {
fileStatusService.validateFileIsNotDeletedAndNotApproved(file.getId());
fileStatusService.setStatusOcrQueued(file.getDossierId(), file.getId());
fileStatusService.setStatusOcrQueued(file.getDossierId(), file.getId(), false);
}
}

View File

@ -35,8 +35,8 @@ dependencies {
exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1")
exclude(group = "com.iqser.red.service", module = "persistence-service-shared-api-v1")
}
api("com.knecon.fforesight:azure-ocr-service-api:0.24.0")
implementation("com.knecon.fforesight:llm-service-api:1.35.0")
api("com.knecon.fforesight:azure-ocr-service-api:0.13.0")
implementation("com.knecon.fforesight:llm-service-api:1.20.0-RED10072.2")
api("com.knecon.fforesight:jobs-commons:0.13.0")
api("com.iqser.red.commons:storage-commons:2.50.0")
api("com.knecon.fforesight:tenant-commons:0.31.0-RED10196.0") {

View File

@ -83,18 +83,6 @@ public class DossierTemplateEntity {
@Column(name = "ocr_by_default")
private boolean ocrByDefault;
@Column(name = "rotation_correction_by_default")
private boolean rotationCorrectionByDefault;
@Column(name = "idp_by_default")
private boolean idpByDefault;
@Column(name = "font_style_detection")
private boolean fontStyleDetection;
@Column(name = "ocr_all_pages")
private boolean ocrAllPages;
@Column(name = "remove_watermark")
private boolean removeWatermark;
@ -140,7 +128,6 @@ public class DossierTemplateEntity {
@Enumerated(EnumType.STRING)
private LayoutParsingType layoutParsingType;
public static DossierTemplateEntity copyDossierTemplateEntityWithoutChildEntities(DossierTemplateEntity dossierTemplateEntity) {
DossierTemplateEntity dossierTemplateCopy = new DossierTemplateEntity();
@ -161,10 +148,6 @@ public class DossierTemplateEntity {
dossierTemplateCopy.removeWatermark = dossierTemplateEntity.removeWatermark;
dossierTemplateCopy.downloadFileTypes = dossierTemplateEntity.downloadFileTypes;
dossierTemplateCopy.layoutParsingType = dossierTemplateEntity.layoutParsingType;
dossierTemplateCopy.rotationCorrectionByDefault = dossierTemplateEntity.rotationCorrectionByDefault;
dossierTemplateCopy.idpByDefault = dossierTemplateEntity.idpByDefault;
dossierTemplateCopy.fontStyleDetection = dossierTemplateEntity.fontStyleDetection;
dossierTemplateCopy.ocrAllPages = dossierTemplateEntity.ocrAllPages;
return dossierTemplateCopy;
}

View File

@ -152,16 +152,9 @@ public class FileEntity {
@Column(name = "number_of_ocred_pages")
private Integer numberOfOCRedPages;
@Column(name = "number_of_idp_pages")
private Integer numberOfIdpPages;
@Column(name = "ocr_end_time")
private OffsetDateTime ocrEndTime;
private int usedPromptTokens;
private int usedCompletionTokens;
@Column
private boolean hasAnnotationComments;

View File

@ -1,35 +0,0 @@
package com.iqser.red.service.persistence.management.v1.processor.entity.migration;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.SaasMigrationStatus;
import jakarta.persistence.*;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@AllArgsConstructor
@NoArgsConstructor
@Entity
@Table(name = "saas_migration_status")
public class SaasMigrationStatusEntity {
@Id
private String fileId;
@Column
private String dossierId;
@Column
@Enumerated(EnumType.STRING)
private SaasMigrationStatus status;
@Column
private Integer processingErrorCounter;
@Column
private String errorCause;
}

View File

@ -1,170 +0,0 @@
package com.iqser.red.service.persistence.management.v1.processor.migration;
import com.iqser.red.service.persistence.management.v1.processor.entity.annotations.*;
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.repository.CommentRepository;
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.repository.FileRepository;
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.repository.annotationentity.*;
import com.knecon.fforesight.databasetenantcommons.providers.utils.MagicConverter;
import jakarta.transaction.Transactional;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import lombok.experimental.FieldDefaults;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
@Slf4j
@Service
@Transactional
@RequiredArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class SaasAnnotationIdMigrationService {
ManualRedactionRepository manualRedactionRepository;
RemoveRedactionRepository removeRedactionRepository;
ForceRedactionRepository forceRedactionRepository;
ResizeRedactionRepository resizeRedactionRepository;
RecategorizationRepository recategorizationRepository;
LegalBasisChangeRepository legalBasisChangeRepository;
CommentRepository commentRepository;
FileRepository fileRepository;
public int updateManualAddRedaction(AnnotationEntityId oldAnnotationEntityId, AnnotationEntityId newAnnotationEntityId) {
if (oldAnnotationEntityId.equals(newAnnotationEntityId)) {
return 0;
}
var oldEntry = manualRedactionRepository.findById(oldAnnotationEntityId);
if (oldEntry.isPresent()) {
var newEntry = MagicConverter.convert(oldEntry.get(), ManualRedactionEntryEntity.class);
newEntry.setPositions(MagicConverter.convert(oldEntry.get().getPositions(), RectangleEntity.class));
newEntry.setFileStatus(fileRepository.findById(oldAnnotationEntityId.getFileId())
.get());
newEntry.setId(newAnnotationEntityId);
manualRedactionRepository.deleteById(oldAnnotationEntityId);
manualRedactionRepository.save(newEntry);
return 1;
}
return 0;
}
public int updateRemoveRedaction(AnnotationEntityId oldAnnotationEntityId, AnnotationEntityId newAnnotationEntityId) {
if (oldAnnotationEntityId.equals(newAnnotationEntityId)) {
return 0;
}
var oldEntry = removeRedactionRepository.findById(oldAnnotationEntityId);
if (oldEntry.isPresent()) {
var newEntry = MagicConverter.convert(oldEntry.get(), IdRemovalEntity.class);
newEntry.setFileStatus(fileRepository.findById(oldAnnotationEntityId.getFileId())
.get());
newEntry.setId(newAnnotationEntityId);
removeRedactionRepository.deleteById(oldAnnotationEntityId);
removeRedactionRepository.save(newEntry);
return 1;
}
return 0;
}
public int updateForceRedaction(AnnotationEntityId oldAnnotationEntityId, AnnotationEntityId newAnnotationEntityId) {
if (oldAnnotationEntityId.equals(newAnnotationEntityId)) {
return 0;
}
var oldEntry = forceRedactionRepository.findById(oldAnnotationEntityId);
if (oldEntry.isPresent()) {
var newEntry = MagicConverter.convert(oldEntry.get(), ManualForceRedactionEntity.class);
newEntry.setFileStatus(fileRepository.findById(oldAnnotationEntityId.getFileId())
.get());
newEntry.setId(newAnnotationEntityId);
forceRedactionRepository.deleteById(oldAnnotationEntityId);
forceRedactionRepository.save(newEntry);
return 1;
}
return 0;
}
public int updateResizeRedaction(AnnotationEntityId oldAnnotationEntityId, AnnotationEntityId newAnnotationEntityId) {
if (oldAnnotationEntityId.equals(newAnnotationEntityId)) {
return 0;
}
var oldEntry = resizeRedactionRepository.findById(oldAnnotationEntityId);
if (oldEntry.isPresent()) {
var newEntry = MagicConverter.convert(oldEntry.get(), ManualResizeRedactionEntity.class);
newEntry.setId(newAnnotationEntityId);
newEntry.setPositions(MagicConverter.convert(oldEntry.get().getPositions(), RectangleEntity.class));
newEntry.setFileStatus(fileRepository.findById(oldAnnotationEntityId.getFileId())
.get());
resizeRedactionRepository.deleteById(oldAnnotationEntityId);
resizeRedactionRepository.save(newEntry);
return 1;
}
return 0;
}
public int updateRecategorizationRedaction(AnnotationEntityId oldAnnotationEntityId, AnnotationEntityId newAnnotationEntityId) {
if (oldAnnotationEntityId.equals(newAnnotationEntityId)) {
return 0;
}
var oldEntry = recategorizationRepository.findById(oldAnnotationEntityId);
if (oldEntry.isPresent()) {
var newEntry = MagicConverter.convert(oldEntry.get(), ManualRecategorizationEntity.class);
newEntry.setId(newAnnotationEntityId);
newEntry.setFileStatus(fileRepository.findById(oldAnnotationEntityId.getFileId())
.get());
recategorizationRepository.deleteById(oldAnnotationEntityId);
recategorizationRepository.save(newEntry);
return 1;
}
return 0;
}
public int updateLegalBasisChangeRedaction(AnnotationEntityId oldAnnotationEntityId, AnnotationEntityId newAnnotationEntityId) {
if (oldAnnotationEntityId.equals(newAnnotationEntityId)) {
return 0;
}
var oldEntry = legalBasisChangeRepository.findById(oldAnnotationEntityId);
if (oldEntry.isPresent()) {
var newEntry = MagicConverter.convert(oldEntry.get(), ManualLegalBasisChangeEntity.class);
newEntry.setId(newAnnotationEntityId);
newEntry.setFileStatus(fileRepository.findById(oldAnnotationEntityId.getFileId())
.get());
legalBasisChangeRepository.deleteById(oldAnnotationEntityId);
legalBasisChangeRepository.save(newEntry);
return 1;
}
return 0;
}
public int updateCommentIds(String fileId, String key, String value) {
if (key.equals(value)) {
return 0;
}
return commentRepository.saasMigrationUpdateAnnotationIds(fileId, key, value);
}
}

View File

@ -1,83 +0,0 @@
package com.iqser.red.service.persistence.management.v1.processor.migration;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import org.springframework.stereotype.Service;
import com.google.common.hash.HashFunction;
import com.google.common.hash.Hashing;
import com.iqser.red.service.persistence.management.v1.processor.entity.annotations.AnnotationEntityId;
import com.iqser.red.service.persistence.management.v1.processor.entity.annotations.ManualRedactionEntryEntity;
import com.iqser.red.service.persistence.management.v1.processor.model.ManualChangesQueryOptions;
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.annotations.AddRedactionPersistenceService;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.DictionaryEntryType;
import lombok.RequiredArgsConstructor;
@Service
@RequiredArgsConstructor
public class SaasMigrationManualChangesUpdateService {
private final AddRedactionPersistenceService addRedactionPersistenceService;
private final HashFunction hashFunction = Hashing.murmur3_128();
public void convertUnprocessedAddToDictionariesToLocalChanges(String fileId) {
var unprocessedManualAdds = addRedactionPersistenceService.findEntriesByFileIdAndOptions(fileId, ManualChangesQueryOptions.unprocessedOnly());
for (var unprocessedManualAdd : unprocessedManualAdds) {
if (!unprocessedManualAdd.getDictionaryEntryType().equals(DictionaryEntryType.ENTRY)) {
continue;
}
if (unprocessedManualAdd.isAddToDictionary() || unprocessedManualAdd.isAddToAllDossiers()) {
// copy pending dict change to a new one with a different id. Can't reuse the same one, as it's the primary key of the table.
// It has no functionality, its only there, such that the UI can show a pending change.
ManualRedactionEntryEntity pendingDictAdd = new ManualRedactionEntryEntity(buildSecondaryId(unprocessedManualAdd.getId(), fileId),
unprocessedManualAdd.getUser(),
unprocessedManualAdd.getTypeId(),
unprocessedManualAdd.getValue(),
unprocessedManualAdd.getReason(),
unprocessedManualAdd.getLegalBasis(),
unprocessedManualAdd.getSection(),
unprocessedManualAdd.isRectangle(),
unprocessedManualAdd.isAddToDictionary(),
unprocessedManualAdd.isAddToAllDossiers(),
unprocessedManualAdd.isAddToDossierDictionary(),
DictionaryEntryType.ENTRY,
unprocessedManualAdd.getRequestDate(),
null,
null,
new ArrayList<>(unprocessedManualAdd.getPositions()),
unprocessedManualAdd.getFileStatus(),
unprocessedManualAdd.getTextBefore(),
unprocessedManualAdd.getTextAfter(),
unprocessedManualAdd.getSourceId(),
new HashSet<>(unprocessedManualAdd.getTypeIdsOfModifiedDictionaries()));
addRedactionPersistenceService.update(pendingDictAdd);
// change existing dict add to unprocessed manual add. ID must match with prior entry, such that other unprocessed manual changes may be applied to it.
unprocessedManualAdd.setAddToDictionary(false);
unprocessedManualAdd.setAddToAllDossiers(false);
unprocessedManualAdd.setLegalBasis("");
unprocessedManualAdd.setTypeIdsOfModifiedDictionaries(Collections.emptySet());
unprocessedManualAdd.setDictionaryEntryType(null);
addRedactionPersistenceService.update(unprocessedManualAdd);
}
}
}
private AnnotationEntityId buildSecondaryId(AnnotationEntityId annotationEntityId, String fileId) {
return new AnnotationEntityId(hashFunction.hashString(annotationEntityId.getAnnotationId(), StandardCharsets.UTF_8).toString(), fileId);
}
}

View File

@ -1,396 +0,0 @@
package com.iqser.red.service.persistence.management.v1.processor.migration;
import static com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration.MIGRATION_REQUEST_QUEUE;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.management.v1.processor.entity.annotations.AnnotationEntityId;
import com.iqser.red.service.persistence.management.v1.processor.exception.InternalServerErrorException;
import com.iqser.red.service.persistence.management.v1.processor.exception.NotFoundException;
import com.iqser.red.service.persistence.management.v1.processor.model.ManualChangesQueryOptions;
import com.iqser.red.service.persistence.management.v1.processor.service.CommentService;
import com.iqser.red.service.persistence.management.v1.processor.service.DossierService;
import com.iqser.red.service.persistence.management.v1.processor.service.IndexingService;
import com.iqser.red.service.persistence.management.v1.processor.service.job.AutomaticAnalysisJob;
import com.iqser.red.service.persistence.management.v1.processor.service.layoutparsing.LayoutParsingRequestFactory;
import com.iqser.red.service.persistence.management.v1.processor.service.manualredactions.ManualRedactionProviderService;
import com.iqser.red.service.persistence.management.v1.processor.service.manualredactions.ManualRedactionService;
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.FileStatusPersistenceService;
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.SaasMigrationStatusPersistenceService;
import com.iqser.red.service.persistence.management.v1.processor.settings.FileManagementServiceSettings;
import com.iqser.red.service.persistence.management.v1.processor.utils.StorageIdUtils;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.migration.MigratedIds;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.SaasMigrationStatus;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.WorkflowStatus;
import com.iqser.red.service.redaction.v1.model.MigrationRequest;
import com.iqser.red.storage.commons.exception.StorageException;
import com.iqser.red.storage.commons.exception.StorageObjectDoesNotExist;
import com.iqser.red.storage.commons.service.StorageService;
import com.knecon.fforesight.databasetenantcommons.providers.TenantSyncService;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingQueueNames;
import com.knecon.fforesight.tenantcommons.TenantContext;
import com.knecon.fforesight.tenantcommons.TenantProvider;
import com.knecon.fforesight.tenantcommons.model.TenantSyncEvent;
import com.knecon.fforesight.tenantcommons.model.UpdateDetailsRequest;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import lombok.experimental.FieldDefaults;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class SaasMigrationService implements TenantSyncService {
AutomaticAnalysisJob automaticAnalysisJob;
FileStatusPersistenceService fileStatusPersistenceService;
SaasMigrationStatusPersistenceService saasMigrationStatusPersistenceService;
DossierService dossierService;
ManualRedactionProviderService manualRedactionProviderService;
TenantProvider tenantProvider;
IndexingService indexingService;
LayoutParsingRequestFactory layoutParsingRequestFactory;
RabbitTemplate rabbitTemplate;
FileManagementServiceSettings settings;
StorageService storageService;
SaasAnnotationIdMigrationService saasAnnotationIdMigrationService;
UncompressedFilesMigrationService uncompressedFilesMigrationService;
ManualRedactionService manualRedactionService;
CommentService commentService;
RankDeDuplicationService rankDeDuplicationService;
SaasMigrationManualChangesUpdateService saasMigrationManualChangesUpdateService;
@Override
public synchronized void syncTenant(TenantSyncEvent tenantSyncEvent) {
startMigrationForTenant(tenantSyncEvent.getTenantId());
}
// Persistence-Service needs to be scaled to 1.
public void startMigrationForTenant(String tenantId) {
// TODO migrate rules.
automaticAnalysisJob.stopForTenant(tenantId);
log.info("Starting uncompressed files migration ...");
uncompressedFilesMigrationService.migrateUncompressedFiles(tenantId);
log.info("Finished uncompressed files migration ...");
rankDeDuplicationService.deduplicate();
int numberOfFiles = 0;
var files = saasMigrationStatusPersistenceService.findAll();
for (var file : files) {
var dossier = dossierService.getDossierById(file.getDossierId());
if (dossier.getHardDeletedTime() != null) {
if (fileStatusPersistenceService.getStatus(file.getFileId()).getHardDeletedTime() != null) {
saasMigrationStatusPersistenceService.updateStatus(file.getFileId(), SaasMigrationStatus.FINISHED);
continue;
} else {
fileStatusPersistenceService.hardDelete(file.getFileId(), dossier.getHardDeletedTime());
saasMigrationStatusPersistenceService.updateStatus(file.getFileId(), SaasMigrationStatus.FINISHED);
continue;
}
}
if (fileStatusPersistenceService.getStatus(file.getFileId()).getHardDeletedTime() != null) {
saasMigrationStatusPersistenceService.updateStatus(file.getFileId(), SaasMigrationStatus.FINISHED);
continue;
}
if (!file.getStatus().equals(SaasMigrationStatus.MIGRATION_REQUIRED)) {
log.info("Skipping {} for tenant {} since migration status is {}", file.getFileId(), TenantContext.getTenantId(), file.getStatus());
continue;
}
// delete NER_ENTITIES since offsets depend on old document structure.
storageService.deleteObject(TenantContext.getTenantId(), StorageIdUtils.getStorageId(file.getDossierId(), file.getFileId(), FileType.NER_ENTITIES));
var layoutParsingRequest = layoutParsingRequestFactory.build(dossier.getDossierTemplate().getId(), file.getDossierId(), file.getFileId(), false);
rabbitTemplate.convertAndSend(LayoutParsingQueueNames.LAYOUT_PARSING_REQUEST_EXCHANGE, TenantContext.getTenantId(), layoutParsingRequest);
numberOfFiles++;
}
log.info("Added {} documents for tenant {} to Layout-Parsing queue for saas migration", numberOfFiles, TenantContext.getTenantId());
if (numberOfFiles == 0) {
finalizeMigration();
}
}
public void startMigrationForFile(String dossierId, String fileId) {
var dossier = dossierService.getDossierById(dossierId);
if (dossier.getHardDeletedTime() != null) {
if (fileStatusPersistenceService.getStatus(fileId).getHardDeletedTime() != null) {
saasMigrationStatusPersistenceService.updateStatus(fileId, SaasMigrationStatus.FINISHED);
return;
} else {
fileStatusPersistenceService.hardDelete(fileId, dossier.getHardDeletedTime());
saasMigrationStatusPersistenceService.updateStatus(fileId, SaasMigrationStatus.FINISHED);
return;
}
}
if (fileStatusPersistenceService.getStatus(fileId).getHardDeletedTime() != null) {
saasMigrationStatusPersistenceService.updateStatus(fileId, SaasMigrationStatus.FINISHED);
return;
}
log.info("Starting Migration for dossierId {} and fileId {}", dossierId, fileId);
saasMigrationStatusPersistenceService.createMigrationRequiredStatus(dossierId, fileId);
var layoutParsingRequest = layoutParsingRequestFactory.build(dossier.getDossierTemplate().getId(), dossierId, fileId, false);
rabbitTemplate.convertAndSend(LayoutParsingQueueNames.LAYOUT_PARSING_REQUEST_EXCHANGE, TenantContext.getTenantId(), layoutParsingRequest);
}
public void handleLayoutParsingFinished(String dossierId, String fileId) {
if (!layoutParsingFilesExist(dossierId, fileId)) {
saasMigrationStatusPersistenceService.updateErrorStatus(fileId, "Layout parsing files not written!");
return;
}
log.info("Layout Parsing finished for saas migration for tenant {} dossier {} and file {}", TenantContext.getTenantId(), dossierId, fileId);
saasMigrationStatusPersistenceService.updateStatus(fileId, SaasMigrationStatus.DOCUMENT_FILES_MIGRATED);
if (fileStatusPersistenceService.getStatus(fileId).getWorkflowStatus().equals(WorkflowStatus.APPROVED)) {
saasMigrationManualChangesUpdateService.convertUnprocessedAddToDictionariesToLocalChanges(fileId);
}
try {
indexingService.reindex(dossierId, Set.of(fileId), false);
String dossierTemplateId = dossierService.getDossierById(dossierId).getDossierTemplateId();
rabbitTemplate.convertAndSend(MIGRATION_REQUEST_QUEUE,
MigrationRequest.builder()
.dossierTemplateId(dossierTemplateId)
.dossierId(dossierId)
.fileId(fileId)
.fileIsApproved(fileStatusPersistenceService.getStatus(fileId).getWorkflowStatus().equals(WorkflowStatus.APPROVED))
.manualRedactions(manualRedactionProviderService.getManualRedactions(fileId, ManualChangesQueryOptions.allWithoutDeleted()))
.entitiesWithComments(commentService.getCommentCounts(fileId).keySet())
.build());
} catch (Exception e) {
log.error("Queuing of entityLog migration failed with {}", e.getMessage());
saasMigrationStatusPersistenceService.updateErrorStatus(fileId, String.format("Queuing of entityLog migration failed with %s", e.getMessage()));
}
}
private boolean layoutParsingFilesExist(String dossierId, String fileId) {
return storageService.objectExists(TenantContext.getTenantId(), StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_STRUCTURE)) //
&& storageService.objectExists(TenantContext.getTenantId(), StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_TEXT)) //
&& storageService.objectExists(TenantContext.getTenantId(), StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_PAGES)) //
&& storageService.objectExists(TenantContext.getTenantId(), StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_POSITION));
}
public void handleEntityLogMigrationFinished(String dossierId, String fileId) {
if (!entityLogMigrationFilesExist(dossierId, fileId)) {
saasMigrationStatusPersistenceService.updateErrorStatus(fileId, "Migration Files not written!");
return;
}
saasMigrationStatusPersistenceService.updateStatus(fileId, SaasMigrationStatus.REDACTION_LOGS_MIGRATED);
log.info("EntityLog migration finished for saas migration for tenant {} dossier {} and file {}", TenantContext.getTenantId(), dossierId, fileId);
migrateAnnotationIdsAndAddManualAddRedactionsAndDeleteSectionGrid(dossierId, fileId);
}
private boolean entityLogMigrationFilesExist(String dossierId, String fileId) {
return storageService.objectExists(TenantContext.getTenantId(), StorageIdUtils.getStorageId(dossierId, fileId, FileType.ENTITY_LOG)) && storageService.objectExists(
TenantContext.getTenantId(),
StorageIdUtils.getStorageId(dossierId, fileId, FileType.MIGRATED_IDS));
}
public void handleError(String dossierId, String fileId, String errorCause, String retryExchange) {
var migrationEntry = saasMigrationStatusPersistenceService.findById(fileId);
Integer numErrors = migrationEntry.getProcessingErrorCounter();
if (numErrors != null && numErrors <= settings.getMaxErrorRetries()) {
saasMigrationStatusPersistenceService.updateErrorCounter(fileId, numErrors + 1, errorCause);
rabbitTemplate.convertAndSend(retryExchange, TenantContext.getTenantId(), MigrationRequest.builder().dossierId(dossierId).fileId(fileId).build());
log.error("Retrying error during saas migration for tenant {} dossier {} and file {}, cause {}", TenantContext.getTenantId(), dossierId, fileId, errorCause);
} else {
saasMigrationStatusPersistenceService.updateErrorStatus(fileId, errorCause);
log.error("Error during saas migration for tenant {} dossier {} and file {}, cause {}", TenantContext.getTenantId(), dossierId, fileId, errorCause);
}
}
public void requeueErrorFiles() {
automaticAnalysisJob.stopForTenant(TenantContext.getTenantId());
saasMigrationStatusPersistenceService.findAllByStatus(SaasMigrationStatus.ERROR)
.forEach(migrationStatus -> startMigrationForFile(migrationStatus.getDossierId(), migrationStatus.getFileId()));
}
private void migrateAnnotationIdsAndAddManualAddRedactionsAndDeleteSectionGrid(String dossierId, String fileId) {
MigratedIds migratedIds = getMigratedIds(dossierId, fileId);
Map<String, String> oldToNewMapping = migratedIds.buildOldToNewMapping();
updateAnnotationIds(dossierId, fileId, oldToNewMapping);
List<String> forceRedactionIdsToDelete = migratedIds.getForceRedactionIdsToDelete();
softDeleteForceRedactions(fileId, forceRedactionIdsToDelete);
log.info("Soft-deleted force redactions.");
List<ManualRedactionEntry> manualRedactionEntriesToAdd = migratedIds.getManualRedactionEntriesToAdd();
int count = addManualRedactionEntries(manualRedactionEntriesToAdd);
log.info("Added {} additional manual entries.", count);
deleteSectionGridAndNerEntitiesFiles(dossierId, fileId);
saasMigrationStatusPersistenceService.updateStatus(fileId, SaasMigrationStatus.FINISHED);
log.info("AnnotationIds migration finished for saas migration for tenant {} dossier {} and file {}", TenantContext.getTenantId(), dossierId, fileId);
finalizeMigration(); // AutomaticAnalysisJob should be re-enabled by re-starting the persistence service pod after a rule change
}
private void deleteSectionGridAndNerEntitiesFiles(String dossierId, String fileId) {
try {
storageService.deleteObject(TenantContext.getTenantId(), StorageIdUtils.getStorageId(dossierId, fileId, FileType.SECTION_GRID));
} catch (StorageObjectDoesNotExist e) {
log.info("No sectiongrid found for {}, {}, ignoring....", dossierId, fileId);
}
try {
storageService.deleteObject(TenantContext.getTenantId(), StorageIdUtils.getStorageId(dossierId, fileId, FileType.NER_ENTITIES));
} catch (StorageObjectDoesNotExist e) {
log.info("No ner entities file found for {}, {}, ignoring....", dossierId, fileId);
}
}
private void softDeleteForceRedactions(String fileId, List<String> forceRedactionIdsToDelete) {
manualRedactionService.softDeleteForceRedactions(fileId, forceRedactionIdsToDelete);
}
private int addManualRedactionEntries(List<ManualRedactionEntry> manualRedactionEntriesToAdd) {
manualRedactionEntriesToAdd.forEach(add -> {
if (add.getSection() != null && add.getSection().length() > 254) {
add.setSection(add.getSection().substring(0, 254));
}
});
return manualRedactionService.addManualRedactionEntries(manualRedactionEntriesToAdd, true);
}
public void revertMigrationForFile(String dossierId, String fileId) {
log.info("Reverting Migration for dossierId {} and fileId {}", dossierId, fileId);
MigratedIds migratedIds = getMigratedIds(dossierId, fileId);
Map<String, String> newToOldMapping = migratedIds.buildNewToOldMapping();
updateAnnotationIds(dossierId, fileId, newToOldMapping);
deleteManualRedactionEntries(migratedIds.getManualRedactionEntriesToAdd());
undeleteForceRedactions(fileId, migratedIds.getForceRedactionIdsToDelete());
saasMigrationStatusPersistenceService.createMigrationRequiredStatus(dossierId, fileId);
}
private void undeleteForceRedactions(String fileId, List<String> forceRedactionIdsToDelete) {
manualRedactionService.undeleteForceRedactions(fileId, forceRedactionIdsToDelete);
}
private void deleteManualRedactionEntries(List<ManualRedactionEntry> manualRedactionEntriesToAdd) {
manualRedactionService.deleteManualRedactionEntries(manualRedactionEntriesToAdd);
}
private void updateAnnotationIds(String dossierId, String fileId, Map<String, String> idMapping) {
try {
updateAnnotationIds(fileId, idMapping);
} catch (Exception e) {
String message = String.format("Error during annotation id migration for tenant %s dossier %s and file %s, cause %s",
TenantContext.getTenantId(),
dossierId,
fileId,
e.getMessage());
saasMigrationStatusPersistenceService.updateErrorStatus(fileId, message);
log.error(message);
throw e;
}
}
private void finalizeMigration() {
if (saasMigrationStatusPersistenceService.countByStatus(SaasMigrationStatus.FINISHED) == saasMigrationStatusPersistenceService.countAll()) {
// automaticAnalysisJob.startForTenant(TenantContext.getTenantId()); // AutomaticAnalysisJob should be re-enabled by re-starting the persistence service pod after a rule change
tenantProvider.updateDetails(TenantContext.getTenantId(), UpdateDetailsRequest.builder().key("persistence-service-ready").value(true).build());
log.info("Saas migration finished for tenantId {}, re-enabled scheduler", TenantContext.getTenantId());
}
}
public void updateAnnotationIds(String fileId, Map<String, String> idMapping) {
AtomicInteger numUpdates = new AtomicInteger(0);
AtomicInteger numCommentUpdates = new AtomicInteger(0);
idMapping.forEach((key, value) -> {
AnnotationEntityId oldAnnotationEntityId = buildAnnotationId(fileId, key);
AnnotationEntityId newAnnotationEntityId = buildAnnotationId(fileId, value);
numUpdates.getAndAdd(saasAnnotationIdMigrationService.updateManualAddRedaction(oldAnnotationEntityId, newAnnotationEntityId));
numUpdates.getAndAdd(saasAnnotationIdMigrationService.updateRemoveRedaction(oldAnnotationEntityId, newAnnotationEntityId));
numUpdates.getAndAdd(saasAnnotationIdMigrationService.updateForceRedaction(oldAnnotationEntityId, newAnnotationEntityId));
numUpdates.getAndAdd(saasAnnotationIdMigrationService.updateResizeRedaction(oldAnnotationEntityId, newAnnotationEntityId));
numUpdates.getAndAdd(saasAnnotationIdMigrationService.updateRecategorizationRedaction(oldAnnotationEntityId, newAnnotationEntityId));
numUpdates.getAndAdd(saasAnnotationIdMigrationService.updateLegalBasisChangeRedaction(oldAnnotationEntityId, newAnnotationEntityId));
numCommentUpdates.getAndAdd(saasAnnotationIdMigrationService.updateCommentIds(fileId, key, value));
});
log.info("Migrated {} annotationIds and {} comments for file {}", numUpdates.get(), numCommentUpdates, fileId);
}
private AnnotationEntityId buildAnnotationId(String fileId, String annotationId) {
return new AnnotationEntityId(annotationId, fileId);
}
private MigratedIds getMigratedIds(String dossierId, String fileId) {
try {
return storageService.readJSONObject(TenantContext.getTenantId(), StorageIdUtils.getStorageId(dossierId, fileId, FileType.MIGRATED_IDS), MigratedIds.class);
} catch (StorageObjectDoesNotExist e) {
throw new NotFoundException(String.format("MigratedIds does not exist for Dossier ID \"%s\" and File ID \"%s\"!", dossierId, fileId));
} catch (StorageException e) {
throw new InternalServerErrorException(e.getMessage());
}
}
}

View File

@ -0,0 +1,20 @@
package com.iqser.red.service.persistence.management.v1.processor.model;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@NoArgsConstructor
@AllArgsConstructor
@Builder
public class OCRStatusUpdateResponse {
private String fileId;
private int numberOfPagesToOCR;
private int numberOfOCRedPages;
private boolean ocrFinished;
private boolean ocrStarted;
}

View File

@ -1,7 +1,5 @@
package com.iqser.red.service.persistence.management.v1.processor.service;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import org.springframework.retry.support.RetryTemplate;
@ -17,7 +15,6 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.UntouchedDo
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.ProcessingStatus;
import com.iqser.red.service.search.v1.model.IndexMessageType;
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
import jakarta.transaction.Transactional;
import lombok.RequiredArgsConstructor;
@ -117,7 +114,7 @@ public class FileStatusProcessingUpdateService {
}
public void requeueOCROrMarkFailed(String dossierId, String fileId, Set<AzureOcrFeature> features, FileErrorInfo fileErrorInfo) {
public void requeueOCROrMarkFailed(String dossierId, String fileId, FileErrorInfo fileErrorInfo) {
var fileEntity = fileStatusPersistenceService.getStatus(fileId);
if (fileEntity.getProcessingErrorCounter() > settings.getMaxErrorRetries()) {
@ -125,7 +122,7 @@ public class FileStatusProcessingUpdateService {
} else {
fileStatusService.setStatusOcrProcessing(fileId,
fileEntity.getProcessingStatus().equals(ProcessingStatus.OCR_PROCESSING) ? fileEntity.getProcessingErrorCounter() + 1 : 0);
fileStatusService.addToOcrQueue(dossierId, fileId, 2, features);
fileStatusService.addToOcrQueue(dossierId, fileId, 2, false);
}
}

View File

@ -12,6 +12,7 @@ import java.util.function.BiFunction;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.stereotype.Service;
@ -20,7 +21,6 @@ import com.iqser.red.service.pdftron.redaction.v1.api.model.ProcessUntouchedDocu
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
import com.iqser.red.service.persistence.management.v1.processor.entity.configuration.TypeEntity;
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.ComponentDefinitionEntity;
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.DossierTemplateEntity;
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.FileAttributeEntity;
import com.iqser.red.service.persistence.management.v1.processor.entity.dossier.FileEntity;
import com.iqser.red.service.persistence.management.v1.processor.entity.projection.DossierStatsFileProjection;
@ -31,6 +31,7 @@ import com.iqser.red.service.persistence.management.v1.processor.model.AnalysisT
import com.iqser.red.service.persistence.management.v1.processor.model.CvAnalysisServiceRequest;
import com.iqser.red.service.persistence.management.v1.processor.model.FileIdentifier;
import com.iqser.red.service.persistence.management.v1.processor.model.ManualChangesQueryOptions;
import com.iqser.red.service.persistence.management.v1.processor.model.OCRStatusUpdateResponse;
import com.iqser.red.service.persistence.management.v1.processor.model.VisualLayoutParsingServiceRequest;
import com.iqser.red.service.persistence.management.v1.processor.model.image.ImageServiceRequest;
import com.iqser.red.service.persistence.management.v1.processor.model.websocket.AnalyseStatus;
@ -79,7 +80,6 @@ import com.knecon.fforesight.llm.service.LlmNerMessage;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingQueueNames;
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
import com.knecon.fforesight.service.ocr.v1.api.model.DocumentRequest;
import com.knecon.fforesight.service.ocr.v1.api.model.OCRStatusUpdateResponse;
import com.knecon.fforesight.tenantcommons.TenantContext;
import jakarta.transaction.Transactional;
@ -316,7 +316,7 @@ public class FileStatusService {
}
log.info("Add file: {} from dossier {} to OCR queue", fileId, dossierId);
setStatusOcrQueued(dossierId, fileId);
setStatusOcrQueued(dossierId, fileId, false);
sendReadOnlyAnalysisEvent(dossierId, fileId, fileEntity);
return;
}
@ -566,13 +566,7 @@ public class FileStatusService {
}
public void setStatusOcrQueued(String dossierId, String fileId) {
setStatusOcrQueued(dossierId, fileId, false);
}
public void setStatusOcrQueued(String dossierId, String fileId, boolean idp) {
public void setStatusOcrQueued(String dossierId, String fileId, boolean allPages) {
FileEntity fileStatus = fileStatusPersistenceService.getStatus(fileId);
@ -584,7 +578,7 @@ public class FileStatusService {
updateOCRStartTime(fileId);
fileStatusPersistenceService.updateProcessingStatus(fileId, ProcessingStatus.OCR_PROCESSING_QUEUED);
websocketService.sendAnalysisEvent(dossierId, fileId, AnalyseStatus.OCR_PROCESSING, fileStatus.getNumberOfAnalyses() + 1);
addToOcrQueue(dossierId, fileId, 2, idp);
addToOcrQueue(dossierId, fileId, 2, allPages);
}
@ -765,39 +759,25 @@ public class FileStatusService {
}
public void addToOcrQueue(String dossierId, String fileId, int priority, boolean useIdp) {
public void addToOcrQueue(String dossierId, String fileId, int priority, boolean allPages) {
DossierTemplateEntity dt = dossierTemplatePersistenceService.getDossierTemplate(dossierPersistenceService.getDossierTemplateId(dossierId));
var removeWatermark = dossierTemplatePersistenceService.getDossierTemplate(dossierPersistenceService.getDossierTemplateId(dossierId)).isRemoveWatermark();
Set<AzureOcrFeature> features = new HashSet<>();
if (dt.isFontStyleDetection()) {
features.add(AzureOcrFeature.FONT_STYLE_DETECTION);
}
if (dt.isRemoveWatermark()) {
if (removeWatermark) {
features.add(AzureOcrFeature.REMOVE_WATERMARKS);
}
if (useIdp || dt.isIdpByDefault()) {
features.add(AzureOcrFeature.IDP);
if (allPages) {
features.add(AzureOcrFeature.ALL_PAGES);
}
if (dt.isRotationCorrectionByDefault()) {
if (currentApplicationTypeProvider.isDocuMine()) {
features.add(AzureOcrFeature.ROTATION_CORRECTION);
features.add(AzureOcrFeature.FONT_STYLE_DETECTION);
}
addToOcrQueue(dossierId, fileId, priority, features);
}
public void addToOcrQueue(String dossierId, String fileId, int priority, Set<AzureOcrFeature> features) {
var request = DocumentRequest.builder()
// needed for legacy OCR-services
.dossierId(dossierId)
.fileId(fileId)
.removeWatermark(features.contains(AzureOcrFeature.REMOVE_WATERMARKS))
// new api
.originDocumentId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.ORIGIN))
.viewerDocId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.VIEWER_DOCUMENT))
.idpResultId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.IDP_RESULT))
.removeWatermark(features.contains(AzureOcrFeature.REMOVE_WATERMARKS)) // needed for legacy OCR-services
.features(features)
.build();
@ -985,13 +965,6 @@ public class FileStatusService {
@Transactional
public void setStatusFullReprocess(String dossierId, String fileId, boolean priority, boolean requiresStructureAnalysis, boolean runOcr) {
setStatusFullReprocess(dossierId, fileId, priority, requiresStructureAnalysis, runOcr, false);
}
@Transactional
public void setStatusFullReprocess(String dossierId, String fileId, boolean priority, boolean requiresStructureAnalysis, boolean runOcr, boolean idp) {
FileEntity fileStatus = fileStatusPersistenceService.getStatus(fileId);
if (fileStatus.isExcluded()) {
@ -999,14 +972,14 @@ public class FileStatusService {
return;
}
if (requiresStructureAnalysis || runOcr || idp) {
if (requiresStructureAnalysis || runOcr) {
log.info("Delete text and NER entities from file {} in dossier {}", fileId, dossierId);
fileManagementStorageService.deleteDocumentAndNerObjects(dossierId, fileId);
}
if (runOcr || idp) {
if (runOcr) {
fileStatusPersistenceService.resetOcrStartAndEndDate(fileId);
setStatusOcrQueued(dossierId, fileId, idp);
setStatusOcrQueued(dossierId, fileId, false);
return;
}
@ -1205,10 +1178,4 @@ public class FileStatusService {
return reanalysisRequiredStatusService.enhanceFileStatusWithAnalysisRequirements(fileModels);
}
public void increaseTokenUsage(String fileId, int promptTokens, int completionTokens) {
fileStatusPersistenceService.increaseTokenUsage(fileId, promptTokens, completionTokens);
}
}

View File

@ -10,7 +10,6 @@ import com.iqser.red.service.persistence.management.v1.processor.exception.Confl
import com.iqser.red.service.persistence.management.v1.processor.exception.NotFoundException;
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.DossierPersistenceService;
import com.iqser.red.service.persistence.service.v1.api.shared.model.ReanalysisSettings;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.DeleteImportedRedactionsRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileModel;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
@ -171,18 +170,18 @@ public class ReanalysisService {
}
public void ocrDossier(String dossierId, boolean idp) {
public void ocrDossier(String dossierId) {
var relevantFiles = getAllFilesForDossier(dossierId, validFilesFilter);
relevantFiles.stream()
.filter(fileStatus -> fileStatus.getOcrStartTime() == null)
.filter(fileStatus -> fileStatus.getProcessingStatus().equals(ProcessingStatus.PROCESSED))
.forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId(), idp));
.forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId(), false));
}
public void ocrFile(String dossierId, String fileId, boolean force, boolean idp) {
public void ocrFile(String dossierId, String fileId, boolean force, boolean allPages) {
dossierPersistenceService.getAndValidateDossier(dossierId);
FileModel dossierFile = fileStatusService.getStatus(fileId);
@ -202,31 +201,30 @@ public class ReanalysisService {
}
if (force) {
fileStatusService.setStatusOcrQueued(dossierId, fileId, idp);
fileStatusService.setStatusOcrQueued(dossierId, fileId, allPages);
} else {
if (dossierFile.getOcrStartTime() != null) {
throw new ConflictException("File already has been OCR processed");
}
ocrFiles(dossierId, Sets.newHashSet(fileId), idp);
ocrFiles(dossierId, Sets.newHashSet(fileId), allPages);
}
}
public void ocrFiles(String dossierId, Set<String> fileIds, boolean idp) {
public void ocrFiles(String dossierId, Set<String> fileIds, boolean allPages) {
var relevantFiles = getRelevantFiles(dossierId, fileIds);
if (relevantFiles.stream()
.anyMatch(fileStatus -> !fileStatus.getProcessingStatus().equals(ProcessingStatus.PROCESSED) //
&& !fileStatus.getProcessingStatus().equals(ProcessingStatus.OCR_PROCESSING_QUEUED) //
&& !fileStatus.getProcessingStatus().equals(ProcessingStatus.OCR_PROCESSING))) {
.anyMatch(fileStatus -> !fileStatus.getProcessingStatus().equals(ProcessingStatus.PROCESSED) && !fileStatus.getProcessingStatus()
.equals(ProcessingStatus.OCR_PROCESSING_QUEUED) && !fileStatus.getProcessingStatus().equals(ProcessingStatus.OCR_PROCESSING))) {
throw new ConflictException("File is not processed");
}
relevantFiles.stream()
.filter(fileStatus -> fileStatus.getOcrStartTime() == null)
.forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId(), idp));
.forEach(fileStatus -> fileStatusService.setStatusOcrQueued(dossierId, fileStatus.getId(), allPages));
}
@ -276,12 +274,12 @@ public class ReanalysisService {
public List<FileModel> reanalyzeTemplate(String dossierTemplateId, ReanalysisSettings reanalysisSettings) {
requestValidator.validateRequestOrThrow404(dossierTemplateId, reanalysisSettings.getDossierIds(), reanalysisSettings.getFileIds());
requestValidator.validateRequestOrThrow404(dossierTemplateId, reanalysisSettings.dossierIds(), reanalysisSettings.fileIds());
var files = fileStatusService.getDossierTemplateStatus(dossierTemplateId)
.stream()
.filter(file -> isInList(file, reanalysisSettings))
.filter(reanalysisSettings.getFileStatusFilter())
.filter(reanalysisSettings.fileStatusFilter().asPredicate())
.peek(file -> log.info("Reanalyzing file {}", file.getId()))
.collect(Collectors.toList());
@ -290,9 +288,8 @@ public class ReanalysisService {
files.forEach(file -> fileStatusService.setStatusFullReprocess(file.getDossierId(),
file.getId(),
false,
reanalysisSettings.isRepeatStructureAnalysis(),
reanalysisSettings.isRunOcr(),
reanalysisSettings.isRunIdp()));
reanalysisSettings.repeatStructureAnalysis(),
reanalysisSettings.runOcr()));
return rejectedFiles;
}
@ -316,8 +313,8 @@ public class ReanalysisService {
private boolean isInList(FileModel file, ReanalysisSettings reAnalysisSettings) {
return (reAnalysisSettings.getFileIds().isEmpty() || reAnalysisSettings.getFileIds().contains(file.getId())) //
&& (reAnalysisSettings.getDossierIds().isEmpty() || reAnalysisSettings.getDossierIds().contains(file.getDossierId()));
return (reAnalysisSettings.fileIds().isEmpty() || reAnalysisSettings.fileIds().contains(file.getId())) //
&& (reAnalysisSettings.dossierIds().isEmpty() || reAnalysisSettings.dossierIds().contains(file.getDossierId()));
}
}

View File

@ -14,7 +14,6 @@ import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusService;
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.SaasMigrationStatusPersistenceService;
import com.iqser.red.service.persistence.management.v1.processor.settings.FileManagementServiceSettings;
import com.iqser.red.service.persistence.management.v1.processor.utils.TenantUtils;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileModel;
@ -38,7 +37,6 @@ public class AutomaticAnalysisJob implements Job {
private final FileStatusService fileStatusService;
private final TenantProvider tenantProvider;
private final ObservationRegistry observationRegistry;
private final SaasMigrationStatusPersistenceService saasMigrationStatusPersistenceService;
@Setter
private boolean schedulingStopped;
@ -69,11 +67,6 @@ public class AutomaticAnalysisJob implements Job {
TenantContext.setTenantId(tenant.getTenantId());
if (!saasMigrationStatusPersistenceService.migrationFinishedForTenant()) {
log.info("[Tenant:{}] Skipping scheduling as there are files that require migration.", tenant.getTenantId());
return;
}
String queueName = MessagingConfiguration.REDACTION_REQUEST_QUEUE_PREFIX + "_" + tenant.getTenantId();
var redactionQueueInfo = amqpAdmin.getQueueInfo(queueName);
if (redactionQueueInfo != null) {

View File

@ -27,7 +27,8 @@ public class LayoutParsingRequestFactory {
public LayoutParsingRequest build(String dossierTemplateId, String dossierId, String fileId, boolean priority) {
LayoutParsingType layoutParsingType = dossierTemplatePersistenceService.getDossierTemplate(dossierTemplateId).getLayoutParsingType();
LayoutParsingType layoutParsingType = dossierTemplatePersistenceService.getDossierTemplate(
dossierTemplateId).getLayoutParsingType();
Optional<String> optionalImageFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.IMAGE_INFO) //
? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMAGE_INFO)) : Optional.empty();
@ -38,9 +39,6 @@ public class LayoutParsingRequestFactory {
Optional<String> optionalVisualLayoutParsingFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.VISUAL_LAYOUT) //
? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.VISUAL_LAYOUT)) : Optional.empty();
Optional<String> optionalIdpResultFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.IDP_RESULT) //
? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.IDP_RESULT)) : Optional.empty();
return LayoutParsingRequest.builder()
.layoutParsingType(layoutParsingType)
.identifier(QueueMessageIdentifierService.buildIdentifier(dossierId, fileId, priority))
@ -57,7 +55,6 @@ public class LayoutParsingRequestFactory {
.documentMarkdownFileStorageId(fileManagementServiceSettings.isStoreMarkdown() ? Optional.of(StorageIdUtils.getStorageId(dossierId,
fileId,
FileType.MARKDOWN)) : Optional.empty())
.idpResultStorageId(optionalIdpResultFileId)
.build();
}

View File

@ -21,6 +21,7 @@ import com.iqser.red.service.persistence.management.v1.processor.entity.projecti
import com.iqser.red.service.persistence.management.v1.processor.exception.BadRequestException;
import com.iqser.red.service.persistence.management.v1.processor.exception.NotFoundException;
import com.iqser.red.service.persistence.management.v1.processor.model.FileIdentifier;
import com.iqser.red.service.persistence.management.v1.processor.model.OCRStatusUpdateResponse;
import com.iqser.red.service.persistence.management.v1.processor.model.websocket.AnalyseStatus;
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.repository.FileAttributesRepository;
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.repository.FileRepository;
@ -30,8 +31,6 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.component.C
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.ProcessingStatus;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.WorkflowStatus;
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
import com.knecon.fforesight.service.ocr.v1.api.model.OCRStatusUpdateResponse;
import jakarta.persistence.EntityManager;
import jakarta.transaction.Transactional;
@ -603,9 +602,9 @@ public class FileStatusPersistenceService {
public int getNumberOfAssignedFiles(String userId) {
List<FileEntity> files = fileRepository.findFilesByAssignee(userId);
return Math.toIntExact(files.stream()
.filter(fileEntity -> fileEntity.getHardDeletedTime() == null)
.count());
return files.stream()
.filter(fileEntity -> fileEntity.getHardDeletedTime() == null)
.collect(Collectors.toList()).size();
}
@ -681,7 +680,6 @@ public class FileStatusPersistenceService {
fileRepository.updateOCRStatus(response.getFileId(),
response.getNumberOfPagesToOCR(),
response.getNumberOfOCRedPages(),
response.getFeatures().contains(AzureOcrFeature.IDP) ? response.getNumberOfOCRedPages() : 0,
response.isOcrFinished() ? OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS) : null,
OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS));
}
@ -767,11 +765,4 @@ public class FileStatusPersistenceService {
fileRepository.updateLastDownloadForFile(fileId, null);
}
@Transactional
public void increaseTokenUsage(String fileId, int promptTokens, int completionTokens) {
fileRepository.increaseTokenUsage(fileId, promptTokens, completionTokens);
}
}

View File

@ -1,96 +0,0 @@
package com.iqser.red.service.persistence.management.v1.processor.service.persistence;
import com.iqser.red.service.persistence.management.v1.processor.entity.annotations.AnnotationEntityId;
import com.iqser.red.service.persistence.management.v1.processor.entity.migration.SaasMigrationStatusEntity;
import com.iqser.red.service.persistence.management.v1.processor.exception.NotFoundException;
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.repository.SaasMigrationStatusRepository;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.SaasMigrationStatus;
import jakarta.transaction.Transactional;
import lombok.RequiredArgsConstructor;
import org.springframework.stereotype.Service;
import java.util.List;
@Service
@RequiredArgsConstructor
public class SaasMigrationStatusPersistenceService {
private final SaasMigrationStatusRepository saasMigrationStatusRepository;
public List<SaasMigrationStatusEntity> findAllByStatus(SaasMigrationStatus status) {
return saasMigrationStatusRepository.findAllByStatus(status);
}
public SaasMigrationStatusEntity findById(String fileId) {
var migrationStatusOptional = saasMigrationStatusRepository.findById(fileId);
if (migrationStatusOptional.isPresent()) {
return migrationStatusOptional.get();
}
throw new NotFoundException("No migration entry found for fileId" + fileId);
}
public boolean isMigrating(String fileId) {
var migrationStatusOptional = saasMigrationStatusRepository.findById(fileId);
return migrationStatusOptional.isPresent() && migrationStatusOptional.get().getStatus() != SaasMigrationStatus.FINISHED;
}
public boolean migrationFinishedForTenant() {
return saasMigrationStatusRepository.findAllWhereStatusNotFinishedAndNotError() == 0;
}
@Transactional
public void createMigrationRequiredStatus(String dossierId, String fileId) {
saasMigrationStatusRepository.save(SaasMigrationStatusEntity.builder().fileId(fileId).dossierId(dossierId).status(SaasMigrationStatus.MIGRATION_REQUIRED).build());
}
@Transactional
public void updateStatus(String fileId, SaasMigrationStatus status) {
saasMigrationStatusRepository.updateStatus(fileId, status);
}
@Transactional
public void updateErrorStatus(String fileId, String errorCause) {
saasMigrationStatusRepository.updateErrorStatus(fileId, SaasMigrationStatus.ERROR, errorCause);
}
@Transactional
public void updateErrorCounter(String fileId, Integer processingErrorCounter, String errorCause) {
saasMigrationStatusRepository.updateErrorCounter(fileId, processingErrorCounter, errorCause);
}
public int countByStatus(SaasMigrationStatus status) {
return saasMigrationStatusRepository.countByStatus(status);
}
public int countAll() {
return saasMigrationStatusRepository.countAll();
}
public List<SaasMigrationStatusEntity> findAll() {
return saasMigrationStatusRepository.findAll();
}
}

View File

@ -389,15 +389,11 @@ public interface FileRepository extends JpaRepository<FileEntity, String> {
@Transactional
@Modifying(clearAutomatically = true)
@Query(value = "update FileEntity f set f.numberOfOCRedPages = :numberOfOCRedPages, "
+ "f.numberOfPagesToOCR = :numberOfPagesToOCR, "
+ "f.numberOfIdpPages = :numberOfIdpPages, "
+ "f.ocrEndTime = :ocrEndTime, "
+ "f.lastUpdated = :lastUpdated "
+ "where f.id = :fileId")
+ "f.numberOfPagesToOCR = :numberOfPagesToOCR, f.ocrEndTime = :ocrEndTime, "
+ "f.lastUpdated = :lastUpdated where f.id = :fileId")
void updateOCRStatus(@Param("fileId") String fileId,
@Param("numberOfPagesToOCR") int numberOfPagesToOCR,
@Param("numberOfOCRedPages") int numberOfOCRedPages,
@Param("numberOfIdpPages") int numberOfIdpPages,
@Param("ocrEndTime") OffsetDateTime ocrEndTime,
@Param("lastUpdated") OffsetDateTime lastUpdated);
@ -413,7 +409,7 @@ public interface FileRepository extends JpaRepository<FileEntity, String> {
@Modifying(clearAutomatically = true)
@Query("update FileEntity f set f.ocrStartTime = NULL, f.ocrEndTime = NULL, f.numberOfPagesToOCR = NULL, f.numberOfOCRedPages = NULL, f.numberOfIdpPages = NULL where f.id = :fileId")
@Query("update FileEntity f set f.ocrStartTime = NULL, f.ocrEndTime = NULL, f.numberOfPagesToOCR = NULL, f.numberOfOCRedPages = NULL where f.id = :fileId")
void resetOcrStartAndEndDate(@Param("fileId") String fileId);
@ -483,11 +479,6 @@ public interface FileRepository extends JpaRepository<FileEntity, String> {
@Query("SELECT f FROM FileEntity f WHERE f.id in :fileIds AND f.dossierId = :dossierId")
List<FileEntity> findAllDossierIdAndIds(@Param("dossierId") String dossierId, @Param("fileIds") Set<String> fileIds);
@Modifying
@Query("UPDATE FileEntity f SET f.usedPromptTokens = f.usedPromptTokens + :promptTokens, f.usedCompletionTokens = f.usedCompletionTokens + :completionTokens WHERE f.id = :id")
void increaseTokenUsage(@Param("id") String fileId, @Param("promptTokens") int promptTokens, @Param("completionTokens") int completionTokens);
}

View File

@ -1,44 +0,0 @@
package com.iqser.red.service.persistence.management.v1.processor.service.persistence.repository;
import com.iqser.red.service.persistence.management.v1.processor.entity.migration.SaasMigrationStatusEntity;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.SaasMigrationStatus;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.jpa.repository.Modifying;
import org.springframework.data.jpa.repository.Query;
import org.springframework.data.repository.query.Param;
import java.util.List;
public interface SaasMigrationStatusRepository extends JpaRepository<SaasMigrationStatusEntity, String> {
List<SaasMigrationStatusEntity> findAllByStatus(SaasMigrationStatus status);
@Modifying
@Query("update SaasMigrationStatusEntity e set e.status = :status where e.fileId = :fileId")
void updateStatus(@Param("fileId") String fileId, @Param("status") SaasMigrationStatus status);
@Modifying
@Query("update SaasMigrationStatusEntity e set e.status = :status, e.errorCause = :errorCause where e.fileId = :fileId")
void updateErrorStatus(@Param("fileId") String fileId, @Param("status") SaasMigrationStatus status, @Param("errorCause") String errorCause);
@Modifying
@Query("update SaasMigrationStatusEntity e set e.processingErrorCounter = :processingErrorCounter, e.errorCause = :errorCause where e.fileId = :fileId")
void updateErrorCounter(@Param("fileId") String fileId, @Param("processingErrorCounter") Integer processingErrorCounter, @Param("errorCause") String errorCause);
@Query("select count(*) from SaasMigrationStatusEntity e where e.status = :status")
int countByStatus(@Param("status") SaasMigrationStatus status);
@Query("select count(*) from SaasMigrationStatusEntity")
int countAll();
@Query("select count(*) from SaasMigrationStatusEntity e where e.status != 'FINISHED' and e.status != 'ERROR'")
int findAllWhereStatusNotFinishedAndNotError();
}

View File

@ -11,14 +11,12 @@ import org.springframework.stereotype.Service;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
import com.iqser.red.service.persistence.management.v1.processor.migration.SaasMigrationService;
import com.iqser.red.service.persistence.management.v1.processor.model.websocket.AnalyseStatus;
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusProcessingUpdateService;
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusService;
import com.iqser.red.service.persistence.management.v1.processor.service.ImageSimilarityService;
import com.iqser.red.service.persistence.management.v1.processor.service.websocket.WebsocketService;
import com.iqser.red.service.persistence.management.v1.processor.service.layoutparsing.QueueMessageIdentifierService;
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.SaasMigrationStatusPersistenceService;
import com.iqser.red.service.persistence.management.v1.processor.utils.StorageIdUtils;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
@ -40,8 +38,6 @@ public class LayoutParsingFinishedMessageReceiver {
private final FileStatusService fileStatusService;
private final FileStatusProcessingUpdateService fileStatusProcessingUpdateService;
private final ObjectMapper objectMapper;
private final SaasMigrationStatusPersistenceService saasMigrationStatusPersistenceService;
private final SaasMigrationService saasMigrationService;
private final ImageSimilarityService imageSimilarityService;
private final WebsocketService websocketService;
@ -53,11 +49,7 @@ public class LayoutParsingFinishedMessageReceiver {
var dossierId = QueueMessageIdentifierService.parseDossierId(response.identifier());
var fileId = QueueMessageIdentifierService.parseFileId(response.identifier());
log.info("Layout parsing has finished for {}/{} in {}", dossierId, fileId, LayoutParsingQueueNames.LAYOUT_PARSING_RESPONSE_EXCHANGE);
if (saasMigrationStatusPersistenceService.isMigrating(QueueMessageIdentifierService.parseFileId(response.identifier()))) {
saasMigrationService.handleLayoutParsingFinished(QueueMessageIdentifierService.parseDossierId(response.identifier()),
QueueMessageIdentifierService.parseFileId(response.identifier()));
return;
}
fileStatusService.setStatusAnalyse(QueueMessageIdentifierService.parseDossierId(response.identifier()),
QueueMessageIdentifierService.parseFileId(response.identifier()),
@ -88,13 +80,6 @@ public class LayoutParsingFinishedMessageReceiver {
if (errorCause == null) {
errorCause = "Error occured during layout parsing!";
}
if (saasMigrationStatusPersistenceService.isMigrating(QueueMessageIdentifierService.parseFileId(analyzeRequest.identifier()))) {
saasMigrationService.handleError(QueueMessageIdentifierService.parseDossierId(analyzeRequest.identifier()),
QueueMessageIdentifierService.parseFileId(analyzeRequest.identifier()),
errorCause,
LayoutParsingQueueNames.LAYOUT_PARSING_REQUEST_EXCHANGE);
return;
}
OffsetDateTime timestamp = failedMessage.getMessageProperties().getHeader(MessagingConfiguration.X_ERROR_INFO_TIMESTAMP_HEADER);
timestamp = timestamp != null ? timestamp : OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS);

View File

@ -63,7 +63,7 @@ public class NerMessageReceiver {
String dossierId = QueueMessageIdentifierService.parseDossierId(message.getIdentifier());
String fileId = QueueMessageIdentifierService.parseFileId(message.getIdentifier());
addFileIdToTrace(fileId);
fileStatusService.increaseTokenUsage(fileId, message.getPromptTokens(), message.getCompletionTokens());
log.info("Received message from {} for dossierId {} and fileId {}", LLM_ENTITY_RESPONSE_LISTENER_ID, dossierId, fileId);
fileStatusPersistenceService.setAiCreationVersion(fileId, message.getAiCreationVersion());
fileStatusService.setStatusForceAnalyse(dossierId, fileId, false);

View File

@ -11,13 +11,14 @@ import org.springframework.stereotype.Service;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
import com.iqser.red.service.persistence.management.v1.processor.model.OCRStatusUpdateResponse;
import com.iqser.red.service.persistence.management.v1.processor.model.websocket.AnalyseStatus;
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusProcessingUpdateService;
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusService;
import com.iqser.red.service.persistence.management.v1.processor.service.websocket.WebsocketService;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
import com.knecon.fforesight.service.ocr.v1.api.model.DocumentRequest;
import com.knecon.fforesight.service.ocr.v1.api.model.OCRStatusUpdateResponse;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
@ -42,8 +43,6 @@ public class OCRProcessingMessageReceiver {
@RabbitListener(id = OCR_STATUS_UPDATE_LISTENER_ID)
public void handleOCRStatusUpdateMessage(OCRStatusUpdateResponse response) {
log.debug("Received message {} in {}", response, MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_EXCHANGE);
var fileModel = fileStatusService.getStatus(response.getFileId());
if (response.isOcrStarted()) {
@ -58,6 +57,7 @@ public class OCRProcessingMessageReceiver {
response.getNumberOfOCRedPages());
}
log.debug("Received message {} in {}", response, MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_EXCHANGE);
}
@ -95,7 +95,6 @@ public class OCRProcessingMessageReceiver {
timestamp = timestamp != null ? timestamp : OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS);
fileStatusProcessingUpdateService.requeueOCROrMarkFailed(ocrRequestMessage.getDossierId(),
ocrRequestMessage.getFileId(),
ocrRequestMessage.getFeatures(),
new FileErrorInfo(errorMessage, MessagingConfiguration.OCR_DLQ, "ocr-service", timestamp));
}

View File

@ -1,53 +0,0 @@
package com.iqser.red.service.persistence.management.v1.processor.service.queue;
import static com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration.MIGRATION_DLQ;
import static com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration.MIGRATION_REQUEST_QUEUE;
import static com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration.MIGRATION_RESPONSE_QUEUE;
import static com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration.X_ERROR_INFO_HEADER;
import org.springframework.amqp.core.Message;
import org.springframework.amqp.rabbit.annotation.RabbitListener;
import org.springframework.stereotype.Service;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.management.v1.processor.migration.SaasMigrationService;
import com.iqser.red.service.redaction.v1.model.MigrationRequest;
import com.iqser.red.service.redaction.v1.model.MigrationResponse;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
public class RedactionServiceSaasMigrationMessageReceiver {
private final SaasMigrationService saasMigrationService;
private final ObjectMapper objectMapper;
@SneakyThrows
@RabbitListener(queues = MIGRATION_RESPONSE_QUEUE)
public void receive(MigrationResponse response) {
saasMigrationService.handleEntityLogMigrationFinished(response.getDossierId(), response.getFileId());
log.info("Received message {} in {}", response, MIGRATION_RESPONSE_QUEUE);
}
@SneakyThrows
@RabbitListener(queues = MIGRATION_DLQ)
public void handleDLQMessage(Message failedMessage) {
var migrationRequest = objectMapper.readValue(failedMessage.getBody(), MigrationRequest.class);
String errorCause = failedMessage.getMessageProperties().getHeader(X_ERROR_INFO_HEADER);
if (errorCause == null) {
errorCause = "Error occured during entityLog migration!";
}
saasMigrationService.handleError(migrationRequest.getDossierId(), migrationRequest.getFileId(), errorCause, MIGRATION_REQUEST_QUEUE);
}
}

View File

@ -245,6 +245,8 @@ databaseChangeLog:
file: db/changelog/tenant/149-add-indexes-across-tables-for-performance.yaml
- include:
file: db/changelog/tenant/150-add-component-mapping-indexes.yaml
- include:
file: db/changelog/tenant/151-drop-saas-migration-table.changelog.yaml
- include:
file: db/changelog/tenant/152-add-ai-fields-to-entity.yaml
- include:
@ -259,7 +261,3 @@ databaseChangeLog:
file: db/changelog/tenant/158-add-app-version-history-table-and-layout-parser-version-field-to-file.yaml
- include:
file: db/changelog/tenant/159-cleanup-truncated-indices.yaml
- include:
file: db/changelog/tenant/160-add-usage-fields-to-file-for-idp-and-llm.yaml
- include:
file: db/changelog/tenant/161-add-idp-related-fields-to-dossier-template.yaml

View File

@ -0,0 +1,12 @@
databaseChangeLog:
- changeSet:
id: drop-saas_migration_status-if-exists
author: dom
comment: drop saas_migration_status if exists
preConditions:
- onFail: MARK_RAN
- tableExists:
tableName: saas_migration_status
changes:
- dropTable:
tableName: saas_migration_status

View File

@ -1,24 +0,0 @@
databaseChangeLog:
- changeSet:
id: add-llm-usage-fields-to-file
author: kilian
changes:
- addColumn:
tableName: file
columns:
- column:
name: used_completion_tokens
type: int
defaultValueNumeric: "0"
constraints:
nullable: false
- column:
name: used_prompt_tokens
type: int
defaultValueNumeric: "0"
constraints:
nullable: false
- column:
name: number_of_idp_pages
type: int
defaultValueNumeric: "0"

View File

@ -1,64 +0,0 @@
databaseChangeLog:
- changeSet:
id: add-idp-related-fields-to-dossier-template
author: kilian
changes:
- addColumn:
tableName: dossier_template
columns:
- column:
name: idp_by_default
type: boolean
defaultValueBoolean: false
remarks: "Indicates if IDP is enabled by default"
- column:
name: rotation_correction_by_default
type: boolean
remarks: "Indicates if rotation correction is enabled by default"
- column:
name: font_style_detection
type: boolean
defaultValueBoolean: true
remarks: "Indicates if font style detection is enabled in OCR"
- column:
name: ocr_all_pages
type: boolean
defaultValueBoolean: false
remarks: "Indicates if all pages should be processed during OCR instead of only pages with images"
- update:
tableName: dossier_template
columns:
- column:
name: rotation_correction_by_default
valueBoolean: true
where: "layout_parsing_type = 'DOCUMINE_OLD'"
- update:
tableName: dossier_template
columns:
- column:
name: rotation_correction_by_default
valueBoolean: false
where: "layout_parsing_type != 'DOCUMINE_OLD'"
- changeSet:
id: make-fields-non-nullable
author: kilian
changes:
- addNotNullConstraint:
tableName: dossier_template
columnName: idp_by_default
columnDataType: boolean
- addNotNullConstraint:
tableName: dossier_template
columnName: rotation_correction_by_default
columnDataType: boolean
- addNotNullConstraint:
tableName: dossier_template
columnName: font_style_detection
columnDataType: boolean
- addNotNullConstraint:
tableName: dossier_template
columnName: ocr_all_pages
columnDataType: boolean

View File

@ -142,7 +142,8 @@ fforesight:
ignored-endpoints: [ '/redaction-gateway-v1', '/actuator/health/**',"/redaction-gateway-v1/websocket","/redaction-gateway-v1/websocket/**", '/redaction-gateway-v1/async/download/with-ott/**',
'/internal-api/**', '/redaction-gateway-v1/docs/swagger-ui',
'/redaction-gateway-v1/docs/**','/redaction-gateway-v1/docs',
'/api', '/api/','/api/docs/**','/api/docs','/api/docs/swagger-ui' ]
'/api', '/api/','/api/docs/**','/api/docs','/api/docs/swagger-ui',
'/actuator/prometheus']
enabled: true
springdoc:
base-path: '/api'

View File

@ -1,13 +1,18 @@
package com.iqser.red.service.peristence.v1.server.integration.tests;
import static org.junit.Assert.assertThrows;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyBoolean;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.when;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import org.junit.jupiter.api.Test;
@ -18,8 +23,8 @@ import com.iqser.red.service.peristence.v1.server.integration.client.FileClient;
import com.iqser.red.service.peristence.v1.server.integration.service.DossierTemplateTesterAndProvider;
import com.iqser.red.service.peristence.v1.server.integration.service.DossierTesterAndProvider;
import com.iqser.red.service.peristence.v1.server.integration.service.FileTesterAndProvider;
import com.iqser.red.service.peristence.v1.server.integration.service.TypeProvider;
import com.iqser.red.service.peristence.v1.server.integration.utils.AbstractPersistenceServerServiceTest;
import com.iqser.red.service.persistence.management.v1.processor.acl.custom.dossier.DossierACLService;
import com.iqser.red.service.persistence.management.v1.processor.entity.configuration.LegalBasisEntity;
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.LegalBasisMappingPersistenceService;
import com.iqser.red.service.persistence.service.v1.api.shared.model.DossierTemplateModel;
@ -33,6 +38,8 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemp
import com.iqser.red.service.persistence.service.v1.api.shared.model.warning.ApproveResponse;
import com.iqser.red.service.persistence.service.v1.api.shared.model.warning.WarningType;
import feign.FeignException;
public class ApprovalTest extends AbstractPersistenceServerServiceTest {
@Autowired
@ -44,15 +51,15 @@ public class ApprovalTest extends AbstractPersistenceServerServiceTest {
@Autowired
private DossierTesterAndProvider dossierTesterAndProvider;
@Autowired
private TypeProvider typeProvider;
@Autowired
private FileClient fileClient;
@SpyBean
private LegalBasisMappingPersistenceService legalBasisMappingPersistenceService;
@SpyBean
private DossierACLService dossierACLService;
@Test
public void testApprovalNoWarnings() {
@ -181,4 +188,54 @@ public class ApprovalTest extends AbstractPersistenceServerServiceTest {
assertTrue(approveResponse.getFileWarnings().isEmpty());
}
@Test
void testApprovalWhenDossierHasNoOwner() {
DossierTemplateModel dossierTemplateModel = dossierTemplateTesterAndProvider.provideTestTemplate();
Dossier dossier = dossierTesterAndProvider.provideTestDossier(dossierTemplateModel);
FileStatus file = fileTesterAndProvider.testAndProvideFile(dossier, "some-file");
fileTesterAndProvider.markFileAsProcessed(dossier.getId(), file.getFileId());
EntityLog entityLog = new EntityLog();
when(entityLogService.getEntityLog(anyString(), anyString(), anyBoolean())).thenReturn(entityLog);
List<com.iqser.red.service.persistence.management.v1.processor.service.users.model.User> allUsers = new ArrayList<>();
allUsers.add(com.iqser.red.service.persistence.management.v1.processor.service.users.model.User.builder()
.userId("manageradmin1@test.com")
.email("manageradmin1@test.com")
.isActive(true)
.roles(Set.of(getAllRoles()))
.build());
allUsers.add(com.iqser.red.service.persistence.management.v1.processor.service.users.model.User.builder()
.userId("manageradmin2@test.com")
.email("manageradmin2@test.com")
.isActive(true)
.roles(Set.of("RED_USER"))
.build());
when(usersClient.getAllUsers(false)).thenReturn(allUsers);
when(usersClient.getAllUsers(true)).thenReturn(allUsers);
doAnswer(invocation -> {
Dossier arg = invocation.getArgument(0);
if (dossier.getId().equals(arg.getId())) {
Dossier emptyDossier = new Dossier();
emptyDossier.setId(arg.getId());
return emptyDossier;
} else {
return invocation.callRealMethod();
}
}).when(dossierACLService).enhanceDossierWithACLData(any(Dossier.class));
FeignException ex = assertThrows(FeignException.Conflict.class, () -> {
fileClient.setStatusApproved(dossier.getId(), file.getFileId(), false);
});
assertTrue(ex.getMessage().contains("Dossier has no owner!"));
}
}

View File

@ -191,7 +191,7 @@ public class ComponentOverrideTest extends AbstractPersistenceServerServiceTest
@Test
@SneakyThrows
public void testDeletedFileOverrides() {
public void testDeletedFileOverrides() throws IOException {
var dossier = dossierTesterAndProvider.provideTestDossier();

View File

@ -1,7 +1,6 @@
package com.iqser.red.service.peristence.v1.server.integration.tests;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
@ -150,10 +149,7 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
var dossierTemplate = dossierTemplateTesterAndProvider.provideTestTemplate();
TypeResponse types = dictionaryClient.getAllTypes(dossierTemplate.getId(), null, true);
List<TypeValue> systemManagedTypes = types.getTypes()
.stream()
.filter(TypeValue::isSystemManaged)
.collect(Collectors.toList());
List<TypeValue> systemManagedTypes = types.getTypes().stream().filter(TypeValue::isSystemManaged).collect(Collectors.toList());
assertThat(systemManagedTypes.size()).isEqualTo(8);
var allTemplates = dossierTemplateClient.getAllDossierTemplates();
@ -288,17 +284,17 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
dictionaryClient.getDictionaryForType(type2.getType(), type2.getDossierTemplateId(), dossier.getId());
var allTypes = dictionaryClient.getAllTypes(dossierTemplate.getId(), dossier.getId(), false).getTypes();
assertThat(allTypes.stream()
.filter(t -> !t.isSystemManaged())
.count()).isEqualTo(4);
assertThat(allTypes
.stream().filter(t -> !t.isSystemManaged()).collect(Collectors.toList())
.size()).isEqualTo(4);
var typesWithRankOfType1 = allTypes.stream()
.filter(t -> t.getRank() == type.getRank())
.toList();
.collect(Collectors.toList());
assertThat(typesWithRankOfType1.size()).isEqualTo(2);
var typesWithRankOfType2 = allTypes.stream()
.filter(t -> t.getRank() == type2.getRank())
.toList();
.collect(Collectors.toList());
assertThat(typesWithRankOfType2.size()).isEqualTo(2);
dictionaryClient.addEntry(createdType1.getType(), createdType1.getDossierTemplateId(), List.of("entry1", "entry2"), false, null, DictionaryEntryType.ENTRY);
@ -600,13 +596,13 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
.build());
// add new justifications
legalBasisClient.setLegalBasisMapping(List.of(new LegalBasis("nameAgain", "description", "reason", "technicalReason")), dossierTemplate.getId());
legalBasisClient.setLegalBasisMapping(List.of(new LegalBasis("nameAgain", "description", "reason","technicalReason")), dossierTemplate.getId());
existingLegalBasis = legalBasisClient.getLegalBasisMapping(dossierTemplate.getId());
assertThat(existingLegalBasis.size()).isEqualTo(1);
// update dossier template metadata
var cru = new DossierTemplateModel();
cru.setId(dossierTemplate.getId());
cru.setDossierTemplateId(dossierTemplate.getId());
BeanUtils.copyProperties(dossierTemplate, cru);
cru.setName("Template 1 Update");
cru.setDescription("new description");
@ -948,7 +944,7 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
assertThat(result.getName()).isEqualTo(name);
assertThat(result.isOcrByDefault()).isTrue();
DossierTemplateModel loadedTemplate = dossierTemplateClient.getDossierTemplate(result.getId());
DossierTemplateModel loadedTemplate = dossierTemplateClient.getDossierTemplate(result.getDossierTemplateId());
assertThat(loadedTemplate).isEqualTo(result);
dossierTemplateModel.setName("Test Dossier Template Update");
@ -996,7 +992,7 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
// update
var cru = new DossierTemplateModel();
cru.setId(dossierTemplate.getId());
cru.setDossierTemplateId(dossierTemplate.getId());
BeanUtils.copyProperties(dossierTemplate, cru);
cru.setName("Template 1 Update");
cru.setValidTo(OffsetDateTime.of(2020, 1, 1, 1, 1, 1, 1, ZoneOffset.UTC));
@ -1006,40 +1002,4 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
assertTrue(exception.getMessage().contains("Invalid dates! validFrom can't be after validTo."));
}
@Test
public void testUpdateDossierTemplateWithOCRSettings() {
var dossierTemplate = dossierTemplateTesterAndProvider.provideTestTemplate();
var allTemplates = dossierTemplateClient.getAllDossierTemplates();
assertThat(allTemplates.size()).isEqualTo(1);
assertThat(allTemplates.get(0)).isEqualTo(dossierTemplate);
// update
var cru = new DossierTemplateModel();
cru.setId(dossierTemplate.getId());
BeanUtils.copyProperties(dossierTemplate, cru);
String updatedName = "Template 1 Update";
cru.setName(updatedName);
cru.setIdpByDefault(true);
cru.setRotationCorrectionByDefault(true);
cru.setOcrAllPages(true);
cru.setFontStyleDetection(true);
var updatedDT = dossierTemplateClient.createOrUpdateDossierTemplate(cru);
assertEquals(updatedName, updatedDT.getName());
assertTrue(updatedDT.isIdpByDefault());
assertTrue(updatedDT.isRotationCorrectionByDefault());
assertTrue(updatedDT.isFontStyleDetection());
assertTrue(updatedDT.isOcrAllPages());
var loadedDT = dossierTemplateClient.getDossierTemplate(updatedDT.getId());
assertEquals(updatedName, loadedDT.getName());
assertTrue(loadedDT.isIdpByDefault());
assertTrue(loadedDT.isRotationCorrectionByDefault());
assertTrue(loadedDT.isFontStyleDetection());
assertTrue(loadedDT.isOcrAllPages());
}
}

View File

@ -46,12 +46,12 @@ public class ReanalysisTest extends AbstractPersistenceServerServiceTest {
var loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
assertThat(loadedFile.getProcessingStatus()).isNotEqualTo(ProcessingStatus.PROCESSED);
reanalysisClient.ocrDossier(dossier.getId(), false);
reanalysisClient.ocrDossier(dossier.getId());
loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
assertThat(loadedFile.getProcessingStatus()).isNotEqualTo(ProcessingStatus.PROCESSED);
resetProcessingStatus(file);
reanalysisClient.ocrDossier(dossier.getId(), false);
reanalysisClient.ocrDossier(dossier.getId());
loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
assertThat(loadedFile.getProcessingStatus()).isEqualTo(ProcessingStatus.OCR_PROCESSING_QUEUED);
resetProcessingStatus(file);
@ -61,7 +61,7 @@ public class ReanalysisTest extends AbstractPersistenceServerServiceTest {
assertThat(loadedFile.getProcessingStatus()).isEqualTo(ProcessingStatus.OCR_PROCESSING_QUEUED);
resetProcessingStatus(file);
reanalysisClient.ocrFiles(dossier.getId(), Set.of(file.getId()), false);
reanalysisClient.ocrFiles(dossier.getId(), Set.of(file.getId()));
loadedFile = fileClient.getFileStatus(dossier.getId(), file.getId());
assertThat(loadedFile.getProcessingStatus()).isEqualTo(ProcessingStatus.OCR_PROCESSING_QUEUED);
resetProcessingStatus(file);

View File

@ -201,7 +201,6 @@ public class SupportControllerTest extends AbstractPersistenceServerServiceTest
Collections.emptySet(),
true,
false,
false,
new FileStatusFilter(null, null, true, true)));
loadedFile2 = fileClient.getFileStatus(dossier2.getId(), file2.getId());
assertThat(loadedFile2.getProcessingStatus()).isEqualTo(ProcessingStatus.FULL_PROCESSING);
@ -248,7 +247,6 @@ public class SupportControllerTest extends AbstractPersistenceServerServiceTest
Collections.emptySet(),
true,
false,
false,
null));
loadedFile2 = fileClient.getFileStatus(dossier2.getId(), file2.getId());
assertThat(loadedFile2.getProcessingStatus()).isEqualTo(ProcessingStatus.FULL_PROCESSING);

View File

@ -0,0 +1,79 @@
package com.iqser.red.service.peristence.v1.server.integration.tests;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.ClassPathResource;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.test.context.TestPropertySource;
import com.iqser.red.service.peristence.v1.server.integration.client.FileClient;
import com.iqser.red.service.peristence.v1.server.integration.client.UploadClient;
import com.iqser.red.service.peristence.v1.server.integration.service.DossierTesterAndProvider;
import com.iqser.red.service.peristence.v1.server.integration.utils.AbstractPersistenceServerServiceTest;
import feign.FeignException;
import lombok.SneakyThrows;
@TestPropertySource(properties = {
"spring.servlet.multipart.max-file-size=50MB",
"spring.servlet.multipart.max-request-size=50MB"
})
public class ZipFileUploadTest extends AbstractPersistenceServerServiceTest {
@Autowired
private DossierTesterAndProvider dossierTesterAndProvider;
@Autowired
private UploadClient uploadClient;
@Autowired
private FileClient fileClient;
@SneakyThrows
@Test
void testZipUploadWithEntryCountCheck() {
var dossier = dossierTesterAndProvider.provideTestDossier();
var smallZipResource = new ClassPathResource("files/zip/ArchiveWithManyFiles.zip");
var smallZip = new MockMultipartFile(
"ArchiveWithManyFiles.zip",
"ArchiveWithManyFiles.zip",
"application/zip",
IOUtils.toByteArray(smallZipResource.getInputStream())
);
var uploadResult = uploadClient.upload(smallZip, dossier.getId(), false, false);
assertNotNull(uploadResult, "Upload result for small zip should not be null.");
assertEquals(9993, uploadResult.getFileIds().size());
var largeZipResource = new ClassPathResource("files/zip/ArchiveWithTooManyFiles.zip");
var largeZip = new MockMultipartFile(
"ArchiveWithTooManyFiles.zip",
"ArchiveWithTooManyFiles.zip",
"application/zip",
IOUtils.toByteArray(largeZipResource.getInputStream())
);
FeignException ex = assertThrows(
FeignException.class,
() -> uploadClient.upload(largeZip, dossier.getId(), false, false),
"Uploading a zip with more than 10000 entries should throw a FeignException."
);
assertEquals(400, ex.status(), "Expected HTTP 400 (Bad Request) for a ZIP bomb scenario");
assertTrue(ex.getMessage().contains("ZIP-Bomb detected"),
"Exception message should contain 'ZIP-Bomb detected' or similar.");
var filesInDossier = fileClient.getDossierStatus(dossier.getId());
assertEquals(9993, filesInDossier.size());
}
}

View File

@ -261,7 +261,7 @@ public abstract class AbstractPersistenceServerServiceTest {
@MockBean
protected TenantsClient tenantsClient;
@MockBean
private UsersClient usersClient;
protected UsersClient usersClient;
@Autowired
protected EncryptionDecryptionService encryptionDecryptionService;
@Autowired
@ -286,7 +286,7 @@ public abstract class AbstractPersistenceServerServiceTest {
private CurrentApplicationTypeProvider currentApplicationTypeProvider;
private static String[] getAllRoles() {
protected static String[] getAllRoles() {
var allRoles = ApplicationRoles.ROLE_DATA.entrySet()
.stream()

View File

@ -10,7 +10,7 @@ dependencies {
api("com.knecon.fforesight:document:${rootProject.extra.get("documentVersion")}"){
exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1")
}
api("com.knecon.fforesight:layoutparser-service-internal-api:0.196.0-RED8670.0") {
api("com.knecon.fforesight:layoutparser-service-internal-api:0.194.0-RED9998.1") {
exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1")
exclude(group = "com.iqser.red.service", module = "persistence-service-shared-api-v1")
}

View File

@ -68,18 +68,6 @@ public class DossierTemplateModel {
@Schema(description = "Flag that specifies if OCR is automatically performed on upload for all dossiers of this template")
private boolean ocrByDefault;
@Schema(description = "Flag that specifies if rotation correction is attempted during OCR for all dossiers of this template")
private boolean rotationCorrectionByDefault;
@Schema(description = "Flag that specifies if IDP is performed instead of OCR for all dossiers of this template")
private boolean idpByDefault;
@Schema(description = "Flag that specifies if font style detection is performed during OCR")
private boolean fontStyleDetection;
@Schema(description = "Flag that specifies if OCR should be performed on all pages instead of only pages with images")
private boolean ocrAllPages;
@Schema(description = "Flag that specifies the watermark removal in documents will be performed before the OCR processing")
private boolean removeWatermark;

View File

@ -88,12 +88,6 @@ public class FileStatus {
private int numberOfPagesToOCR;
@Schema(description = "Number of pages already OCRed by us")
private int numberOfOCRedPages;
@Schema(description = "Number of pages already IDPed by us")
private int numberOfIdpPages;
@Schema(description = "Number of prompt tokens used by this file")
private int usedPromptTokens;
@Schema(description = "Number of completion tokens used by this file")
private int usedCompletionTokens;
@Schema(description = "Shows if this file has been OCRed by us. End time of OCR Process")
private OffsetDateTime ocrEndTime;
@Schema(description = "Shows if this file has comments on annotations.")

View File

@ -15,7 +15,7 @@ import lombok.NoArgsConstructor;
@Data
@NoArgsConstructor
@AllArgsConstructor
public class FileStatusFilter implements Predicate<FileModel> {
public class FileStatusFilter {
private List<ProcessingStatus> processingStatusList = new ArrayList<>();
private List<WorkflowStatus> workflowStatusList = new ArrayList<>();
@ -32,8 +32,7 @@ public class FileStatusFilter implements Predicate<FileModel> {
}
@Override
public boolean test(FileModel fileModel) {
public Predicate<FileModel> asPredicate() {
if (this.getProcessingStatusList() == null) {
this.setProcessingStatusList(new ArrayList<>());
@ -43,12 +42,10 @@ public class FileStatusFilter implements Predicate<FileModel> {
this.setWorkflowStatusList(new ArrayList<>());
}
return (this.getProcessingStatusList().isEmpty() || this.getProcessingStatusList().contains(fileModel.getProcessingStatus()))
&& (this.getWorkflowStatusList().isEmpty()
|| this.getWorkflowStatusList()
.contains(fileModel.getWorkflowStatus()))
&& (this.isIncludeSoftDeletedFiles() || fileModel.getDeleted() == null)
&& (this.isIncludeHardDeletedFiles() || fileModel.getHardDeletedTime() == null);
return fileStatus -> (this.getProcessingStatusList().isEmpty() || this.getProcessingStatusList().contains(fileStatus.getProcessingStatus()))
&& (this.getWorkflowStatusList().isEmpty() || this.getWorkflowStatusList().contains(fileStatus.getWorkflowStatus()))
&& (this.isIncludeSoftDeletedFiles() || fileStatus.getDeleted() == null)
&& (this.isIncludeHardDeletedFiles() || fileStatus.getHardDeletedTime() == null);
}
}

View File

@ -4,33 +4,16 @@ import java.util.Optional;
import java.util.Set;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.AccessLevel;
import lombok.Builder;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.experimental.FieldDefaults;
@Getter
@Builder
@RequiredArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public final class ReanalysisSettings {
public record ReanalysisSettings(
@Schema(description = "Provide a list of dossierIds to filter for. If the list is empty, every dossier is selected for reanalysis.", defaultValue = "[]") Set<String> dossierIds,
@Schema(description = "Provide a list of fileIds to filter for. If the list is empty, every file is selected for reanalysis.", defaultValue = "[]") Set<String> fileIds,
@Schema(description = "If set to true, layout parsing and named entity recognition will be repeated.", defaultValue = "false") boolean repeatStructureAnalysis,
@Schema(description = "If set to true, ocr will be repeated and therefore also layout parsing and named entity recognition.", defaultValue = "false") boolean runOcr,
@Schema(description = "Use this to create a filter for files to reanalyse. Matches anything if set to null.", defaultValue = "{}") FileStatusFilter fileStatusFilter
) {
@Schema(description = "Provide a list of dossierIds to filter for. If the list is empty, every dossier is selected for reanalysis.", defaultValue = "[]")
Set<String> dossierIds;
@Schema(description = "Provide a list of fileIds to filter for. If the list is empty, every file is selected for reanalysis.", defaultValue = "[]")
Set<String> fileIds;
@Schema(description = "If set to true, layout parsing and named entity recognition will be repeated.", defaultValue = "false")
boolean repeatStructureAnalysis;
@Schema(description = "If set to true, OCR will be repeated.", defaultValue = "false")
boolean runOcr;
@Schema(description = "If set to true, OCR with IDP will be repeated.", defaultValue = "false")
boolean runIdp;
@Schema(description = "Use this to create a filter for files to reanalyse. Matches anything if set to null.", defaultValue = "null")
FileStatusFilter fileStatusFilter;
public FileStatusFilter getFileStatusFilter() {
public FileStatusFilter fileStatusFilter() {
return Optional.ofNullable(fileStatusFilter)
.orElse(new FileStatusFilter());

View File

@ -41,16 +41,8 @@ public class CreateOrUpdateDossierTemplateRequest {
private boolean ocrByDefault;
private boolean idpByDefault;
private boolean rotationCorrectionByDefault;
private boolean fontStyleDetection;
private boolean removeWatermark;
private boolean ocrAllPages;
private LayoutParsingType layoutParsingType;
}

View File

@ -36,11 +36,7 @@ public class DossierTemplate {
private boolean keepOverlappingObjects;
private boolean applyDictionaryUpdatesToAllDossiersByDefault;
private boolean ocrByDefault;
private boolean rotationCorrectionByDefault;
private boolean idpByDefault;
private boolean removeWatermark;
private boolean fontStyleDetection;
private boolean ocrAllPages;
private LayoutParsingType layoutParsingType;
}

View File

@ -8,7 +8,6 @@ import java.util.Set;
import com.fasterxml.jackson.annotation.JsonProperty;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
@ -61,9 +60,6 @@ public class FileModel {
private OffsetDateTime ocrStartTime;
private Integer numberOfPagesToOCR;
private Integer numberOfOCRedPages;
private Integer numberOfIdpPages;
private int usedPromptTokens;
private int usedCompletionTokens;
private OffsetDateTime ocrEndTime;
private boolean hasAnnotationComments;
private boolean excluded;

View File

@ -21,7 +21,6 @@ public enum FileType {
TABLES(".json"),
VISUAL_LAYOUT(".json"),
IDP_RESULT(".json"),
COMPONENTS(".json"),
// document is split into 4 files, all should be overridden/deleted at the same time
DOCUMENT_TEXT_OLD(".json"),