From d871bf0d8071f231573960e96c0d5295dcf2e2ae Mon Sep 17 00:00:00 2001 From: Kilian Schuettler Date: Thu, 19 Dec 2024 11:14:36 +0100 Subject: [PATCH] RED-8670: add idp fields and llm tracking --- .../controller/DossierTemplateController.java | 5 ++--- .../entity/dossier/DossierTemplateEntity.java | 4 ++++ .../v1/processor/service/FileStatusService.java | 8 ++++++-- .../LayoutParsingRequestFactory.java | 7 +++++-- ...-idp-related-fields-to-dossier-template.yaml | 11 ++++++++++- .../integration/tests/DossierTemplateTest.java | 17 ++++++++++++----- .../build.gradle.kts | 2 +- .../api/shared/model/DossierTemplateModel.java | 3 +++ .../CreateOrUpdateDossierTemplateRequest.java | 2 ++ .../model/dossiertemplate/DossierTemplate.java | 1 + .../dossiertemplate/dossier/file/FileType.java | 1 + 11 files changed, 47 insertions(+), 14 deletions(-) diff --git a/persistence-service-v1/persistence-service-external-api-impl-v1/src/main/java/com/iqser/red/persistence/service/v1/external/api/impl/controller/DossierTemplateController.java b/persistence-service-v1/persistence-service-external-api-impl-v1/src/main/java/com/iqser/red/persistence/service/v1/external/api/impl/controller/DossierTemplateController.java index 91f1a6ab6..f5d6bb9c5 100644 --- a/persistence-service-v1/persistence-service-external-api-impl-v1/src/main/java/com/iqser/red/persistence/service/v1/external/api/impl/controller/DossierTemplateController.java +++ b/persistence-service-v1/persistence-service-external-api-impl-v1/src/main/java/com/iqser/red/persistence/service/v1/external/api/impl/controller/DossierTemplateController.java @@ -24,13 +24,13 @@ import org.springframework.web.bind.annotation.RestController; import org.springframework.web.multipart.MultipartFile; import com.iqser.red.service.persistence.management.v1.processor.acl.custom.dossier.DossierACLService; +import com.iqser.red.service.persistence.management.v1.processor.dataexchange.models.ExportDownloadRequest; import com.iqser.red.service.persistence.management.v1.processor.exception.BadRequestException; import com.iqser.red.service.persistence.management.v1.processor.exception.ConflictException; import com.iqser.red.service.persistence.management.v1.processor.service.DossierManagementService; import com.iqser.red.service.persistence.management.v1.processor.service.DossierTemplateManagementService; import com.iqser.red.service.persistence.management.v1.processor.service.DossierTemplateStatsService; import com.iqser.red.service.persistence.management.v1.processor.service.persistence.AuditPersistenceService; -import com.iqser.red.service.persistence.management.v1.processor.service.users.UserService; import com.iqser.red.service.persistence.service.v1.api.external.resource.DossierTemplateResource; import com.iqser.red.service.persistence.service.v1.api.shared.model.AuditCategory; import com.iqser.red.service.persistence.service.v1.api.shared.model.DossierTemplateModel; @@ -42,7 +42,6 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemp import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.DossierTemplateStats; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.DossierTemplateStatus; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.Dossier; -import com.iqser.red.service.persistence.management.v1.processor.dataexchange.models.ExportDownloadRequest; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.importexport.ImportDossierTemplateRequest; import com.knecon.fforesight.keycloakcommons.security.KeycloakSecurity; @@ -61,7 +60,6 @@ public class DossierTemplateController implements DossierTemplateResource { private final AuditPersistenceService auditPersistenceService; private final DossierManagementService dossierManagementService; private final DossierACLService dossierACLService; - private final UserService userService; @Override @@ -317,6 +315,7 @@ public class DossierTemplateController implements DossierTemplateResource { .idpByDefault(dossierTemplate.isIdpByDefault()) .rotationCorrectionByDefault(dossierTemplate.isRotationCorrectionByDefault()) .fontStyleDetection(dossierTemplate.isFontStyleDetection()) + .ocrAllPages(dossierTemplate.isOcrAllPages()) .build(); } diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/entity/dossier/DossierTemplateEntity.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/entity/dossier/DossierTemplateEntity.java index 4fa89592c..fdd48e1ef 100644 --- a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/entity/dossier/DossierTemplateEntity.java +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/entity/dossier/DossierTemplateEntity.java @@ -92,6 +92,9 @@ public class DossierTemplateEntity { @Column(name = "font_style_detection") private boolean fontStyleDetection; + @Column(name = "ocr_all_pages") + private boolean ocrAllPages; + @Column(name = "remove_watermark") private boolean removeWatermark; @@ -161,6 +164,7 @@ public class DossierTemplateEntity { dossierTemplateCopy.rotationCorrectionByDefault = dossierTemplateEntity.rotationCorrectionByDefault; dossierTemplateCopy.idpByDefault = dossierTemplateEntity.idpByDefault; dossierTemplateCopy.fontStyleDetection = dossierTemplateEntity.fontStyleDetection; + dossierTemplateCopy.ocrAllPages = dossierTemplateEntity.ocrAllPages; return dossierTemplateCopy; } diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusService.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusService.java index 600109a7f..5592c1b05 100644 --- a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusService.java +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/FileStatusService.java @@ -773,7 +773,6 @@ public class FileStatusService { if (dt.isFontStyleDetection()) { features.add(AzureOcrFeature.FONT_STYLE_DETECTION); } - if (dt.isRemoveWatermark()) { features.add(AzureOcrFeature.REMOVE_WATERMARKS); } @@ -791,9 +790,14 @@ public class FileStatusService { public void addToOcrQueue(String dossierId, String fileId, int priority, Set features) { var request = DocumentRequest.builder() + // needed for legacy OCR-services .dossierId(dossierId) .fileId(fileId) - .removeWatermark(features.contains(AzureOcrFeature.REMOVE_WATERMARKS)) // needed for legacy OCR-services + .removeWatermark(features.contains(AzureOcrFeature.REMOVE_WATERMARKS)) + // new api + .originDocumentId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.ORIGIN)) + .viewerDocId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.VIEWER_DOCUMENT)) + .idpResultId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.IDP_RESULT)) .features(features) .build(); diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/layoutparsing/LayoutParsingRequestFactory.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/layoutparsing/LayoutParsingRequestFactory.java index fc2a5a94e..61f8d2ced 100644 --- a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/layoutparsing/LayoutParsingRequestFactory.java +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/layoutparsing/LayoutParsingRequestFactory.java @@ -27,8 +27,7 @@ public class LayoutParsingRequestFactory { public LayoutParsingRequest build(String dossierTemplateId, String dossierId, String fileId, boolean priority) { - LayoutParsingType layoutParsingType = dossierTemplatePersistenceService.getDossierTemplate( - dossierTemplateId).getLayoutParsingType(); + LayoutParsingType layoutParsingType = dossierTemplatePersistenceService.getDossierTemplate(dossierTemplateId).getLayoutParsingType(); Optional optionalImageFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.IMAGE_INFO) // ? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMAGE_INFO)) : Optional.empty(); @@ -39,6 +38,9 @@ public class LayoutParsingRequestFactory { Optional optionalVisualLayoutParsingFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.VISUAL_LAYOUT) // ? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.VISUAL_LAYOUT)) : Optional.empty(); + Optional optionalIdpResultFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.IDP_RESULT) // + ? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.IDP_RESULT)) : Optional.empty(); + return LayoutParsingRequest.builder() .layoutParsingType(layoutParsingType) .identifier(QueueMessageIdentifierService.buildIdentifier(dossierId, fileId, priority)) @@ -55,6 +57,7 @@ public class LayoutParsingRequestFactory { .documentMarkdownFileStorageId(fileManagementServiceSettings.isStoreMarkdown() ? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.MARKDOWN)) : Optional.empty()) + .idpResultStorageId(optionalIdpResultFileId) .build(); } diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/resources/db/changelog/tenant/160.0.0-add-idp-related-fields-to-dossier-template.yaml b/persistence-service-v1/persistence-service-processor-v1/src/main/resources/db/changelog/tenant/160.0.0-add-idp-related-fields-to-dossier-template.yaml index 3161a6c0a..e583d541e 100644 --- a/persistence-service-v1/persistence-service-processor-v1/src/main/resources/db/changelog/tenant/160.0.0-add-idp-related-fields-to-dossier-template.yaml +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/resources/db/changelog/tenant/160.0.0-add-idp-related-fields-to-dossier-template.yaml @@ -19,7 +19,12 @@ databaseChangeLog: name: font_style_detection type: boolean defaultValueBoolean: true - remarks: "Indicates if bold detection is enabled in OCR" + remarks: "Indicates if font style detection is enabled in OCR" + - column: + name: ocr_all_pages + type: boolean + defaultValueBoolean: false + remarks: "Indicates if all pages should be processed during OCR instead of only pages with images" - update: tableName: dossier_template @@ -53,3 +58,7 @@ databaseChangeLog: tableName: dossier_template columnName: font_style_detection columnDataType: boolean + - addNotNullConstraint: + tableName: dossier_template + columnName: ocr_all_pages + columnDataType: boolean diff --git a/persistence-service-v1/persistence-service-server-v1/src/test/java/com/iqser/red/service/peristence/v1/server/integration/tests/DossierTemplateTest.java b/persistence-service-v1/persistence-service-server-v1/src/test/java/com/iqser/red/service/peristence/v1/server/integration/tests/DossierTemplateTest.java index 06467205f..30ac173f8 100644 --- a/persistence-service-v1/persistence-service-server-v1/src/test/java/com/iqser/red/service/peristence/v1/server/integration/tests/DossierTemplateTest.java +++ b/persistence-service-v1/persistence-service-server-v1/src/test/java/com/iqser/red/service/peristence/v1/server/integration/tests/DossierTemplateTest.java @@ -606,7 +606,7 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest { // update dossier template metadata var cru = new DossierTemplateModel(); - cru.setDossierTemplateId(dossierTemplate.getId()); + cru.setId(dossierTemplate.getId()); BeanUtils.copyProperties(dossierTemplate, cru); cru.setName("Template 1 Update"); cru.setDescription("new description"); @@ -948,7 +948,7 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest { assertThat(result.getName()).isEqualTo(name); assertThat(result.isOcrByDefault()).isTrue(); - DossierTemplateModel loadedTemplate = dossierTemplateClient.getDossierTemplate(result.getDossierTemplateId()); + DossierTemplateModel loadedTemplate = dossierTemplateClient.getDossierTemplate(result.getId()); assertThat(loadedTemplate).isEqualTo(result); dossierTemplateModel.setName("Test Dossier Template Update"); @@ -996,7 +996,7 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest { // update var cru = new DossierTemplateModel(); - cru.setDossierTemplateId(dossierTemplate.getId()); + cru.setId(dossierTemplate.getId()); BeanUtils.copyProperties(dossierTemplate, cru); cru.setName("Template 1 Update"); cru.setValidTo(OffsetDateTime.of(2020, 1, 1, 1, 1, 1, 1, ZoneOffset.UTC)); @@ -1006,8 +1006,9 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest { assertTrue(exception.getMessage().contains("Invalid dates! validFrom can't be after validTo.")); } + @Test - public void testUpdateDossierTemplateWithIdp() { + public void testUpdateDossierTemplateWithOCRSettings() { var dossierTemplate = dossierTemplateTesterAndProvider.provideTestTemplate(); @@ -1017,22 +1018,28 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest { // update var cru = new DossierTemplateModel(); - cru.setDossierTemplateId(dossierTemplate.getId()); + cru.setId(dossierTemplate.getId()); BeanUtils.copyProperties(dossierTemplate, cru); String updatedName = "Template 1 Update"; cru.setName(updatedName); cru.setIdpByDefault(true); cru.setRotationCorrectionByDefault(true); + cru.setOcrAllPages(true); + cru.setFontStyleDetection(true); var updatedDT = dossierTemplateClient.createOrUpdateDossierTemplate(cru); assertEquals(updatedName, updatedDT.getName()); assertTrue(updatedDT.isIdpByDefault()); assertTrue(updatedDT.isRotationCorrectionByDefault()); + assertTrue(updatedDT.isFontStyleDetection()); + assertTrue(updatedDT.isOcrAllPages()); var loadedDT = dossierTemplateClient.getDossierTemplate(updatedDT.getId()); assertEquals(updatedName, loadedDT.getName()); assertTrue(loadedDT.isIdpByDefault()); assertTrue(loadedDT.isRotationCorrectionByDefault()); + assertTrue(loadedDT.isFontStyleDetection()); + assertTrue(loadedDT.isOcrAllPages()); } } diff --git a/persistence-service-v1/persistence-service-shared-api-v1/build.gradle.kts b/persistence-service-v1/persistence-service-shared-api-v1/build.gradle.kts index 3b65aa3cc..08014c970 100644 --- a/persistence-service-v1/persistence-service-shared-api-v1/build.gradle.kts +++ b/persistence-service-v1/persistence-service-shared-api-v1/build.gradle.kts @@ -10,7 +10,7 @@ dependencies { api("com.knecon.fforesight:document:${rootProject.extra.get("documentVersion")}"){ exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1") } - api("com.knecon.fforesight:layoutparser-service-internal-api:0.194.0-RED9998.1") { + api("com.knecon.fforesight:layoutparser-service-internal-api:idp1") { exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1") exclude(group = "com.iqser.red.service", module = "persistence-service-shared-api-v1") } diff --git a/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/DossierTemplateModel.java b/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/DossierTemplateModel.java index b7df60e28..da03244ac 100644 --- a/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/DossierTemplateModel.java +++ b/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/DossierTemplateModel.java @@ -77,6 +77,9 @@ public class DossierTemplateModel { @Schema(description = "Flag that specifies if font style detection is performed during OCR") private boolean fontStyleDetection; + @Schema(description = "Flag that specifies if OCR should be performed on all pages instead of only pages with images") + private boolean ocrAllPages; + @Schema(description = "Flag that specifies the watermark removal in documents will be performed before the OCR processing") private boolean removeWatermark; diff --git a/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/CreateOrUpdateDossierTemplateRequest.java b/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/CreateOrUpdateDossierTemplateRequest.java index 0ed496b51..f2e1d9779 100644 --- a/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/CreateOrUpdateDossierTemplateRequest.java +++ b/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/CreateOrUpdateDossierTemplateRequest.java @@ -49,6 +49,8 @@ public class CreateOrUpdateDossierTemplateRequest { private boolean removeWatermark; + private boolean ocrAllPages; + private LayoutParsingType layoutParsingType; } diff --git a/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/DossierTemplate.java b/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/DossierTemplate.java index 91dff5de0..a1b906263 100644 --- a/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/DossierTemplate.java +++ b/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/DossierTemplate.java @@ -40,6 +40,7 @@ public class DossierTemplate { private boolean idpByDefault; private boolean removeWatermark; private boolean fontStyleDetection; + private boolean ocrAllPages; private LayoutParsingType layoutParsingType; } diff --git a/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/FileType.java b/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/FileType.java index bb00f3608..d853cf0df 100644 --- a/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/FileType.java +++ b/persistence-service-v1/persistence-service-shared-api-v1/src/main/java/com/iqser/red/service/persistence/service/v1/api/shared/model/dossiertemplate/dossier/file/FileType.java @@ -21,6 +21,7 @@ public enum FileType { TABLES(".json"), VISUAL_LAYOUT(".json"), + IDP_RESULT(".json"), COMPONENTS(".json"), // document is split into 4 files, all should be overridden/deleted at the same time DOCUMENT_TEXT_OLD(".json"),