RED-8670: add idp fields and llm tracking

This commit is contained in:
Kilian Schuettler 2024-12-19 11:14:36 +01:00
parent da9924d1e6
commit d871bf0d80
11 changed files with 47 additions and 14 deletions

View File

@ -24,13 +24,13 @@ import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import com.iqser.red.service.persistence.management.v1.processor.acl.custom.dossier.DossierACLService;
import com.iqser.red.service.persistence.management.v1.processor.dataexchange.models.ExportDownloadRequest;
import com.iqser.red.service.persistence.management.v1.processor.exception.BadRequestException;
import com.iqser.red.service.persistence.management.v1.processor.exception.ConflictException;
import com.iqser.red.service.persistence.management.v1.processor.service.DossierManagementService;
import com.iqser.red.service.persistence.management.v1.processor.service.DossierTemplateManagementService;
import com.iqser.red.service.persistence.management.v1.processor.service.DossierTemplateStatsService;
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.AuditPersistenceService;
import com.iqser.red.service.persistence.management.v1.processor.service.users.UserService;
import com.iqser.red.service.persistence.service.v1.api.external.resource.DossierTemplateResource;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AuditCategory;
import com.iqser.red.service.persistence.service.v1.api.shared.model.DossierTemplateModel;
@ -42,7 +42,6 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemp
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.DossierTemplateStats;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.DossierTemplateStatus;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.Dossier;
import com.iqser.red.service.persistence.management.v1.processor.dataexchange.models.ExportDownloadRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.importexport.ImportDossierTemplateRequest;
import com.knecon.fforesight.keycloakcommons.security.KeycloakSecurity;
@ -61,7 +60,6 @@ public class DossierTemplateController implements DossierTemplateResource {
private final AuditPersistenceService auditPersistenceService;
private final DossierManagementService dossierManagementService;
private final DossierACLService dossierACLService;
private final UserService userService;
@Override
@ -317,6 +315,7 @@ public class DossierTemplateController implements DossierTemplateResource {
.idpByDefault(dossierTemplate.isIdpByDefault())
.rotationCorrectionByDefault(dossierTemplate.isRotationCorrectionByDefault())
.fontStyleDetection(dossierTemplate.isFontStyleDetection())
.ocrAllPages(dossierTemplate.isOcrAllPages())
.build();
}

View File

@ -92,6 +92,9 @@ public class DossierTemplateEntity {
@Column(name = "font_style_detection")
private boolean fontStyleDetection;
@Column(name = "ocr_all_pages")
private boolean ocrAllPages;
@Column(name = "remove_watermark")
private boolean removeWatermark;
@ -161,6 +164,7 @@ public class DossierTemplateEntity {
dossierTemplateCopy.rotationCorrectionByDefault = dossierTemplateEntity.rotationCorrectionByDefault;
dossierTemplateCopy.idpByDefault = dossierTemplateEntity.idpByDefault;
dossierTemplateCopy.fontStyleDetection = dossierTemplateEntity.fontStyleDetection;
dossierTemplateCopy.ocrAllPages = dossierTemplateEntity.ocrAllPages;
return dossierTemplateCopy;
}

View File

@ -773,7 +773,6 @@ public class FileStatusService {
if (dt.isFontStyleDetection()) {
features.add(AzureOcrFeature.FONT_STYLE_DETECTION);
}
if (dt.isRemoveWatermark()) {
features.add(AzureOcrFeature.REMOVE_WATERMARKS);
}
@ -791,9 +790,14 @@ public class FileStatusService {
public void addToOcrQueue(String dossierId, String fileId, int priority, Set<AzureOcrFeature> features) {
var request = DocumentRequest.builder()
// needed for legacy OCR-services
.dossierId(dossierId)
.fileId(fileId)
.removeWatermark(features.contains(AzureOcrFeature.REMOVE_WATERMARKS)) // needed for legacy OCR-services
.removeWatermark(features.contains(AzureOcrFeature.REMOVE_WATERMARKS))
// new api
.originDocumentId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.ORIGIN))
.viewerDocId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.VIEWER_DOCUMENT))
.idpResultId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.IDP_RESULT))
.features(features)
.build();

View File

@ -27,8 +27,7 @@ public class LayoutParsingRequestFactory {
public LayoutParsingRequest build(String dossierTemplateId, String dossierId, String fileId, boolean priority) {
LayoutParsingType layoutParsingType = dossierTemplatePersistenceService.getDossierTemplate(
dossierTemplateId).getLayoutParsingType();
LayoutParsingType layoutParsingType = dossierTemplatePersistenceService.getDossierTemplate(dossierTemplateId).getLayoutParsingType();
Optional<String> optionalImageFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.IMAGE_INFO) //
? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMAGE_INFO)) : Optional.empty();
@ -39,6 +38,9 @@ public class LayoutParsingRequestFactory {
Optional<String> optionalVisualLayoutParsingFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.VISUAL_LAYOUT) //
? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.VISUAL_LAYOUT)) : Optional.empty();
Optional<String> optionalIdpResultFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.IDP_RESULT) //
? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.IDP_RESULT)) : Optional.empty();
return LayoutParsingRequest.builder()
.layoutParsingType(layoutParsingType)
.identifier(QueueMessageIdentifierService.buildIdentifier(dossierId, fileId, priority))
@ -55,6 +57,7 @@ public class LayoutParsingRequestFactory {
.documentMarkdownFileStorageId(fileManagementServiceSettings.isStoreMarkdown() ? Optional.of(StorageIdUtils.getStorageId(dossierId,
fileId,
FileType.MARKDOWN)) : Optional.empty())
.idpResultStorageId(optionalIdpResultFileId)
.build();
}

View File

@ -19,7 +19,12 @@ databaseChangeLog:
name: font_style_detection
type: boolean
defaultValueBoolean: true
remarks: "Indicates if bold detection is enabled in OCR"
remarks: "Indicates if font style detection is enabled in OCR"
- column:
name: ocr_all_pages
type: boolean
defaultValueBoolean: false
remarks: "Indicates if all pages should be processed during OCR instead of only pages with images"
- update:
tableName: dossier_template
@ -53,3 +58,7 @@ databaseChangeLog:
tableName: dossier_template
columnName: font_style_detection
columnDataType: boolean
- addNotNullConstraint:
tableName: dossier_template
columnName: ocr_all_pages
columnDataType: boolean

View File

@ -606,7 +606,7 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
// update dossier template metadata
var cru = new DossierTemplateModel();
cru.setDossierTemplateId(dossierTemplate.getId());
cru.setId(dossierTemplate.getId());
BeanUtils.copyProperties(dossierTemplate, cru);
cru.setName("Template 1 Update");
cru.setDescription("new description");
@ -948,7 +948,7 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
assertThat(result.getName()).isEqualTo(name);
assertThat(result.isOcrByDefault()).isTrue();
DossierTemplateModel loadedTemplate = dossierTemplateClient.getDossierTemplate(result.getDossierTemplateId());
DossierTemplateModel loadedTemplate = dossierTemplateClient.getDossierTemplate(result.getId());
assertThat(loadedTemplate).isEqualTo(result);
dossierTemplateModel.setName("Test Dossier Template Update");
@ -996,7 +996,7 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
// update
var cru = new DossierTemplateModel();
cru.setDossierTemplateId(dossierTemplate.getId());
cru.setId(dossierTemplate.getId());
BeanUtils.copyProperties(dossierTemplate, cru);
cru.setName("Template 1 Update");
cru.setValidTo(OffsetDateTime.of(2020, 1, 1, 1, 1, 1, 1, ZoneOffset.UTC));
@ -1006,8 +1006,9 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
assertTrue(exception.getMessage().contains("Invalid dates! validFrom can't be after validTo."));
}
@Test
public void testUpdateDossierTemplateWithIdp() {
public void testUpdateDossierTemplateWithOCRSettings() {
var dossierTemplate = dossierTemplateTesterAndProvider.provideTestTemplate();
@ -1017,22 +1018,28 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
// update
var cru = new DossierTemplateModel();
cru.setDossierTemplateId(dossierTemplate.getId());
cru.setId(dossierTemplate.getId());
BeanUtils.copyProperties(dossierTemplate, cru);
String updatedName = "Template 1 Update";
cru.setName(updatedName);
cru.setIdpByDefault(true);
cru.setRotationCorrectionByDefault(true);
cru.setOcrAllPages(true);
cru.setFontStyleDetection(true);
var updatedDT = dossierTemplateClient.createOrUpdateDossierTemplate(cru);
assertEquals(updatedName, updatedDT.getName());
assertTrue(updatedDT.isIdpByDefault());
assertTrue(updatedDT.isRotationCorrectionByDefault());
assertTrue(updatedDT.isFontStyleDetection());
assertTrue(updatedDT.isOcrAllPages());
var loadedDT = dossierTemplateClient.getDossierTemplate(updatedDT.getId());
assertEquals(updatedName, loadedDT.getName());
assertTrue(loadedDT.isIdpByDefault());
assertTrue(loadedDT.isRotationCorrectionByDefault());
assertTrue(loadedDT.isFontStyleDetection());
assertTrue(loadedDT.isOcrAllPages());
}
}

View File

@ -10,7 +10,7 @@ dependencies {
api("com.knecon.fforesight:document:${rootProject.extra.get("documentVersion")}"){
exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1")
}
api("com.knecon.fforesight:layoutparser-service-internal-api:0.194.0-RED9998.1") {
api("com.knecon.fforesight:layoutparser-service-internal-api:idp1") {
exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1")
exclude(group = "com.iqser.red.service", module = "persistence-service-shared-api-v1")
}

View File

@ -77,6 +77,9 @@ public class DossierTemplateModel {
@Schema(description = "Flag that specifies if font style detection is performed during OCR")
private boolean fontStyleDetection;
@Schema(description = "Flag that specifies if OCR should be performed on all pages instead of only pages with images")
private boolean ocrAllPages;
@Schema(description = "Flag that specifies the watermark removal in documents will be performed before the OCR processing")
private boolean removeWatermark;

View File

@ -49,6 +49,8 @@ public class CreateOrUpdateDossierTemplateRequest {
private boolean removeWatermark;
private boolean ocrAllPages;
private LayoutParsingType layoutParsingType;
}

View File

@ -40,6 +40,7 @@ public class DossierTemplate {
private boolean idpByDefault;
private boolean removeWatermark;
private boolean fontStyleDetection;
private boolean ocrAllPages;
private LayoutParsingType layoutParsingType;
}

View File

@ -21,6 +21,7 @@ public enum FileType {
TABLES(".json"),
VISUAL_LAYOUT(".json"),
IDP_RESULT(".json"),
COMPONENTS(".json"),
// document is split into 4 files, all should be overridden/deleted at the same time
DOCUMENT_TEXT_OLD(".json"),