RED-8670: add more settings to OCR #919
@ -24,13 +24,13 @@ import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import com.iqser.red.service.persistence.management.v1.processor.acl.custom.dossier.DossierACLService;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.dataexchange.models.ExportDownloadRequest;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.exception.BadRequestException;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.exception.ConflictException;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.DossierManagementService;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.DossierTemplateManagementService;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.DossierTemplateStatsService;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.AuditPersistenceService;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.service.users.UserService;
|
||||
import com.iqser.red.service.persistence.service.v1.api.external.resource.DossierTemplateResource;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AuditCategory;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.DossierTemplateModel;
|
||||
@ -42,7 +42,6 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemp
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.DossierTemplateStats;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.DossierTemplateStatus;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.Dossier;
|
||||
import com.iqser.red.service.persistence.management.v1.processor.dataexchange.models.ExportDownloadRequest;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.importexport.ImportDossierTemplateRequest;
|
||||
import com.knecon.fforesight.keycloakcommons.security.KeycloakSecurity;
|
||||
|
||||
@ -61,7 +60,6 @@ public class DossierTemplateController implements DossierTemplateResource {
|
||||
private final AuditPersistenceService auditPersistenceService;
|
||||
private final DossierManagementService dossierManagementService;
|
||||
private final DossierACLService dossierACLService;
|
||||
private final UserService userService;
|
||||
|
||||
|
||||
@Override
|
||||
@ -317,6 +315,7 @@ public class DossierTemplateController implements DossierTemplateResource {
|
||||
.idpByDefault(dossierTemplate.isIdpByDefault())
|
||||
.rotationCorrectionByDefault(dossierTemplate.isRotationCorrectionByDefault())
|
||||
.fontStyleDetection(dossierTemplate.isFontStyleDetection())
|
||||
.ocrAllPages(dossierTemplate.isOcrAllPages())
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
@ -92,6 +92,9 @@ public class DossierTemplateEntity {
|
||||
@Column(name = "font_style_detection")
|
||||
private boolean fontStyleDetection;
|
||||
|
||||
@Column(name = "ocr_all_pages")
|
||||
private boolean ocrAllPages;
|
||||
|
||||
@Column(name = "remove_watermark")
|
||||
private boolean removeWatermark;
|
||||
|
||||
@ -161,6 +164,7 @@ public class DossierTemplateEntity {
|
||||
dossierTemplateCopy.rotationCorrectionByDefault = dossierTemplateEntity.rotationCorrectionByDefault;
|
||||
dossierTemplateCopy.idpByDefault = dossierTemplateEntity.idpByDefault;
|
||||
dossierTemplateCopy.fontStyleDetection = dossierTemplateEntity.fontStyleDetection;
|
||||
dossierTemplateCopy.ocrAllPages = dossierTemplateEntity.ocrAllPages;
|
||||
return dossierTemplateCopy;
|
||||
}
|
||||
|
||||
|
||||
@ -773,7 +773,6 @@ public class FileStatusService {
|
||||
if (dt.isFontStyleDetection()) {
|
||||
features.add(AzureOcrFeature.FONT_STYLE_DETECTION);
|
||||
}
|
||||
|
||||
if (dt.isRemoveWatermark()) {
|
||||
features.add(AzureOcrFeature.REMOVE_WATERMARKS);
|
||||
}
|
||||
@ -791,9 +790,14 @@ public class FileStatusService {
|
||||
public void addToOcrQueue(String dossierId, String fileId, int priority, Set<AzureOcrFeature> features) {
|
||||
|
||||
var request = DocumentRequest.builder()
|
||||
// needed for legacy OCR-services
|
||||
.dossierId(dossierId)
|
||||
.fileId(fileId)
|
||||
.removeWatermark(features.contains(AzureOcrFeature.REMOVE_WATERMARKS)) // needed for legacy OCR-services
|
||||
.removeWatermark(features.contains(AzureOcrFeature.REMOVE_WATERMARKS))
|
||||
// new api
|
||||
.originDocumentId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.ORIGIN))
|
||||
.viewerDocId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.VIEWER_DOCUMENT))
|
||||
.idpResultId(StorageIdUtils.getStorageId(dossierId, fileId, FileType.IDP_RESULT))
|
||||
.features(features)
|
||||
.build();
|
||||
|
||||
|
||||
@ -27,8 +27,7 @@ public class LayoutParsingRequestFactory {
|
||||
|
||||
public LayoutParsingRequest build(String dossierTemplateId, String dossierId, String fileId, boolean priority) {
|
||||
|
||||
LayoutParsingType layoutParsingType = dossierTemplatePersistenceService.getDossierTemplate(
|
||||
dossierTemplateId).getLayoutParsingType();
|
||||
LayoutParsingType layoutParsingType = dossierTemplatePersistenceService.getDossierTemplate(dossierTemplateId).getLayoutParsingType();
|
||||
|
||||
Optional<String> optionalImageFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.IMAGE_INFO) //
|
||||
? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMAGE_INFO)) : Optional.empty();
|
||||
@ -39,6 +38,9 @@ public class LayoutParsingRequestFactory {
|
||||
Optional<String> optionalVisualLayoutParsingFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.VISUAL_LAYOUT) //
|
||||
? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.VISUAL_LAYOUT)) : Optional.empty();
|
||||
|
||||
Optional<String> optionalIdpResultFileId = fileManagementStorageService.objectExists(dossierId, fileId, FileType.IDP_RESULT) //
|
||||
? Optional.of(StorageIdUtils.getStorageId(dossierId, fileId, FileType.IDP_RESULT)) : Optional.empty();
|
||||
|
||||
return LayoutParsingRequest.builder()
|
||||
.layoutParsingType(layoutParsingType)
|
||||
.identifier(QueueMessageIdentifierService.buildIdentifier(dossierId, fileId, priority))
|
||||
@ -55,6 +57,7 @@ public class LayoutParsingRequestFactory {
|
||||
.documentMarkdownFileStorageId(fileManagementServiceSettings.isStoreMarkdown() ? Optional.of(StorageIdUtils.getStorageId(dossierId,
|
||||
fileId,
|
||||
FileType.MARKDOWN)) : Optional.empty())
|
||||
.idpResultStorageId(optionalIdpResultFileId)
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
@ -19,7 +19,12 @@ databaseChangeLog:
|
||||
name: font_style_detection
|
||||
type: boolean
|
||||
defaultValueBoolean: true
|
||||
remarks: "Indicates if bold detection is enabled in OCR"
|
||||
remarks: "Indicates if font style detection is enabled in OCR"
|
||||
- column:
|
||||
name: ocr_all_pages
|
||||
type: boolean
|
||||
defaultValueBoolean: false
|
||||
remarks: "Indicates if all pages should be processed during OCR instead of only pages with images"
|
||||
|
||||
- update:
|
||||
tableName: dossier_template
|
||||
@ -53,3 +58,7 @@ databaseChangeLog:
|
||||
tableName: dossier_template
|
||||
columnName: font_style_detection
|
||||
columnDataType: boolean
|
||||
- addNotNullConstraint:
|
||||
tableName: dossier_template
|
||||
columnName: ocr_all_pages
|
||||
columnDataType: boolean
|
||||
|
||||
@ -606,7 +606,7 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
|
||||
|
||||
// update dossier template metadata
|
||||
var cru = new DossierTemplateModel();
|
||||
cru.setDossierTemplateId(dossierTemplate.getId());
|
||||
cru.setId(dossierTemplate.getId());
|
||||
BeanUtils.copyProperties(dossierTemplate, cru);
|
||||
cru.setName("Template 1 Update");
|
||||
cru.setDescription("new description");
|
||||
@ -948,7 +948,7 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
|
||||
assertThat(result.getName()).isEqualTo(name);
|
||||
assertThat(result.isOcrByDefault()).isTrue();
|
||||
|
||||
DossierTemplateModel loadedTemplate = dossierTemplateClient.getDossierTemplate(result.getDossierTemplateId());
|
||||
DossierTemplateModel loadedTemplate = dossierTemplateClient.getDossierTemplate(result.getId());
|
||||
assertThat(loadedTemplate).isEqualTo(result);
|
||||
|
||||
dossierTemplateModel.setName("Test Dossier Template Update");
|
||||
@ -996,7 +996,7 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
|
||||
|
||||
// update
|
||||
var cru = new DossierTemplateModel();
|
||||
cru.setDossierTemplateId(dossierTemplate.getId());
|
||||
cru.setId(dossierTemplate.getId());
|
||||
BeanUtils.copyProperties(dossierTemplate, cru);
|
||||
cru.setName("Template 1 Update");
|
||||
cru.setValidTo(OffsetDateTime.of(2020, 1, 1, 1, 1, 1, 1, ZoneOffset.UTC));
|
||||
@ -1006,8 +1006,9 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
|
||||
assertTrue(exception.getMessage().contains("Invalid dates! validFrom can't be after validTo."));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testUpdateDossierTemplateWithIdp() {
|
||||
public void testUpdateDossierTemplateWithOCRSettings() {
|
||||
|
||||
var dossierTemplate = dossierTemplateTesterAndProvider.provideTestTemplate();
|
||||
|
||||
@ -1017,22 +1018,28 @@ public class DossierTemplateTest extends AbstractPersistenceServerServiceTest {
|
||||
|
||||
// update
|
||||
var cru = new DossierTemplateModel();
|
||||
cru.setDossierTemplateId(dossierTemplate.getId());
|
||||
cru.setId(dossierTemplate.getId());
|
||||
BeanUtils.copyProperties(dossierTemplate, cru);
|
||||
String updatedName = "Template 1 Update";
|
||||
cru.setName(updatedName);
|
||||
cru.setIdpByDefault(true);
|
||||
cru.setRotationCorrectionByDefault(true);
|
||||
cru.setOcrAllPages(true);
|
||||
cru.setFontStyleDetection(true);
|
||||
|
||||
var updatedDT = dossierTemplateClient.createOrUpdateDossierTemplate(cru);
|
||||
assertEquals(updatedName, updatedDT.getName());
|
||||
assertTrue(updatedDT.isIdpByDefault());
|
||||
assertTrue(updatedDT.isRotationCorrectionByDefault());
|
||||
assertTrue(updatedDT.isFontStyleDetection());
|
||||
assertTrue(updatedDT.isOcrAllPages());
|
||||
|
||||
var loadedDT = dossierTemplateClient.getDossierTemplate(updatedDT.getId());
|
||||
assertEquals(updatedName, loadedDT.getName());
|
||||
assertTrue(loadedDT.isIdpByDefault());
|
||||
assertTrue(loadedDT.isRotationCorrectionByDefault());
|
||||
assertTrue(loadedDT.isFontStyleDetection());
|
||||
assertTrue(loadedDT.isOcrAllPages());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -10,7 +10,7 @@ dependencies {
|
||||
api("com.knecon.fforesight:document:${rootProject.extra.get("documentVersion")}"){
|
||||
exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1")
|
||||
}
|
||||
api("com.knecon.fforesight:layoutparser-service-internal-api:0.194.0-RED9998.1") {
|
||||
api("com.knecon.fforesight:layoutparser-service-internal-api:idp1") {
|
||||
exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1")
|
||||
exclude(group = "com.iqser.red.service", module = "persistence-service-shared-api-v1")
|
||||
}
|
||||
|
||||
@ -77,6 +77,9 @@ public class DossierTemplateModel {
|
||||
@Schema(description = "Flag that specifies if font style detection is performed during OCR")
|
||||
private boolean fontStyleDetection;
|
||||
|
||||
@Schema(description = "Flag that specifies if OCR should be performed on all pages instead of only pages with images")
|
||||
private boolean ocrAllPages;
|
||||
|
||||
@Schema(description = "Flag that specifies the watermark removal in documents will be performed before the OCR processing")
|
||||
private boolean removeWatermark;
|
||||
|
||||
|
||||
@ -49,6 +49,8 @@ public class CreateOrUpdateDossierTemplateRequest {
|
||||
|
||||
private boolean removeWatermark;
|
||||
|
||||
private boolean ocrAllPages;
|
||||
|
||||
private LayoutParsingType layoutParsingType;
|
||||
|
||||
}
|
||||
|
||||
@ -40,6 +40,7 @@ public class DossierTemplate {
|
||||
private boolean idpByDefault;
|
||||
private boolean removeWatermark;
|
||||
private boolean fontStyleDetection;
|
||||
private boolean ocrAllPages;
|
||||
private LayoutParsingType layoutParsingType;
|
||||
|
||||
}
|
||||
|
||||
@ -21,6 +21,7 @@ public enum FileType {
|
||||
TABLES(".json"),
|
||||
|
||||
VISUAL_LAYOUT(".json"),
|
||||
IDP_RESULT(".json"),
|
||||
COMPONENTS(".json"),
|
||||
// document is split into 4 files, all should be overridden/deleted at the same time
|
||||
DOCUMENT_TEXT_OLD(".json"),
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user