diff --git a/redaction-service-v1/redaction-service-api-v1/build.gradle.kts b/redaction-service-v1/redaction-service-api-v1/build.gradle.kts index 77476ddd..774dc096 100644 --- a/redaction-service-v1/redaction-service-api-v1/build.gradle.kts +++ b/redaction-service-v1/redaction-service-api-v1/build.gradle.kts @@ -7,7 +7,7 @@ description = "redaction-service-api-v1" dependencies { implementation("org.springframework:spring-web:6.0.12") - implementation("com.iqser.red.service:persistence-service-internal-api-v1:2.351.0") + implementation("com.iqser.red.service:persistence-service-internal-api-v1:2.383.0") } publishing { diff --git a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts index 7e0ea1d3..964b83de 100644 --- a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts +++ b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts @@ -12,12 +12,14 @@ plugins { description = "redaction-service-server-v1" -val layoutParserVersion = "0.96.0" +val layoutParserVersion = "0.107.0" val jacksonVersion = "2.15.2" val droolsVersion = "9.44.0.Final" val pdfBoxVersion = "3.0.0" -val persistenceServiceVersion = "2.380.0" +val persistenceServiceVersion = "2.383.0" val springBootStarterVersion = "3.1.5" +val springCloudVersion = "4.0.4" +val testContainersVersion = "1.19.7" configurations { all { @@ -31,6 +33,7 @@ dependencies { implementation(project(":redaction-service-api-v1")) { exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1") } implementation("com.iqser.red.service:persistence-service-internal-api-v1:${persistenceServiceVersion}") { exclude(group = "org.springframework.boot") } + implementation("com.iqser.red.service:persistence-service-shared-mongo-v1:${persistenceServiceVersion}") implementation("com.knecon.fforesight:layoutparser-service-internal-api:${layoutParserVersion}") implementation("com.iqser.red.commons:spring-commons:6.2.0") @@ -38,7 +41,7 @@ dependencies { implementation("com.iqser.red.commons:dictionary-merge-commons:1.5.0") implementation("com.iqser.red.commons:storage-commons:2.45.0") - implementation("com.knecon.fforesight:tenant-commons:0.21.0") + implementation("com.knecon.fforesight:tenant-commons:0.23.0") implementation("com.knecon.fforesight:tracing-commons:0.5.0") implementation("com.fasterxml.jackson.module:jackson-module-afterburner:${jacksonVersion}") @@ -52,7 +55,7 @@ dependencies { implementation("org.locationtech.jts:jts-core:1.19.0") - implementation("org.springframework.cloud:spring-cloud-starter-openfeign:4.0.4") + implementation("org.springframework.cloud:spring-cloud-starter-openfeign:${springCloudVersion}") implementation("org.springframework.boot:spring-boot-starter-amqp:${springBootStarterVersion}") implementation("org.springframework.boot:spring-boot-starter-cache:${springBootStarterVersion}") implementation("org.springframework.boot:spring-boot-starter-data-redis:${springBootStarterVersion}") @@ -66,6 +69,9 @@ dependencies { testImplementation("org.apache.pdfbox:pdfbox:${pdfBoxVersion}") testImplementation("org.apache.pdfbox:pdfbox-tools:${pdfBoxVersion}") + testImplementation("org.testcontainers:testcontainers:${testContainersVersion}") + testImplementation("org.testcontainers:junit-jupiter:${testContainersVersion}") + testImplementation("org.springframework.boot:spring-boot-starter-test:${springBootStarterVersion}") testImplementation("com.knecon.fforesight:viewer-doc-processor:${layoutParserVersion}") testImplementation("com.knecon.fforesight:layoutparser-service-processor:${layoutParserVersion}") { @@ -76,6 +82,12 @@ dependencies { } } +dependencyManagement { + imports { + mavenBom("org.testcontainers:testcontainers-bom:${testContainersVersion}") + } +} + tasks.test { configure { excludes = listOf("org/drools/**/*") diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/Application.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/Application.java index c87b2c59..e5b10742 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/Application.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/Application.java @@ -4,16 +4,23 @@ import org.springframework.boot.SpringApplication; import org.springframework.boot.actuate.autoconfigure.security.servlet.ManagementWebSecurityAutoConfiguration; import org.springframework.boot.autoconfigure.ImportAutoConfiguration; import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.boot.autoconfigure.data.mongo.MongoDataAutoConfiguration; +import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration; +import org.springframework.boot.autoconfigure.liquibase.LiquibaseAutoConfiguration; +import org.springframework.boot.autoconfigure.mongo.MongoAutoConfiguration; import org.springframework.boot.autoconfigure.security.servlet.SecurityAutoConfiguration; import org.springframework.boot.context.properties.EnableConfigurationProperties; import org.springframework.cache.annotation.EnableCaching; import org.springframework.cloud.openfeign.EnableFeignClients; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Import; +import org.springframework.data.mongodb.repository.config.EnableMongoRepositories; import com.iqser.red.service.dictionarymerge.commons.DictionaryMergeService; +import com.iqser.red.service.persistence.service.v1.api.shared.mongo.SharedMongoAutoConfiguration; import com.iqser.red.service.redaction.v1.server.client.RulesClient; import com.iqser.red.storage.commons.StorageAutoConfiguration; +import com.knecon.fforesight.mongo.database.commons.MongoDatabaseCommonsAutoConfiguration; import com.knecon.fforesight.tenantcommons.MultiTenancyAutoConfiguration; import io.micrometer.core.aop.TimedAspect; @@ -22,11 +29,12 @@ import io.micrometer.observation.ObservationRegistry; import io.micrometer.observation.aop.ObservedAspect; @EnableCaching -@ImportAutoConfiguration({MultiTenancyAutoConfiguration.class}) -@Import({MetricsConfiguration.class, StorageAutoConfiguration.class}) +@ImportAutoConfiguration({MultiTenancyAutoConfiguration.class, SharedMongoAutoConfiguration.class}) +@Import({MetricsConfiguration.class, StorageAutoConfiguration.class, MongoDatabaseCommonsAutoConfiguration.class}) @EnableFeignClients(basePackageClasses = RulesClient.class) @EnableConfigurationProperties(RedactionServiceSettings.class) -@SpringBootApplication(exclude = {SecurityAutoConfiguration.class, ManagementWebSecurityAutoConfiguration.class}) +@EnableMongoRepositories(basePackages = "com.iqser.red.service.persistence") +@SpringBootApplication(exclude = {SecurityAutoConfiguration.class, ManagementWebSecurityAutoConfiguration.class, DataSourceAutoConfiguration.class, LiquibaseAutoConfiguration.class, MongoAutoConfiguration.class, MongoDataAutoConfiguration.class}) public class Application { public static void main(String[] args) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/MigrationMessageReceiver.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/MigrationMessageReceiver.java index 740d4116..b57dcfaf 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/MigrationMessageReceiver.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/MigrationMessageReceiver.java @@ -70,6 +70,7 @@ public class MigrationMessageReceiver { migrationRequest.getFileId()); log.info("Storing migrated entityLog and ids to migrate in DB for file {}", migrationRequest.getFileId()); + redactionStorageService.storeObject(migrationRequest.getDossierId(), migrationRequest.getFileId(), FileType.ENTITY_LOG, migratedEntityLog.getEntityLog()); redactionStorageService.storeObject(migrationRequest.getDossierId(), migrationRequest.getFileId(), FileType.MIGRATED_IDS, migratedEntityLog.getMigratedIds()); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/AnalyzeService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/AnalyzeService.java index 6fa7fb31..05f4be4b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/AnalyzeService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/AnalyzeService.java @@ -1,5 +1,8 @@ package com.iqser.red.service.redaction.v1.server.service; +import static com.iqser.red.service.redaction.v1.server.service.document.SectionFinderService.getRelevantManuallyModifiedAnnotationIds; + +import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashSet; @@ -20,18 +23,12 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileTyp import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.componentlog.ComponentLog; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogChanges; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType; -import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.legalbasis.LegalBasis; -import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog; -import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogChanges; -import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry; -import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogLegalBasis; import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings; -import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient; import com.iqser.red.service.redaction.v1.server.client.model.NerEntitiesModel; import com.iqser.red.service.redaction.v1.server.model.KieWrapper; -import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity; import com.iqser.red.service.redaction.v1.server.model.NerEntities; import com.iqser.red.service.redaction.v1.server.model.component.Component; import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary; @@ -87,7 +84,7 @@ public class AnalyzeService { public AnalyzeResult reanalyze(@RequestBody AnalyzeRequest analyzeRequest) { long startTime = System.currentTimeMillis(); - EntityLog previousEntityLog = redactionStorageService.getEntityLog(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); + EntityLog entityLogWithoutEntries = redactionStorageService.getEntityLogWithoutEntries(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); log.info("Loaded previous entity log for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); Document document = DocumentGraphMapper.toDocumentGraph(observedStorageService.getDocumentData(analyzeRequest.getDossierId(), analyzeRequest.getFileId())); @@ -97,25 +94,36 @@ public class AnalyzeService { log.info("Loaded Imported Redactions for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); // not yet ready for reanalysis - if (previousEntityLog == null || document == null || document.getNumberOfPages() == 0) { + if (entityLogWithoutEntries == null || document == null || document.getNumberOfPages() == 0) { return analyze(analyzeRequest); } DictionaryIncrement dictionaryIncrement = dictionaryService.getDictionaryIncrements(analyzeRequest.getDossierTemplateId(), - new DictionaryVersion(previousEntityLog.getDictionaryVersion(), - previousEntityLog.getDossierDictionaryVersion()), + new DictionaryVersion(entityLogWithoutEntries.getDictionaryVersion(), + entityLogWithoutEntries.getDossierDictionaryVersion()), analyzeRequest.getDossierId()); - Set sectionsToReanalyseIds = getSectionsToReanalyseIds(analyzeRequest, previousEntityLog, document, dictionaryIncrement, importedRedactions); + Set relevantManuallyModifiedAnnotationIds = getRelevantManuallyModifiedAnnotationIds(analyzeRequest.getManualRedactions()); + + Set sectionsToReanalyseIds = redactionStorageService.findIdsOfSectionsToReanalyse(analyzeRequest.getDossierId(), + analyzeRequest.getFileId(), + relevantManuallyModifiedAnnotationIds); + sectionsToReanalyseIds.addAll(getSectionsToReanalyseIds(analyzeRequest, + document, + dictionaryIncrement, + importedRedactions, + relevantManuallyModifiedAnnotationIds)); + List sectionsToReAnalyse = getSectionsToReAnalyse(document, sectionsToReanalyseIds); log.info("{} Sections to reanalyze found for file {} in dossier {}", sectionsToReanalyseIds.size(), analyzeRequest.getFileId(), analyzeRequest.getDossierId()); if (sectionsToReAnalyse.isEmpty()) { - EntityLogChanges entityLogChanges = entityLogCreatorService.updateVersionsAndReturnChanges(previousEntityLog, + EntityLogChanges entityLogChanges = entityLogCreatorService.updateVersionsAndReturnChanges(entityLogWithoutEntries, dictionaryIncrement.getDictionaryVersion(), analyzeRequest, - false); + new ArrayList<>(), + new ArrayList<>()); return finalizeAnalysis(analyzeRequest, startTime, @@ -160,8 +168,8 @@ public class AnalyzeService { EntityLogChanges entityLogChanges = entityLogCreatorService.updatePreviousEntityLog(analyzeRequest, document, + entityLogWithoutEntries, notFoundManualOrImportedEntries, - previousEntityLog, sectionsToReanalyseIds, dictionary.getVersion()); @@ -224,18 +232,18 @@ public class AnalyzeService { nerEntities); log.info("Finished entity rule execution for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); - EntityLog entityLog = entityLogCreatorService.createInitialEntityLog(analyzeRequest, - document, - notFoundManualOrImportedEntries, - dictionary.getVersion(), - kieWrapperEntityRules.rulesVersion()); + EntityLogChanges entityLogChanges = entityLogCreatorService.createInitialEntityLog(analyzeRequest, + document, + notFoundManualOrImportedEntries, + dictionary.getVersion(), + kieWrapperEntityRules.rulesVersion()); - notFoundImportedEntitiesService.processEntityLog(entityLog, analyzeRequest, notFoundImportedEntries); + notFoundImportedEntitiesService.processEntityLog(entityLogChanges.getEntityLog(), analyzeRequest, notFoundImportedEntries); return finalizeAnalysis(analyzeRequest, startTime, kieWrapperComponentRules, - new EntityLogChanges(entityLog, false), + entityLogChanges, document, document.getNumberOfPages(), dictionary.getVersion(), @@ -255,10 +263,24 @@ public class AnalyzeService { Set addedFileAttributes) { EntityLog entityLog = entityLogChanges.getEntityLog(); - redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.ENTITY_LOG, entityLogChanges.getEntityLog()); + + // analysis numbers should be incremented in every follow-up request, so that this could be replaced + if (!redactionStorageService.entityLogExists(analyzeRequest.getDossierId(), analyzeRequest.getFileId())) { + redactionStorageService.insertEntityLog(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), entityLog); + + } else { + redactionStorageService.updateEntityLogWithoutEntries(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), entityLog); + + if (!entityLogChanges.getNewEntityLogEntries().isEmpty()) { + redactionStorageService.insertEntityLogEntries(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), entityLogChanges.getNewEntityLogEntries()); + } + if (!entityLogChanges.getUpdatedEntityLogEntries().isEmpty()) { + redactionStorageService.updateEntityLogEntries(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), entityLogChanges.getUpdatedEntityLogEntries()); + } + } log.info("Created entity log for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); - if (entityLogChanges.isHasChanges() || !isReanalysis) { + if (entityLogChanges.hasChanges() || !isReanalysis) { computeComponentsWhenRulesArePresent(analyzeRequest, kieWrapperComponentRules, document, addedFileAttributes, entityLogChanges, dictionaryVersion); } @@ -273,7 +295,7 @@ public class AnalyzeService { .fileId(analyzeRequest.getFileId()) .duration(duration) .numberOfPages(numberOfPages) - .hasUpdates(entityLogChanges.isHasChanges()) + .hasUpdates(entityLogChanges.hasChanges()) .analysisVersion(redactionServiceSettings.getAnalysisVersion()) .analysisNumber(analyzeRequest.getAnalysisNumber()) .rulesVersion(entityLog.getRulesVersion()) @@ -323,12 +345,16 @@ public class AnalyzeService { private Set getSectionsToReanalyseIds(AnalyzeRequest analyzeRequest, - EntityLog entityLog, Document document, DictionaryIncrement dictionaryIncrement, - ImportedRedactions importedRedactions) { + ImportedRedactions importedRedactions, + Set relevantManuallyModifiedAnnotationIds) { - return sectionFinderService.findSectionsToReanalyse(dictionaryIncrement, entityLog, document, analyzeRequest, importedRedactions); + return sectionFinderService.findSectionsToReanalyse(dictionaryIncrement, + document, + analyzeRequest, + importedRedactions, + relevantManuallyModifiedAnnotationIds); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityChangeLogService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityChangeLogService.java index f86a8a7b..f0760105 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityChangeLogService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityChangeLogService.java @@ -1,6 +1,7 @@ package com.iqser.red.service.redaction.v1.server.service; import java.time.OffsetDateTime; +import java.util.ArrayList; import java.util.List; import java.util.Optional; import java.util.Set; @@ -26,61 +27,57 @@ import lombok.extern.slf4j.Slf4j; public class EntityChangeLogService { @Timed("redactmanager_computeChanges") - public boolean computeChanges(List previousEntityLogEntries, List newEntityLogEntries, int analysisNumber) { + public EntryChanges computeChanges(List previousEntityLogEntries, List newEntityLogEntries, int analysisNumber) { var now = OffsetDateTime.now(); if (previousEntityLogEntries.isEmpty()) { newEntityLogEntries.forEach(entry -> entry.getChanges().add(new Change(analysisNumber, ChangeType.ADDED, now))); - return true; + return new EntryChanges(newEntityLogEntries, new ArrayList<>()); } - boolean hasChanges = false; - + List toInsert = new ArrayList<>(); + List toUpdate = new ArrayList<>(); for (EntityLogEntry entityLogEntry : newEntityLogEntries) { - Optional optionalPreviousEntity = previousEntityLogEntries.stream() .filter(entry -> entry.getId().equals(entityLogEntry.getId())) .findAny(); - if (optionalPreviousEntity.isEmpty()) { - hasChanges = true; entityLogEntry.getChanges().add(new Change(analysisNumber, ChangeType.ADDED, now)); + toInsert.add(entityLogEntry); continue; } EntityLogEntry previousEntity = optionalPreviousEntity.get(); - entityLogEntry.getChanges().addAll(previousEntity.getChanges()); - if (!previousEntity.getState().equals(entityLogEntry.getState())) { - hasChanges = true; ChangeType changeType = calculateChangeType(entityLogEntry.getState(), previousEntity.getState()); entityLogEntry.getChanges().add(new Change(analysisNumber, changeType, now)); + toUpdate.add(entityLogEntry); } } - addRemovedEntriesAsRemoved(previousEntityLogEntries, newEntityLogEntries, analysisNumber, now); - - return hasChanges; + toUpdate.addAll(addRemovedEntriesAsRemoved(previousEntityLogEntries, newEntityLogEntries, analysisNumber, now)); + return new EntryChanges(toInsert, toUpdate); } - private void addRemovedEntriesAsRemoved(List previousEntityLogEntries, List newEntityLogEntries, int analysisNumber, OffsetDateTime now) { + private List addRemovedEntriesAsRemoved(List previousEntityLogEntries, + List newEntityLogEntries, + int analysisNumber, + OffsetDateTime now) { Set existingIds = newEntityLogEntries.stream() .map(EntityLogEntry::getId) .collect(Collectors.toSet()); - List removedEntries = previousEntityLogEntries.stream() .filter(entry -> !existingIds.contains(entry.getId())) .toList(); - removedEntries.stream() .filter(entry -> !entry.getState().equals(EntryState.REMOVED)) .peek(entry -> entry.getChanges().add(new Change(analysisNumber, ChangeType.REMOVED, now))) .forEach(entry -> entry.setState(EntryState.REMOVED)); - newEntityLogEntries.addAll(removedEntries); + return removedEntries; } @@ -104,4 +101,9 @@ public class EntityChangeLogService { return (state.equals(EntryState.REMOVED) || state.equals(EntryState.IGNORED)); } + + public record EntryChanges(List inserted, List updated) { + + } + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java index f206c572..42ed5313 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java @@ -32,6 +32,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntit import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image; import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType; +import com.iqser.red.service.redaction.v1.server.service.EntityChangeLogService.EntryChanges; import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; import lombok.AccessLevel; @@ -60,11 +61,11 @@ public class EntityLogCreatorService { } - public EntityLog createInitialEntityLog(AnalyzeRequest analyzeRequest, - Document document, - List notFoundEntities, - DictionaryVersion dictionaryVersion, - long rulesVersion) { + public EntityLogChanges createInitialEntityLog(AnalyzeRequest analyzeRequest, + Document document, + List notFoundEntities, + DictionaryVersion dictionaryVersion, + long rulesVersion) { List entityLogEntries = createEntityLogEntries(document, analyzeRequest, notFoundEntities); @@ -72,16 +73,20 @@ public class EntityLogCreatorService { List previousExistingEntityLogEntries = getPreviousEntityLogEntries(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); - entityChangeLogService.computeChanges(previousExistingEntityLogEntries, entityLogEntries, analyzeRequest.getAnalysisNumber()); + EntryChanges entryChanges = entityChangeLogService.computeChanges(previousExistingEntityLogEntries, entityLogEntries, analyzeRequest.getAnalysisNumber()); - return new EntityLog(redactionServiceSettings.getAnalysisVersion(), - analyzeRequest.getAnalysisNumber(), - entityLogEntries, - toEntityLogLegalBasis(legalBasis), - dictionaryVersion.getDossierTemplateVersion(), - dictionaryVersion.getDossierVersion(), - rulesVersion, - legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId())); + return EntityLogChanges.builder() + .entityLog(new EntityLog(redactionServiceSettings.getAnalysisVersion(), + analyzeRequest.getAnalysisNumber(), + entityLogEntries, + toEntityLogLegalBasis(legalBasis), + dictionaryVersion.getDossierTemplateVersion(), + dictionaryVersion.getDossierVersion(), + rulesVersion, + legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId()))) + .updatedEntityLogEntries(entryChanges.updated()) + .newEntityLogEntries(entryChanges.inserted()) + .build(); } @@ -95,7 +100,11 @@ public class EntityLogCreatorService { } - public EntityLogChanges updateVersionsAndReturnChanges(EntityLog entityLog, DictionaryVersion dictionaryVersion, AnalyzeRequest analyzeRequest, boolean hasChanges) { + public EntityLogChanges updateVersionsAndReturnChanges(EntityLog entityLog, + DictionaryVersion dictionaryVersion, + AnalyzeRequest analyzeRequest, + List newEntries, + List updatedEntries) { List legalBasis = legalBasisClient.getLegalBasisMapping(analyzeRequest.getDossierTemplateId()); entityLog.setLegalBasisVersion(legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId())); @@ -104,14 +113,14 @@ public class EntityLogCreatorService { entityLog.setDossierDictionaryVersion(dictionaryVersion.getDossierVersion()); entityLog.setAnalysisNumber(analyzeRequest.getAnalysisNumber()); - return new EntityLogChanges(entityLog, hasChanges); + return EntityLogChanges.builder().entityLog(entityLog).newEntityLogEntries(newEntries).updatedEntityLogEntries(updatedEntries).build(); } public EntityLogChanges updatePreviousEntityLog(AnalyzeRequest analyzeRequest, Document document, + EntityLog entityLogWithoutEntries, List notFoundEntries, - EntityLog previousEntityLog, Set sectionsToReanalyseIds, DictionaryVersion dictionaryVersion) { @@ -119,22 +128,14 @@ public class EntityLogCreatorService { .filter(entry -> entry.getContainingNodeId().isEmpty() || sectionsToReanalyseIds.contains(entry.getContainingNodeId() .get(0))) .collect(Collectors.toList()); - Set newEntityIds = newEntityLogEntries.stream() - .map(EntityLogEntry::getId) - .collect(Collectors.toSet()); - List previousEntriesFromReAnalyzedSections = previousEntityLog.getEntityLogEntry() - .stream() - .filter(entry -> (newEntityIds.contains(entry.getId()) || entry.getContainingNodeId().isEmpty() || sectionsToReanalyseIds.contains(entry.getContainingNodeId() - .get(0)))) - .collect(Collectors.toList()); - previousEntityLog.getEntityLogEntry().removeAll(previousEntriesFromReAnalyzedSections); + List previousEntriesFromReAnalyzedSections = redactionStorageService.findEntriesContainedBySectionsOrNotContained(analyzeRequest.getDossierId(), + analyzeRequest.getFileId(), + sectionsToReanalyseIds); - boolean hasChanges = entityChangeLogService.computeChanges(previousEntriesFromReAnalyzedSections, newEntityLogEntries, analyzeRequest.getAnalysisNumber()); + EntryChanges entryChanges = entityChangeLogService.computeChanges(previousEntriesFromReAnalyzedSections, newEntityLogEntries, analyzeRequest.getAnalysisNumber()); - previousEntityLog.getEntityLogEntry().addAll(newEntityLogEntries); - - return updateVersionsAndReturnChanges(previousEntityLog, dictionaryVersion, analyzeRequest, hasChanges); + return updateVersionsAndReturnChanges(entityLogWithoutEntries, dictionaryVersion, analyzeRequest, entryChanges.inserted(), entryChanges.updated()); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/SectionFinderService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/SectionFinderService.java index a78360b8..f45a3299 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/SectionFinderService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/SectionFinderService.java @@ -11,7 +11,6 @@ import java.util.stream.Stream; import org.springframework.stereotype.Service; import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; -import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedaction; @@ -44,23 +43,14 @@ public class SectionFinderService { @Timed("redactmanager_findSectionsToReanalyse") public Set findSectionsToReanalyse(DictionaryIncrement dictionaryIncrement, - EntityLog entityLog, Document document, AnalyzeRequest analyzeRequest, - ImportedRedactions importedRedactions) { + ImportedRedactions importedRedactions, + Set relevantManuallyModifiedAnnotationIds) { long start = System.currentTimeMillis(); - Set relevantManuallyModifiedAnnotationIds = getRelevantManuallyModifiedAnnotationIds(analyzeRequest.getManualRedactions()); Set sectionsToReanalyse = new HashSet<>(); - for (EntityLogEntry entry : entityLog.getEntityLogEntry()) { - if (relevantManuallyModifiedAnnotationIds.contains(entry.getId())) { - if (entry.getContainingNodeId().isEmpty()) { - continue; // Empty list means either Entity has not been found or it is between main sections. Thus, this might lead to wrong reanalysis. - } - sectionsToReanalyse.add(entry.getContainingNodeId() - .get(0)); - } - } + var dictionaryIncrementsSearch = new SearchImplementation(dictionaryIncrement.getValues() .stream() @@ -133,7 +123,7 @@ public class SectionFinderService { } - private static Set getRelevantManuallyModifiedAnnotationIds(ManualRedactions manualRedactions) { + public static Set getRelevantManuallyModifiedAnnotationIds(ManualRedactions manualRedactions) { if (manualRedactions == null) { return new HashSet<>(); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/storage/RedactionStorageService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/storage/RedactionStorageService.java index 08044c93..bdb5491c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/storage/RedactionStorageService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/storage/RedactionStorageService.java @@ -3,20 +3,24 @@ package com.iqser.red.service.redaction.v1.server.storage; import java.io.File; import java.io.FileInputStream; import java.io.InputStream; +import java.util.Collection; import java.util.List; +import java.util.Set; import java.util.stream.Collectors; import org.springframework.cache.annotation.Cacheable; import org.springframework.stereotype.Service; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.componentlog.ComponentLog; -import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactionsPerPage; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog; +import com.iqser.red.service.persistence.service.v1.api.shared.mongo.service.EntityLogMongoService; import com.iqser.red.service.redaction.v1.server.client.model.NerEntitiesModel; import com.iqser.red.service.redaction.v1.server.model.document.DocumentData; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog; import com.iqser.red.service.redaction.v1.server.utils.exception.NotFoundException; import com.iqser.red.storage.commons.exception.StorageObjectDoesNotExist; import com.iqser.red.storage.commons.service.StorageService; @@ -39,6 +43,8 @@ public class RedactionStorageService { private final StorageService storageService; + private final EntityLogMongoService entityLogMongoService; + @SneakyThrows public InputStream getStoredObject(String storageId) { @@ -75,6 +81,41 @@ public class RedactionStorageService { } + @SneakyThrows + public void saveEntityLog(String dossierId, String fileId, EntityLog entityLog) { + + entityLogMongoService.upsertEntityLog(dossierId, fileId, entityLog); + } + + + @SneakyThrows + public void updateEntityLogWithoutEntries(String dossierId, String fileId, EntityLog entityLog) { + + entityLogMongoService.saveEntityLogWithoutEntries(dossierId, fileId, entityLog); + } + + + @SneakyThrows + public void insertEntityLog(String dossierId, String fileId, EntityLog entityLog) { + + entityLogMongoService.insertEntityLog(dossierId, fileId, entityLog); + } + + + @SneakyThrows + public void insertEntityLogEntries(String dossierId, String fileId, List entityLogEntries) { + + entityLogMongoService.insertEntityLogEntries(dossierId, fileId, entityLogEntries); + } + + + @SneakyThrows + public void updateEntityLogEntries(String dossierId, String fileId, List entityLogEntries) { + + entityLogMongoService.updateEntityLogEntries(dossierId, fileId, entityLogEntries); + } + + @Timed("redactmanager_getImportedRedactions") public ImportedRedactions getImportedRedactions(String dossierId, String fileId) { @@ -132,7 +173,8 @@ public class RedactionStorageService { public EntityLog getEntityLog(String dossierId, String fileId) { try { - EntityLog entityLog = storageService.readJSONObject(TenantContext.getTenantId(), StorageIdUtils.getStorageId(dossierId, fileId, FileType.ENTITY_LOG), EntityLog.class); + EntityLog entityLog = entityLogMongoService.findEntityLogByDossierIdAndFileId(dossierId, fileId) + .orElseThrow(() -> new StorageObjectDoesNotExist("")); entityLog.setEntityLogEntry(entityLog.getEntityLogEntry() .stream() .filter(entry -> !(entry.getPositions() == null || entry.getPositions().isEmpty())) @@ -146,6 +188,33 @@ public class RedactionStorageService { } + @Timed("redactmanager_getRedactionLog") + public EntityLog getEntityLogWithoutEntries(String dossierId, String fileId) { + + try { + return entityLogMongoService.findEntityLogWithoutEntries(dossierId, fileId) + .orElseThrow(() -> new StorageObjectDoesNotExist("")); + } catch (StorageObjectDoesNotExist e) { + log.debug("EntityLog not available."); + return null; + } + + } + + + public Set findIdsOfSectionsToReanalyse(String dossierId, String fileId, Collection entryIds) { + + return entityLogMongoService.findFirstContainingNodeIdForEachEntry(dossierId, fileId, entryIds); + } + + + public List findEntriesContainedBySectionsOrNotContained(String dossierId, String fileId, Collection sectionIds) { + + return entityLogMongoService.findEntityLogEntriesNotContainedOrFirstContainedByElementInList(dossierId, fileId, sectionIds); + } + + + // !Warning! before activating redis cache you need to set // -Dio.netty.noPreferDirect=true -XX:MaxDirectMemorySize=1000M // Jvm args to the largest document data size we want to process. for 4443 pages file that was 500mb. @@ -200,7 +269,7 @@ public class RedactionStorageService { public boolean entityLogExists(String dossierId, String fileId) { - return storageService.objectExists(TenantContext.getTenantId(), StorageIdUtils.getStorageId(dossierId, fileId, FileType.ENTITY_LOG)); + return entityLogMongoService.entityLogDocumentExists(dossierId, fileId); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/resources/application.yml b/redaction-service-v1/redaction-service-server-v1/src/main/resources/application.yml index db3ec857..6078cc9f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/resources/application.yml +++ b/redaction-service-v1/redaction-service-server-v1/src/main/resources/application.yml @@ -46,6 +46,13 @@ spring: port: ${REDIS_PORT:6379} password: ${REDIS_PASSWORD} timeout: 60000 + mongodb: + auto-index-creation: true + database: redaction + host: ${MONGODB_HOST:localhost} + port: 27017 + username: ${MONGODB_USER} + password: ${MONGODB_PASSWORD} management: endpoint: diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AbstractRedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AbstractRedactionIntegrationTest.java index f37de123..e606d38c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AbstractRedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AbstractRedactionIntegrationTest.java @@ -1,5 +1,6 @@ package com.iqser.red.service.redaction.v1.server; +import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.when; import java.io.File; @@ -16,12 +17,20 @@ import java.util.Map; import java.util.Set; import java.util.stream.Collectors; +import org.bson.BsonArray; +import org.bson.BsonDocument; +import org.bson.BsonString; import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; import org.mockito.stubbing.Answer; import org.springframework.amqp.rabbit.core.RabbitTemplate; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.mock.mockito.MockBean; +import org.springframework.boot.test.util.TestPropertyValues; +import org.springframework.context.ApplicationContextInitializer; +import org.springframework.context.ConfigurableApplicationContext; import org.springframework.core.io.ClassPathResource; +import org.springframework.test.context.ContextConfiguration; import com.fasterxml.jackson.databind.ObjectMapper; import com.iqser.red.service.dictionarymerge.commons.DictionaryEntry; @@ -30,6 +39,8 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.configuration.Colors; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type; +import com.iqser.red.service.persistence.service.v1.api.shared.mongo.repository.EntityLogDocumentRepository; +import com.iqser.red.service.persistence.service.v1.api.shared.mongo.repository.EntityLogEntryDocumentRepository; import com.iqser.red.service.redaction.v1.server.annotate.AnnotationService; import com.iqser.red.service.redaction.v1.server.client.DictionaryClient; import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient; @@ -38,19 +49,28 @@ import com.iqser.red.service.redaction.v1.server.controller.RedactionController; import com.iqser.red.service.redaction.v1.server.service.AnalyzeService; import com.iqser.red.service.redaction.v1.server.service.UnprocessedChangesService; import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; +import com.iqser.red.service.redaction.v1.server.testcontainers.MongoDBTestContainer; import com.iqser.red.service.redaction.v1.server.utils.LayoutParsingRequestProvider; import com.iqser.red.service.redaction.v1.server.utils.ResourceLoader; import com.iqser.red.service.redaction.v1.server.utils.TextNormalizationUtilities; import com.iqser.red.storage.commons.service.StorageService; import com.iqser.red.storage.commons.utils.FileSystemBackedStorageService; +import com.knecon.fforesight.mongo.database.commons.service.MongoConnectionProvider; import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingFinishedEvent; import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType; import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingPipeline; import com.knecon.fforesight.tenantcommons.TenantContext; import com.knecon.fforesight.tenantcommons.TenantsClient; +import com.knecon.fforesight.tenantcommons.model.MongoDBConnection; +import com.mongodb.MongoCommandException; +import com.mongodb.client.MongoClient; +import com.mongodb.client.MongoClients; +import com.mongodb.client.MongoDatabase; import lombok.SneakyThrows; +import lombok.extern.slf4j.Slf4j; +@ContextConfiguration(initializers = {AbstractRedactionIntegrationTest.Initializer.class}) public abstract class AbstractRedactionIntegrationTest { protected static final String VERTEBRATE_INDICATOR = "vertebrate"; @@ -120,6 +140,11 @@ public abstract class AbstractRedactionIntegrationTest { @Autowired protected UnprocessedChangesService unprocessedChangesService; + @Autowired + protected EntityLogDocumentRepository entityLogDocumentRepository; + @Autowired + protected EntityLogEntryDocumentRepository entityLogEntryDocumentRepository; + @MockBean protected RabbitTemplate rabbitTemplate; @@ -129,6 +154,9 @@ public abstract class AbstractRedactionIntegrationTest { @MockBean private TenantsClient tenantsClient; + @MockBean + private MongoConnectionProvider mongoConnectionProvider; + protected final Map> dictionary = new HashMap<>(); protected final Map> dossierDictionary = new HashMap<>(); protected final Map> falsePositive = new HashMap<>(); @@ -158,6 +186,8 @@ public abstract class AbstractRedactionIntegrationTest { if (this.storageService instanceof FileSystemBackedStorageService) { ((FileSystemBackedStorageService) this.storageService).clearStorage(); } + entityLogDocumentRepository.deleteAll(); + entityLogEntryDocumentRepository.deleteAll(); } @@ -552,4 +582,71 @@ public abstract class AbstractRedactionIntegrationTest { } + + @BeforeEach + protected void mockProvideMongoDBConnection() { + + TenantContext.setTenantId("redaction"); + + var mongoInstance = MongoDBTestContainer.getInstance(); + + when(mongoConnectionProvider.getMongoDBConnection(any())).thenReturn(MongoDBConnection.builder() + .host(mongoInstance.getHost()) + .port(String.valueOf(mongoInstance.getFirstMappedPort())) + .database(MongoDBTestContainer.MONGO_DATABASE) + .username(MongoDBTestContainer.MONGO_USERNAME) + .password(MongoDBTestContainer.MONGO_PASSWORD) + .build()); + } + + + @Slf4j + static class Initializer implements ApplicationContextInitializer { + + public void initialize(ConfigurableApplicationContext configurableApplicationContext) { + + var mongoInstance = MongoDBTestContainer.getInstance(); + mongoInstance.start(); + createMongoDBDatabase(mongoInstance); + + log.info("Hosts are - MongoDB: {}", mongoInstance.getHost()); + + TestPropertyValues.of("MONGODB_HOST=" + mongoInstance.getHost(), + "MONGODB_PORT=" + mongoInstance.getFirstMappedPort(), + "MONGODB_USER=" + MongoDBTestContainer.MONGO_USERNAME, + "MONGODB_PASSWORD=" + MongoDBTestContainer.MONGO_PASSWORD).applyTo(configurableApplicationContext.getEnvironment()); + + } + + } + + + private static void createMongoDBDatabase(MongoDBTestContainer mongoDBTestContainer) { + + try (MongoClient mongoClient = MongoClients.create(String.format("mongodb://%s:%s@%s:%s/", + MongoDBTestContainer.MONGO_USERNAME, + MongoDBTestContainer.MONGO_PASSWORD, + mongoDBTestContainer.getHost(), + mongoDBTestContainer.getFirstMappedPort()))) { + MongoDatabase database = mongoClient.getDatabase(MongoDBTestContainer.MONGO_DATABASE); + BsonDocument createUserCommand = new BsonDocument(); + createUserCommand.append("createUser", new BsonString(MongoDBTestContainer.MONGO_USERNAME)); + createUserCommand.append("pwd", new BsonString(MongoDBTestContainer.MONGO_PASSWORD)); + BsonArray roles = new BsonArray(); + roles.add(new BsonString("readWrite")); + createUserCommand.append("roles", roles); + + try { + database.runCommand(createUserCommand); + } catch (MongoCommandException mongoCommandException) { + // ignore user already exists + if (mongoCommandException.getErrorCode() != 51003) { + throw mongoCommandException; + } + + } + + } + } + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java index 9c45bc25..08aa7f2e 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionAcceptanceTest.java @@ -128,7 +128,7 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest { String EFSA_SANITISATION_RULES = loadFromClassPath("drools/efsa_sanitisation.drl"); when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(JSONPrimitive.of(EFSA_SANITISATION_RULES)); - AnalyzeRequest request = uploadFileToStorage("files/syngenta/CustomerFiles/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections.pdf"); + AnalyzeRequest request = uploadFileToStorage("files/new/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections.pdf"); System.out.println("Start Full integration test"); analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request); System.out.println("Finished structure analysis"); @@ -194,7 +194,7 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest { String EFSA_SANITISATION_RULES = loadFromClassPath("drools/efsa_sanitisation.drl"); when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(JSONPrimitive.of(EFSA_SANITISATION_RULES)); - AnalyzeRequest request = uploadFileToStorage("files/syngenta/CustomerFiles/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections.pdf"); + AnalyzeRequest request = uploadFileToStorage("files/new/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections.pdf"); System.out.println("Start Full integration test"); analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request); System.out.println("Finished structure analysis"); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index 53eb15d1..01eb61d5 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -146,6 +146,18 @@ public class RedactionIntegrationTest extends RulesIntegrationTest { } + @Test + public void testFludioxonilDuplicatedImageEntries() { + + AnalyzeRequest request = prepareStorage("files/Fludioxonil/Fludioxonil_duplicates.pdf", + "files/cv_service_empty_response.json", "files/image_info_fludioxonil_duplicates.json"); + + analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request); + AnalyzeResult result = analyzeService.analyze(request); + assertThat(result).isNotNull(); + } + + @Test @Disabled public void testLargeScannedFileOOM() { @@ -608,6 +620,7 @@ public class RedactionIntegrationTest extends RulesIntegrationTest { .build())); analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request); + request.setAnalysisNumber(1); AnalyzeResult result = analyzeService.analyze(request); var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); @@ -666,7 +679,7 @@ public class RedactionIntegrationTest extends RulesIntegrationTest { .build())); request.setManualRedactions(manualRedactions); - + request.setAnalysisNumber(2); AnalyzeResult reanalyzeResult = analyzeService.reanalyze(request); entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); @@ -688,6 +701,7 @@ public class RedactionIntegrationTest extends RulesIntegrationTest { when(dictionaryClient.getDictionaryForType(VERTEBRATE_INDICATOR, null)).thenReturn(getDictionaryResponse(VERTEBRATE_INDICATOR, false)); + request.setAnalysisNumber(3); analyzeService.reanalyze(request); entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/PrecursorEntityTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/PrecursorEntityTest.java index b6a1efcb..81acfe52 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/PrecursorEntityTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/PrecursorEntityTest.java @@ -138,7 +138,7 @@ public class PrecursorEntityTest extends BuildDocumentIntegrationTest { document, notFoundManualEntities, new DictionaryVersion(), - 0L).getEntityLogEntry(); + 0L).getEntityLog().getEntityLogEntry(); assertEquals(1, redactionLogEntries.size()); assertEquals(value, redactionLogEntries.get(0).getValue()); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/testcontainers/MongoDBTestContainer.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/testcontainers/MongoDBTestContainer.java new file mode 100644 index 00000000..701c4eea --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/testcontainers/MongoDBTestContainer.java @@ -0,0 +1,35 @@ +package com.iqser.red.service.redaction.v1.server.testcontainers; + +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.utility.DockerImageName; + +public final class MongoDBTestContainer extends GenericContainer { + + private static final String IMAGE_VERSION = "mongo:7.0.2"; + public static final Integer MONGO_PORT = 27017; + public static final String MONGO_DATABASE = "mongo_database"; + public static final String MONGO_PASSWORD = "mongo_password"; + public static final String MONGO_USERNAME = "mongo_username"; + private static MongoDBTestContainer mongoDB; + + + private MongoDBTestContainer() { + + super(DockerImageName.parse(IMAGE_VERSION)); + + } + + + public static MongoDBTestContainer getInstance() { + + if (mongoDB == null) { + mongoDB = new MongoDBTestContainer().withEnv("MONGO_INITDB_ROOT_USERNAME", MONGO_USERNAME) + .withEnv("MONGO_INITDB_ROOT_PASSWORD", MONGO_PASSWORD) + .withEnv("MONGO_INITDB_DATABASE", MONGO_DATABASE) + .withExposedPorts(MONGO_PORT); + + } + return mongoDB; + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/application.yml b/redaction-service-v1/redaction-service-server-v1/src/test/resources/application.yml index 2d87538c..b16c3b9d 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/application.yml +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/application.yml @@ -12,6 +12,15 @@ spring: allow-circular-references: true # FIXME cache: type: NONE + data: + mongodb: + auto-index-creation: true + # todo: multi-tenancy + database: redaction + host: ${MONGODB_HOST:localhost} + port: ${MONGODB_PORT:27017} + username: ${MONGODB_USER} + password: ${MONGODB_PASSWORD} processing.kafkastreams: false diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Fludioxonil/Fludioxonil_duplicates.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Fludioxonil/Fludioxonil_duplicates.pdf new file mode 100644 index 00000000..2893f779 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Fludioxonil/Fludioxonil_duplicates.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/image_info_fludioxonil_duplicates.json b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/image_info_fludioxonil_duplicates.json new file mode 100644 index 00000000..94768ebd --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/image_info_fludioxonil_duplicates.json @@ -0,0 +1,303 @@ +{ + "dossierId": "c4849583-af00-4bef-934f-491ef761e984", + "fileId": "790c52eb58e02f2b3b3b2b19ec7d6e1e", + "targetFileExtension": "ORIGIN.pdf.gz", + "responseFileExtension": "IMAGE_INFO.json.gz", + "X-TENANT-ID": "redaction", + "data": [ + { + "classification": { + "label": "formula", + "probabilities": { + "formula": 1.0, + "logo": 0.0, + "other": 0.0, + "signature": 0.0 + } + }, + "representation": "44CF9F4E3EFAF3FFCF3FFCF3F", + "position": { + "x1": 71, + "x2": 511, + "y1": 627, + "y2": 736, + "pageNumber": 5 + }, + "geometry": { + "width": 440, + "height": 109 + }, + "alpha": false, + "filters": { + "geometry": { + "imageSize": { + "quotient": 0.3094, + "tooLarge": false, + "tooSmall": false + }, + "imageFormat": { + "quotient": 4.0367, + "tooTall": false, + "tooWide": false + } + }, + "probability": { + "unconfident": false + }, + "allPassed": true + } + }, + { + "classification": { + "label": "formula", + "probabilities": { + "formula": 1.0, + "logo": 0.0, + "other": 0.0, + "signature": 0.0 + } + }, + "representation": "FFFFF78F1EFBC32F8F717FCF1", + "position": { + "x1": 71, + "x2": 511, + "y1": 410, + "y2": 519, + "pageNumber": 5 + }, + "geometry": { + "width": 440, + "height": 109 + }, + "alpha": false, + "filters": { + "geometry": { + "imageSize": { + "quotient": 0.3094, + "tooLarge": false, + "tooSmall": false + }, + "imageFormat": { + "quotient": 4.0367, + "tooTall": false, + "tooWide": false + } + }, + "probability": { + "unconfident": false + }, + "allPassed": true + } + }, + { + "classification": { + "label": "formula", + "probabilities": { + "formula": 1.0, + "logo": 0.0, + "other": 0.0, + "signature": 0.0 + } + }, + "representation": "FFCF3FFFFFCC733DE63B00038", + "position": { + "x1": 71, + "x2": 511, + "y1": 519, + "y2": 628, + "pageNumber": 5 + }, + "geometry": { + "width": 440, + "height": 109 + }, + "alpha": false, + "filters": { + "geometry": { + "imageSize": { + "quotient": 0.3094, + "tooLarge": false, + "tooSmall": false + }, + "imageFormat": { + "quotient": 4.0367, + "tooTall": false, + "tooWide": false + } + }, + "probability": { + "unconfident": false + }, + "allPassed": true + } + }, + { + "classification": { + "label": "formula", + "probabilities": { + "formula": 1.0, + "logo": 0.0, + "other": 0.0, + "signature": 0.0 + } + }, + "representation": "FFF6D344888AE08B5F6DB1FFF", + "position": { + "x1": 196, + "x2": 302, + "y1": 453, + "y2": 523, + "pageNumber": 6 + }, + "geometry": { + "width": 106, + "height": 70 + }, + "alpha": false, + "filters": { + "geometry": { + "imageSize": { + "quotient": 0.1217, + "tooLarge": false, + "tooSmall": false + }, + "imageFormat": { + "quotient": 1.5143, + "tooTall": false, + "tooWide": false + } + }, + "probability": { + "unconfident": false + }, + "allPassed": true + } + }, + { + "classification": { + "label": "formula", + "probabilities": { + "formula": 1.0, + "logo": 0.0, + "other": 0.0, + "signature": 0.0 + } + }, + "representation": "FFF6B30E904BE08F0F3DF9FFF", + "position": { + "x1": 196, + "x2": 303, + "y1": 217, + "y2": 284, + "pageNumber": 6 + }, + "geometry": { + "width": 107, + "height": 67 + }, + "alpha": false, + "filters": { + "geometry": { + "imageSize": { + "quotient": 0.1196, + "tooLarge": false, + "tooSmall": false + }, + "imageFormat": { + "quotient": 1.597, + "tooTall": false, + "tooWide": false + } + }, + "probability": { + "unconfident": false + }, + "allPassed": true + } + }, + { + "classification": { + "label": "formula", + "probabilities": { + "formula": 1.0, + "logo": 0.0, + "other": 0.0, + "signature": 0.0 + } + }, + "representation": "FFF7D14488FA60844F1D39FFF", + "position": { + "x1": 197, + "x2": 301, + "y1": 379, + "y2": 449, + "pageNumber": 6 + }, + "geometry": { + "width": 104, + "height": 70 + }, + "alpha": false, + "filters": { + "geometry": { + "imageSize": { + "quotient": 0.1205, + "tooLarge": false, + "tooSmall": false + }, + "imageFormat": { + "quotient": 1.4857, + "tooTall": false, + "tooWide": false + } + }, + "probability": { + "unconfident": false + }, + "allPassed": true + } + }, + { + "classification": { + "label": "formula", + "probabilities": { + "formula": 1.0, + "logo": 0.0, + "other": 0.0, + "signature": 0.0 + } + }, + "representation": "FFFFCB3468BD60842F9CF8F7F", + "position": { + "x1": 208, + "x2": 290, + "y1": 304, + "y2": 374, + "pageNumber": 6 + }, + "geometry": { + "width": 82, + "height": 70 + }, + "alpha": false, + "filters": { + "geometry": { + "imageSize": { + "quotient": 0.107, + "tooLarge": false, + "tooSmall": false + }, + "imageFormat": { + "quotient": 1.1714, + "tooTall": false, + "tooWide": false + } + }, + "probability": { + "unconfident": false + }, + "allPassed": true + } + } + ] +} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/ner_entities/crafted document.NER_ENTITIES.json b/redaction-service-v1/redaction-service-server-v1/src/test/resources/ner_entities/crafted document.NER_ENTITIES.json index 5076e2c7..59752665 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/ner_entities/crafted document.NER_ENTITIES.json +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/ner_entities/crafted document.NER_ENTITIES.json @@ -1 +1,767 @@ -{"dossierId": "c4e2fc37-5956-449f-a2c7-04574f3096ce", "fileId": "b85e859a9df0c8f5b0635b262a82d3df", "targetFileExtension": "SIMPLIFIED_TEXT.json.gz", "responseFileExtension": "NER_ENTITIES.json.gz", "X-TENANT-ID": "redaction", "data": {"0": [{"value": "Lastname, J.", "startOffset": 54, "endOffset": 66, "type": "CBI_author"}, {"value": "Doe, M.", "startOffset": 67, "endOffset": 74, "type": "CBI_author"}, {"value": "Mustermann Lastname M.", "startOffset": 75, "endOffset": 97, "type": "CBI_author"}, {"value": "Doe J. Mustermann M.", "startOffset": 99, "endOffset": 119, "type": "CBI_author"}], "1": [{"value": "Eikenboom Charalampos", "startOffset": 148, "endOffset": 169, "type": "ORG"}, {"value": "Schenk Tanja Schmitt \u2190", "startOffset": 170, "endOffset": 192, "type": "ORG"}], "2": [{"value": "Rue Jean Baffier", "startOffset": 214, "endOffset": 230, "type": "CBI_author"}, {"value": "7232", "startOffset": 155, "endOffset": 159, "type": "POSTAL"}, {"value": "CX", "startOffset": 160, "endOffset": 162, "type": "COUNTRY"}, {"value": "Warnsveld", "startOffset": 163, "endOffset": 172, "type": "CITY"}, {"value": "Netherlands", "startOffset": 174, "endOffset": 185, "type": "COUNTRY"}, {"value": "NL", "startOffset": 187, "endOffset": 189, "type": "COUNTRY"}, {"value": "Institut Industries", "startOffset": 190, "endOffset": 209, "type": "ORG"}, {"value": "33", "startOffset": 211, "endOffset": 213, "type": "CARDINAL"}, {"value": "18000", "startOffset": 232, "endOffset": 237, "type": "CARDINAL"}, {"value": "Bourges", "startOffset": 238, "endOffset": 245, "type": "CITY"}, {"value": "France", "startOffset": 247, "endOffset": 253, "type": "COUNTRY"}, {"value": "18300", "startOffset": 282, "endOffset": 287, "type": "CARDINAL"}, {"value": "Saint-Satur", "startOffset": 288, "endOffset": 299, "type": "CITY"}, {"value": "France", "startOffset": 301, "endOffset": 307, "type": "COUNTRY"}, {"value": "Lesdo Industries", "startOffset": 312, "endOffset": 328, "type": "ORG"}, {"value": "Ch\u00e4ppelistr\u00e4ssli", "startOffset": 330, "endOffset": 346, "type": "ORG"}, {"value": "6078", "startOffset": 348, "endOffset": 352, "type": "POSTAL"}, {"value": "Lungern", "startOffset": 353, "endOffset": 360, "type": "STREET"}, {"value": "Switzerland", "startOffset": 362, "endOffset": 373, "type": "COUNTRY"}, {"value": "Shlissel'burgskaya Ulitsa", "startOffset": 374, "endOffset": 399, "type": "ORG"}, {"value": "Nizhny Novgorod Oblast", "startOffset": 401, "endOffset": 423, "type": "STREET"}, {"value": "Russia", "startOffset": 425, "endOffset": 431, "type": "CITY"}, {"value": "603034", "startOffset": 433, "endOffset": 439, "type": "POSTAL"}, {"value": "RU", "startOffset": 441, "endOffset": 443, "type": "COUNTRY"}, {"value": "Karl Johans Gate", "startOffset": 444, "endOffset": 460, "type": "STREET"}, {"value": "11", "startOffset": 461, "endOffset": 463, "type": "CARDINAL"}, {"value": "0154", "startOffset": 465, "endOffset": 469, "type": "POSTAL"}, {"value": "Oslo", "startOffset": 470, "endOffset": 474, "type": "CITY"}, {"value": "Norway", "startOffset": 476, "endOffset": 482, "type": "COUNTRY"}], "3": [{"value": "Expand", "startOffset": 67, "endOffset": 73, "type": "STATE"}, {"value": "Hint Clarissa", "startOffset": 77, "endOffset": 90, "type": "ORG"}, {"value": "Dict", "startOffset": 114, "endOffset": 118, "type": "ORG"}, {"value": "Authors-Dict", "startOffset": 171, "endOffset": 183, "type": "ORG"}], "4": [{"value": "Michael N.", "startOffset": 149, "endOffset": 159, "type": "CBI_author"}, {"value": "Funnarie B.", "startOffset": 244, "endOffset": 255, "type": "CBI_author"}, {"value": "Weyland Industries", "startOffset": 218, "endOffset": 236, "type": "ORG"}, {"value": "Tyrell Corporation", "startOffset": 406, "endOffset": 424, "type": "ORG"}], "6": [{"value": "Melanie", "startOffset": 292, "endOffset": 299, "type": "CBI_author"}], "7": [{"value": "Stark Industries", "startOffset": 184, "endOffset": 200, "type": "ORG"}], "8": [{"value": "Omni Consumer Products Do", "startOffset": 197, "endOffset": 222, "type": "ORG"}], "9": [{"value": "Omni Consumer Products Do", "startOffset": 122, "endOffset": 147, "type": "ORG"}], "10": [{"value": "Asya Lyon", "startOffset": 253, "endOffset": 262, "type": "CBI_author"}, {"value": "Carina Madsen", "startOffset": 264, "endOffset": 277, "type": "CBI_author"}, {"value": "Alexandra H\u00e4usler", "startOffset": 279, "endOffset": 296, "type": "CBI_author"}, {"value": "Hanke Mendel", "startOffset": 298, "endOffset": 310, "type": "CBI_author"}, {"value": "Kwok, Jun K.", "startOffset": 444, "endOffset": 456, "type": "CBI_author"}, {"value": "Tu Wong", "startOffset": 458, "endOffset": 465, "type": "CBI_author"}, {"value": "Qiang Suen", "startOffset": 467, "endOffset": 477, "type": "CBI_author"}, {"value": "Zhou Mah", "startOffset": 479, "endOffset": 487, "type": "CBI_author"}, {"value": "Lei W. Huang", "startOffset": 499, "endOffset": 511, "type": "CBI_author"}, {"value": "Ru X.", "startOffset": 513, "endOffset": 518, "type": "CBI_author"}, {"value": "Oxford University Press", "startOffset": 166, "endOffset": 189, "type": "ORG"}, {"value": "Iakovos Geiger", "startOffset": 222, "endOffset": 236, "type": "ORG"}, {"value": "Julian Ritter", "startOffset": 238, "endOffset": 251, "type": "CITY"}, {"value": "Ranya", "startOffset": 312, "endOffset": 317, "type": "COUNTRY"}, {"value": "Eikenboom", "startOffset": 318, "endOffset": 327, "type": "ORG"}, {"value": "Ning Liu", "startOffset": 489, "endOffset": 497, "type": "STREET"}], "11": [{"value": "Nurullah \u00d6zg\u00fcr", "startOffset": 210, "endOffset": 224, "type": "CBI_author"}, {"value": "Reyhan B.", "startOffset": 228, "endOffset": 237, "type": "CBI_author"}, {"value": "Alfred Xinyi Y.", "startOffset": 250, "endOffset": 265, "type": "CBI_author"}, {"value": "Redacted", "startOffset": 12, "endOffset": 20, "type": "ORG"}, {"value": "Aomachi", "startOffset": 154, "endOffset": 161, "type": "ORG"}, {"value": "Nomi", "startOffset": 163, "endOffset": 167, "type": "ORG"}, {"value": "Ishikawa", "startOffset": 169, "endOffset": 177, "type": "CITY"}, {"value": "923-1101", "startOffset": 178, "endOffset": 186, "type": "CARDINAL"}, {"value": "Japan", "startOffset": 188, "endOffset": 193, "type": "COUNTRY"}, {"value": "JP", "startOffset": 195, "endOffset": 197, "type": "COUNTRY"}, {"value": "Dict", "startOffset": 301, "endOffset": 305, "type": "ORG"}], "12": [{"value": "Redact Emails", "startOffset": 12, "endOffset": 25, "type": "ORG"}], "13": [{"value": "Central Research Industry", "startOffset": 462, "endOffset": 487, "type": "ORG"}, {"value": "Maximiliam Schmitt", "startOffset": 718, "endOffset": 736, "type": "ORG"}, {"value": "European Central Institute", "startOffset": 915, "endOffset": 941, "type": "ORG"}, {"value": "Emilia Lockhart Alternative", "startOffset": 963, "endOffset": 990, "type": "ORG"}, {"value": "Cyberdyne Systems Tower", "startOffset": 1000, "endOffset": 1023, "type": "ORG"}, {"value": "121a", "startOffset": 1032, "endOffset": 1036, "type": "CARDINAL"}, {"value": "Hong Kong", "startOffset": 1037, "endOffset": 1046, "type": "COUNTRY"}, {"value": "BT", "startOffset": 1048, "endOffset": 1050, "type": "COUNTRY"}], "15": [{"value": "Soylent Corporation", "startOffset": 16, "endOffset": 35, "type": "ORG"}, {"value": "Riddley", "startOffset": 51, "endOffset": 58, "type": "ORG"}, {"value": "359-21", "startOffset": 74, "endOffset": 80, "type": "CARDINAL"}, {"value": "Huam-dong", "startOffset": 81, "endOffset": 90, "type": "STATE"}, {"value": "Yongsan-gu", "startOffset": 91, "endOffset": 101, "type": "CITY"}, {"value": "Seoul", "startOffset": 102, "endOffset": 107, "type": "CITY"}, {"value": "South Korea Phone", "startOffset": 109, "endOffset": 126, "type": "COUNTRY"}, {"value": "Soylent Corporation", "startOffset": 345, "endOffset": 364, "type": "ORG"}, {"value": "Riddley Scott", "startOffset": 365, "endOffset": 378, "type": "STREET"}, {"value": "359-21", "startOffset": 379, "endOffset": 385, "type": "CARDINAL"}, {"value": "Huam-dong", "startOffset": 386, "endOffset": 395, "type": "STATE"}, {"value": "Yongsan-gu", "startOffset": 396, "endOffset": 406, "type": "CITY"}, {"value": "Seoul", "startOffset": 407, "endOffset": 412, "type": "CITY"}, {"value": "South Korea", "startOffset": 414, "endOffset": 425, "type": "COUNTRY"}, {"value": "Tel", "startOffset": 614, "endOffset": 617, "type": "ORG"}, {"value": "Central Research Industry", "startOffset": 890, "endOffset": 915, "type": "ORG"}, {"value": "Maximiliam Schmitt", "startOffset": 1146, "endOffset": 1164, "type": "ORG"}, {"value": "European Central Institute", "startOffset": 1343, "endOffset": 1369, "type": "ORG"}, {"value": "Emilia Lockhart Alternative", "startOffset": 1391, "endOffset": 1418, "type": "ORG"}, {"value": "Cyberdyne Systems Tower", "startOffset": 1428, "endOffset": 1451, "type": "ORG"}, {"value": "121a", "startOffset": 1460, "endOffset": 1464, "type": "CARDINAL"}, {"value": "Hong Kong", "startOffset": 1465, "endOffset": 1474, "type": "COUNTRY"}, {"value": "BT", "startOffset": 1476, "endOffset": 1478, "type": "COUNTRY"}], "16": [{"value": "Umbrella Corporation", "startOffset": 208, "endOffset": 228, "type": "ORG"}, {"value": "Jill", "startOffset": 238, "endOffset": 242, "type": "ORG"}, {"value": "359-21", "startOffset": 262, "endOffset": 268, "type": "CARDINAL"}, {"value": "Huam-dong", "startOffset": 269, "endOffset": 278, "type": "STATE"}, {"value": "Yongsan-gu", "startOffset": 279, "endOffset": 289, "type": "CITY"}, {"value": "Seoul", "startOffset": 290, "endOffset": 295, "type": "CITY"}, {"value": "South Korea Phone", "startOffset": 297, "endOffset": 314, "type": "COUNTRY"}, {"value": "Umbrella Corporation", "startOffset": 407, "endOffset": 427, "type": "ORG"}, {"value": "Jill Valentine", "startOffset": 428, "endOffset": 442, "type": "STREET"}, {"value": "359-21", "startOffset": 443, "endOffset": 449, "type": "CARDINAL"}, {"value": "Huam-dong", "startOffset": 450, "endOffset": 459, "type": "STATE"}, {"value": "Yongsan-gu", "startOffset": 460, "endOffset": 470, "type": "CITY"}, {"value": "Seoul", "startOffset": 471, "endOffset": 476, "type": "CITY"}, {"value": "South Korea", "startOffset": 478, "endOffset": 489, "type": "COUNTRY"}], "19": [{"value": "Umbrella Corporation", "startOffset": 209, "endOffset": 229, "type": "ORG"}], "21": [{"value": "Purity Hint", "startOffset": 9, "endOffset": 20, "type": "ORG"}, {"value": "Hint", "startOffset": 35, "endOffset": 39, "type": "ORG"}, {"value": "Hint Hint", "startOffset": 122, "endOffset": 131, "type": "ORG"}, {"value": "Hint Hint", "startOffset": 169, "endOffset": 178, "type": "ORG"}, {"value": "Hint Hint Purity", "startOffset": 216, "endOffset": 232, "type": "ORG"}, {"value": "Hint Hint", "startOffset": 1359, "endOffset": 1368, "type": "ORG"}, {"value": "Ignore", "startOffset": 2313, "endOffset": 2319, "type": "STREET"}, {"value": "Redact Signatures Redact", "startOffset": 2589, "endOffset": 2613, "type": "ORG"}, {"value": "Dilara Sonnenschein Signed", "startOffset": 2743, "endOffset": 2769, "type": "ORG"}, {"value": "Tobias", "startOffset": 2774, "endOffset": 2780, "type": "ORG"}, {"value": "Dilara", "startOffset": 2826, "endOffset": 2832, "type": "ORG"}, {"value": "Tobias M\u00fcller Rule", "startOffset": 2884, "endOffset": 2902, "type": "ORG"}, {"value": "43:", "startOffset": 2903, "endOffset": 2906, "type": "CARDINAL"}, {"value": "Redact Logo Redact", "startOffset": 2907, "endOffset": 2925, "type": "ORG"}]}} \ No newline at end of file +{ + "dossierId": "c4e2fc37-5956-449f-a2c7-04574f3096ce", + "fileId": "b85e859a9df0c8f5b0635b262a82d3df", + "targetFileExtension": "SIMPLIFIED_TEXT.json.gz", + "responseFileExtension": "NER_ENTITIES.json.gz", + "X-TENANT-ID": "redaction", + "data": { + "0": [ + { + "value": "Lastname, J.", + "startOffset": 54, + "endOffset": 66, + "type": "CBI_author" + }, + { + "value": "Doe, M.", + "startOffset": 67, + "endOffset": 74, + "type": "CBI_author" + }, + { + "value": "Mustermann Lastname M.", + "startOffset": 75, + "endOffset": 97, + "type": "CBI_author" + }, + { + "value": "Doe J. Mustermann M.", + "startOffset": 99, + "endOffset": 119, + "type": "CBI_author" + } + ], + "1": [ + { + "value": "Eikenboom Charalampos", + "startOffset": 148, + "endOffset": 169, + "type": "ORG" + }, + { + "value": "Schenk Tanja Schmitt \u2190", + "startOffset": 170, + "endOffset": 192, + "type": "ORG" + } + ], + "2": [ + { + "value": "Rue Jean Baffier", + "startOffset": 214, + "endOffset": 230, + "type": "CBI_author" + }, + { + "value": "7232", + "startOffset": 155, + "endOffset": 159, + "type": "POSTAL" + }, + { + "value": "CX", + "startOffset": 160, + "endOffset": 162, + "type": "COUNTRY" + }, + { + "value": "Warnsveld", + "startOffset": 163, + "endOffset": 172, + "type": "CITY" + }, + { + "value": "Netherlands", + "startOffset": 174, + "endOffset": 185, + "type": "COUNTRY" + }, + { + "value": "NL", + "startOffset": 187, + "endOffset": 189, + "type": "COUNTRY" + }, + { + "value": "Institut Industries", + "startOffset": 190, + "endOffset": 209, + "type": "ORG" + }, + { + "value": "33", + "startOffset": 211, + "endOffset": 213, + "type": "CARDINAL" + }, + { + "value": "18000", + "startOffset": 232, + "endOffset": 237, + "type": "CARDINAL" + }, + { + "value": "Bourges", + "startOffset": 238, + "endOffset": 245, + "type": "CITY" + }, + { + "value": "France", + "startOffset": 247, + "endOffset": 253, + "type": "COUNTRY" + }, + { + "value": "18300", + "startOffset": 282, + "endOffset": 287, + "type": "CARDINAL" + }, + { + "value": "Saint-Satur", + "startOffset": 288, + "endOffset": 299, + "type": "CITY" + }, + { + "value": "France", + "startOffset": 301, + "endOffset": 307, + "type": "COUNTRY" + }, + { + "value": "Lesdo Industries", + "startOffset": 312, + "endOffset": 328, + "type": "ORG" + }, + { + "value": "Ch\u00e4ppelistr\u00e4ssli", + "startOffset": 330, + "endOffset": 346, + "type": "ORG" + }, + { + "value": "6078", + "startOffset": 348, + "endOffset": 352, + "type": "POSTAL" + }, + { + "value": "Lungern", + "startOffset": 353, + "endOffset": 360, + "type": "STREET" + }, + { + "value": "Switzerland", + "startOffset": 362, + "endOffset": 373, + "type": "COUNTRY" + }, + { + "value": "Shlissel'burgskaya Ulitsa", + "startOffset": 374, + "endOffset": 399, + "type": "ORG" + }, + { + "value": "Nizhny Novgorod Oblast", + "startOffset": 401, + "endOffset": 423, + "type": "STREET" + }, + { + "value": "Russia", + "startOffset": 425, + "endOffset": 431, + "type": "CITY" + }, + { + "value": "603034", + "startOffset": 433, + "endOffset": 439, + "type": "POSTAL" + }, + { + "value": "RU", + "startOffset": 441, + "endOffset": 443, + "type": "COUNTRY" + }, + { + "value": "Karl Johans Gate", + "startOffset": 444, + "endOffset": 460, + "type": "STREET" + }, + { + "value": "11", + "startOffset": 461, + "endOffset": 463, + "type": "CARDINAL" + }, + { + "value": "0154", + "startOffset": 465, + "endOffset": 469, + "type": "POSTAL" + }, + { + "value": "Oslo", + "startOffset": 470, + "endOffset": 474, + "type": "CITY" + }, + { + "value": "Norway", + "startOffset": 476, + "endOffset": 482, + "type": "COUNTRY" + } + ], + "3": [ + { + "value": "Expand", + "startOffset": 67, + "endOffset": 73, + "type": "STATE" + }, + { + "value": "Hint Clarissa", + "startOffset": 77, + "endOffset": 90, + "type": "ORG" + }, + { + "value": "Dict", + "startOffset": 114, + "endOffset": 118, + "type": "ORG" + }, + { + "value": "Authors-Dict", + "startOffset": 171, + "endOffset": 183, + "type": "ORG" + } + ], + "4": [ + { + "value": "Michael N.", + "startOffset": 149, + "endOffset": 159, + "type": "CBI_author" + }, + { + "value": "Funnarie B.", + "startOffset": 244, + "endOffset": 255, + "type": "CBI_author" + }, + { + "value": "Weyland Industries", + "startOffset": 218, + "endOffset": 236, + "type": "ORG" + }, + { + "value": "Tyrell Corporation", + "startOffset": 406, + "endOffset": 424, + "type": "ORG" + } + ], + "6": [ + { + "value": "Melanie", + "startOffset": 292, + "endOffset": 299, + "type": "CBI_author" + } + ], + "7": [ + { + "value": "Stark Industries", + "startOffset": 184, + "endOffset": 200, + "type": "ORG" + } + ], + "8": [ + { + "value": "Omni Consumer Products Do", + "startOffset": 197, + "endOffset": 222, + "type": "ORG" + } + ], + "9": [ + { + "value": "Omni Consumer Products Do", + "startOffset": 122, + "endOffset": 147, + "type": "ORG" + } + ], + "10": [ + { + "value": "Asya Lyon", + "startOffset": 253, + "endOffset": 262, + "type": "CBI_author" + }, + { + "value": "Carina Madsen", + "startOffset": 264, + "endOffset": 277, + "type": "CBI_author" + }, + { + "value": "Alexandra H\u00e4usler", + "startOffset": 279, + "endOffset": 296, + "type": "CBI_author" + }, + { + "value": "Hanke Mendel", + "startOffset": 298, + "endOffset": 310, + "type": "CBI_author" + }, + { + "value": "Kwok, Jun K.", + "startOffset": 444, + "endOffset": 456, + "type": "CBI_author" + }, + { + "value": "Tu Wong", + "startOffset": 458, + "endOffset": 465, + "type": "CBI_author" + }, + { + "value": "Qiang Suen", + "startOffset": 467, + "endOffset": 477, + "type": "CBI_author" + }, + { + "value": "Zhou Mah", + "startOffset": 479, + "endOffset": 487, + "type": "CBI_author" + }, + { + "value": "Lei W. Huang", + "startOffset": 499, + "endOffset": 511, + "type": "CBI_author" + }, + { + "value": "Ru X.", + "startOffset": 513, + "endOffset": 518, + "type": "CBI_author" + }, + { + "value": "Oxford University Press", + "startOffset": 166, + "endOffset": 189, + "type": "ORG" + }, + { + "value": "Iakovos Geiger", + "startOffset": 222, + "endOffset": 236, + "type": "ORG" + }, + { + "value": "Julian Ritter", + "startOffset": 238, + "endOffset": 251, + "type": "CITY" + }, + { + "value": "Ranya", + "startOffset": 312, + "endOffset": 317, + "type": "COUNTRY" + }, + { + "value": "Eikenboom", + "startOffset": 318, + "endOffset": 327, + "type": "ORG" + }, + { + "value": "Ning Liu", + "startOffset": 489, + "endOffset": 497, + "type": "STREET" + } + ], + "11": [ + { + "value": "Nurullah \u00d6zg\u00fcr", + "startOffset": 210, + "endOffset": 224, + "type": "CBI_author" + }, + { + "value": "Reyhan B.", + "startOffset": 228, + "endOffset": 237, + "type": "CBI_author" + }, + { + "value": "Alfred Xinyi Y.", + "startOffset": 250, + "endOffset": 265, + "type": "CBI_author" + }, + { + "value": "Redacted", + "startOffset": 12, + "endOffset": 20, + "type": "ORG" + }, + { + "value": "Aomachi", + "startOffset": 154, + "endOffset": 161, + "type": "ORG" + }, + { + "value": "Nomi", + "startOffset": 163, + "endOffset": 167, + "type": "ORG" + }, + { + "value": "Ishikawa", + "startOffset": 169, + "endOffset": 177, + "type": "CITY" + }, + { + "value": "923-1101", + "startOffset": 178, + "endOffset": 186, + "type": "CARDINAL" + }, + { + "value": "Japan", + "startOffset": 188, + "endOffset": 193, + "type": "COUNTRY" + }, + { + "value": "JP", + "startOffset": 195, + "endOffset": 197, + "type": "COUNTRY" + }, + { + "value": "Dict", + "startOffset": 301, + "endOffset": 305, + "type": "ORG" + } + ], + "12": [ + { + "value": "Redact Emails", + "startOffset": 12, + "endOffset": 25, + "type": "ORG" + } + ], + "13": [ + { + "value": "Central Research Industry", + "startOffset": 462, + "endOffset": 487, + "type": "ORG" + }, + { + "value": "Maximiliam Schmitt", + "startOffset": 718, + "endOffset": 736, + "type": "ORG" + }, + { + "value": "European Central Institute", + "startOffset": 915, + "endOffset": 941, + "type": "ORG" + }, + { + "value": "Emilia Lockhart Alternative", + "startOffset": 963, + "endOffset": 990, + "type": "ORG" + }, + { + "value": "Cyberdyne Systems Tower", + "startOffset": 1000, + "endOffset": 1023, + "type": "ORG" + }, + { + "value": "121a", + "startOffset": 1032, + "endOffset": 1036, + "type": "CARDINAL" + }, + { + "value": "Hong Kong", + "startOffset": 1037, + "endOffset": 1046, + "type": "COUNTRY" + }, + { + "value": "BT", + "startOffset": 1048, + "endOffset": 1050, + "type": "COUNTRY" + } + ], + "15": [ + { + "value": "Soylent Corporation", + "startOffset": 16, + "endOffset": 35, + "type": "ORG" + }, + { + "value": "Riddley", + "startOffset": 51, + "endOffset": 58, + "type": "ORG" + }, + { + "value": "359-21", + "startOffset": 74, + "endOffset": 80, + "type": "CARDINAL" + }, + { + "value": "Huam-dong", + "startOffset": 81, + "endOffset": 90, + "type": "STATE" + }, + { + "value": "Yongsan-gu", + "startOffset": 91, + "endOffset": 101, + "type": "CITY" + }, + { + "value": "Seoul", + "startOffset": 102, + "endOffset": 107, + "type": "CITY" + }, + { + "value": "South Korea Phone", + "startOffset": 109, + "endOffset": 126, + "type": "COUNTRY" + }, + { + "value": "Soylent Corporation", + "startOffset": 345, + "endOffset": 364, + "type": "ORG" + }, + { + "value": "Riddley Scott", + "startOffset": 365, + "endOffset": 378, + "type": "STREET" + }, + { + "value": "359-21", + "startOffset": 379, + "endOffset": 385, + "type": "CARDINAL" + }, + { + "value": "Huam-dong", + "startOffset": 386, + "endOffset": 395, + "type": "STATE" + }, + { + "value": "Yongsan-gu", + "startOffset": 396, + "endOffset": 406, + "type": "CITY" + }, + { + "value": "Seoul", + "startOffset": 407, + "endOffset": 412, + "type": "CITY" + }, + { + "value": "South Korea", + "startOffset": 414, + "endOffset": 425, + "type": "COUNTRY" + }, + { + "value": "Tel", + "startOffset": 614, + "endOffset": 617, + "type": "ORG" + }, + { + "value": "Central Research Industry", + "startOffset": 890, + "endOffset": 915, + "type": "ORG" + }, + { + "value": "Maximiliam Schmitt", + "startOffset": 1146, + "endOffset": 1164, + "type": "ORG" + }, + { + "value": "European Central Institute", + "startOffset": 1343, + "endOffset": 1369, + "type": "ORG" + }, + { + "value": "Emilia Lockhart Alternative", + "startOffset": 1391, + "endOffset": 1418, + "type": "ORG" + }, + { + "value": "Cyberdyne Systems Tower", + "startOffset": 1428, + "endOffset": 1451, + "type": "ORG" + }, + { + "value": "121a", + "startOffset": 1460, + "endOffset": 1464, + "type": "CARDINAL" + }, + { + "value": "Hong Kong", + "startOffset": 1465, + "endOffset": 1474, + "type": "COUNTRY" + }, + { + "value": "BT", + "startOffset": 1476, + "endOffset": 1478, + "type": "COUNTRY" + } + ], + "16": [ + { + "value": "Umbrella Corporation", + "startOffset": 208, + "endOffset": 228, + "type": "ORG" + }, + { + "value": "Jill", + "startOffset": 238, + "endOffset": 242, + "type": "ORG" + }, + { + "value": "359-21", + "startOffset": 262, + "endOffset": 268, + "type": "CARDINAL" + }, + { + "value": "Huam-dong", + "startOffset": 269, + "endOffset": 278, + "type": "STATE" + }, + { + "value": "Yongsan-gu", + "startOffset": 279, + "endOffset": 289, + "type": "CITY" + }, + { + "value": "Seoul", + "startOffset": 290, + "endOffset": 295, + "type": "CITY" + }, + { + "value": "South Korea Phone", + "startOffset": 297, + "endOffset": 314, + "type": "COUNTRY" + }, + { + "value": "Umbrella Corporation", + "startOffset": 407, + "endOffset": 427, + "type": "ORG" + }, + { + "value": "Jill Valentine", + "startOffset": 428, + "endOffset": 442, + "type": "STREET" + }, + { + "value": "359-21", + "startOffset": 443, + "endOffset": 449, + "type": "CARDINAL" + }, + { + "value": "Huam-dong", + "startOffset": 450, + "endOffset": 459, + "type": "STATE" + }, + { + "value": "Yongsan-gu", + "startOffset": 460, + "endOffset": 470, + "type": "CITY" + }, + { + "value": "Seoul", + "startOffset": 471, + "endOffset": 476, + "type": "CITY" + }, + { + "value": "South Korea", + "startOffset": 478, + "endOffset": 489, + "type": "COUNTRY" + } + ], + "19": [ + { + "value": "Umbrella Corporation", + "startOffset": 209, + "endOffset": 229, + "type": "ORG" + } + ] + } +} \ No newline at end of file