RED-8702: Explore document databases to store entityLog
This commit is contained in:
parent
fcae77dcd4
commit
dbc5d9ab43
@ -7,7 +7,7 @@ description = "redaction-service-api-v1"
|
||||
|
||||
dependencies {
|
||||
implementation("org.springframework:spring-web:6.0.12")
|
||||
implementation("com.iqser.red.service:persistence-service-internal-api-v1:2.351.0")
|
||||
implementation("com.iqser.red.service:persistence-service-internal-api-v1:2.383.0")
|
||||
}
|
||||
|
||||
publishing {
|
||||
|
||||
@ -12,12 +12,14 @@ plugins {
|
||||
description = "redaction-service-server-v1"
|
||||
|
||||
|
||||
val layoutParserVersion = "0.96.0"
|
||||
val layoutParserVersion = "0.107.0"
|
||||
val jacksonVersion = "2.15.2"
|
||||
val droolsVersion = "9.44.0.Final"
|
||||
val pdfBoxVersion = "3.0.0"
|
||||
val persistenceServiceVersion = "2.380.0"
|
||||
val persistenceServiceVersion = "2.383.0"
|
||||
val springBootStarterVersion = "3.1.5"
|
||||
val springCloudVersion = "4.0.4"
|
||||
val testContainersVersion = "1.19.7"
|
||||
|
||||
configurations {
|
||||
all {
|
||||
@ -31,6 +33,7 @@ dependencies {
|
||||
|
||||
implementation(project(":redaction-service-api-v1")) { exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1") }
|
||||
implementation("com.iqser.red.service:persistence-service-internal-api-v1:${persistenceServiceVersion}") { exclude(group = "org.springframework.boot") }
|
||||
implementation("com.iqser.red.service:persistence-service-shared-mongo-v1:${persistenceServiceVersion}")
|
||||
implementation("com.knecon.fforesight:layoutparser-service-internal-api:${layoutParserVersion}")
|
||||
|
||||
implementation("com.iqser.red.commons:spring-commons:6.2.0")
|
||||
@ -38,7 +41,7 @@ dependencies {
|
||||
|
||||
implementation("com.iqser.red.commons:dictionary-merge-commons:1.5.0")
|
||||
implementation("com.iqser.red.commons:storage-commons:2.45.0")
|
||||
implementation("com.knecon.fforesight:tenant-commons:0.21.0")
|
||||
implementation("com.knecon.fforesight:tenant-commons:0.23.0")
|
||||
implementation("com.knecon.fforesight:tracing-commons:0.5.0")
|
||||
|
||||
implementation("com.fasterxml.jackson.module:jackson-module-afterburner:${jacksonVersion}")
|
||||
@ -52,7 +55,7 @@ dependencies {
|
||||
|
||||
implementation("org.locationtech.jts:jts-core:1.19.0")
|
||||
|
||||
implementation("org.springframework.cloud:spring-cloud-starter-openfeign:4.0.4")
|
||||
implementation("org.springframework.cloud:spring-cloud-starter-openfeign:${springCloudVersion}")
|
||||
implementation("org.springframework.boot:spring-boot-starter-amqp:${springBootStarterVersion}")
|
||||
implementation("org.springframework.boot:spring-boot-starter-cache:${springBootStarterVersion}")
|
||||
implementation("org.springframework.boot:spring-boot-starter-data-redis:${springBootStarterVersion}")
|
||||
@ -66,6 +69,9 @@ dependencies {
|
||||
testImplementation("org.apache.pdfbox:pdfbox:${pdfBoxVersion}")
|
||||
testImplementation("org.apache.pdfbox:pdfbox-tools:${pdfBoxVersion}")
|
||||
|
||||
testImplementation("org.testcontainers:testcontainers:${testContainersVersion}")
|
||||
testImplementation("org.testcontainers:junit-jupiter:${testContainersVersion}")
|
||||
|
||||
testImplementation("org.springframework.boot:spring-boot-starter-test:${springBootStarterVersion}")
|
||||
testImplementation("com.knecon.fforesight:viewer-doc-processor:${layoutParserVersion}")
|
||||
testImplementation("com.knecon.fforesight:layoutparser-service-processor:${layoutParserVersion}") {
|
||||
@ -76,6 +82,12 @@ dependencies {
|
||||
}
|
||||
}
|
||||
|
||||
dependencyManagement {
|
||||
imports {
|
||||
mavenBom("org.testcontainers:testcontainers-bom:${testContainersVersion}")
|
||||
}
|
||||
}
|
||||
|
||||
tasks.test {
|
||||
configure<JacocoTaskExtension> {
|
||||
excludes = listOf("org/drools/**/*")
|
||||
|
||||
@ -4,16 +4,23 @@ import org.springframework.boot.SpringApplication;
|
||||
import org.springframework.boot.actuate.autoconfigure.security.servlet.ManagementWebSecurityAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.ImportAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
import org.springframework.boot.autoconfigure.data.mongo.MongoDataAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.liquibase.LiquibaseAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.mongo.MongoAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.security.servlet.SecurityAutoConfiguration;
|
||||
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
||||
import org.springframework.cache.annotation.EnableCaching;
|
||||
import org.springframework.cloud.openfeign.EnableFeignClients;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.data.mongodb.repository.config.EnableMongoRepositories;
|
||||
|
||||
import com.iqser.red.service.dictionarymerge.commons.DictionaryMergeService;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.mongo.SharedMongoAutoConfiguration;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
||||
import com.knecon.fforesight.mongo.database.commons.MongoDatabaseCommonsAutoConfiguration;
|
||||
import com.knecon.fforesight.tenantcommons.MultiTenancyAutoConfiguration;
|
||||
|
||||
import io.micrometer.core.aop.TimedAspect;
|
||||
@ -22,11 +29,12 @@ import io.micrometer.observation.ObservationRegistry;
|
||||
import io.micrometer.observation.aop.ObservedAspect;
|
||||
|
||||
@EnableCaching
|
||||
@ImportAutoConfiguration({MultiTenancyAutoConfiguration.class})
|
||||
@Import({MetricsConfiguration.class, StorageAutoConfiguration.class})
|
||||
@ImportAutoConfiguration({MultiTenancyAutoConfiguration.class, SharedMongoAutoConfiguration.class})
|
||||
@Import({MetricsConfiguration.class, StorageAutoConfiguration.class, MongoDatabaseCommonsAutoConfiguration.class})
|
||||
@EnableFeignClients(basePackageClasses = RulesClient.class)
|
||||
@EnableConfigurationProperties(RedactionServiceSettings.class)
|
||||
@SpringBootApplication(exclude = {SecurityAutoConfiguration.class, ManagementWebSecurityAutoConfiguration.class})
|
||||
@EnableMongoRepositories(basePackages = "com.iqser.red.service.persistence")
|
||||
@SpringBootApplication(exclude = {SecurityAutoConfiguration.class, ManagementWebSecurityAutoConfiguration.class, DataSourceAutoConfiguration.class, LiquibaseAutoConfiguration.class, MongoAutoConfiguration.class, MongoDataAutoConfiguration.class})
|
||||
public class Application {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
@ -70,6 +70,7 @@ public class MigrationMessageReceiver {
|
||||
migrationRequest.getFileId());
|
||||
|
||||
log.info("Storing migrated entityLog and ids to migrate in DB for file {}", migrationRequest.getFileId());
|
||||
|
||||
redactionStorageService.storeObject(migrationRequest.getDossierId(), migrationRequest.getFileId(), FileType.ENTITY_LOG, migratedEntityLog.getEntityLog());
|
||||
redactionStorageService.storeObject(migrationRequest.getDossierId(), migrationRequest.getFileId(), FileType.MIGRATED_IDS, migratedEntityLog.getMigratedIds());
|
||||
|
||||
|
||||
@ -1,5 +1,8 @@
|
||||
package com.iqser.red.service.redaction.v1.server.service;
|
||||
|
||||
import static com.iqser.red.service.redaction.v1.server.service.document.SectionFinderService.getRelevantManuallyModifiedAnnotationIds;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
@ -20,18 +23,12 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileTyp
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.componentlog.ComponentLog;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogChanges;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.legalbasis.LegalBasis;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogChanges;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogLegalBasis;
|
||||
import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings;
|
||||
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.NerEntitiesModel;
|
||||
import com.iqser.red.service.redaction.v1.server.model.KieWrapper;
|
||||
import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.model.component.Component;
|
||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
|
||||
@ -87,7 +84,7 @@ public class AnalyzeService {
|
||||
public AnalyzeResult reanalyze(@RequestBody AnalyzeRequest analyzeRequest) {
|
||||
|
||||
long startTime = System.currentTimeMillis();
|
||||
EntityLog previousEntityLog = redactionStorageService.getEntityLog(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
EntityLog entityLogWithoutEntries = redactionStorageService.getEntityLogWithoutEntries(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
log.info("Loaded previous entity log for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
Document document = DocumentGraphMapper.toDocumentGraph(observedStorageService.getDocumentData(analyzeRequest.getDossierId(), analyzeRequest.getFileId()));
|
||||
@ -97,25 +94,36 @@ public class AnalyzeService {
|
||||
log.info("Loaded Imported Redactions for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
// not yet ready for reanalysis
|
||||
if (previousEntityLog == null || document == null || document.getNumberOfPages() == 0) {
|
||||
if (entityLogWithoutEntries == null || document == null || document.getNumberOfPages() == 0) {
|
||||
return analyze(analyzeRequest);
|
||||
}
|
||||
|
||||
DictionaryIncrement dictionaryIncrement = dictionaryService.getDictionaryIncrements(analyzeRequest.getDossierTemplateId(),
|
||||
new DictionaryVersion(previousEntityLog.getDictionaryVersion(),
|
||||
previousEntityLog.getDossierDictionaryVersion()),
|
||||
new DictionaryVersion(entityLogWithoutEntries.getDictionaryVersion(),
|
||||
entityLogWithoutEntries.getDossierDictionaryVersion()),
|
||||
analyzeRequest.getDossierId());
|
||||
|
||||
Set<Integer> sectionsToReanalyseIds = getSectionsToReanalyseIds(analyzeRequest, previousEntityLog, document, dictionaryIncrement, importedRedactions);
|
||||
Set<String> relevantManuallyModifiedAnnotationIds = getRelevantManuallyModifiedAnnotationIds(analyzeRequest.getManualRedactions());
|
||||
|
||||
Set<Integer> sectionsToReanalyseIds = redactionStorageService.findIdsOfSectionsToReanalyse(analyzeRequest.getDossierId(),
|
||||
analyzeRequest.getFileId(),
|
||||
relevantManuallyModifiedAnnotationIds);
|
||||
sectionsToReanalyseIds.addAll(getSectionsToReanalyseIds(analyzeRequest,
|
||||
document,
|
||||
dictionaryIncrement,
|
||||
importedRedactions,
|
||||
relevantManuallyModifiedAnnotationIds));
|
||||
|
||||
List<SemanticNode> sectionsToReAnalyse = getSectionsToReAnalyse(document, sectionsToReanalyseIds);
|
||||
log.info("{} Sections to reanalyze found for file {} in dossier {}", sectionsToReanalyseIds.size(), analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
if (sectionsToReAnalyse.isEmpty()) {
|
||||
|
||||
EntityLogChanges entityLogChanges = entityLogCreatorService.updateVersionsAndReturnChanges(previousEntityLog,
|
||||
EntityLogChanges entityLogChanges = entityLogCreatorService.updateVersionsAndReturnChanges(entityLogWithoutEntries,
|
||||
dictionaryIncrement.getDictionaryVersion(),
|
||||
analyzeRequest,
|
||||
false);
|
||||
new ArrayList<>(),
|
||||
new ArrayList<>());
|
||||
|
||||
return finalizeAnalysis(analyzeRequest,
|
||||
startTime,
|
||||
@ -160,8 +168,8 @@ public class AnalyzeService {
|
||||
|
||||
EntityLogChanges entityLogChanges = entityLogCreatorService.updatePreviousEntityLog(analyzeRequest,
|
||||
document,
|
||||
entityLogWithoutEntries,
|
||||
notFoundManualOrImportedEntries,
|
||||
previousEntityLog,
|
||||
sectionsToReanalyseIds,
|
||||
dictionary.getVersion());
|
||||
|
||||
@ -224,18 +232,18 @@ public class AnalyzeService {
|
||||
nerEntities);
|
||||
log.info("Finished entity rule execution for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
EntityLog entityLog = entityLogCreatorService.createInitialEntityLog(analyzeRequest,
|
||||
document,
|
||||
notFoundManualOrImportedEntries,
|
||||
dictionary.getVersion(),
|
||||
kieWrapperEntityRules.rulesVersion());
|
||||
EntityLogChanges entityLogChanges = entityLogCreatorService.createInitialEntityLog(analyzeRequest,
|
||||
document,
|
||||
notFoundManualOrImportedEntries,
|
||||
dictionary.getVersion(),
|
||||
kieWrapperEntityRules.rulesVersion());
|
||||
|
||||
notFoundImportedEntitiesService.processEntityLog(entityLog, analyzeRequest, notFoundImportedEntries);
|
||||
notFoundImportedEntitiesService.processEntityLog(entityLogChanges.getEntityLog(), analyzeRequest, notFoundImportedEntries);
|
||||
|
||||
return finalizeAnalysis(analyzeRequest,
|
||||
startTime,
|
||||
kieWrapperComponentRules,
|
||||
new EntityLogChanges(entityLog, false),
|
||||
entityLogChanges,
|
||||
document,
|
||||
document.getNumberOfPages(),
|
||||
dictionary.getVersion(),
|
||||
@ -255,10 +263,24 @@ public class AnalyzeService {
|
||||
Set<FileAttribute> addedFileAttributes) {
|
||||
|
||||
EntityLog entityLog = entityLogChanges.getEntityLog();
|
||||
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.ENTITY_LOG, entityLogChanges.getEntityLog());
|
||||
|
||||
// analysis numbers should be incremented in every follow-up request, so that this could be replaced
|
||||
if (!redactionStorageService.entityLogExists(analyzeRequest.getDossierId(), analyzeRequest.getFileId())) {
|
||||
redactionStorageService.insertEntityLog(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), entityLog);
|
||||
|
||||
} else {
|
||||
redactionStorageService.updateEntityLogWithoutEntries(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), entityLog);
|
||||
|
||||
if (!entityLogChanges.getNewEntityLogEntries().isEmpty()) {
|
||||
redactionStorageService.insertEntityLogEntries(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), entityLogChanges.getNewEntityLogEntries());
|
||||
}
|
||||
if (!entityLogChanges.getUpdatedEntityLogEntries().isEmpty()) {
|
||||
redactionStorageService.updateEntityLogEntries(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), entityLogChanges.getUpdatedEntityLogEntries());
|
||||
}
|
||||
}
|
||||
|
||||
log.info("Created entity log for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
if (entityLogChanges.isHasChanges() || !isReanalysis) {
|
||||
if (entityLogChanges.hasChanges() || !isReanalysis) {
|
||||
computeComponentsWhenRulesArePresent(analyzeRequest, kieWrapperComponentRules, document, addedFileAttributes, entityLogChanges, dictionaryVersion);
|
||||
}
|
||||
|
||||
@ -273,7 +295,7 @@ public class AnalyzeService {
|
||||
.fileId(analyzeRequest.getFileId())
|
||||
.duration(duration)
|
||||
.numberOfPages(numberOfPages)
|
||||
.hasUpdates(entityLogChanges.isHasChanges())
|
||||
.hasUpdates(entityLogChanges.hasChanges())
|
||||
.analysisVersion(redactionServiceSettings.getAnalysisVersion())
|
||||
.analysisNumber(analyzeRequest.getAnalysisNumber())
|
||||
.rulesVersion(entityLog.getRulesVersion())
|
||||
@ -323,12 +345,16 @@ public class AnalyzeService {
|
||||
|
||||
|
||||
private Set<Integer> getSectionsToReanalyseIds(AnalyzeRequest analyzeRequest,
|
||||
EntityLog entityLog,
|
||||
Document document,
|
||||
DictionaryIncrement dictionaryIncrement,
|
||||
ImportedRedactions importedRedactions) {
|
||||
ImportedRedactions importedRedactions,
|
||||
Set<String> relevantManuallyModifiedAnnotationIds) {
|
||||
|
||||
return sectionFinderService.findSectionsToReanalyse(dictionaryIncrement, entityLog, document, analyzeRequest, importedRedactions);
|
||||
return sectionFinderService.findSectionsToReanalyse(dictionaryIncrement,
|
||||
document,
|
||||
analyzeRequest,
|
||||
importedRedactions,
|
||||
relevantManuallyModifiedAnnotationIds);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.server.service;
|
||||
|
||||
import java.time.OffsetDateTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
@ -26,61 +27,57 @@ import lombok.extern.slf4j.Slf4j;
|
||||
public class EntityChangeLogService {
|
||||
|
||||
@Timed("redactmanager_computeChanges")
|
||||
public boolean computeChanges(List<EntityLogEntry> previousEntityLogEntries, List<EntityLogEntry> newEntityLogEntries, int analysisNumber) {
|
||||
public EntryChanges computeChanges(List<EntityLogEntry> previousEntityLogEntries, List<EntityLogEntry> newEntityLogEntries, int analysisNumber) {
|
||||
|
||||
var now = OffsetDateTime.now();
|
||||
if (previousEntityLogEntries.isEmpty()) {
|
||||
newEntityLogEntries.forEach(entry -> entry.getChanges().add(new Change(analysisNumber, ChangeType.ADDED, now)));
|
||||
return true;
|
||||
return new EntryChanges(newEntityLogEntries, new ArrayList<>());
|
||||
}
|
||||
|
||||
boolean hasChanges = false;
|
||||
|
||||
List<EntityLogEntry> toInsert = new ArrayList<>();
|
||||
List<EntityLogEntry> toUpdate = new ArrayList<>();
|
||||
for (EntityLogEntry entityLogEntry : newEntityLogEntries) {
|
||||
|
||||
Optional<EntityLogEntry> optionalPreviousEntity = previousEntityLogEntries.stream()
|
||||
.filter(entry -> entry.getId().equals(entityLogEntry.getId()))
|
||||
.findAny();
|
||||
|
||||
if (optionalPreviousEntity.isEmpty()) {
|
||||
hasChanges = true;
|
||||
entityLogEntry.getChanges().add(new Change(analysisNumber, ChangeType.ADDED, now));
|
||||
toInsert.add(entityLogEntry);
|
||||
continue;
|
||||
}
|
||||
|
||||
EntityLogEntry previousEntity = optionalPreviousEntity.get();
|
||||
|
||||
entityLogEntry.getChanges().addAll(previousEntity.getChanges());
|
||||
|
||||
if (!previousEntity.getState().equals(entityLogEntry.getState())) {
|
||||
hasChanges = true;
|
||||
ChangeType changeType = calculateChangeType(entityLogEntry.getState(), previousEntity.getState());
|
||||
entityLogEntry.getChanges().add(new Change(analysisNumber, changeType, now));
|
||||
toUpdate.add(entityLogEntry);
|
||||
}
|
||||
}
|
||||
|
||||
addRemovedEntriesAsRemoved(previousEntityLogEntries, newEntityLogEntries, analysisNumber, now);
|
||||
|
||||
return hasChanges;
|
||||
toUpdate.addAll(addRemovedEntriesAsRemoved(previousEntityLogEntries, newEntityLogEntries, analysisNumber, now));
|
||||
return new EntryChanges(toInsert, toUpdate);
|
||||
}
|
||||
|
||||
|
||||
private void addRemovedEntriesAsRemoved(List<EntityLogEntry> previousEntityLogEntries, List<EntityLogEntry> newEntityLogEntries, int analysisNumber, OffsetDateTime now) {
|
||||
private List<EntityLogEntry> addRemovedEntriesAsRemoved(List<EntityLogEntry> previousEntityLogEntries,
|
||||
List<EntityLogEntry> newEntityLogEntries,
|
||||
int analysisNumber,
|
||||
OffsetDateTime now) {
|
||||
|
||||
Set<String> existingIds = newEntityLogEntries.stream()
|
||||
.map(EntityLogEntry::getId)
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
List<EntityLogEntry> removedEntries = previousEntityLogEntries.stream()
|
||||
.filter(entry -> !existingIds.contains(entry.getId()))
|
||||
.toList();
|
||||
|
||||
removedEntries.stream()
|
||||
.filter(entry -> !entry.getState().equals(EntryState.REMOVED))
|
||||
.peek(entry -> entry.getChanges().add(new Change(analysisNumber, ChangeType.REMOVED, now)))
|
||||
.forEach(entry -> entry.setState(EntryState.REMOVED));
|
||||
|
||||
newEntityLogEntries.addAll(removedEntries);
|
||||
return removedEntries;
|
||||
}
|
||||
|
||||
|
||||
@ -104,4 +101,9 @@ public class EntityChangeLogService {
|
||||
return (state.equals(EntryState.REMOVED) || state.equals(EntryState.IGNORED));
|
||||
}
|
||||
|
||||
|
||||
public record EntryChanges(List<EntityLogEntry> inserted, List<EntityLogEntry> updated) {
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -32,6 +32,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntit
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType;
|
||||
import com.iqser.red.service.redaction.v1.server.service.EntityChangeLogService.EntryChanges;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
@ -60,11 +61,11 @@ public class EntityLogCreatorService {
|
||||
}
|
||||
|
||||
|
||||
public EntityLog createInitialEntityLog(AnalyzeRequest analyzeRequest,
|
||||
Document document,
|
||||
List<PrecursorEntity> notFoundEntities,
|
||||
DictionaryVersion dictionaryVersion,
|
||||
long rulesVersion) {
|
||||
public EntityLogChanges createInitialEntityLog(AnalyzeRequest analyzeRequest,
|
||||
Document document,
|
||||
List<PrecursorEntity> notFoundEntities,
|
||||
DictionaryVersion dictionaryVersion,
|
||||
long rulesVersion) {
|
||||
|
||||
List<EntityLogEntry> entityLogEntries = createEntityLogEntries(document, analyzeRequest, notFoundEntities);
|
||||
|
||||
@ -72,16 +73,20 @@ public class EntityLogCreatorService {
|
||||
|
||||
List<EntityLogEntry> previousExistingEntityLogEntries = getPreviousEntityLogEntries(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
|
||||
entityChangeLogService.computeChanges(previousExistingEntityLogEntries, entityLogEntries, analyzeRequest.getAnalysisNumber());
|
||||
EntryChanges entryChanges = entityChangeLogService.computeChanges(previousExistingEntityLogEntries, entityLogEntries, analyzeRequest.getAnalysisNumber());
|
||||
|
||||
return new EntityLog(redactionServiceSettings.getAnalysisVersion(),
|
||||
analyzeRequest.getAnalysisNumber(),
|
||||
entityLogEntries,
|
||||
toEntityLogLegalBasis(legalBasis),
|
||||
dictionaryVersion.getDossierTemplateVersion(),
|
||||
dictionaryVersion.getDossierVersion(),
|
||||
rulesVersion,
|
||||
legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId()));
|
||||
return EntityLogChanges.builder()
|
||||
.entityLog(new EntityLog(redactionServiceSettings.getAnalysisVersion(),
|
||||
analyzeRequest.getAnalysisNumber(),
|
||||
entityLogEntries,
|
||||
toEntityLogLegalBasis(legalBasis),
|
||||
dictionaryVersion.getDossierTemplateVersion(),
|
||||
dictionaryVersion.getDossierVersion(),
|
||||
rulesVersion,
|
||||
legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId())))
|
||||
.updatedEntityLogEntries(entryChanges.updated())
|
||||
.newEntityLogEntries(entryChanges.inserted())
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
@ -95,7 +100,11 @@ public class EntityLogCreatorService {
|
||||
}
|
||||
|
||||
|
||||
public EntityLogChanges updateVersionsAndReturnChanges(EntityLog entityLog, DictionaryVersion dictionaryVersion, AnalyzeRequest analyzeRequest, boolean hasChanges) {
|
||||
public EntityLogChanges updateVersionsAndReturnChanges(EntityLog entityLog,
|
||||
DictionaryVersion dictionaryVersion,
|
||||
AnalyzeRequest analyzeRequest,
|
||||
List<EntityLogEntry> newEntries,
|
||||
List<EntityLogEntry> updatedEntries) {
|
||||
|
||||
List<LegalBasis> legalBasis = legalBasisClient.getLegalBasisMapping(analyzeRequest.getDossierTemplateId());
|
||||
entityLog.setLegalBasisVersion(legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId()));
|
||||
@ -104,14 +113,14 @@ public class EntityLogCreatorService {
|
||||
entityLog.setDossierDictionaryVersion(dictionaryVersion.getDossierVersion());
|
||||
entityLog.setAnalysisNumber(analyzeRequest.getAnalysisNumber());
|
||||
|
||||
return new EntityLogChanges(entityLog, hasChanges);
|
||||
return EntityLogChanges.builder().entityLog(entityLog).newEntityLogEntries(newEntries).updatedEntityLogEntries(updatedEntries).build();
|
||||
}
|
||||
|
||||
|
||||
public EntityLogChanges updatePreviousEntityLog(AnalyzeRequest analyzeRequest,
|
||||
Document document,
|
||||
EntityLog entityLogWithoutEntries,
|
||||
List<PrecursorEntity> notFoundEntries,
|
||||
EntityLog previousEntityLog,
|
||||
Set<Integer> sectionsToReanalyseIds,
|
||||
DictionaryVersion dictionaryVersion) {
|
||||
|
||||
@ -119,22 +128,14 @@ public class EntityLogCreatorService {
|
||||
.filter(entry -> entry.getContainingNodeId().isEmpty() || sectionsToReanalyseIds.contains(entry.getContainingNodeId()
|
||||
.get(0)))
|
||||
.collect(Collectors.toList());
|
||||
Set<String> newEntityIds = newEntityLogEntries.stream()
|
||||
.map(EntityLogEntry::getId)
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
List<EntityLogEntry> previousEntriesFromReAnalyzedSections = previousEntityLog.getEntityLogEntry()
|
||||
.stream()
|
||||
.filter(entry -> (newEntityIds.contains(entry.getId()) || entry.getContainingNodeId().isEmpty() || sectionsToReanalyseIds.contains(entry.getContainingNodeId()
|
||||
.get(0))))
|
||||
.collect(Collectors.toList());
|
||||
previousEntityLog.getEntityLogEntry().removeAll(previousEntriesFromReAnalyzedSections);
|
||||
List<EntityLogEntry> previousEntriesFromReAnalyzedSections = redactionStorageService.findEntriesContainedBySectionsOrNotContained(analyzeRequest.getDossierId(),
|
||||
analyzeRequest.getFileId(),
|
||||
sectionsToReanalyseIds);
|
||||
|
||||
boolean hasChanges = entityChangeLogService.computeChanges(previousEntriesFromReAnalyzedSections, newEntityLogEntries, analyzeRequest.getAnalysisNumber());
|
||||
EntryChanges entryChanges = entityChangeLogService.computeChanges(previousEntriesFromReAnalyzedSections, newEntityLogEntries, analyzeRequest.getAnalysisNumber());
|
||||
|
||||
previousEntityLog.getEntityLogEntry().addAll(newEntityLogEntries);
|
||||
|
||||
return updateVersionsAndReturnChanges(previousEntityLog, dictionaryVersion, analyzeRequest, hasChanges);
|
||||
return updateVersionsAndReturnChanges(entityLogWithoutEntries, dictionaryVersion, analyzeRequest, entryChanges.inserted(), entryChanges.updated());
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -11,7 +11,6 @@ import java.util.stream.Stream;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedaction;
|
||||
@ -44,23 +43,14 @@ public class SectionFinderService {
|
||||
|
||||
@Timed("redactmanager_findSectionsToReanalyse")
|
||||
public Set<Integer> findSectionsToReanalyse(DictionaryIncrement dictionaryIncrement,
|
||||
EntityLog entityLog,
|
||||
Document document,
|
||||
AnalyzeRequest analyzeRequest,
|
||||
ImportedRedactions importedRedactions) {
|
||||
ImportedRedactions importedRedactions,
|
||||
Set<String> relevantManuallyModifiedAnnotationIds) {
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
Set<String> relevantManuallyModifiedAnnotationIds = getRelevantManuallyModifiedAnnotationIds(analyzeRequest.getManualRedactions());
|
||||
Set<Integer> sectionsToReanalyse = new HashSet<>();
|
||||
for (EntityLogEntry entry : entityLog.getEntityLogEntry()) {
|
||||
if (relevantManuallyModifiedAnnotationIds.contains(entry.getId())) {
|
||||
if (entry.getContainingNodeId().isEmpty()) {
|
||||
continue; // Empty list means either Entity has not been found or it is between main sections. Thus, this might lead to wrong reanalysis.
|
||||
}
|
||||
sectionsToReanalyse.add(entry.getContainingNodeId()
|
||||
.get(0));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
var dictionaryIncrementsSearch = new SearchImplementation(dictionaryIncrement.getValues()
|
||||
.stream()
|
||||
@ -133,7 +123,7 @@ public class SectionFinderService {
|
||||
}
|
||||
|
||||
|
||||
private static Set<String> getRelevantManuallyModifiedAnnotationIds(ManualRedactions manualRedactions) {
|
||||
public static Set<String> getRelevantManuallyModifiedAnnotationIds(ManualRedactions manualRedactions) {
|
||||
|
||||
if (manualRedactions == null) {
|
||||
return new HashSet<>();
|
||||
|
||||
@ -3,20 +3,24 @@ package com.iqser.red.service.redaction.v1.server.storage;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.springframework.cache.annotation.Cacheable;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.componentlog.ComponentLog;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactionsPerPage;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.mongo.service.EntityLogMongoService;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.NerEntitiesModel;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentData;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.exception.NotFoundException;
|
||||
import com.iqser.red.storage.commons.exception.StorageObjectDoesNotExist;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
@ -39,6 +43,8 @@ public class RedactionStorageService {
|
||||
|
||||
private final StorageService storageService;
|
||||
|
||||
private final EntityLogMongoService entityLogMongoService;
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public InputStream getStoredObject(String storageId) {
|
||||
@ -75,6 +81,41 @@ public class RedactionStorageService {
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public void saveEntityLog(String dossierId, String fileId, EntityLog entityLog) {
|
||||
|
||||
entityLogMongoService.upsertEntityLog(dossierId, fileId, entityLog);
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public void updateEntityLogWithoutEntries(String dossierId, String fileId, EntityLog entityLog) {
|
||||
|
||||
entityLogMongoService.saveEntityLogWithoutEntries(dossierId, fileId, entityLog);
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public void insertEntityLog(String dossierId, String fileId, EntityLog entityLog) {
|
||||
|
||||
entityLogMongoService.insertEntityLog(dossierId, fileId, entityLog);
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public void insertEntityLogEntries(String dossierId, String fileId, List<EntityLogEntry> entityLogEntries) {
|
||||
|
||||
entityLogMongoService.insertEntityLogEntries(dossierId, fileId, entityLogEntries);
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public void updateEntityLogEntries(String dossierId, String fileId, List<EntityLogEntry> entityLogEntries) {
|
||||
|
||||
entityLogMongoService.updateEntityLogEntries(dossierId, fileId, entityLogEntries);
|
||||
}
|
||||
|
||||
|
||||
@Timed("redactmanager_getImportedRedactions")
|
||||
public ImportedRedactions getImportedRedactions(String dossierId, String fileId) {
|
||||
|
||||
@ -132,7 +173,8 @@ public class RedactionStorageService {
|
||||
public EntityLog getEntityLog(String dossierId, String fileId) {
|
||||
|
||||
try {
|
||||
EntityLog entityLog = storageService.readJSONObject(TenantContext.getTenantId(), StorageIdUtils.getStorageId(dossierId, fileId, FileType.ENTITY_LOG), EntityLog.class);
|
||||
EntityLog entityLog = entityLogMongoService.findEntityLogByDossierIdAndFileId(dossierId, fileId)
|
||||
.orElseThrow(() -> new StorageObjectDoesNotExist(""));
|
||||
entityLog.setEntityLogEntry(entityLog.getEntityLogEntry()
|
||||
.stream()
|
||||
.filter(entry -> !(entry.getPositions() == null || entry.getPositions().isEmpty()))
|
||||
@ -146,6 +188,33 @@ public class RedactionStorageService {
|
||||
}
|
||||
|
||||
|
||||
@Timed("redactmanager_getRedactionLog")
|
||||
public EntityLog getEntityLogWithoutEntries(String dossierId, String fileId) {
|
||||
|
||||
try {
|
||||
return entityLogMongoService.findEntityLogWithoutEntries(dossierId, fileId)
|
||||
.orElseThrow(() -> new StorageObjectDoesNotExist(""));
|
||||
} catch (StorageObjectDoesNotExist e) {
|
||||
log.debug("EntityLog not available.");
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
public Set<Integer> findIdsOfSectionsToReanalyse(String dossierId, String fileId, Collection<String> entryIds) {
|
||||
|
||||
return entityLogMongoService.findFirstContainingNodeIdForEachEntry(dossierId, fileId, entryIds);
|
||||
}
|
||||
|
||||
|
||||
public List<EntityLogEntry> findEntriesContainedBySectionsOrNotContained(String dossierId, String fileId, Collection<Integer> sectionIds) {
|
||||
|
||||
return entityLogMongoService.findEntityLogEntriesNotContainedOrFirstContainedByElementInList(dossierId, fileId, sectionIds);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// !Warning! before activating redis cache you need to set
|
||||
// -Dio.netty.noPreferDirect=true -XX:MaxDirectMemorySize=1000M
|
||||
// Jvm args to the largest document data size we want to process. for 4443 pages file that was 500mb.
|
||||
@ -200,7 +269,7 @@ public class RedactionStorageService {
|
||||
|
||||
public boolean entityLogExists(String dossierId, String fileId) {
|
||||
|
||||
return storageService.objectExists(TenantContext.getTenantId(), StorageIdUtils.getStorageId(dossierId, fileId, FileType.ENTITY_LOG));
|
||||
return entityLogMongoService.entityLogDocumentExists(dossierId, fileId);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -46,6 +46,13 @@ spring:
|
||||
port: ${REDIS_PORT:6379}
|
||||
password: ${REDIS_PASSWORD}
|
||||
timeout: 60000
|
||||
mongodb:
|
||||
auto-index-creation: true
|
||||
database: redaction
|
||||
host: ${MONGODB_HOST:localhost}
|
||||
port: 27017
|
||||
username: ${MONGODB_USER}
|
||||
password: ${MONGODB_PASSWORD}
|
||||
|
||||
management:
|
||||
endpoint:
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
package com.iqser.red.service.redaction.v1.server;
|
||||
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import java.io.File;
|
||||
@ -16,12 +17,20 @@ import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.bson.BsonArray;
|
||||
import org.bson.BsonDocument;
|
||||
import org.bson.BsonString;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.mockito.stubbing.Answer;
|
||||
import org.springframework.amqp.rabbit.core.RabbitTemplate;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.mock.mockito.MockBean;
|
||||
import org.springframework.boot.test.util.TestPropertyValues;
|
||||
import org.springframework.context.ApplicationContextInitializer;
|
||||
import org.springframework.context.ConfigurableApplicationContext;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.test.context.ContextConfiguration;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.dictionarymerge.commons.DictionaryEntry;
|
||||
@ -30,6 +39,8 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.configuration.Colors;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.mongo.repository.EntityLogDocumentRepository;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.mongo.repository.EntityLogEntryDocumentRepository;
|
||||
import com.iqser.red.service.redaction.v1.server.annotate.AnnotationService;
|
||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
|
||||
@ -38,19 +49,28 @@ import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
|
||||
import com.iqser.red.service.redaction.v1.server.service.AnalyzeService;
|
||||
import com.iqser.red.service.redaction.v1.server.service.UnprocessedChangesService;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
import com.iqser.red.service.redaction.v1.server.testcontainers.MongoDBTestContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.LayoutParsingRequestProvider;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.ResourceLoader;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.TextNormalizationUtilities;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
import com.iqser.red.storage.commons.utils.FileSystemBackedStorageService;
|
||||
import com.knecon.fforesight.mongo.database.commons.service.MongoConnectionProvider;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingFinishedEvent;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingPipeline;
|
||||
import com.knecon.fforesight.tenantcommons.TenantContext;
|
||||
import com.knecon.fforesight.tenantcommons.TenantsClient;
|
||||
import com.knecon.fforesight.tenantcommons.model.MongoDBConnection;
|
||||
import com.mongodb.MongoCommandException;
|
||||
import com.mongodb.client.MongoClient;
|
||||
import com.mongodb.client.MongoClients;
|
||||
import com.mongodb.client.MongoDatabase;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@ContextConfiguration(initializers = {AbstractRedactionIntegrationTest.Initializer.class})
|
||||
public abstract class AbstractRedactionIntegrationTest {
|
||||
|
||||
protected static final String VERTEBRATE_INDICATOR = "vertebrate";
|
||||
@ -120,6 +140,11 @@ public abstract class AbstractRedactionIntegrationTest {
|
||||
@Autowired
|
||||
protected UnprocessedChangesService unprocessedChangesService;
|
||||
|
||||
@Autowired
|
||||
protected EntityLogDocumentRepository entityLogDocumentRepository;
|
||||
@Autowired
|
||||
protected EntityLogEntryDocumentRepository entityLogEntryDocumentRepository;
|
||||
|
||||
@MockBean
|
||||
protected RabbitTemplate rabbitTemplate;
|
||||
|
||||
@ -129,6 +154,9 @@ public abstract class AbstractRedactionIntegrationTest {
|
||||
@MockBean
|
||||
private TenantsClient tenantsClient;
|
||||
|
||||
@MockBean
|
||||
private MongoConnectionProvider mongoConnectionProvider;
|
||||
|
||||
protected final Map<String, List<String>> dictionary = new HashMap<>();
|
||||
protected final Map<String, List<String>> dossierDictionary = new HashMap<>();
|
||||
protected final Map<String, List<String>> falsePositive = new HashMap<>();
|
||||
@ -158,6 +186,8 @@ public abstract class AbstractRedactionIntegrationTest {
|
||||
if (this.storageService instanceof FileSystemBackedStorageService) {
|
||||
((FileSystemBackedStorageService) this.storageService).clearStorage();
|
||||
}
|
||||
entityLogDocumentRepository.deleteAll();
|
||||
entityLogEntryDocumentRepository.deleteAll();
|
||||
}
|
||||
|
||||
|
||||
@ -552,4 +582,71 @@ public abstract class AbstractRedactionIntegrationTest {
|
||||
|
||||
}
|
||||
|
||||
|
||||
@BeforeEach
|
||||
protected void mockProvideMongoDBConnection() {
|
||||
|
||||
TenantContext.setTenantId("redaction");
|
||||
|
||||
var mongoInstance = MongoDBTestContainer.getInstance();
|
||||
|
||||
when(mongoConnectionProvider.getMongoDBConnection(any())).thenReturn(MongoDBConnection.builder()
|
||||
.host(mongoInstance.getHost())
|
||||
.port(String.valueOf(mongoInstance.getFirstMappedPort()))
|
||||
.database(MongoDBTestContainer.MONGO_DATABASE)
|
||||
.username(MongoDBTestContainer.MONGO_USERNAME)
|
||||
.password(MongoDBTestContainer.MONGO_PASSWORD)
|
||||
.build());
|
||||
}
|
||||
|
||||
|
||||
@Slf4j
|
||||
static class Initializer implements ApplicationContextInitializer<ConfigurableApplicationContext> {
|
||||
|
||||
public void initialize(ConfigurableApplicationContext configurableApplicationContext) {
|
||||
|
||||
var mongoInstance = MongoDBTestContainer.getInstance();
|
||||
mongoInstance.start();
|
||||
createMongoDBDatabase(mongoInstance);
|
||||
|
||||
log.info("Hosts are - MongoDB: {}", mongoInstance.getHost());
|
||||
|
||||
TestPropertyValues.of("MONGODB_HOST=" + mongoInstance.getHost(),
|
||||
"MONGODB_PORT=" + mongoInstance.getFirstMappedPort(),
|
||||
"MONGODB_USER=" + MongoDBTestContainer.MONGO_USERNAME,
|
||||
"MONGODB_PASSWORD=" + MongoDBTestContainer.MONGO_PASSWORD).applyTo(configurableApplicationContext.getEnvironment());
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
private static void createMongoDBDatabase(MongoDBTestContainer mongoDBTestContainer) {
|
||||
|
||||
try (MongoClient mongoClient = MongoClients.create(String.format("mongodb://%s:%s@%s:%s/",
|
||||
MongoDBTestContainer.MONGO_USERNAME,
|
||||
MongoDBTestContainer.MONGO_PASSWORD,
|
||||
mongoDBTestContainer.getHost(),
|
||||
mongoDBTestContainer.getFirstMappedPort()))) {
|
||||
MongoDatabase database = mongoClient.getDatabase(MongoDBTestContainer.MONGO_DATABASE);
|
||||
BsonDocument createUserCommand = new BsonDocument();
|
||||
createUserCommand.append("createUser", new BsonString(MongoDBTestContainer.MONGO_USERNAME));
|
||||
createUserCommand.append("pwd", new BsonString(MongoDBTestContainer.MONGO_PASSWORD));
|
||||
BsonArray roles = new BsonArray();
|
||||
roles.add(new BsonString("readWrite"));
|
||||
createUserCommand.append("roles", roles);
|
||||
|
||||
try {
|
||||
database.runCommand(createUserCommand);
|
||||
} catch (MongoCommandException mongoCommandException) {
|
||||
// ignore user already exists
|
||||
if (mongoCommandException.getErrorCode() != 51003) {
|
||||
throw mongoCommandException;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -128,7 +128,7 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
|
||||
|
||||
String EFSA_SANITISATION_RULES = loadFromClassPath("drools/efsa_sanitisation.drl");
|
||||
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(JSONPrimitive.of(EFSA_SANITISATION_RULES));
|
||||
AnalyzeRequest request = uploadFileToStorage("files/syngenta/CustomerFiles/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections.pdf");
|
||||
AnalyzeRequest request = uploadFileToStorage("files/new/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections.pdf");
|
||||
System.out.println("Start Full integration test");
|
||||
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
|
||||
System.out.println("Finished structure analysis");
|
||||
@ -194,7 +194,7 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
|
||||
|
||||
String EFSA_SANITISATION_RULES = loadFromClassPath("drools/efsa_sanitisation.drl");
|
||||
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(JSONPrimitive.of(EFSA_SANITISATION_RULES));
|
||||
AnalyzeRequest request = uploadFileToStorage("files/syngenta/CustomerFiles/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections.pdf");
|
||||
AnalyzeRequest request = uploadFileToStorage("files/new/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections.pdf");
|
||||
System.out.println("Start Full integration test");
|
||||
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
|
||||
System.out.println("Finished structure analysis");
|
||||
|
||||
@ -146,6 +146,18 @@ public class RedactionIntegrationTest extends RulesIntegrationTest {
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testFludioxonilDuplicatedImageEntries() {
|
||||
|
||||
AnalyzeRequest request = prepareStorage("files/Fludioxonil/Fludioxonil_duplicates.pdf",
|
||||
"files/cv_service_empty_response.json", "files/image_info_fludioxonil_duplicates.json");
|
||||
|
||||
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
assertThat(result).isNotNull();
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
@Disabled
|
||||
public void testLargeScannedFileOOM() {
|
||||
@ -608,6 +620,7 @@ public class RedactionIntegrationTest extends RulesIntegrationTest {
|
||||
.build()));
|
||||
|
||||
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
|
||||
request.setAnalysisNumber(1);
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
|
||||
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
@ -666,7 +679,7 @@ public class RedactionIntegrationTest extends RulesIntegrationTest {
|
||||
.build()));
|
||||
|
||||
request.setManualRedactions(manualRedactions);
|
||||
|
||||
request.setAnalysisNumber(2);
|
||||
AnalyzeResult reanalyzeResult = analyzeService.reanalyze(request);
|
||||
|
||||
entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
@ -688,6 +701,7 @@ public class RedactionIntegrationTest extends RulesIntegrationTest {
|
||||
|
||||
when(dictionaryClient.getDictionaryForType(VERTEBRATE_INDICATOR, null)).thenReturn(getDictionaryResponse(VERTEBRATE_INDICATOR, false));
|
||||
|
||||
request.setAnalysisNumber(3);
|
||||
analyzeService.reanalyze(request);
|
||||
|
||||
entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
|
||||
@ -138,7 +138,7 @@ public class PrecursorEntityTest extends BuildDocumentIntegrationTest {
|
||||
document,
|
||||
notFoundManualEntities,
|
||||
new DictionaryVersion(),
|
||||
0L).getEntityLogEntry();
|
||||
0L).getEntityLog().getEntityLogEntry();
|
||||
|
||||
assertEquals(1, redactionLogEntries.size());
|
||||
assertEquals(value, redactionLogEntries.get(0).getValue());
|
||||
|
||||
@ -0,0 +1,35 @@
|
||||
package com.iqser.red.service.redaction.v1.server.testcontainers;
|
||||
|
||||
import org.testcontainers.containers.GenericContainer;
|
||||
import org.testcontainers.utility.DockerImageName;
|
||||
|
||||
public final class MongoDBTestContainer extends GenericContainer<MongoDBTestContainer> {
|
||||
|
||||
private static final String IMAGE_VERSION = "mongo:7.0.2";
|
||||
public static final Integer MONGO_PORT = 27017;
|
||||
public static final String MONGO_DATABASE = "mongo_database";
|
||||
public static final String MONGO_PASSWORD = "mongo_password";
|
||||
public static final String MONGO_USERNAME = "mongo_username";
|
||||
private static MongoDBTestContainer mongoDB;
|
||||
|
||||
|
||||
private MongoDBTestContainer() {
|
||||
|
||||
super(DockerImageName.parse(IMAGE_VERSION));
|
||||
|
||||
}
|
||||
|
||||
|
||||
public static MongoDBTestContainer getInstance() {
|
||||
|
||||
if (mongoDB == null) {
|
||||
mongoDB = new MongoDBTestContainer().withEnv("MONGO_INITDB_ROOT_USERNAME", MONGO_USERNAME)
|
||||
.withEnv("MONGO_INITDB_ROOT_PASSWORD", MONGO_PASSWORD)
|
||||
.withEnv("MONGO_INITDB_DATABASE", MONGO_DATABASE)
|
||||
.withExposedPorts(MONGO_PORT);
|
||||
|
||||
}
|
||||
return mongoDB;
|
||||
}
|
||||
|
||||
}
|
||||
@ -12,6 +12,15 @@ spring:
|
||||
allow-circular-references: true # FIXME
|
||||
cache:
|
||||
type: NONE
|
||||
data:
|
||||
mongodb:
|
||||
auto-index-creation: true
|
||||
# todo: multi-tenancy
|
||||
database: redaction
|
||||
host: ${MONGODB_HOST:localhost}
|
||||
port: ${MONGODB_PORT:27017}
|
||||
username: ${MONGODB_USER}
|
||||
password: ${MONGODB_PASSWORD}
|
||||
|
||||
processing.kafkastreams: false
|
||||
|
||||
|
||||
Binary file not shown.
@ -0,0 +1,303 @@
|
||||
{
|
||||
"dossierId": "c4849583-af00-4bef-934f-491ef761e984",
|
||||
"fileId": "790c52eb58e02f2b3b3b2b19ec7d6e1e",
|
||||
"targetFileExtension": "ORIGIN.pdf.gz",
|
||||
"responseFileExtension": "IMAGE_INFO.json.gz",
|
||||
"X-TENANT-ID": "redaction",
|
||||
"data": [
|
||||
{
|
||||
"classification": {
|
||||
"label": "formula",
|
||||
"probabilities": {
|
||||
"formula": 1.0,
|
||||
"logo": 0.0,
|
||||
"other": 0.0,
|
||||
"signature": 0.0
|
||||
}
|
||||
},
|
||||
"representation": "44CF9F4E3EFAF3FFCF3FFCF3F",
|
||||
"position": {
|
||||
"x1": 71,
|
||||
"x2": 511,
|
||||
"y1": 627,
|
||||
"y2": 736,
|
||||
"pageNumber": 5
|
||||
},
|
||||
"geometry": {
|
||||
"width": 440,
|
||||
"height": 109
|
||||
},
|
||||
"alpha": false,
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.3094,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 4.0367,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"label": "formula",
|
||||
"probabilities": {
|
||||
"formula": 1.0,
|
||||
"logo": 0.0,
|
||||
"other": 0.0,
|
||||
"signature": 0.0
|
||||
}
|
||||
},
|
||||
"representation": "FFFFF78F1EFBC32F8F717FCF1",
|
||||
"position": {
|
||||
"x1": 71,
|
||||
"x2": 511,
|
||||
"y1": 410,
|
||||
"y2": 519,
|
||||
"pageNumber": 5
|
||||
},
|
||||
"geometry": {
|
||||
"width": 440,
|
||||
"height": 109
|
||||
},
|
||||
"alpha": false,
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.3094,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 4.0367,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"label": "formula",
|
||||
"probabilities": {
|
||||
"formula": 1.0,
|
||||
"logo": 0.0,
|
||||
"other": 0.0,
|
||||
"signature": 0.0
|
||||
}
|
||||
},
|
||||
"representation": "FFCF3FFFFFCC733DE63B00038",
|
||||
"position": {
|
||||
"x1": 71,
|
||||
"x2": 511,
|
||||
"y1": 519,
|
||||
"y2": 628,
|
||||
"pageNumber": 5
|
||||
},
|
||||
"geometry": {
|
||||
"width": 440,
|
||||
"height": 109
|
||||
},
|
||||
"alpha": false,
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.3094,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 4.0367,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"label": "formula",
|
||||
"probabilities": {
|
||||
"formula": 1.0,
|
||||
"logo": 0.0,
|
||||
"other": 0.0,
|
||||
"signature": 0.0
|
||||
}
|
||||
},
|
||||
"representation": "FFF6D344888AE08B5F6DB1FFF",
|
||||
"position": {
|
||||
"x1": 196,
|
||||
"x2": 302,
|
||||
"y1": 453,
|
||||
"y2": 523,
|
||||
"pageNumber": 6
|
||||
},
|
||||
"geometry": {
|
||||
"width": 106,
|
||||
"height": 70
|
||||
},
|
||||
"alpha": false,
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.1217,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 1.5143,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"label": "formula",
|
||||
"probabilities": {
|
||||
"formula": 1.0,
|
||||
"logo": 0.0,
|
||||
"other": 0.0,
|
||||
"signature": 0.0
|
||||
}
|
||||
},
|
||||
"representation": "FFF6B30E904BE08F0F3DF9FFF",
|
||||
"position": {
|
||||
"x1": 196,
|
||||
"x2": 303,
|
||||
"y1": 217,
|
||||
"y2": 284,
|
||||
"pageNumber": 6
|
||||
},
|
||||
"geometry": {
|
||||
"width": 107,
|
||||
"height": 67
|
||||
},
|
||||
"alpha": false,
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.1196,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 1.597,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"label": "formula",
|
||||
"probabilities": {
|
||||
"formula": 1.0,
|
||||
"logo": 0.0,
|
||||
"other": 0.0,
|
||||
"signature": 0.0
|
||||
}
|
||||
},
|
||||
"representation": "FFF7D14488FA60844F1D39FFF",
|
||||
"position": {
|
||||
"x1": 197,
|
||||
"x2": 301,
|
||||
"y1": 379,
|
||||
"y2": 449,
|
||||
"pageNumber": 6
|
||||
},
|
||||
"geometry": {
|
||||
"width": 104,
|
||||
"height": 70
|
||||
},
|
||||
"alpha": false,
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.1205,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 1.4857,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"label": "formula",
|
||||
"probabilities": {
|
||||
"formula": 1.0,
|
||||
"logo": 0.0,
|
||||
"other": 0.0,
|
||||
"signature": 0.0
|
||||
}
|
||||
},
|
||||
"representation": "FFFFCB3468BD60842F9CF8F7F",
|
||||
"position": {
|
||||
"x1": 208,
|
||||
"x2": 290,
|
||||
"y1": 304,
|
||||
"y2": 374,
|
||||
"pageNumber": 6
|
||||
},
|
||||
"geometry": {
|
||||
"width": 82,
|
||||
"height": 70
|
||||
},
|
||||
"alpha": false,
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.107,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 1.1714,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
Loading…
x
Reference in New Issue
Block a user