diff --git a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts index ea2f708c..f746225f 100644 --- a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts +++ b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts @@ -43,7 +43,7 @@ dependencies { implementation("com.iqser.red.commons:dictionary-merge-commons:1.5.0") implementation("com.iqser.red.commons:storage-commons:2.45.0") implementation("com.knecon.fforesight:keycloak-commons:0.29.0") - implementation("com.knecon.fforesight:tenant-commons:0.24.0") + implementation("com.knecon.fforesight:tenant-commons:0.25.0") implementation("com.knecon.fforesight:tracing-commons:0.5.0") implementation("com.fasterxml.jackson.module:jackson-module-afterburner:${jacksonVersion}") diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/Application.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/Application.java index eeddf593..8d647f99 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/Application.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/Application.java @@ -9,6 +9,7 @@ import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration; import org.springframework.boot.autoconfigure.liquibase.LiquibaseAutoConfiguration; import org.springframework.boot.autoconfigure.mongo.MongoAutoConfiguration; import org.springframework.boot.autoconfigure.security.servlet.SecurityAutoConfiguration; +import org.springframework.boot.autoconfigure.task.TaskExecutionAutoConfiguration; import org.springframework.boot.context.properties.EnableConfigurationProperties; import org.springframework.cache.annotation.EnableCaching; import org.springframework.cloud.openfeign.EnableFeignClients; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/TextEntity.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/TextEntity.java index a8c7884f..1c85d383 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/TextEntity.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/TextEntity.java @@ -194,25 +194,32 @@ public class TextEntity implements IEntity { public boolean containedBy(TextEntity textEntity) { - return this.textRange.containedBy(textEntity.getTextRange()) // - || duplicateTextRanges.stream() - .anyMatch(duplicateTextRange -> duplicateTextRange.containedBy(textEntity.textRange)) // - || duplicateTextRanges.stream() - .anyMatch(duplicateTextRange -> textEntity.getDuplicateTextRanges() - .stream() - .anyMatch(duplicateTextRange::containedBy)); + return textEntity.contains(this); } public boolean contains(TextEntity textEntity) { - return this.textRange.contains(textEntity.getTextRange()) // - || duplicateTextRanges.stream() - .anyMatch(duplicateTextRange -> duplicateTextRange.contains(textEntity.textRange)) // - || duplicateTextRanges.stream() - .anyMatch(duplicateTextRange -> textEntity.getDuplicateTextRanges() - .stream() - .anyMatch(duplicateTextRange::contains)); + if (this.textRange.contains(textEntity.getTextRange())) { + return true; + } + + List textEntityDuplicateRanges = textEntity.getDuplicateTextRanges(); + // use optimized indexed loops for extra performance boost + for (int i = 0, duplicateTextRangesSize = duplicateTextRanges.size(); i < duplicateTextRangesSize; i++) { + TextRange duplicateTextRange = duplicateTextRanges.get(i); + if (duplicateTextRange.contains(textEntity.getTextRange())) { + return true; + } + for (int j = 0, textEntityDuplicateRangesSize = textEntityDuplicateRanges.size(); j < textEntityDuplicateRangesSize; j++) { + TextRange otherRange = textEntityDuplicateRanges.get(j); + if (duplicateTextRange.contains(otherRange)) { + return true; + } + } + } + + return false; } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/AnalysisPreparationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/AnalysisPreparationService.java new file mode 100644 index 00000000..65a16c5c --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/AnalysisPreparationService.java @@ -0,0 +1,396 @@ +package com.iqser.red.service.redaction.v1.server.service; + +import static com.iqser.red.service.redaction.v1.server.service.document.SectionFinderService.getRelevantManuallyModifiedAnnotationIds; +import static org.springframework.boot.autoconfigure.task.TaskExecutionAutoConfiguration.APPLICATION_TASK_EXECUTOR_BEAN_NAME; + +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.stream.Collectors; + +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.core.task.TaskExecutor; +import org.springframework.stereotype.Service; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; +import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions; +import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings; +import com.iqser.red.service.redaction.v1.server.client.model.NerEntitiesModel; +import com.iqser.red.service.redaction.v1.server.model.KieWrapper; +import com.iqser.red.service.redaction.v1.server.model.NerEntities; +import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity; +import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary; +import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryIncrement; +import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryVersion; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; +import com.iqser.red.service.redaction.v1.server.service.document.DocumentGraphMapper; +import com.iqser.red.service.redaction.v1.server.service.document.ImportedRedactionEntryService; +import com.iqser.red.service.redaction.v1.server.service.document.ManualRedactionEntryService; +import com.iqser.red.service.redaction.v1.server.service.document.NerEntitiesAdapter; +import com.iqser.red.service.redaction.v1.server.service.document.SectionFinderService; +import com.iqser.red.service.redaction.v1.server.service.drools.KieContainerCreationService; +import com.iqser.red.service.redaction.v1.server.storage.ObservedStorageService; +import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; + +import lombok.AccessLevel; +import lombok.SneakyThrows; +import lombok.experimental.FieldDefaults; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Service +@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) +public class AnalysisPreparationService { + + KieContainerCreationService kieContainerCreationService; + ObservedStorageService observedStorageService; + RedactionStorageService redactionStorageService; + RedactionServiceSettings redactionServiceSettings; + ManualRedactionEntryService manualRedactionEntryService; + ImportedRedactionEntryService importedRedactionEntryService; + DictionaryService dictionaryService; + SectionFinderService sectionFinderService; + TaskExecutor taskExecutor; + + + public AnalysisPreparationService(KieContainerCreationService kieContainerCreationService, + ObservedStorageService observedStorageService, + RedactionStorageService redactionStorageService, + RedactionServiceSettings redactionServiceSettings, + ManualRedactionEntryService manualRedactionEntryService, + ImportedRedactionEntryService importedRedactionEntryService, + DictionaryService dictionaryService, + SectionFinderService sectionFinderService, + @Qualifier(APPLICATION_TASK_EXECUTOR_BEAN_NAME) TaskExecutor taskExecutor) { + + this.kieContainerCreationService = kieContainerCreationService; + this.observedStorageService = observedStorageService; + this.redactionStorageService = redactionStorageService; + this.redactionServiceSettings = redactionServiceSettings; + this.manualRedactionEntryService = manualRedactionEntryService; + this.importedRedactionEntryService = importedRedactionEntryService; + this.dictionaryService = dictionaryService; + this.sectionFinderService = sectionFinderService; + this.taskExecutor = taskExecutor; + } + + + @SneakyThrows + public AnalysisData getAnalysisData(AnalyzeRequest analyzeRequest) { + + CompletableFuture kieWrapperComponentRulesFuture = CompletableFuture.supplyAsync(() -> getKieWrapper(analyzeRequest, RuleFileType.COMPONENT), taskExecutor); + + CompletableFuture kieWrapperEntityRulesFuture = CompletableFuture.supplyAsync(() -> getKieWrapper(analyzeRequest, RuleFileType.ENTITY), taskExecutor); + + CompletableFuture documentFuture = CompletableFuture.supplyAsync(() -> getDocument(analyzeRequest), taskExecutor); + + CompletableFuture importedRedactionsFuture = CompletableFuture.supplyAsync(() -> getImportedRedactions(analyzeRequest), taskExecutor); + + CompletableFuture nerEntitiesFuture = documentFuture.thenApplyAsync((document) -> getNerEntities(analyzeRequest, document), taskExecutor); + + CompletableFuture.allOf(kieWrapperEntityRulesFuture, kieWrapperComponentRulesFuture, documentFuture, importedRedactionsFuture, nerEntitiesFuture).join(); + + Dictionary dictionary = getDictionary(analyzeRequest); + + Document document = documentFuture.get(); + ImportedRedactions importedRedactions = importedRedactionsFuture.get(); + + List notFoundManualRedactionEntries = manualRedactionEntryService.addManualRedactionEntriesAndReturnNotFoundEntries(analyzeRequest, + document, + analyzeRequest.getDossierTemplateId()); + + List notFoundImportedEntries = importedRedactionEntryService.addImportedEntriesAndReturnNotFoundEntries(analyzeRequest, importedRedactions, document); + + return new AnalysisData(kieWrapperEntityRulesFuture.get(), + kieWrapperComponentRulesFuture.get(), + document, + importedRedactions, + dictionary, + notFoundManualRedactionEntries, + notFoundImportedEntries, + nerEntitiesFuture.get()); + } + + + @SneakyThrows + public ReanalysisSetupData getReanalysisSetupData(AnalyzeRequest analyzeRequest) { + + CompletableFuture entityLogFuture = CompletableFuture.supplyAsync(() -> getEntityLog(analyzeRequest), taskExecutor); + + CompletableFuture documentFuture = CompletableFuture.supplyAsync(() -> getDocument(analyzeRequest), taskExecutor); + + CompletableFuture.allOf(entityLogFuture, documentFuture).join(); + + return new ReanalysisSetupData(entityLogFuture.get(), documentFuture.get()); + } + + + @SneakyThrows + public ReanalysisInitialProcessingData getReanalysisInitialProcessingData(AnalyzeRequest analyzeRequest, ReanalysisSetupData reanalysisSetupData) { + + CompletableFuture importedRedactionsFuture = CompletableFuture.supplyAsync(() -> getImportedRedactions(analyzeRequest), taskExecutor); + + CompletableFuture incrementAndSectionsToReanalyzeFuture = importedRedactionsFuture.thenApplyAsync((importedRedactions) -> { + DictionaryIncrement dictionaryIncrement = getDictionaryIncrement(analyzeRequest, reanalysisSetupData); + return getDictionaryIncrementAndSectionsToReanalyze(analyzeRequest, dictionaryIncrement, reanalysisSetupData, importedRedactions); + + }, taskExecutor); + + CompletableFuture kieWrapperComponentRulesFuture = CompletableFuture.supplyAsync(() -> getKieWrapper(analyzeRequest, RuleFileType.COMPONENT), taskExecutor); + + CompletableFuture kieWrapperEntityRulesFuture = CompletableFuture.supplyAsync(() -> getKieWrapper(analyzeRequest, RuleFileType.ENTITY), taskExecutor); + + CompletableFuture.allOf(importedRedactionsFuture, incrementAndSectionsToReanalyzeFuture, kieWrapperComponentRulesFuture, kieWrapperEntityRulesFuture).join(); + + return new ReanalysisInitialProcessingData(importedRedactionsFuture.get(), + incrementAndSectionsToReanalyzeFuture.get().dictionaryIncrement(), + incrementAndSectionsToReanalyzeFuture.get().sectionsToReanalyseIds(), + incrementAndSectionsToReanalyzeFuture.get().sectionsToReanalyze(), + kieWrapperComponentRulesFuture.get(), + kieWrapperEntityRulesFuture.get()); + } + + + @SneakyThrows + public ReanalysisFinalProcessingData getReanalysisFinalProcessingData(AnalyzeRequest analyzeRequest, + ReanalysisSetupData reanalysisSetupData, + ReanalysisInitialProcessingData reanalysisInitialProcessingData) { + + CompletableFuture nerEntitiesFuture = CompletableFuture.supplyAsync(() -> getNerEntitiesFiltered(analyzeRequest, + reanalysisSetupData.document, + reanalysisInitialProcessingData.sectionsToReanalyseIds), + taskExecutor); + + CompletableFuture dictionaryAndNotFoundEntriesCompletableFuture = CompletableFuture.supplyAsync(() -> { + Dictionary dictionary = getDictionary(analyzeRequest); + NotFoundEntries notFoundEntries = getNotFoundEntries(analyzeRequest, reanalysisSetupData.document(), reanalysisInitialProcessingData.importedRedactions()); + return new DictionaryAndNotFoundEntries(dictionary, notFoundEntries.notFoundManualRedactionEntries(), notFoundEntries.notFoundImportedEntries()); + }, taskExecutor); + + CompletableFuture.allOf(nerEntitiesFuture, dictionaryAndNotFoundEntriesCompletableFuture).join(); + + return new ReanalysisFinalProcessingData(nerEntitiesFuture.get(), + dictionaryAndNotFoundEntriesCompletableFuture.get().dictionary(), + dictionaryAndNotFoundEntriesCompletableFuture.get().notFoundManualRedactionEntries(), + dictionaryAndNotFoundEntriesCompletableFuture.get().notFoundImportedEntries()); + } + + + public KieWrapper getKieWrapper(AnalyzeRequest analyzeRequest, RuleFileType ruleFileType) { + + KieWrapper kieWrapperComponentRules = kieContainerCreationService.getLatestKieContainer(analyzeRequest.getDossierTemplateId(), ruleFileType); + log.info("Updated {} Rules to Version {} for file {} in dossier {}", + ruleFileType, + kieWrapperComponentRules.rulesVersion(), + analyzeRequest.getFileId(), + analyzeRequest.getDossierId()); + return kieWrapperComponentRules; + } + + + public Document getDocument(AnalyzeRequest analyzeRequest) { + + Document document = DocumentGraphMapper.toDocumentGraph(observedStorageService.getDocumentData(analyzeRequest.getDossierId(), analyzeRequest.getFileId())); + log.info("Loaded Document Graph for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); + return document; + } + + + public ImportedRedactions getImportedRedactions(AnalyzeRequest analyzeRequest) { + + ImportedRedactions importedRedactions = redactionStorageService.getImportedRedactions(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); + log.info("Loaded Imported Redactions for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); + return importedRedactions; + } + + + public NerEntities getNerEntities(AnalyzeRequest analyzeRequest, Document document) { + + NerEntities nerEntities = getEntityRecognitionEntities(analyzeRequest, document); + log.info("Loaded Ner Entities for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); + return nerEntities; + } + + + public NerEntities getNerEntitiesFiltered(AnalyzeRequest analyzeRequest, Document document, Set sectionsToReanalyseIds) { + + NerEntities nerEntities = getEntityRecognitionEntitiesFilteredBySectionIds(analyzeRequest, document, sectionsToReanalyseIds); + log.info("Loaded Ner Entities for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); + return nerEntities; + } + + + private Dictionary getDictionary(AnalyzeRequest analyzeRequest) { + + dictionaryService.updateDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId()); + Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId()); + log.info("Updated Dictionaries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); + return dictionary; + } + + + private NotFoundEntries getNotFoundEntries(AnalyzeRequest analyzeRequest, Document document, ImportedRedactions importedRedactions) { + + var notFoundManualRedactionEntries = manualRedactionEntryService.addManualRedactionEntriesAndReturnNotFoundEntries(analyzeRequest, + document, + analyzeRequest.getDossierTemplateId()); + var notFoundImportedEntries = importedRedactionEntryService.addImportedEntriesAndReturnNotFoundEntries(analyzeRequest, importedRedactions, document); + return new NotFoundEntries(notFoundManualRedactionEntries, notFoundImportedEntries); + } + + + private NerEntities getEntityRecognitionEntities(AnalyzeRequest analyzeRequest, Document document) { + + NerEntities nerEntities; + if (redactionServiceSettings.isNerServiceEnabled()) { + nerEntities = NerEntitiesAdapter.toNerEntities(redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId()), document); + } else { + nerEntities = new NerEntities(Collections.emptyList()); + } + return nerEntities; + } + + + private EntityLog getEntityLog(AnalyzeRequest analyzeRequest) { + + EntityLog entityLogWithoutEntries = redactionStorageService.getEntityLogWithoutEntries(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); + log.info("Loaded previous entity log for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); + return entityLogWithoutEntries; + } + + + private SectionsToReanalyzeData getDictionaryIncrementAndSectionsToReanalyze(AnalyzeRequest analyzeRequest, + DictionaryIncrement dictionaryIncrement, + ReanalysisSetupData reanalysisSetupData, + ImportedRedactions importedRedactions) { + + Set relevantManuallyModifiedAnnotationIds = getRelevantManuallyModifiedAnnotationIds(analyzeRequest.getManualRedactions()); + + Set sectionsToReanalyseIds = redactionStorageService.findIdsOfSectionsToReanalyse(analyzeRequest.getDossierId(), + analyzeRequest.getFileId(), + relevantManuallyModifiedAnnotationIds); + sectionsToReanalyseIds.addAll(getSectionsToReanalyseIds(analyzeRequest, + reanalysisSetupData.document(), + dictionaryIncrement, + importedRedactions, + relevantManuallyModifiedAnnotationIds)); + + List sectionsToReAnalyse = getSectionsToReAnalyse(reanalysisSetupData.document(), sectionsToReanalyseIds); + log.info("{} Sections to reanalyze found for file {} in dossier {}", sectionsToReanalyseIds.size(), analyzeRequest.getFileId(), analyzeRequest.getDossierId()); + return new SectionsToReanalyzeData(dictionaryIncrement, sectionsToReanalyseIds, sectionsToReAnalyse); + } + + + private DictionaryIncrement getDictionaryIncrement(AnalyzeRequest analyzeRequest, ReanalysisSetupData reanalysisSetupData) { + + return dictionaryService.getDictionaryIncrements(analyzeRequest.getDossierTemplateId(), + new DictionaryVersion(reanalysisSetupData.entityLog().getDictionaryVersion(), + reanalysisSetupData.entityLog().getDossierDictionaryVersion()), + analyzeRequest.getDossierId()); + } + + + private NerEntities getEntityRecognitionEntitiesFilteredBySectionIds(AnalyzeRequest analyzeRequest, Document document, Set sectionsToReanalyseIds) { + + NerEntities nerEntities; + if (redactionServiceSettings.isNerServiceEnabled()) { + NerEntitiesModel nerEntitiesModel = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); + nerEntitiesModel = filterNerEntitiesModelBySectionIds(sectionsToReanalyseIds, nerEntitiesModel); + nerEntities = NerEntitiesAdapter.toNerEntities(nerEntitiesModel, document); + } else { + nerEntities = new NerEntities(Collections.emptyList()); + } + return nerEntities; + } + + + private static NerEntitiesModel filterNerEntitiesModelBySectionIds(Set sectionsToReanalyseIds, NerEntitiesModel nerEntitiesModel) { + + return new NerEntitiesModel(nerEntitiesModel.getData().entrySet() + .stream() // + .filter(entry -> sectionsToReanalyseIds.contains(getSuperSectionID(entry.getKey()))) // + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))); + } + + + private static Integer getSuperSectionID(String section) { + + return NerEntitiesAdapter.sectionNumberToTreeId(section) + .get(0); + + } + + + private static List getSectionsToReAnalyse(Document document, Set sectionsToReanalyseIds) { + + return document.streamChildren() + .filter(section -> sectionsToReanalyseIds.contains(section.getTreeId() + .get(0))) + .collect(Collectors.toList()); + } + + + private Set getSectionsToReanalyseIds(AnalyzeRequest analyzeRequest, + Document document, + DictionaryIncrement dictionaryIncrement, + ImportedRedactions importedRedactions, + Set relevantManuallyModifiedAnnotationIds) { + + return sectionFinderService.findSectionsToReanalyse(dictionaryIncrement, document, analyzeRequest, importedRedactions, relevantManuallyModifiedAnnotationIds); + } + + + private record DictionaryAndNotFoundEntries(Dictionary dictionary, List notFoundManualRedactionEntries, List notFoundImportedEntries) { + + } + + private record NotFoundEntries(List notFoundManualRedactionEntries, List notFoundImportedEntries) { + + } + + private record SectionsToReanalyzeData(DictionaryIncrement dictionaryIncrement, Set sectionsToReanalyseIds, List sectionsToReanalyze) { + + } + + public record AnalysisData( + KieWrapper kieWrapperEntityRules, + KieWrapper kieWrapperComponentRules, + Document document, + ImportedRedactions importedRedactions, + Dictionary dictionary, + List notFoundManualRedactionEntries, + List notFoundImportedEntries, + NerEntities nerEntities + ) { + + } + + public record ReanalysisSetupData( + EntityLog entityLog, Document document + ) { + + } + + public record ReanalysisInitialProcessingData( + ImportedRedactions importedRedactions, + DictionaryIncrement dictionaryIncrement, + Set sectionsToReanalyseIds, + List sectionsToReAnalyse, + KieWrapper kieWrapperComponentRules, + KieWrapper kieWrapperEntityRules + ) { + + } + + public record ReanalysisFinalProcessingData( + NerEntities nerEntities, Dictionary dictionary, List notFoundManualRedactionEntries, List notFoundImportedEntries + ) { + + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/AnalyzeService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/AnalyzeService.java index 42085162..d0ee2e30 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/AnalyzeService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/AnalyzeService.java @@ -1,13 +1,14 @@ package com.iqser.red.service.redaction.v1.server.service; -import static com.iqser.red.service.redaction.v1.server.service.document.SectionFinderService.getRelevantManuallyModifiedAnnotationIds; +import static com.iqser.red.service.redaction.v1.server.service.AnalysisPreparationService.ReanalysisFinalProcessingData; +import static com.iqser.red.service.redaction.v1.server.service.AnalysisPreparationService.ReanalysisInitialProcessingData; +import static com.iqser.red.service.redaction.v1.server.service.AnalysisPreparationService.ReanalysisSetupData; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.List; -import java.util.Map; import java.util.Set; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -19,33 +20,19 @@ import com.iqser.gin4.commons.metrics.meters.FunctionTimerValues; import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult; import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute; -import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.componentlog.ComponentLog; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogChanges; -import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType; import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings; -import com.iqser.red.service.redaction.v1.server.client.model.NerEntitiesModel; import com.iqser.red.service.redaction.v1.server.logger.Context; import com.iqser.red.service.redaction.v1.server.model.KieWrapper; -import com.iqser.red.service.redaction.v1.server.model.NerEntities; import com.iqser.red.service.redaction.v1.server.model.component.Component; -import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary; -import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryIncrement; -import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryVersion; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; +import com.iqser.red.service.redaction.v1.server.service.AnalysisPreparationService.AnalysisData; import com.iqser.red.service.redaction.v1.server.service.components.ComponentLogCreatorService; -import com.iqser.red.service.redaction.v1.server.service.document.DocumentGraphMapper; -import com.iqser.red.service.redaction.v1.server.service.document.ImportedRedactionEntryService; -import com.iqser.red.service.redaction.v1.server.service.document.ManualRedactionEntryService; -import com.iqser.red.service.redaction.v1.server.service.document.NerEntitiesAdapter; -import com.iqser.red.service.redaction.v1.server.service.document.SectionFinderService; import com.iqser.red.service.redaction.v1.server.service.drools.ComponentDroolsExecutionService; import com.iqser.red.service.redaction.v1.server.service.drools.EntityDroolsExecutionService; -import com.iqser.red.service.redaction.v1.server.service.drools.KieContainerCreationService; -import com.iqser.red.service.redaction.v1.server.storage.ObservedStorageService; import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; import com.knecon.fforesight.tenantcommons.TenantContext; @@ -63,21 +50,16 @@ import lombok.extern.slf4j.Slf4j; @RequiredArgsConstructor public class AnalyzeService { - DictionaryService dictionaryService; EntityDroolsExecutionService entityDroolsExecutionService; ComponentDroolsExecutionService componentDroolsExecutionService; - KieContainerCreationService kieContainerCreationService; DictionarySearchService dictionarySearchService; EntityLogCreatorService entityLogCreatorService; ComponentLogCreatorService componentLogCreatorService; RedactionStorageService redactionStorageService; RedactionServiceSettings redactionServiceSettings; NotFoundImportedEntitiesService notFoundImportedEntitiesService; - SectionFinderService sectionFinderService; - ManualRedactionEntryService manualRedactionEntryService; - ImportedRedactionEntryService importedRedactionEntryService; - ObservedStorageService observedStorageService; FunctionTimerValues redactmanagerAnalyzePagewiseValues; + AnalysisPreparationService analysisPreparationService; @Timed("redactmanager_reanalyze") @@ -86,172 +68,131 @@ public class AnalyzeService { public AnalyzeResult reanalyze(@RequestBody AnalyzeRequest analyzeRequest) { long startTime = System.currentTimeMillis(); - EntityLog entityLogWithoutEntries = redactionStorageService.getEntityLogWithoutEntries(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); - log.info("Loaded previous entity log for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); - Document document = DocumentGraphMapper.toDocumentGraph(observedStorageService.getDocumentData(analyzeRequest.getDossierId(), analyzeRequest.getFileId())); - log.info("Loaded Document Graph for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); - - ImportedRedactions importedRedactions = redactionStorageService.getImportedRedactions(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); - log.info("Loaded Imported Redactions for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); - - Context context = new Context(analyzeRequest.getFileId(), analyzeRequest.getDossierId(), analyzeRequest.getDossierTemplateId(), 0, analyzeRequest.getAnalysisNumber(), TenantContext.getTenantId()); + ReanalysisSetupData setupData = analysisPreparationService.getReanalysisSetupData(analyzeRequest); + Context context = new Context(analyzeRequest.getFileId(), + analyzeRequest.getDossierId(), + analyzeRequest.getDossierTemplateId(), + 0, + analyzeRequest.getAnalysisNumber(), + TenantContext.getTenantId()); // not yet ready for reanalysis - if (entityLogWithoutEntries == null || document == null || document.getNumberOfPages() == 0) { + if (setupData.entityLog() == null || setupData.document() == null || setupData.document().getNumberOfPages() == 0) { return analyze(analyzeRequest); } - DictionaryIncrement dictionaryIncrement = dictionaryService.getDictionaryIncrements(analyzeRequest.getDossierTemplateId(), - new DictionaryVersion(entityLogWithoutEntries.getDictionaryVersion(), - entityLogWithoutEntries.getDossierDictionaryVersion()), - analyzeRequest.getDossierId()); + ReanalysisInitialProcessingData initialProcessingData = analysisPreparationService.getReanalysisInitialProcessingData(analyzeRequest, setupData); - Set relevantManuallyModifiedAnnotationIds = getRelevantManuallyModifiedAnnotationIds(analyzeRequest.getManualRedactions()); + if (initialProcessingData.sectionsToReAnalyse().isEmpty()) { - Set sectionsToReanalyseIds = redactionStorageService.findIdsOfSectionsToReanalyse(analyzeRequest.getDossierId(), - analyzeRequest.getFileId(), - relevantManuallyModifiedAnnotationIds); - sectionsToReanalyseIds.addAll(getSectionsToReanalyseIds(analyzeRequest, document, dictionaryIncrement, importedRedactions, relevantManuallyModifiedAnnotationIds)); - - List sectionsToReAnalyse = getSectionsToReAnalyse(document, sectionsToReanalyseIds); - log.info("{} Sections to reanalyze found for file {} in dossier {}", sectionsToReanalyseIds.size(), analyzeRequest.getFileId(), analyzeRequest.getDossierId()); - - if (sectionsToReAnalyse.isEmpty()) { - - EntityLogChanges entityLogChanges = entityLogCreatorService.updateVersionsAndReturnChanges(entityLogWithoutEntries, - dictionaryIncrement.getDictionaryVersion(), + EntityLogChanges entityLogChanges = entityLogCreatorService.updateVersionsAndReturnChanges(setupData.entityLog(), + initialProcessingData.dictionaryIncrement().getDictionaryVersion(), analyzeRequest, new ArrayList<>(), new ArrayList<>()); return finalizeAnalysis(analyzeRequest, startTime, - kieContainerCreationService.getLatestKieContainer(analyzeRequest.getDossierTemplateId(), RuleFileType.COMPONENT), + initialProcessingData.kieWrapperComponentRules(), entityLogChanges, - document, - document.getNumberOfPages(), + setupData.document(), + setupData.document().getNumberOfPages(), true, Collections.emptySet(), context); } - KieWrapper kieWrapperEntityRules = kieContainerCreationService.getLatestKieContainer(analyzeRequest.getDossierTemplateId(), RuleFileType.ENTITY); - log.info("Updated entity rules to version {} for file {} in dossier {}", kieWrapperEntityRules.rulesVersion(), analyzeRequest.getFileId(), analyzeRequest.getDossierId()); + context.setRuleVersion(initialProcessingData.kieWrapperEntityRules().rulesVersion()); - context.setRuleVersion(kieWrapperEntityRules.rulesVersion()); + ReanalysisFinalProcessingData finalProcessingData = analysisPreparationService.getReanalysisFinalProcessingData(analyzeRequest, setupData, initialProcessingData); - NerEntities nerEntities = getEntityRecognitionEntitiesFilteredBySectionIds(analyzeRequest, document, sectionsToReanalyseIds); - log.info("Loaded Ner Entities for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); + dictionarySearchService.addDictionaryEntities(finalProcessingData.dictionary(), initialProcessingData.sectionsToReAnalyse()); + log.info("Finished Dictionary Search for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); - var notFoundManualRedactionEntries = manualRedactionEntryService.addManualRedactionEntriesAndReturnNotFoundEntries(analyzeRequest, - document, - analyzeRequest.getDossierTemplateId()); - var notFoundImportedEntries = importedRedactionEntryService.addImportedEntriesAndReturnNotFoundEntries(analyzeRequest, importedRedactions, document); - var notFoundManualOrImportedEntries = Stream.of(notFoundManualRedactionEntries, notFoundImportedEntries) + var notFoundManualOrImportedEntries = Stream.of(finalProcessingData.notFoundManualRedactionEntries(), finalProcessingData.notFoundImportedEntries()) .flatMap(Collection::stream) .collect(Collectors.toList()); - Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId()); - log.info("Updated Dictionaries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); - - dictionarySearchService.addDictionaryEntities(dictionary, sectionsToReAnalyse); - log.info("Finished Dictionary Search for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); - // we could add the imported redactions similar to the manual redactions here as well for additional processing - List allFileAttributes = entityDroolsExecutionService.executeRules(kieWrapperEntityRules.container(), - document, - sectionsToReAnalyse, - dictionary, + List allFileAttributes = entityDroolsExecutionService.executeRules(initialProcessingData.kieWrapperEntityRules().container(), + setupData.document(), + initialProcessingData.sectionsToReAnalyse(), + finalProcessingData.dictionary(), analyzeRequest.getFileAttributes(), analyzeRequest.getManualRedactions(), - nerEntities, + finalProcessingData.nerEntities(), context); log.info("Finished entity rule execution for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); EntityLogChanges entityLogChanges = entityLogCreatorService.updatePreviousEntityLog(analyzeRequest, - document, - entityLogWithoutEntries, + setupData.document(), + setupData.entityLog(), notFoundManualOrImportedEntries, - sectionsToReanalyseIds, - dictionary.getVersion()); + initialProcessingData.sectionsToReanalyseIds(), + finalProcessingData.dictionary().getVersion()); - notFoundImportedEntitiesService.processEntityLog(entityLogChanges.getEntityLog(), analyzeRequest, notFoundImportedEntries); + notFoundImportedEntitiesService.processEntityLog(entityLogChanges.getEntityLog(), analyzeRequest, finalProcessingData.notFoundImportedEntries()); return finalizeAnalysis(analyzeRequest, startTime, - kieContainerCreationService.getLatestKieContainer(analyzeRequest.getDossierTemplateId(), RuleFileType.COMPONENT), + initialProcessingData.kieWrapperComponentRules(), entityLogChanges, - document, - document.getNumberOfPages(), + setupData.document(), + setupData.document().getNumberOfPages(), true, new HashSet<>(allFileAttributes), context); } + @SneakyThrows @Timed("redactmanager_analyze") @Observed(name = "AnalyzeService", contextualName = "analyze") public AnalyzeResult analyze(AnalyzeRequest analyzeRequest) { long startTime = System.currentTimeMillis(); - var kieWrapperEntityRules = kieContainerCreationService.getLatestKieContainer(analyzeRequest.getDossierTemplateId(), RuleFileType.ENTITY); - log.info("Updated Rules to Version {} for file {} in dossier {}", kieWrapperEntityRules.rulesVersion(), analyzeRequest.getFileId(), analyzeRequest.getDossierId()); + AnalysisData analysisData = analysisPreparationService.getAnalysisData(analyzeRequest); - var kieWrapperComponentRules = kieContainerCreationService.getLatestKieContainer(analyzeRequest.getDossierTemplateId(), RuleFileType.COMPONENT); - log.info("Updated Rules to Version {} for file {} in dossier {}", kieWrapperEntityRules.rulesVersion(), analyzeRequest.getFileId(), analyzeRequest.getDossierId()); + Context context = new Context(analyzeRequest.getFileId(), + analyzeRequest.getDossierId(), + analyzeRequest.getDossierTemplateId(), + analysisData.kieWrapperEntityRules().rulesVersion(), + analyzeRequest.getAnalysisNumber(), + TenantContext.getTenantId()); - Context context = new Context(analyzeRequest.getFileId(), analyzeRequest.getDossierId(), analyzeRequest.getDossierTemplateId(), kieWrapperEntityRules.rulesVersion(), analyzeRequest.getAnalysisNumber(), TenantContext.getTenantId()); - - Document document = DocumentGraphMapper.toDocumentGraph(observedStorageService.getDocumentData(analyzeRequest.getDossierId(), analyzeRequest.getFileId())); - log.info("Loaded Document Graph for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); - - ImportedRedactions importedRedactions = redactionStorageService.getImportedRedactions(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); - log.info("Loaded Imported Redactions for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); - - NerEntities nerEntities = getEntityRecognitionEntities(analyzeRequest, document); - log.info("Loaded Ner Entities for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); - - dictionaryService.updateDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId()); - Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId()); - log.info("Updated Dictionaries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); - - var notFoundManualRedactionEntries = manualRedactionEntryService.addManualRedactionEntriesAndReturnNotFoundEntries(analyzeRequest, - document, - analyzeRequest.getDossierTemplateId()); - var notFoundImportedEntries = importedRedactionEntryService.addImportedEntriesAndReturnNotFoundEntries(analyzeRequest, importedRedactions, document); - var notFoundManualOrImportedEntries = Stream.of(notFoundManualRedactionEntries, notFoundImportedEntries) + var notFoundManualOrImportedEntries = Stream.of(analysisData.notFoundManualRedactionEntries(), analysisData.notFoundImportedEntries()) .flatMap(Collection::stream) .collect(Collectors.toList()); - dictionarySearchService.addDictionaryEntities(dictionary, document); + dictionarySearchService.addDictionaryEntities(analysisData.dictionary(), analysisData.document()); log.info("Finished Dictionary Search for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); // we could add the imported redactions similar to the manual redactions here as well for additional processing - List allFileAttributes = entityDroolsExecutionService.executeRules(kieWrapperEntityRules.container(), - document, - dictionary, + List allFileAttributes = entityDroolsExecutionService.executeRules(analysisData.kieWrapperEntityRules().container(), + analysisData.document(), + analysisData.dictionary(), analyzeRequest.getFileAttributes(), analyzeRequest.getManualRedactions(), - nerEntities, + analysisData.nerEntities(), context); log.info("Finished entity rule execution for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); EntityLogChanges entityLogChanges = entityLogCreatorService.createInitialEntityLog(analyzeRequest, - document, + analysisData.document(), notFoundManualOrImportedEntries, - dictionary.getVersion(), - kieWrapperEntityRules.rulesVersion()); + analysisData.dictionary().getVersion(), + analysisData.kieWrapperEntityRules().rulesVersion()); - notFoundImportedEntitiesService.processEntityLog(entityLogChanges.getEntityLog(), analyzeRequest, notFoundImportedEntries); + notFoundImportedEntitiesService.processEntityLog(entityLogChanges.getEntityLog(), analyzeRequest, analysisData.notFoundImportedEntries()); return finalizeAnalysis(analyzeRequest, startTime, - kieWrapperComponentRules, + analysisData.kieWrapperComponentRules(), entityLogChanges, - document, - document.getNumberOfPages(), + analysisData.document(), + analysisData.document().getNumberOfPages(), false, new HashSet<>(allFileAttributes), context); @@ -345,65 +286,4 @@ public class AnalyzeService { log.info("Stored component log for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); } - - private static List getSectionsToReAnalyse(Document document, Set sectionsToReanalyseIds) { - - return document.streamChildren() - .filter(section -> sectionsToReanalyseIds.contains(section.getTreeId() - .get(0))) - .collect(Collectors.toList()); - } - - - private Set getSectionsToReanalyseIds(AnalyzeRequest analyzeRequest, - Document document, - DictionaryIncrement dictionaryIncrement, - ImportedRedactions importedRedactions, - Set relevantManuallyModifiedAnnotationIds) { - - return sectionFinderService.findSectionsToReanalyse(dictionaryIncrement, document, analyzeRequest, importedRedactions, relevantManuallyModifiedAnnotationIds); - } - - - private NerEntities getEntityRecognitionEntitiesFilteredBySectionIds(AnalyzeRequest analyzeRequest, Document document, Set sectionsToReanalyseIds) { - - NerEntities nerEntities; - if (redactionServiceSettings.isNerServiceEnabled()) { - NerEntitiesModel nerEntitiesModel = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); - nerEntitiesModel = filterNerEntitiesModelBySectionIds(sectionsToReanalyseIds, nerEntitiesModel); - nerEntities = NerEntitiesAdapter.toNerEntities(nerEntitiesModel, document); - } else { - nerEntities = new NerEntities(Collections.emptyList()); - } - return nerEntities; - } - - - private static NerEntitiesModel filterNerEntitiesModelBySectionIds(Set sectionsToReanalyseIds, NerEntitiesModel nerEntitiesModel) { - - return new NerEntitiesModel(nerEntitiesModel.getData().entrySet() - .stream() // - .filter(entry -> sectionsToReanalyseIds.contains(getSuperSectionID(entry.getKey()))) // - .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))); - } - - - private NerEntities getEntityRecognitionEntities(AnalyzeRequest analyzeRequest, Document document) { - - NerEntities nerEntities; - if (redactionServiceSettings.isNerServiceEnabled()) { - nerEntities = NerEntitiesAdapter.toNerEntities(redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId()), document); - } else { - nerEntities = new NerEntities(Collections.emptyList()); - } - return nerEntities; - } - - - private static Integer getSuperSectionID(String section) { - - return NerEntitiesAdapter.sectionNumberToTreeId(section) - .get(0); - } - } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/DictionaryService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/DictionaryService.java index 895f535b..ee415fff 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/DictionaryService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/DictionaryService.java @@ -1,7 +1,6 @@ package com.iqser.red.service.redaction.v1.server.service; import java.awt.Color; -import java.util.ArrayList; import java.util.Comparator; import java.util.HashSet; import java.util.LinkedList; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/storage/RedactionStorageService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/storage/RedactionStorageService.java index 6f641017..ed6dcd62 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/storage/RedactionStorageService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/storage/RedactionStorageService.java @@ -6,6 +6,8 @@ import java.io.InputStream; import java.util.Collection; import java.util.List; import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.function.Supplier; import java.util.stream.Collectors; import org.springframework.cache.annotation.Cacheable; @@ -213,7 +215,6 @@ public class RedactionStorageService { } - // !Warning! before activating redis cache you need to set // -Dio.netty.noPreferDirect=true -XX:MaxDirectMemorySize=1000M // Jvm args to the largest document data size we want to process. for 4443 pages file that was 500mb. @@ -224,23 +225,42 @@ public class RedactionStorageService { public DocumentData getDocumentData(String dossierId, String fileId) { try { + Supplier documentStructureSupplier = () -> storageService.readJSONObject(TenantContext.getTenantId(), + StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_STRUCTURE), + DocumentStructure.class); + CompletableFuture documentStructureFuture = CompletableFuture.supplyAsync(documentStructureSupplier); + + Supplier documentTextDataSupplier = () -> storageService.readJSONObject(TenantContext.getTenantId(), + StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_TEXT), + DocumentTextData[].class); + CompletableFuture documentTextDataFuture = CompletableFuture.supplyAsync(documentTextDataSupplier); + + Supplier documentPositionDataSupplier = () -> storageService.readJSONObject(TenantContext.getTenantId(), + StorageIdUtils.getStorageId(dossierId, + fileId, + FileType.DOCUMENT_POSITION), + DocumentPositionData[].class); + CompletableFuture documentPositionDataFuture = CompletableFuture.supplyAsync(documentPositionDataSupplier); + + Supplier documentPageSupplier = () -> storageService.readJSONObject(TenantContext.getTenantId(), + StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_PAGES), + DocumentPage[].class); + CompletableFuture documentPagesFuture = CompletableFuture.supplyAsync(documentPageSupplier); + + CompletableFuture.allOf(documentStructureFuture, documentTextDataFuture, documentPositionDataFuture, documentPagesFuture).join(); + return DocumentData.builder() - .documentStructure(storageService.readJSONObject(TenantContext.getTenantId(), - StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_STRUCTURE), - DocumentStructure.class)) - .documentTextData(storageService.readJSONObject(TenantContext.getTenantId(), - StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_TEXT), - DocumentTextData[].class)) - .documentPositionData(storageService.readJSONObject(TenantContext.getTenantId(), - StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_POSITION), - DocumentPositionData[].class)) - .documentPages(storageService.readJSONObject(TenantContext.getTenantId(), - StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_PAGES), - DocumentPage[].class)) + .documentStructure(documentStructureFuture.get()) + .documentTextData(documentTextDataFuture.get()) + .documentPositionData(documentPositionDataFuture.get()) + .documentPages(documentPagesFuture.get()) .build(); } catch (StorageObjectDoesNotExist e) { log.debug("DocumentData not available."); return null; + } catch (Exception e) { + log.error("An error occurred while fetching document data", e); + throw new RuntimeException(e); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AnalysisEnd2EndTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AnalysisEnd2EndTest.java index ebb83325..3ccae12a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AnalysisEnd2EndTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AnalysisEnd2EndTest.java @@ -9,12 +9,14 @@ import static org.mockito.Mockito.when; import java.io.File; import java.io.FileInputStream; +import java.nio.file.FileVisitOption; import java.nio.file.Files; import java.nio.file.Path; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Optional; @@ -58,6 +60,7 @@ import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService import com.iqser.red.service.redaction.v1.server.testcontainers.MongoDBTestContainer; import com.iqser.red.service.redaction.v1.server.utils.exception.NotFoundException; import com.iqser.red.storage.commons.service.StorageService; +import com.knecon.fforesight.keycloakcommons.security.TenantAuthenticationManagerResolver; import com.knecon.fforesight.mongo.database.commons.liquibase.TenantMongoLiquibaseExecutor; import com.knecon.fforesight.mongo.database.commons.service.MongoConnectionProvider; import com.knecon.fforesight.tenantcommons.TenantContext; @@ -80,7 +83,8 @@ import lombok.extern.slf4j.Slf4j; * This way you can recreate what is happening on the stack almost exactly. */ public class AnalysisEnd2EndTest { - Path dossierTemplateToUse = Path.of("/home/kschuettler/iqser/fforesight/dossier-templates-v2/dev/LayoutParsingDatasetEvaluation"); // Add your dossier-template here + Path dossierTemplateToUse = Path.of( + "/Users/maverickstuder/Documents/syngenta/redactmanager/prod-cp-eu-reg/EFSA_sanitisation_pre_GFL_v1"); // Add your dossier-template here ObjectMapper mapper = ObjectMapperFactory.create(); final String TENANT_ID = "tenant"; @@ -116,12 +120,15 @@ import lombok.extern.slf4j.Slf4j; @Autowired protected TenantMongoLiquibaseExecutor tenantMongoLiquibaseExecutor; + @MockBean + protected TenantAuthenticationManagerResolver tenantAuthenticationManagerResolver; + @Test @SneakyThrows public void runAnalysisEnd2End() { - String folder = "/home/kschuettler/Dokumente/analysisend2end/file1"; // Should contain all files from minio directly, still zipped. Can contain multiple files. + String folder = "/Users/maverickstuder/Documents/RedactManager/redaction-service/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/files_end2end/file0"; // Should contain all files from minio directly, still zipped. Can contain multiple files. Path absoluteFolderPath; if (folder.startsWith("files")) { // if it starts with "files" it is most likely in the resources folder, else it should be an absolute path @@ -137,7 +144,13 @@ import lombok.extern.slf4j.Slf4j; for (int i = 0; i < analyzeRequests.size(); i++) { AnalyzeRequest analyzeRequest = analyzeRequests.get(i); log.info("{}/{}: Starting analysis for file {}", i + 1, analyzeRequests.size(), analyzeRequest.getFileId()); - analyzeService.analyze(analyzeRequest); + var times = new LinkedList(); + for (int j = 1; j <= 10; j++) { + var start = System.currentTimeMillis(); + analyzeService.analyze(analyzeRequest); + times.add(System.currentTimeMillis() - start); + } + System.out.println("times in ms for each analyze run: " + times); } } @@ -294,7 +307,7 @@ import lombok.extern.slf4j.Slf4j; Map dossierTemplate = mapper.readValue(dossierTemplateToUse.resolve("dossierTemplate.json").toFile(), HashMap.class); this.id = (String) dossierTemplate.get("dossierTemplateId"); - List dictionaries = Files.walk(dossierTemplateToUse) + List dictionaries = Files.walk(dossierTemplateToUse, FileVisitOption.FOLLOW_LINKS) .filter(path -> path.getFileName().toString().equals("dossierType.json")) .map(this::loadDictionaryModel) .toList();