RED-9123: Improve performance of re-analysis (Spike)

This commit is contained in:
Maverick Studer 2024-07-01 12:07:11 +02:00
parent c279f54295
commit 1c81212a2d
8 changed files with 528 additions and 212 deletions

View File

@ -43,7 +43,7 @@ dependencies {
implementation("com.iqser.red.commons:dictionary-merge-commons:1.5.0")
implementation("com.iqser.red.commons:storage-commons:2.45.0")
implementation("com.knecon.fforesight:keycloak-commons:0.29.0")
implementation("com.knecon.fforesight:tenant-commons:0.24.0")
implementation("com.knecon.fforesight:tenant-commons:0.25.0")
implementation("com.knecon.fforesight:tracing-commons:0.5.0")
implementation("com.fasterxml.jackson.module:jackson-module-afterburner:${jacksonVersion}")

View File

@ -9,6 +9,7 @@ import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
import org.springframework.boot.autoconfigure.liquibase.LiquibaseAutoConfiguration;
import org.springframework.boot.autoconfigure.mongo.MongoAutoConfiguration;
import org.springframework.boot.autoconfigure.security.servlet.SecurityAutoConfiguration;
import org.springframework.boot.autoconfigure.task.TaskExecutionAutoConfiguration;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.cache.annotation.EnableCaching;
import org.springframework.cloud.openfeign.EnableFeignClients;

View File

@ -194,25 +194,32 @@ public class TextEntity implements IEntity {
public boolean containedBy(TextEntity textEntity) {
return this.textRange.containedBy(textEntity.getTextRange()) //
|| duplicateTextRanges.stream()
.anyMatch(duplicateTextRange -> duplicateTextRange.containedBy(textEntity.textRange)) //
|| duplicateTextRanges.stream()
.anyMatch(duplicateTextRange -> textEntity.getDuplicateTextRanges()
.stream()
.anyMatch(duplicateTextRange::containedBy));
return textEntity.contains(this);
}
public boolean contains(TextEntity textEntity) {
return this.textRange.contains(textEntity.getTextRange()) //
|| duplicateTextRanges.stream()
.anyMatch(duplicateTextRange -> duplicateTextRange.contains(textEntity.textRange)) //
|| duplicateTextRanges.stream()
.anyMatch(duplicateTextRange -> textEntity.getDuplicateTextRanges()
.stream()
.anyMatch(duplicateTextRange::contains));
if (this.textRange.contains(textEntity.getTextRange())) {
return true;
}
List<TextRange> textEntityDuplicateRanges = textEntity.getDuplicateTextRanges();
// use optimized indexed loops for extra performance boost
for (int i = 0, duplicateTextRangesSize = duplicateTextRanges.size(); i < duplicateTextRangesSize; i++) {
TextRange duplicateTextRange = duplicateTextRanges.get(i);
if (duplicateTextRange.contains(textEntity.getTextRange())) {
return true;
}
for (int j = 0, textEntityDuplicateRangesSize = textEntityDuplicateRanges.size(); j < textEntityDuplicateRangesSize; j++) {
TextRange otherRange = textEntityDuplicateRanges.get(j);
if (duplicateTextRange.contains(otherRange)) {
return true;
}
}
}
return false;
}

View File

@ -0,0 +1,396 @@
package com.iqser.red.service.redaction.v1.server.service;
import static com.iqser.red.service.redaction.v1.server.service.document.SectionFinderService.getRelevantManuallyModifiedAnnotationIds;
import static org.springframework.boot.autoconfigure.task.TaskExecutionAutoConfiguration.APPLICATION_TASK_EXECUTOR_BEAN_NAME;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.stream.Collectors;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.core.task.TaskExecutor;
import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions;
import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings;
import com.iqser.red.service.redaction.v1.server.client.model.NerEntitiesModel;
import com.iqser.red.service.redaction.v1.server.model.KieWrapper;
import com.iqser.red.service.redaction.v1.server.model.NerEntities;
import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity;
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryIncrement;
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryVersion;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.service.document.DocumentGraphMapper;
import com.iqser.red.service.redaction.v1.server.service.document.ImportedRedactionEntryService;
import com.iqser.red.service.redaction.v1.server.service.document.ManualRedactionEntryService;
import com.iqser.red.service.redaction.v1.server.service.document.NerEntitiesAdapter;
import com.iqser.red.service.redaction.v1.server.service.document.SectionFinderService;
import com.iqser.red.service.redaction.v1.server.service.drools.KieContainerCreationService;
import com.iqser.red.service.redaction.v1.server.storage.ObservedStorageService;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import lombok.AccessLevel;
import lombok.SneakyThrows;
import lombok.experimental.FieldDefaults;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class AnalysisPreparationService {
KieContainerCreationService kieContainerCreationService;
ObservedStorageService observedStorageService;
RedactionStorageService redactionStorageService;
RedactionServiceSettings redactionServiceSettings;
ManualRedactionEntryService manualRedactionEntryService;
ImportedRedactionEntryService importedRedactionEntryService;
DictionaryService dictionaryService;
SectionFinderService sectionFinderService;
TaskExecutor taskExecutor;
public AnalysisPreparationService(KieContainerCreationService kieContainerCreationService,
ObservedStorageService observedStorageService,
RedactionStorageService redactionStorageService,
RedactionServiceSettings redactionServiceSettings,
ManualRedactionEntryService manualRedactionEntryService,
ImportedRedactionEntryService importedRedactionEntryService,
DictionaryService dictionaryService,
SectionFinderService sectionFinderService,
@Qualifier(APPLICATION_TASK_EXECUTOR_BEAN_NAME) TaskExecutor taskExecutor) {
this.kieContainerCreationService = kieContainerCreationService;
this.observedStorageService = observedStorageService;
this.redactionStorageService = redactionStorageService;
this.redactionServiceSettings = redactionServiceSettings;
this.manualRedactionEntryService = manualRedactionEntryService;
this.importedRedactionEntryService = importedRedactionEntryService;
this.dictionaryService = dictionaryService;
this.sectionFinderService = sectionFinderService;
this.taskExecutor = taskExecutor;
}
@SneakyThrows
public AnalysisData getAnalysisData(AnalyzeRequest analyzeRequest) {
CompletableFuture<KieWrapper> kieWrapperComponentRulesFuture = CompletableFuture.supplyAsync(() -> getKieWrapper(analyzeRequest, RuleFileType.COMPONENT), taskExecutor);
CompletableFuture<KieWrapper> kieWrapperEntityRulesFuture = CompletableFuture.supplyAsync(() -> getKieWrapper(analyzeRequest, RuleFileType.ENTITY), taskExecutor);
CompletableFuture<Document> documentFuture = CompletableFuture.supplyAsync(() -> getDocument(analyzeRequest), taskExecutor);
CompletableFuture<ImportedRedactions> importedRedactionsFuture = CompletableFuture.supplyAsync(() -> getImportedRedactions(analyzeRequest), taskExecutor);
CompletableFuture<NerEntities> nerEntitiesFuture = documentFuture.thenApplyAsync((document) -> getNerEntities(analyzeRequest, document), taskExecutor);
CompletableFuture.allOf(kieWrapperEntityRulesFuture, kieWrapperComponentRulesFuture, documentFuture, importedRedactionsFuture, nerEntitiesFuture).join();
Dictionary dictionary = getDictionary(analyzeRequest);
Document document = documentFuture.get();
ImportedRedactions importedRedactions = importedRedactionsFuture.get();
List<PrecursorEntity> notFoundManualRedactionEntries = manualRedactionEntryService.addManualRedactionEntriesAndReturnNotFoundEntries(analyzeRequest,
document,
analyzeRequest.getDossierTemplateId());
List<PrecursorEntity> notFoundImportedEntries = importedRedactionEntryService.addImportedEntriesAndReturnNotFoundEntries(analyzeRequest, importedRedactions, document);
return new AnalysisData(kieWrapperEntityRulesFuture.get(),
kieWrapperComponentRulesFuture.get(),
document,
importedRedactions,
dictionary,
notFoundManualRedactionEntries,
notFoundImportedEntries,
nerEntitiesFuture.get());
}
@SneakyThrows
public ReanalysisSetupData getReanalysisSetupData(AnalyzeRequest analyzeRequest) {
CompletableFuture<EntityLog> entityLogFuture = CompletableFuture.supplyAsync(() -> getEntityLog(analyzeRequest), taskExecutor);
CompletableFuture<Document> documentFuture = CompletableFuture.supplyAsync(() -> getDocument(analyzeRequest), taskExecutor);
CompletableFuture.allOf(entityLogFuture, documentFuture).join();
return new ReanalysisSetupData(entityLogFuture.get(), documentFuture.get());
}
@SneakyThrows
public ReanalysisInitialProcessingData getReanalysisInitialProcessingData(AnalyzeRequest analyzeRequest, ReanalysisSetupData reanalysisSetupData) {
CompletableFuture<ImportedRedactions> importedRedactionsFuture = CompletableFuture.supplyAsync(() -> getImportedRedactions(analyzeRequest), taskExecutor);
CompletableFuture<SectionsToReanalyzeData> incrementAndSectionsToReanalyzeFuture = importedRedactionsFuture.thenApplyAsync((importedRedactions) -> {
DictionaryIncrement dictionaryIncrement = getDictionaryIncrement(analyzeRequest, reanalysisSetupData);
return getDictionaryIncrementAndSectionsToReanalyze(analyzeRequest, dictionaryIncrement, reanalysisSetupData, importedRedactions);
}, taskExecutor);
CompletableFuture<KieWrapper> kieWrapperComponentRulesFuture = CompletableFuture.supplyAsync(() -> getKieWrapper(analyzeRequest, RuleFileType.COMPONENT), taskExecutor);
CompletableFuture<KieWrapper> kieWrapperEntityRulesFuture = CompletableFuture.supplyAsync(() -> getKieWrapper(analyzeRequest, RuleFileType.ENTITY), taskExecutor);
CompletableFuture.allOf(importedRedactionsFuture, incrementAndSectionsToReanalyzeFuture, kieWrapperComponentRulesFuture, kieWrapperEntityRulesFuture).join();
return new ReanalysisInitialProcessingData(importedRedactionsFuture.get(),
incrementAndSectionsToReanalyzeFuture.get().dictionaryIncrement(),
incrementAndSectionsToReanalyzeFuture.get().sectionsToReanalyseIds(),
incrementAndSectionsToReanalyzeFuture.get().sectionsToReanalyze(),
kieWrapperComponentRulesFuture.get(),
kieWrapperEntityRulesFuture.get());
}
@SneakyThrows
public ReanalysisFinalProcessingData getReanalysisFinalProcessingData(AnalyzeRequest analyzeRequest,
ReanalysisSetupData reanalysisSetupData,
ReanalysisInitialProcessingData reanalysisInitialProcessingData) {
CompletableFuture<NerEntities> nerEntitiesFuture = CompletableFuture.supplyAsync(() -> getNerEntitiesFiltered(analyzeRequest,
reanalysisSetupData.document,
reanalysisInitialProcessingData.sectionsToReanalyseIds),
taskExecutor);
CompletableFuture<DictionaryAndNotFoundEntries> dictionaryAndNotFoundEntriesCompletableFuture = CompletableFuture.supplyAsync(() -> {
Dictionary dictionary = getDictionary(analyzeRequest);
NotFoundEntries notFoundEntries = getNotFoundEntries(analyzeRequest, reanalysisSetupData.document(), reanalysisInitialProcessingData.importedRedactions());
return new DictionaryAndNotFoundEntries(dictionary, notFoundEntries.notFoundManualRedactionEntries(), notFoundEntries.notFoundImportedEntries());
}, taskExecutor);
CompletableFuture.allOf(nerEntitiesFuture, dictionaryAndNotFoundEntriesCompletableFuture).join();
return new ReanalysisFinalProcessingData(nerEntitiesFuture.get(),
dictionaryAndNotFoundEntriesCompletableFuture.get().dictionary(),
dictionaryAndNotFoundEntriesCompletableFuture.get().notFoundManualRedactionEntries(),
dictionaryAndNotFoundEntriesCompletableFuture.get().notFoundImportedEntries());
}
public KieWrapper getKieWrapper(AnalyzeRequest analyzeRequest, RuleFileType ruleFileType) {
KieWrapper kieWrapperComponentRules = kieContainerCreationService.getLatestKieContainer(analyzeRequest.getDossierTemplateId(), ruleFileType);
log.info("Updated {} Rules to Version {} for file {} in dossier {}",
ruleFileType,
kieWrapperComponentRules.rulesVersion(),
analyzeRequest.getFileId(),
analyzeRequest.getDossierId());
return kieWrapperComponentRules;
}
public Document getDocument(AnalyzeRequest analyzeRequest) {
Document document = DocumentGraphMapper.toDocumentGraph(observedStorageService.getDocumentData(analyzeRequest.getDossierId(), analyzeRequest.getFileId()));
log.info("Loaded Document Graph for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
return document;
}
public ImportedRedactions getImportedRedactions(AnalyzeRequest analyzeRequest) {
ImportedRedactions importedRedactions = redactionStorageService.getImportedRedactions(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
log.info("Loaded Imported Redactions for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
return importedRedactions;
}
public NerEntities getNerEntities(AnalyzeRequest analyzeRequest, Document document) {
NerEntities nerEntities = getEntityRecognitionEntities(analyzeRequest, document);
log.info("Loaded Ner Entities for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
return nerEntities;
}
public NerEntities getNerEntitiesFiltered(AnalyzeRequest analyzeRequest, Document document, Set<Integer> sectionsToReanalyseIds) {
NerEntities nerEntities = getEntityRecognitionEntitiesFilteredBySectionIds(analyzeRequest, document, sectionsToReanalyseIds);
log.info("Loaded Ner Entities for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
return nerEntities;
}
private Dictionary getDictionary(AnalyzeRequest analyzeRequest) {
dictionaryService.updateDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
log.info("Updated Dictionaries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
return dictionary;
}
private NotFoundEntries getNotFoundEntries(AnalyzeRequest analyzeRequest, Document document, ImportedRedactions importedRedactions) {
var notFoundManualRedactionEntries = manualRedactionEntryService.addManualRedactionEntriesAndReturnNotFoundEntries(analyzeRequest,
document,
analyzeRequest.getDossierTemplateId());
var notFoundImportedEntries = importedRedactionEntryService.addImportedEntriesAndReturnNotFoundEntries(analyzeRequest, importedRedactions, document);
return new NotFoundEntries(notFoundManualRedactionEntries, notFoundImportedEntries);
}
private NerEntities getEntityRecognitionEntities(AnalyzeRequest analyzeRequest, Document document) {
NerEntities nerEntities;
if (redactionServiceSettings.isNerServiceEnabled()) {
nerEntities = NerEntitiesAdapter.toNerEntities(redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId()), document);
} else {
nerEntities = new NerEntities(Collections.emptyList());
}
return nerEntities;
}
private EntityLog getEntityLog(AnalyzeRequest analyzeRequest) {
EntityLog entityLogWithoutEntries = redactionStorageService.getEntityLogWithoutEntries(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
log.info("Loaded previous entity log for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
return entityLogWithoutEntries;
}
private SectionsToReanalyzeData getDictionaryIncrementAndSectionsToReanalyze(AnalyzeRequest analyzeRequest,
DictionaryIncrement dictionaryIncrement,
ReanalysisSetupData reanalysisSetupData,
ImportedRedactions importedRedactions) {
Set<String> relevantManuallyModifiedAnnotationIds = getRelevantManuallyModifiedAnnotationIds(analyzeRequest.getManualRedactions());
Set<Integer> sectionsToReanalyseIds = redactionStorageService.findIdsOfSectionsToReanalyse(analyzeRequest.getDossierId(),
analyzeRequest.getFileId(),
relevantManuallyModifiedAnnotationIds);
sectionsToReanalyseIds.addAll(getSectionsToReanalyseIds(analyzeRequest,
reanalysisSetupData.document(),
dictionaryIncrement,
importedRedactions,
relevantManuallyModifiedAnnotationIds));
List<SemanticNode> sectionsToReAnalyse = getSectionsToReAnalyse(reanalysisSetupData.document(), sectionsToReanalyseIds);
log.info("{} Sections to reanalyze found for file {} in dossier {}", sectionsToReanalyseIds.size(), analyzeRequest.getFileId(), analyzeRequest.getDossierId());
return new SectionsToReanalyzeData(dictionaryIncrement, sectionsToReanalyseIds, sectionsToReAnalyse);
}
private DictionaryIncrement getDictionaryIncrement(AnalyzeRequest analyzeRequest, ReanalysisSetupData reanalysisSetupData) {
return dictionaryService.getDictionaryIncrements(analyzeRequest.getDossierTemplateId(),
new DictionaryVersion(reanalysisSetupData.entityLog().getDictionaryVersion(),
reanalysisSetupData.entityLog().getDossierDictionaryVersion()),
analyzeRequest.getDossierId());
}
private NerEntities getEntityRecognitionEntitiesFilteredBySectionIds(AnalyzeRequest analyzeRequest, Document document, Set<Integer> sectionsToReanalyseIds) {
NerEntities nerEntities;
if (redactionServiceSettings.isNerServiceEnabled()) {
NerEntitiesModel nerEntitiesModel = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
nerEntitiesModel = filterNerEntitiesModelBySectionIds(sectionsToReanalyseIds, nerEntitiesModel);
nerEntities = NerEntitiesAdapter.toNerEntities(nerEntitiesModel, document);
} else {
nerEntities = new NerEntities(Collections.emptyList());
}
return nerEntities;
}
private static NerEntitiesModel filterNerEntitiesModelBySectionIds(Set<Integer> sectionsToReanalyseIds, NerEntitiesModel nerEntitiesModel) {
return new NerEntitiesModel(nerEntitiesModel.getData().entrySet()
.stream() //
.filter(entry -> sectionsToReanalyseIds.contains(getSuperSectionID(entry.getKey()))) //
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)));
}
private static Integer getSuperSectionID(String section) {
return NerEntitiesAdapter.sectionNumberToTreeId(section)
.get(0);
}
private static List<SemanticNode> getSectionsToReAnalyse(Document document, Set<Integer> sectionsToReanalyseIds) {
return document.streamChildren()
.filter(section -> sectionsToReanalyseIds.contains(section.getTreeId()
.get(0)))
.collect(Collectors.toList());
}
private Set<Integer> getSectionsToReanalyseIds(AnalyzeRequest analyzeRequest,
Document document,
DictionaryIncrement dictionaryIncrement,
ImportedRedactions importedRedactions,
Set<String> relevantManuallyModifiedAnnotationIds) {
return sectionFinderService.findSectionsToReanalyse(dictionaryIncrement, document, analyzeRequest, importedRedactions, relevantManuallyModifiedAnnotationIds);
}
private record DictionaryAndNotFoundEntries(Dictionary dictionary, List<PrecursorEntity> notFoundManualRedactionEntries, List<PrecursorEntity> notFoundImportedEntries) {
}
private record NotFoundEntries(List<PrecursorEntity> notFoundManualRedactionEntries, List<PrecursorEntity> notFoundImportedEntries) {
}
private record SectionsToReanalyzeData(DictionaryIncrement dictionaryIncrement, Set<Integer> sectionsToReanalyseIds, List<SemanticNode> sectionsToReanalyze) {
}
public record AnalysisData(
KieWrapper kieWrapperEntityRules,
KieWrapper kieWrapperComponentRules,
Document document,
ImportedRedactions importedRedactions,
Dictionary dictionary,
List<PrecursorEntity> notFoundManualRedactionEntries,
List<PrecursorEntity> notFoundImportedEntries,
NerEntities nerEntities
) {
}
public record ReanalysisSetupData(
EntityLog entityLog, Document document
) {
}
public record ReanalysisInitialProcessingData(
ImportedRedactions importedRedactions,
DictionaryIncrement dictionaryIncrement,
Set<Integer> sectionsToReanalyseIds,
List<SemanticNode> sectionsToReAnalyse,
KieWrapper kieWrapperComponentRules,
KieWrapper kieWrapperEntityRules
) {
}
public record ReanalysisFinalProcessingData(
NerEntities nerEntities, Dictionary dictionary, List<PrecursorEntity> notFoundManualRedactionEntries, List<PrecursorEntity> notFoundImportedEntries
) {
}
}

View File

@ -1,13 +1,14 @@
package com.iqser.red.service.redaction.v1.server.service;
import static com.iqser.red.service.redaction.v1.server.service.document.SectionFinderService.getRelevantManuallyModifiedAnnotationIds;
import static com.iqser.red.service.redaction.v1.server.service.AnalysisPreparationService.ReanalysisFinalProcessingData;
import static com.iqser.red.service.redaction.v1.server.service.AnalysisPreparationService.ReanalysisInitialProcessingData;
import static com.iqser.red.service.redaction.v1.server.service.AnalysisPreparationService.ReanalysisSetupData;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@ -19,33 +20,19 @@ import com.iqser.gin4.commons.metrics.meters.FunctionTimerValues;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute;
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.componentlog.ComponentLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogChanges;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings;
import com.iqser.red.service.redaction.v1.server.client.model.NerEntitiesModel;
import com.iqser.red.service.redaction.v1.server.logger.Context;
import com.iqser.red.service.redaction.v1.server.model.KieWrapper;
import com.iqser.red.service.redaction.v1.server.model.NerEntities;
import com.iqser.red.service.redaction.v1.server.model.component.Component;
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryIncrement;
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryVersion;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.service.AnalysisPreparationService.AnalysisData;
import com.iqser.red.service.redaction.v1.server.service.components.ComponentLogCreatorService;
import com.iqser.red.service.redaction.v1.server.service.document.DocumentGraphMapper;
import com.iqser.red.service.redaction.v1.server.service.document.ImportedRedactionEntryService;
import com.iqser.red.service.redaction.v1.server.service.document.ManualRedactionEntryService;
import com.iqser.red.service.redaction.v1.server.service.document.NerEntitiesAdapter;
import com.iqser.red.service.redaction.v1.server.service.document.SectionFinderService;
import com.iqser.red.service.redaction.v1.server.service.drools.ComponentDroolsExecutionService;
import com.iqser.red.service.redaction.v1.server.service.drools.EntityDroolsExecutionService;
import com.iqser.red.service.redaction.v1.server.service.drools.KieContainerCreationService;
import com.iqser.red.service.redaction.v1.server.storage.ObservedStorageService;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.knecon.fforesight.tenantcommons.TenantContext;
@ -63,21 +50,16 @@ import lombok.extern.slf4j.Slf4j;
@RequiredArgsConstructor
public class AnalyzeService {
DictionaryService dictionaryService;
EntityDroolsExecutionService entityDroolsExecutionService;
ComponentDroolsExecutionService componentDroolsExecutionService;
KieContainerCreationService kieContainerCreationService;
DictionarySearchService dictionarySearchService;
EntityLogCreatorService entityLogCreatorService;
ComponentLogCreatorService componentLogCreatorService;
RedactionStorageService redactionStorageService;
RedactionServiceSettings redactionServiceSettings;
NotFoundImportedEntitiesService notFoundImportedEntitiesService;
SectionFinderService sectionFinderService;
ManualRedactionEntryService manualRedactionEntryService;
ImportedRedactionEntryService importedRedactionEntryService;
ObservedStorageService observedStorageService;
FunctionTimerValues redactmanagerAnalyzePagewiseValues;
AnalysisPreparationService analysisPreparationService;
@Timed("redactmanager_reanalyze")
@ -86,172 +68,131 @@ public class AnalyzeService {
public AnalyzeResult reanalyze(@RequestBody AnalyzeRequest analyzeRequest) {
long startTime = System.currentTimeMillis();
EntityLog entityLogWithoutEntries = redactionStorageService.getEntityLogWithoutEntries(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
log.info("Loaded previous entity log for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
Document document = DocumentGraphMapper.toDocumentGraph(observedStorageService.getDocumentData(analyzeRequest.getDossierId(), analyzeRequest.getFileId()));
log.info("Loaded Document Graph for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
ImportedRedactions importedRedactions = redactionStorageService.getImportedRedactions(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
log.info("Loaded Imported Redactions for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
Context context = new Context(analyzeRequest.getFileId(), analyzeRequest.getDossierId(), analyzeRequest.getDossierTemplateId(), 0, analyzeRequest.getAnalysisNumber(), TenantContext.getTenantId());
ReanalysisSetupData setupData = analysisPreparationService.getReanalysisSetupData(analyzeRequest);
Context context = new Context(analyzeRequest.getFileId(),
analyzeRequest.getDossierId(),
analyzeRequest.getDossierTemplateId(),
0,
analyzeRequest.getAnalysisNumber(),
TenantContext.getTenantId());
// not yet ready for reanalysis
if (entityLogWithoutEntries == null || document == null || document.getNumberOfPages() == 0) {
if (setupData.entityLog() == null || setupData.document() == null || setupData.document().getNumberOfPages() == 0) {
return analyze(analyzeRequest);
}
DictionaryIncrement dictionaryIncrement = dictionaryService.getDictionaryIncrements(analyzeRequest.getDossierTemplateId(),
new DictionaryVersion(entityLogWithoutEntries.getDictionaryVersion(),
entityLogWithoutEntries.getDossierDictionaryVersion()),
analyzeRequest.getDossierId());
ReanalysisInitialProcessingData initialProcessingData = analysisPreparationService.getReanalysisInitialProcessingData(analyzeRequest, setupData);
Set<String> relevantManuallyModifiedAnnotationIds = getRelevantManuallyModifiedAnnotationIds(analyzeRequest.getManualRedactions());
if (initialProcessingData.sectionsToReAnalyse().isEmpty()) {
Set<Integer> sectionsToReanalyseIds = redactionStorageService.findIdsOfSectionsToReanalyse(analyzeRequest.getDossierId(),
analyzeRequest.getFileId(),
relevantManuallyModifiedAnnotationIds);
sectionsToReanalyseIds.addAll(getSectionsToReanalyseIds(analyzeRequest, document, dictionaryIncrement, importedRedactions, relevantManuallyModifiedAnnotationIds));
List<SemanticNode> sectionsToReAnalyse = getSectionsToReAnalyse(document, sectionsToReanalyseIds);
log.info("{} Sections to reanalyze found for file {} in dossier {}", sectionsToReanalyseIds.size(), analyzeRequest.getFileId(), analyzeRequest.getDossierId());
if (sectionsToReAnalyse.isEmpty()) {
EntityLogChanges entityLogChanges = entityLogCreatorService.updateVersionsAndReturnChanges(entityLogWithoutEntries,
dictionaryIncrement.getDictionaryVersion(),
EntityLogChanges entityLogChanges = entityLogCreatorService.updateVersionsAndReturnChanges(setupData.entityLog(),
initialProcessingData.dictionaryIncrement().getDictionaryVersion(),
analyzeRequest,
new ArrayList<>(),
new ArrayList<>());
return finalizeAnalysis(analyzeRequest,
startTime,
kieContainerCreationService.getLatestKieContainer(analyzeRequest.getDossierTemplateId(), RuleFileType.COMPONENT),
initialProcessingData.kieWrapperComponentRules(),
entityLogChanges,
document,
document.getNumberOfPages(),
setupData.document(),
setupData.document().getNumberOfPages(),
true,
Collections.emptySet(),
context);
}
KieWrapper kieWrapperEntityRules = kieContainerCreationService.getLatestKieContainer(analyzeRequest.getDossierTemplateId(), RuleFileType.ENTITY);
log.info("Updated entity rules to version {} for file {} in dossier {}", kieWrapperEntityRules.rulesVersion(), analyzeRequest.getFileId(), analyzeRequest.getDossierId());
context.setRuleVersion(initialProcessingData.kieWrapperEntityRules().rulesVersion());
context.setRuleVersion(kieWrapperEntityRules.rulesVersion());
ReanalysisFinalProcessingData finalProcessingData = analysisPreparationService.getReanalysisFinalProcessingData(analyzeRequest, setupData, initialProcessingData);
NerEntities nerEntities = getEntityRecognitionEntitiesFilteredBySectionIds(analyzeRequest, document, sectionsToReanalyseIds);
log.info("Loaded Ner Entities for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
dictionarySearchService.addDictionaryEntities(finalProcessingData.dictionary(), initialProcessingData.sectionsToReAnalyse());
log.info("Finished Dictionary Search for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
var notFoundManualRedactionEntries = manualRedactionEntryService.addManualRedactionEntriesAndReturnNotFoundEntries(analyzeRequest,
document,
analyzeRequest.getDossierTemplateId());
var notFoundImportedEntries = importedRedactionEntryService.addImportedEntriesAndReturnNotFoundEntries(analyzeRequest, importedRedactions, document);
var notFoundManualOrImportedEntries = Stream.of(notFoundManualRedactionEntries, notFoundImportedEntries)
var notFoundManualOrImportedEntries = Stream.of(finalProcessingData.notFoundManualRedactionEntries(), finalProcessingData.notFoundImportedEntries())
.flatMap(Collection::stream)
.collect(Collectors.toList());
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
log.info("Updated Dictionaries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
dictionarySearchService.addDictionaryEntities(dictionary, sectionsToReAnalyse);
log.info("Finished Dictionary Search for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
// we could add the imported redactions similar to the manual redactions here as well for additional processing
List<FileAttribute> allFileAttributes = entityDroolsExecutionService.executeRules(kieWrapperEntityRules.container(),
document,
sectionsToReAnalyse,
dictionary,
List<FileAttribute> allFileAttributes = entityDroolsExecutionService.executeRules(initialProcessingData.kieWrapperEntityRules().container(),
setupData.document(),
initialProcessingData.sectionsToReAnalyse(),
finalProcessingData.dictionary(),
analyzeRequest.getFileAttributes(),
analyzeRequest.getManualRedactions(),
nerEntities,
finalProcessingData.nerEntities(),
context);
log.info("Finished entity rule execution for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
EntityLogChanges entityLogChanges = entityLogCreatorService.updatePreviousEntityLog(analyzeRequest,
document,
entityLogWithoutEntries,
setupData.document(),
setupData.entityLog(),
notFoundManualOrImportedEntries,
sectionsToReanalyseIds,
dictionary.getVersion());
initialProcessingData.sectionsToReanalyseIds(),
finalProcessingData.dictionary().getVersion());
notFoundImportedEntitiesService.processEntityLog(entityLogChanges.getEntityLog(), analyzeRequest, notFoundImportedEntries);
notFoundImportedEntitiesService.processEntityLog(entityLogChanges.getEntityLog(), analyzeRequest, finalProcessingData.notFoundImportedEntries());
return finalizeAnalysis(analyzeRequest,
startTime,
kieContainerCreationService.getLatestKieContainer(analyzeRequest.getDossierTemplateId(), RuleFileType.COMPONENT),
initialProcessingData.kieWrapperComponentRules(),
entityLogChanges,
document,
document.getNumberOfPages(),
setupData.document(),
setupData.document().getNumberOfPages(),
true,
new HashSet<>(allFileAttributes),
context);
}
@SneakyThrows
@Timed("redactmanager_analyze")
@Observed(name = "AnalyzeService", contextualName = "analyze")
public AnalyzeResult analyze(AnalyzeRequest analyzeRequest) {
long startTime = System.currentTimeMillis();
var kieWrapperEntityRules = kieContainerCreationService.getLatestKieContainer(analyzeRequest.getDossierTemplateId(), RuleFileType.ENTITY);
log.info("Updated Rules to Version {} for file {} in dossier {}", kieWrapperEntityRules.rulesVersion(), analyzeRequest.getFileId(), analyzeRequest.getDossierId());
AnalysisData analysisData = analysisPreparationService.getAnalysisData(analyzeRequest);
var kieWrapperComponentRules = kieContainerCreationService.getLatestKieContainer(analyzeRequest.getDossierTemplateId(), RuleFileType.COMPONENT);
log.info("Updated Rules to Version {} for file {} in dossier {}", kieWrapperEntityRules.rulesVersion(), analyzeRequest.getFileId(), analyzeRequest.getDossierId());
Context context = new Context(analyzeRequest.getFileId(),
analyzeRequest.getDossierId(),
analyzeRequest.getDossierTemplateId(),
analysisData.kieWrapperEntityRules().rulesVersion(),
analyzeRequest.getAnalysisNumber(),
TenantContext.getTenantId());
Context context = new Context(analyzeRequest.getFileId(), analyzeRequest.getDossierId(), analyzeRequest.getDossierTemplateId(), kieWrapperEntityRules.rulesVersion(), analyzeRequest.getAnalysisNumber(), TenantContext.getTenantId());
Document document = DocumentGraphMapper.toDocumentGraph(observedStorageService.getDocumentData(analyzeRequest.getDossierId(), analyzeRequest.getFileId()));
log.info("Loaded Document Graph for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
ImportedRedactions importedRedactions = redactionStorageService.getImportedRedactions(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
log.info("Loaded Imported Redactions for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
NerEntities nerEntities = getEntityRecognitionEntities(analyzeRequest, document);
log.info("Loaded Ner Entities for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
dictionaryService.updateDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
log.info("Updated Dictionaries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
var notFoundManualRedactionEntries = manualRedactionEntryService.addManualRedactionEntriesAndReturnNotFoundEntries(analyzeRequest,
document,
analyzeRequest.getDossierTemplateId());
var notFoundImportedEntries = importedRedactionEntryService.addImportedEntriesAndReturnNotFoundEntries(analyzeRequest, importedRedactions, document);
var notFoundManualOrImportedEntries = Stream.of(notFoundManualRedactionEntries, notFoundImportedEntries)
var notFoundManualOrImportedEntries = Stream.of(analysisData.notFoundManualRedactionEntries(), analysisData.notFoundImportedEntries())
.flatMap(Collection::stream)
.collect(Collectors.toList());
dictionarySearchService.addDictionaryEntities(dictionary, document);
dictionarySearchService.addDictionaryEntities(analysisData.dictionary(), analysisData.document());
log.info("Finished Dictionary Search for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
// we could add the imported redactions similar to the manual redactions here as well for additional processing
List<FileAttribute> allFileAttributes = entityDroolsExecutionService.executeRules(kieWrapperEntityRules.container(),
document,
dictionary,
List<FileAttribute> allFileAttributes = entityDroolsExecutionService.executeRules(analysisData.kieWrapperEntityRules().container(),
analysisData.document(),
analysisData.dictionary(),
analyzeRequest.getFileAttributes(),
analyzeRequest.getManualRedactions(),
nerEntities,
analysisData.nerEntities(),
context);
log.info("Finished entity rule execution for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
EntityLogChanges entityLogChanges = entityLogCreatorService.createInitialEntityLog(analyzeRequest,
document,
analysisData.document(),
notFoundManualOrImportedEntries,
dictionary.getVersion(),
kieWrapperEntityRules.rulesVersion());
analysisData.dictionary().getVersion(),
analysisData.kieWrapperEntityRules().rulesVersion());
notFoundImportedEntitiesService.processEntityLog(entityLogChanges.getEntityLog(), analyzeRequest, notFoundImportedEntries);
notFoundImportedEntitiesService.processEntityLog(entityLogChanges.getEntityLog(), analyzeRequest, analysisData.notFoundImportedEntries());
return finalizeAnalysis(analyzeRequest,
startTime,
kieWrapperComponentRules,
analysisData.kieWrapperComponentRules(),
entityLogChanges,
document,
document.getNumberOfPages(),
analysisData.document(),
analysisData.document().getNumberOfPages(),
false,
new HashSet<>(allFileAttributes),
context);
@ -345,65 +286,4 @@ public class AnalyzeService {
log.info("Stored component log for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
}
private static List<SemanticNode> getSectionsToReAnalyse(Document document, Set<Integer> sectionsToReanalyseIds) {
return document.streamChildren()
.filter(section -> sectionsToReanalyseIds.contains(section.getTreeId()
.get(0)))
.collect(Collectors.toList());
}
private Set<Integer> getSectionsToReanalyseIds(AnalyzeRequest analyzeRequest,
Document document,
DictionaryIncrement dictionaryIncrement,
ImportedRedactions importedRedactions,
Set<String> relevantManuallyModifiedAnnotationIds) {
return sectionFinderService.findSectionsToReanalyse(dictionaryIncrement, document, analyzeRequest, importedRedactions, relevantManuallyModifiedAnnotationIds);
}
private NerEntities getEntityRecognitionEntitiesFilteredBySectionIds(AnalyzeRequest analyzeRequest, Document document, Set<Integer> sectionsToReanalyseIds) {
NerEntities nerEntities;
if (redactionServiceSettings.isNerServiceEnabled()) {
NerEntitiesModel nerEntitiesModel = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
nerEntitiesModel = filterNerEntitiesModelBySectionIds(sectionsToReanalyseIds, nerEntitiesModel);
nerEntities = NerEntitiesAdapter.toNerEntities(nerEntitiesModel, document);
} else {
nerEntities = new NerEntities(Collections.emptyList());
}
return nerEntities;
}
private static NerEntitiesModel filterNerEntitiesModelBySectionIds(Set<Integer> sectionsToReanalyseIds, NerEntitiesModel nerEntitiesModel) {
return new NerEntitiesModel(nerEntitiesModel.getData().entrySet()
.stream() //
.filter(entry -> sectionsToReanalyseIds.contains(getSuperSectionID(entry.getKey()))) //
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)));
}
private NerEntities getEntityRecognitionEntities(AnalyzeRequest analyzeRequest, Document document) {
NerEntities nerEntities;
if (redactionServiceSettings.isNerServiceEnabled()) {
nerEntities = NerEntitiesAdapter.toNerEntities(redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId()), document);
} else {
nerEntities = new NerEntities(Collections.emptyList());
}
return nerEntities;
}
private static Integer getSuperSectionID(String section) {
return NerEntitiesAdapter.sectionNumberToTreeId(section)
.get(0);
}
}

View File

@ -1,7 +1,6 @@
package com.iqser.red.service.redaction.v1.server.service;
import java.awt.Color;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashSet;
import java.util.LinkedList;

View File

@ -6,6 +6,8 @@ import java.io.InputStream;
import java.util.Collection;
import java.util.List;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import org.springframework.cache.annotation.Cacheable;
@ -213,7 +215,6 @@ public class RedactionStorageService {
}
// !Warning! before activating redis cache you need to set
// -Dio.netty.noPreferDirect=true -XX:MaxDirectMemorySize=1000M
// Jvm args to the largest document data size we want to process. for 4443 pages file that was 500mb.
@ -224,23 +225,42 @@ public class RedactionStorageService {
public DocumentData getDocumentData(String dossierId, String fileId) {
try {
Supplier<DocumentStructure> documentStructureSupplier = () -> storageService.readJSONObject(TenantContext.getTenantId(),
StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_STRUCTURE),
DocumentStructure.class);
CompletableFuture<DocumentStructure> documentStructureFuture = CompletableFuture.supplyAsync(documentStructureSupplier);
Supplier<DocumentTextData[]> documentTextDataSupplier = () -> storageService.readJSONObject(TenantContext.getTenantId(),
StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_TEXT),
DocumentTextData[].class);
CompletableFuture<DocumentTextData[]> documentTextDataFuture = CompletableFuture.supplyAsync(documentTextDataSupplier);
Supplier<DocumentPositionData[]> documentPositionDataSupplier = () -> storageService.readJSONObject(TenantContext.getTenantId(),
StorageIdUtils.getStorageId(dossierId,
fileId,
FileType.DOCUMENT_POSITION),
DocumentPositionData[].class);
CompletableFuture<DocumentPositionData[]> documentPositionDataFuture = CompletableFuture.supplyAsync(documentPositionDataSupplier);
Supplier<DocumentPage[]> documentPageSupplier = () -> storageService.readJSONObject(TenantContext.getTenantId(),
StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_PAGES),
DocumentPage[].class);
CompletableFuture<DocumentPage[]> documentPagesFuture = CompletableFuture.supplyAsync(documentPageSupplier);
CompletableFuture.allOf(documentStructureFuture, documentTextDataFuture, documentPositionDataFuture, documentPagesFuture).join();
return DocumentData.builder()
.documentStructure(storageService.readJSONObject(TenantContext.getTenantId(),
StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_STRUCTURE),
DocumentStructure.class))
.documentTextData(storageService.readJSONObject(TenantContext.getTenantId(),
StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_TEXT),
DocumentTextData[].class))
.documentPositionData(storageService.readJSONObject(TenantContext.getTenantId(),
StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_POSITION),
DocumentPositionData[].class))
.documentPages(storageService.readJSONObject(TenantContext.getTenantId(),
StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_PAGES),
DocumentPage[].class))
.documentStructure(documentStructureFuture.get())
.documentTextData(documentTextDataFuture.get())
.documentPositionData(documentPositionDataFuture.get())
.documentPages(documentPagesFuture.get())
.build();
} catch (StorageObjectDoesNotExist e) {
log.debug("DocumentData not available.");
return null;
} catch (Exception e) {
log.error("An error occurred while fetching document data", e);
throw new RuntimeException(e);
}
}

View File

@ -9,12 +9,14 @@ import static org.mockito.Mockito.when;
import java.io.File;
import java.io.FileInputStream;
import java.nio.file.FileVisitOption;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
@ -58,6 +60,7 @@ import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService
import com.iqser.red.service.redaction.v1.server.testcontainers.MongoDBTestContainer;
import com.iqser.red.service.redaction.v1.server.utils.exception.NotFoundException;
import com.iqser.red.storage.commons.service.StorageService;
import com.knecon.fforesight.keycloakcommons.security.TenantAuthenticationManagerResolver;
import com.knecon.fforesight.mongo.database.commons.liquibase.TenantMongoLiquibaseExecutor;
import com.knecon.fforesight.mongo.database.commons.service.MongoConnectionProvider;
import com.knecon.fforesight.tenantcommons.TenantContext;
@ -80,7 +83,8 @@ import lombok.extern.slf4j.Slf4j;
* This way you can recreate what is happening on the stack almost exactly.
*/ public class AnalysisEnd2EndTest {
Path dossierTemplateToUse = Path.of("/home/kschuettler/iqser/fforesight/dossier-templates-v2/dev/LayoutParsingDatasetEvaluation"); // Add your dossier-template here
Path dossierTemplateToUse = Path.of(
"/Users/maverickstuder/Documents/syngenta/redactmanager/prod-cp-eu-reg/EFSA_sanitisation_pre_GFL_v1"); // Add your dossier-template here
ObjectMapper mapper = ObjectMapperFactory.create();
final String TENANT_ID = "tenant";
@ -116,12 +120,15 @@ import lombok.extern.slf4j.Slf4j;
@Autowired
protected TenantMongoLiquibaseExecutor tenantMongoLiquibaseExecutor;
@MockBean
protected TenantAuthenticationManagerResolver tenantAuthenticationManagerResolver;
@Test
@SneakyThrows
public void runAnalysisEnd2End() {
String folder = "/home/kschuettler/Dokumente/analysisend2end/file1"; // Should contain all files from minio directly, still zipped. Can contain multiple files.
String folder = "/Users/maverickstuder/Documents/RedactManager/redaction-service/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/files_end2end/file0"; // Should contain all files from minio directly, still zipped. Can contain multiple files.
Path absoluteFolderPath;
if (folder.startsWith("files")) { // if it starts with "files" it is most likely in the resources folder, else it should be an absolute path
@ -137,7 +144,13 @@ import lombok.extern.slf4j.Slf4j;
for (int i = 0; i < analyzeRequests.size(); i++) {
AnalyzeRequest analyzeRequest = analyzeRequests.get(i);
log.info("{}/{}: Starting analysis for file {}", i + 1, analyzeRequests.size(), analyzeRequest.getFileId());
analyzeService.analyze(analyzeRequest);
var times = new LinkedList<Long>();
for (int j = 1; j <= 10; j++) {
var start = System.currentTimeMillis();
analyzeService.analyze(analyzeRequest);
times.add(System.currentTimeMillis() - start);
}
System.out.println("times in ms for each analyze run: " + times);
}
}
@ -294,7 +307,7 @@ import lombok.extern.slf4j.Slf4j;
Map<String, Object> dossierTemplate = mapper.readValue(dossierTemplateToUse.resolve("dossierTemplate.json").toFile(), HashMap.class);
this.id = (String) dossierTemplate.get("dossierTemplateId");
List<DictionaryModel> dictionaries = Files.walk(dossierTemplateToUse)
List<DictionaryModel> dictionaries = Files.walk(dossierTemplateToUse, FileVisitOption.FOLLOW_LINKS)
.filter(path -> path.getFileName().toString().equals("dossierType.json"))
.map(this::loadDictionaryModel)
.toList();