RED-9123: Improve performance of re-analysis (Spike)
This commit is contained in:
parent
c279f54295
commit
1c81212a2d
@ -43,7 +43,7 @@ dependencies {
|
||||
implementation("com.iqser.red.commons:dictionary-merge-commons:1.5.0")
|
||||
implementation("com.iqser.red.commons:storage-commons:2.45.0")
|
||||
implementation("com.knecon.fforesight:keycloak-commons:0.29.0")
|
||||
implementation("com.knecon.fforesight:tenant-commons:0.24.0")
|
||||
implementation("com.knecon.fforesight:tenant-commons:0.25.0")
|
||||
implementation("com.knecon.fforesight:tracing-commons:0.5.0")
|
||||
|
||||
implementation("com.fasterxml.jackson.module:jackson-module-afterburner:${jacksonVersion}")
|
||||
|
||||
@ -9,6 +9,7 @@ import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.liquibase.LiquibaseAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.mongo.MongoAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.security.servlet.SecurityAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.task.TaskExecutionAutoConfiguration;
|
||||
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
||||
import org.springframework.cache.annotation.EnableCaching;
|
||||
import org.springframework.cloud.openfeign.EnableFeignClients;
|
||||
|
||||
@ -194,25 +194,32 @@ public class TextEntity implements IEntity {
|
||||
|
||||
public boolean containedBy(TextEntity textEntity) {
|
||||
|
||||
return this.textRange.containedBy(textEntity.getTextRange()) //
|
||||
|| duplicateTextRanges.stream()
|
||||
.anyMatch(duplicateTextRange -> duplicateTextRange.containedBy(textEntity.textRange)) //
|
||||
|| duplicateTextRanges.stream()
|
||||
.anyMatch(duplicateTextRange -> textEntity.getDuplicateTextRanges()
|
||||
.stream()
|
||||
.anyMatch(duplicateTextRange::containedBy));
|
||||
return textEntity.contains(this);
|
||||
}
|
||||
|
||||
|
||||
public boolean contains(TextEntity textEntity) {
|
||||
|
||||
return this.textRange.contains(textEntity.getTextRange()) //
|
||||
|| duplicateTextRanges.stream()
|
||||
.anyMatch(duplicateTextRange -> duplicateTextRange.contains(textEntity.textRange)) //
|
||||
|| duplicateTextRanges.stream()
|
||||
.anyMatch(duplicateTextRange -> textEntity.getDuplicateTextRanges()
|
||||
.stream()
|
||||
.anyMatch(duplicateTextRange::contains));
|
||||
if (this.textRange.contains(textEntity.getTextRange())) {
|
||||
return true;
|
||||
}
|
||||
|
||||
List<TextRange> textEntityDuplicateRanges = textEntity.getDuplicateTextRanges();
|
||||
// use optimized indexed loops for extra performance boost
|
||||
for (int i = 0, duplicateTextRangesSize = duplicateTextRanges.size(); i < duplicateTextRangesSize; i++) {
|
||||
TextRange duplicateTextRange = duplicateTextRanges.get(i);
|
||||
if (duplicateTextRange.contains(textEntity.getTextRange())) {
|
||||
return true;
|
||||
}
|
||||
for (int j = 0, textEntityDuplicateRangesSize = textEntityDuplicateRanges.size(); j < textEntityDuplicateRangesSize; j++) {
|
||||
TextRange otherRange = textEntityDuplicateRanges.get(j);
|
||||
if (duplicateTextRange.contains(otherRange)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,396 @@
|
||||
package com.iqser.red.service.redaction.v1.server.service;
|
||||
|
||||
import static com.iqser.red.service.redaction.v1.server.service.document.SectionFinderService.getRelevantManuallyModifiedAnnotationIds;
|
||||
import static org.springframework.boot.autoconfigure.task.TaskExecutionAutoConfiguration.APPLICATION_TASK_EXECUTOR_BEAN_NAME;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.springframework.beans.factory.annotation.Qualifier;
|
||||
import org.springframework.core.task.TaskExecutor;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions;
|
||||
import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.NerEntitiesModel;
|
||||
import com.iqser.red.service.redaction.v1.server.model.KieWrapper;
|
||||
import com.iqser.red.service.redaction.v1.server.model.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryIncrement;
|
||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryVersion;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.service.document.DocumentGraphMapper;
|
||||
import com.iqser.red.service.redaction.v1.server.service.document.ImportedRedactionEntryService;
|
||||
import com.iqser.red.service.redaction.v1.server.service.document.ManualRedactionEntryService;
|
||||
import com.iqser.red.service.redaction.v1.server.service.document.NerEntitiesAdapter;
|
||||
import com.iqser.red.service.redaction.v1.server.service.document.SectionFinderService;
|
||||
import com.iqser.red.service.redaction.v1.server.service.drools.KieContainerCreationService;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.ObservedStorageService;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class AnalysisPreparationService {
|
||||
|
||||
KieContainerCreationService kieContainerCreationService;
|
||||
ObservedStorageService observedStorageService;
|
||||
RedactionStorageService redactionStorageService;
|
||||
RedactionServiceSettings redactionServiceSettings;
|
||||
ManualRedactionEntryService manualRedactionEntryService;
|
||||
ImportedRedactionEntryService importedRedactionEntryService;
|
||||
DictionaryService dictionaryService;
|
||||
SectionFinderService sectionFinderService;
|
||||
TaskExecutor taskExecutor;
|
||||
|
||||
|
||||
public AnalysisPreparationService(KieContainerCreationService kieContainerCreationService,
|
||||
ObservedStorageService observedStorageService,
|
||||
RedactionStorageService redactionStorageService,
|
||||
RedactionServiceSettings redactionServiceSettings,
|
||||
ManualRedactionEntryService manualRedactionEntryService,
|
||||
ImportedRedactionEntryService importedRedactionEntryService,
|
||||
DictionaryService dictionaryService,
|
||||
SectionFinderService sectionFinderService,
|
||||
@Qualifier(APPLICATION_TASK_EXECUTOR_BEAN_NAME) TaskExecutor taskExecutor) {
|
||||
|
||||
this.kieContainerCreationService = kieContainerCreationService;
|
||||
this.observedStorageService = observedStorageService;
|
||||
this.redactionStorageService = redactionStorageService;
|
||||
this.redactionServiceSettings = redactionServiceSettings;
|
||||
this.manualRedactionEntryService = manualRedactionEntryService;
|
||||
this.importedRedactionEntryService = importedRedactionEntryService;
|
||||
this.dictionaryService = dictionaryService;
|
||||
this.sectionFinderService = sectionFinderService;
|
||||
this.taskExecutor = taskExecutor;
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public AnalysisData getAnalysisData(AnalyzeRequest analyzeRequest) {
|
||||
|
||||
CompletableFuture<KieWrapper> kieWrapperComponentRulesFuture = CompletableFuture.supplyAsync(() -> getKieWrapper(analyzeRequest, RuleFileType.COMPONENT), taskExecutor);
|
||||
|
||||
CompletableFuture<KieWrapper> kieWrapperEntityRulesFuture = CompletableFuture.supplyAsync(() -> getKieWrapper(analyzeRequest, RuleFileType.ENTITY), taskExecutor);
|
||||
|
||||
CompletableFuture<Document> documentFuture = CompletableFuture.supplyAsync(() -> getDocument(analyzeRequest), taskExecutor);
|
||||
|
||||
CompletableFuture<ImportedRedactions> importedRedactionsFuture = CompletableFuture.supplyAsync(() -> getImportedRedactions(analyzeRequest), taskExecutor);
|
||||
|
||||
CompletableFuture<NerEntities> nerEntitiesFuture = documentFuture.thenApplyAsync((document) -> getNerEntities(analyzeRequest, document), taskExecutor);
|
||||
|
||||
CompletableFuture.allOf(kieWrapperEntityRulesFuture, kieWrapperComponentRulesFuture, documentFuture, importedRedactionsFuture, nerEntitiesFuture).join();
|
||||
|
||||
Dictionary dictionary = getDictionary(analyzeRequest);
|
||||
|
||||
Document document = documentFuture.get();
|
||||
ImportedRedactions importedRedactions = importedRedactionsFuture.get();
|
||||
|
||||
List<PrecursorEntity> notFoundManualRedactionEntries = manualRedactionEntryService.addManualRedactionEntriesAndReturnNotFoundEntries(analyzeRequest,
|
||||
document,
|
||||
analyzeRequest.getDossierTemplateId());
|
||||
|
||||
List<PrecursorEntity> notFoundImportedEntries = importedRedactionEntryService.addImportedEntriesAndReturnNotFoundEntries(analyzeRequest, importedRedactions, document);
|
||||
|
||||
return new AnalysisData(kieWrapperEntityRulesFuture.get(),
|
||||
kieWrapperComponentRulesFuture.get(),
|
||||
document,
|
||||
importedRedactions,
|
||||
dictionary,
|
||||
notFoundManualRedactionEntries,
|
||||
notFoundImportedEntries,
|
||||
nerEntitiesFuture.get());
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public ReanalysisSetupData getReanalysisSetupData(AnalyzeRequest analyzeRequest) {
|
||||
|
||||
CompletableFuture<EntityLog> entityLogFuture = CompletableFuture.supplyAsync(() -> getEntityLog(analyzeRequest), taskExecutor);
|
||||
|
||||
CompletableFuture<Document> documentFuture = CompletableFuture.supplyAsync(() -> getDocument(analyzeRequest), taskExecutor);
|
||||
|
||||
CompletableFuture.allOf(entityLogFuture, documentFuture).join();
|
||||
|
||||
return new ReanalysisSetupData(entityLogFuture.get(), documentFuture.get());
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public ReanalysisInitialProcessingData getReanalysisInitialProcessingData(AnalyzeRequest analyzeRequest, ReanalysisSetupData reanalysisSetupData) {
|
||||
|
||||
CompletableFuture<ImportedRedactions> importedRedactionsFuture = CompletableFuture.supplyAsync(() -> getImportedRedactions(analyzeRequest), taskExecutor);
|
||||
|
||||
CompletableFuture<SectionsToReanalyzeData> incrementAndSectionsToReanalyzeFuture = importedRedactionsFuture.thenApplyAsync((importedRedactions) -> {
|
||||
DictionaryIncrement dictionaryIncrement = getDictionaryIncrement(analyzeRequest, reanalysisSetupData);
|
||||
return getDictionaryIncrementAndSectionsToReanalyze(analyzeRequest, dictionaryIncrement, reanalysisSetupData, importedRedactions);
|
||||
|
||||
}, taskExecutor);
|
||||
|
||||
CompletableFuture<KieWrapper> kieWrapperComponentRulesFuture = CompletableFuture.supplyAsync(() -> getKieWrapper(analyzeRequest, RuleFileType.COMPONENT), taskExecutor);
|
||||
|
||||
CompletableFuture<KieWrapper> kieWrapperEntityRulesFuture = CompletableFuture.supplyAsync(() -> getKieWrapper(analyzeRequest, RuleFileType.ENTITY), taskExecutor);
|
||||
|
||||
CompletableFuture.allOf(importedRedactionsFuture, incrementAndSectionsToReanalyzeFuture, kieWrapperComponentRulesFuture, kieWrapperEntityRulesFuture).join();
|
||||
|
||||
return new ReanalysisInitialProcessingData(importedRedactionsFuture.get(),
|
||||
incrementAndSectionsToReanalyzeFuture.get().dictionaryIncrement(),
|
||||
incrementAndSectionsToReanalyzeFuture.get().sectionsToReanalyseIds(),
|
||||
incrementAndSectionsToReanalyzeFuture.get().sectionsToReanalyze(),
|
||||
kieWrapperComponentRulesFuture.get(),
|
||||
kieWrapperEntityRulesFuture.get());
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public ReanalysisFinalProcessingData getReanalysisFinalProcessingData(AnalyzeRequest analyzeRequest,
|
||||
ReanalysisSetupData reanalysisSetupData,
|
||||
ReanalysisInitialProcessingData reanalysisInitialProcessingData) {
|
||||
|
||||
CompletableFuture<NerEntities> nerEntitiesFuture = CompletableFuture.supplyAsync(() -> getNerEntitiesFiltered(analyzeRequest,
|
||||
reanalysisSetupData.document,
|
||||
reanalysisInitialProcessingData.sectionsToReanalyseIds),
|
||||
taskExecutor);
|
||||
|
||||
CompletableFuture<DictionaryAndNotFoundEntries> dictionaryAndNotFoundEntriesCompletableFuture = CompletableFuture.supplyAsync(() -> {
|
||||
Dictionary dictionary = getDictionary(analyzeRequest);
|
||||
NotFoundEntries notFoundEntries = getNotFoundEntries(analyzeRequest, reanalysisSetupData.document(), reanalysisInitialProcessingData.importedRedactions());
|
||||
return new DictionaryAndNotFoundEntries(dictionary, notFoundEntries.notFoundManualRedactionEntries(), notFoundEntries.notFoundImportedEntries());
|
||||
}, taskExecutor);
|
||||
|
||||
CompletableFuture.allOf(nerEntitiesFuture, dictionaryAndNotFoundEntriesCompletableFuture).join();
|
||||
|
||||
return new ReanalysisFinalProcessingData(nerEntitiesFuture.get(),
|
||||
dictionaryAndNotFoundEntriesCompletableFuture.get().dictionary(),
|
||||
dictionaryAndNotFoundEntriesCompletableFuture.get().notFoundManualRedactionEntries(),
|
||||
dictionaryAndNotFoundEntriesCompletableFuture.get().notFoundImportedEntries());
|
||||
}
|
||||
|
||||
|
||||
public KieWrapper getKieWrapper(AnalyzeRequest analyzeRequest, RuleFileType ruleFileType) {
|
||||
|
||||
KieWrapper kieWrapperComponentRules = kieContainerCreationService.getLatestKieContainer(analyzeRequest.getDossierTemplateId(), ruleFileType);
|
||||
log.info("Updated {} Rules to Version {} for file {} in dossier {}",
|
||||
ruleFileType,
|
||||
kieWrapperComponentRules.rulesVersion(),
|
||||
analyzeRequest.getFileId(),
|
||||
analyzeRequest.getDossierId());
|
||||
return kieWrapperComponentRules;
|
||||
}
|
||||
|
||||
|
||||
public Document getDocument(AnalyzeRequest analyzeRequest) {
|
||||
|
||||
Document document = DocumentGraphMapper.toDocumentGraph(observedStorageService.getDocumentData(analyzeRequest.getDossierId(), analyzeRequest.getFileId()));
|
||||
log.info("Loaded Document Graph for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
return document;
|
||||
}
|
||||
|
||||
|
||||
public ImportedRedactions getImportedRedactions(AnalyzeRequest analyzeRequest) {
|
||||
|
||||
ImportedRedactions importedRedactions = redactionStorageService.getImportedRedactions(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
log.info("Loaded Imported Redactions for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
return importedRedactions;
|
||||
}
|
||||
|
||||
|
||||
public NerEntities getNerEntities(AnalyzeRequest analyzeRequest, Document document) {
|
||||
|
||||
NerEntities nerEntities = getEntityRecognitionEntities(analyzeRequest, document);
|
||||
log.info("Loaded Ner Entities for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
return nerEntities;
|
||||
}
|
||||
|
||||
|
||||
public NerEntities getNerEntitiesFiltered(AnalyzeRequest analyzeRequest, Document document, Set<Integer> sectionsToReanalyseIds) {
|
||||
|
||||
NerEntities nerEntities = getEntityRecognitionEntitiesFilteredBySectionIds(analyzeRequest, document, sectionsToReanalyseIds);
|
||||
log.info("Loaded Ner Entities for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
return nerEntities;
|
||||
}
|
||||
|
||||
|
||||
private Dictionary getDictionary(AnalyzeRequest analyzeRequest) {
|
||||
|
||||
dictionaryService.updateDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
|
||||
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
|
||||
log.info("Updated Dictionaries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
return dictionary;
|
||||
}
|
||||
|
||||
|
||||
private NotFoundEntries getNotFoundEntries(AnalyzeRequest analyzeRequest, Document document, ImportedRedactions importedRedactions) {
|
||||
|
||||
var notFoundManualRedactionEntries = manualRedactionEntryService.addManualRedactionEntriesAndReturnNotFoundEntries(analyzeRequest,
|
||||
document,
|
||||
analyzeRequest.getDossierTemplateId());
|
||||
var notFoundImportedEntries = importedRedactionEntryService.addImportedEntriesAndReturnNotFoundEntries(analyzeRequest, importedRedactions, document);
|
||||
return new NotFoundEntries(notFoundManualRedactionEntries, notFoundImportedEntries);
|
||||
}
|
||||
|
||||
|
||||
private NerEntities getEntityRecognitionEntities(AnalyzeRequest analyzeRequest, Document document) {
|
||||
|
||||
NerEntities nerEntities;
|
||||
if (redactionServiceSettings.isNerServiceEnabled()) {
|
||||
nerEntities = NerEntitiesAdapter.toNerEntities(redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId()), document);
|
||||
} else {
|
||||
nerEntities = new NerEntities(Collections.emptyList());
|
||||
}
|
||||
return nerEntities;
|
||||
}
|
||||
|
||||
|
||||
private EntityLog getEntityLog(AnalyzeRequest analyzeRequest) {
|
||||
|
||||
EntityLog entityLogWithoutEntries = redactionStorageService.getEntityLogWithoutEntries(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
log.info("Loaded previous entity log for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
return entityLogWithoutEntries;
|
||||
}
|
||||
|
||||
|
||||
private SectionsToReanalyzeData getDictionaryIncrementAndSectionsToReanalyze(AnalyzeRequest analyzeRequest,
|
||||
DictionaryIncrement dictionaryIncrement,
|
||||
ReanalysisSetupData reanalysisSetupData,
|
||||
ImportedRedactions importedRedactions) {
|
||||
|
||||
Set<String> relevantManuallyModifiedAnnotationIds = getRelevantManuallyModifiedAnnotationIds(analyzeRequest.getManualRedactions());
|
||||
|
||||
Set<Integer> sectionsToReanalyseIds = redactionStorageService.findIdsOfSectionsToReanalyse(analyzeRequest.getDossierId(),
|
||||
analyzeRequest.getFileId(),
|
||||
relevantManuallyModifiedAnnotationIds);
|
||||
sectionsToReanalyseIds.addAll(getSectionsToReanalyseIds(analyzeRequest,
|
||||
reanalysisSetupData.document(),
|
||||
dictionaryIncrement,
|
||||
importedRedactions,
|
||||
relevantManuallyModifiedAnnotationIds));
|
||||
|
||||
List<SemanticNode> sectionsToReAnalyse = getSectionsToReAnalyse(reanalysisSetupData.document(), sectionsToReanalyseIds);
|
||||
log.info("{} Sections to reanalyze found for file {} in dossier {}", sectionsToReanalyseIds.size(), analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
return new SectionsToReanalyzeData(dictionaryIncrement, sectionsToReanalyseIds, sectionsToReAnalyse);
|
||||
}
|
||||
|
||||
|
||||
private DictionaryIncrement getDictionaryIncrement(AnalyzeRequest analyzeRequest, ReanalysisSetupData reanalysisSetupData) {
|
||||
|
||||
return dictionaryService.getDictionaryIncrements(analyzeRequest.getDossierTemplateId(),
|
||||
new DictionaryVersion(reanalysisSetupData.entityLog().getDictionaryVersion(),
|
||||
reanalysisSetupData.entityLog().getDossierDictionaryVersion()),
|
||||
analyzeRequest.getDossierId());
|
||||
}
|
||||
|
||||
|
||||
private NerEntities getEntityRecognitionEntitiesFilteredBySectionIds(AnalyzeRequest analyzeRequest, Document document, Set<Integer> sectionsToReanalyseIds) {
|
||||
|
||||
NerEntities nerEntities;
|
||||
if (redactionServiceSettings.isNerServiceEnabled()) {
|
||||
NerEntitiesModel nerEntitiesModel = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
nerEntitiesModel = filterNerEntitiesModelBySectionIds(sectionsToReanalyseIds, nerEntitiesModel);
|
||||
nerEntities = NerEntitiesAdapter.toNerEntities(nerEntitiesModel, document);
|
||||
} else {
|
||||
nerEntities = new NerEntities(Collections.emptyList());
|
||||
}
|
||||
return nerEntities;
|
||||
}
|
||||
|
||||
|
||||
private static NerEntitiesModel filterNerEntitiesModelBySectionIds(Set<Integer> sectionsToReanalyseIds, NerEntitiesModel nerEntitiesModel) {
|
||||
|
||||
return new NerEntitiesModel(nerEntitiesModel.getData().entrySet()
|
||||
.stream() //
|
||||
.filter(entry -> sectionsToReanalyseIds.contains(getSuperSectionID(entry.getKey()))) //
|
||||
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)));
|
||||
}
|
||||
|
||||
|
||||
private static Integer getSuperSectionID(String section) {
|
||||
|
||||
return NerEntitiesAdapter.sectionNumberToTreeId(section)
|
||||
.get(0);
|
||||
|
||||
}
|
||||
|
||||
|
||||
private static List<SemanticNode> getSectionsToReAnalyse(Document document, Set<Integer> sectionsToReanalyseIds) {
|
||||
|
||||
return document.streamChildren()
|
||||
.filter(section -> sectionsToReanalyseIds.contains(section.getTreeId()
|
||||
.get(0)))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
private Set<Integer> getSectionsToReanalyseIds(AnalyzeRequest analyzeRequest,
|
||||
Document document,
|
||||
DictionaryIncrement dictionaryIncrement,
|
||||
ImportedRedactions importedRedactions,
|
||||
Set<String> relevantManuallyModifiedAnnotationIds) {
|
||||
|
||||
return sectionFinderService.findSectionsToReanalyse(dictionaryIncrement, document, analyzeRequest, importedRedactions, relevantManuallyModifiedAnnotationIds);
|
||||
}
|
||||
|
||||
|
||||
private record DictionaryAndNotFoundEntries(Dictionary dictionary, List<PrecursorEntity> notFoundManualRedactionEntries, List<PrecursorEntity> notFoundImportedEntries) {
|
||||
|
||||
}
|
||||
|
||||
private record NotFoundEntries(List<PrecursorEntity> notFoundManualRedactionEntries, List<PrecursorEntity> notFoundImportedEntries) {
|
||||
|
||||
}
|
||||
|
||||
private record SectionsToReanalyzeData(DictionaryIncrement dictionaryIncrement, Set<Integer> sectionsToReanalyseIds, List<SemanticNode> sectionsToReanalyze) {
|
||||
|
||||
}
|
||||
|
||||
public record AnalysisData(
|
||||
KieWrapper kieWrapperEntityRules,
|
||||
KieWrapper kieWrapperComponentRules,
|
||||
Document document,
|
||||
ImportedRedactions importedRedactions,
|
||||
Dictionary dictionary,
|
||||
List<PrecursorEntity> notFoundManualRedactionEntries,
|
||||
List<PrecursorEntity> notFoundImportedEntries,
|
||||
NerEntities nerEntities
|
||||
) {
|
||||
|
||||
}
|
||||
|
||||
public record ReanalysisSetupData(
|
||||
EntityLog entityLog, Document document
|
||||
) {
|
||||
|
||||
}
|
||||
|
||||
public record ReanalysisInitialProcessingData(
|
||||
ImportedRedactions importedRedactions,
|
||||
DictionaryIncrement dictionaryIncrement,
|
||||
Set<Integer> sectionsToReanalyseIds,
|
||||
List<SemanticNode> sectionsToReAnalyse,
|
||||
KieWrapper kieWrapperComponentRules,
|
||||
KieWrapper kieWrapperEntityRules
|
||||
) {
|
||||
|
||||
}
|
||||
|
||||
public record ReanalysisFinalProcessingData(
|
||||
NerEntities nerEntities, Dictionary dictionary, List<PrecursorEntity> notFoundManualRedactionEntries, List<PrecursorEntity> notFoundImportedEntries
|
||||
) {
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,13 +1,14 @@
|
||||
package com.iqser.red.service.redaction.v1.server.service;
|
||||
|
||||
import static com.iqser.red.service.redaction.v1.server.service.document.SectionFinderService.getRelevantManuallyModifiedAnnotationIds;
|
||||
import static com.iqser.red.service.redaction.v1.server.service.AnalysisPreparationService.ReanalysisFinalProcessingData;
|
||||
import static com.iqser.red.service.redaction.v1.server.service.AnalysisPreparationService.ReanalysisInitialProcessingData;
|
||||
import static com.iqser.red.service.redaction.v1.server.service.AnalysisPreparationService.ReanalysisSetupData;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
@ -19,33 +20,19 @@ import com.iqser.gin4.commons.metrics.meters.FunctionTimerValues;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.componentlog.ComponentLog;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogChanges;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.imported.ImportedRedactions;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
|
||||
import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.NerEntitiesModel;
|
||||
import com.iqser.red.service.redaction.v1.server.logger.Context;
|
||||
import com.iqser.red.service.redaction.v1.server.model.KieWrapper;
|
||||
import com.iqser.red.service.redaction.v1.server.model.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.model.component.Component;
|
||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryIncrement;
|
||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryVersion;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.service.AnalysisPreparationService.AnalysisData;
|
||||
import com.iqser.red.service.redaction.v1.server.service.components.ComponentLogCreatorService;
|
||||
import com.iqser.red.service.redaction.v1.server.service.document.DocumentGraphMapper;
|
||||
import com.iqser.red.service.redaction.v1.server.service.document.ImportedRedactionEntryService;
|
||||
import com.iqser.red.service.redaction.v1.server.service.document.ManualRedactionEntryService;
|
||||
import com.iqser.red.service.redaction.v1.server.service.document.NerEntitiesAdapter;
|
||||
import com.iqser.red.service.redaction.v1.server.service.document.SectionFinderService;
|
||||
import com.iqser.red.service.redaction.v1.server.service.drools.ComponentDroolsExecutionService;
|
||||
import com.iqser.red.service.redaction.v1.server.service.drools.EntityDroolsExecutionService;
|
||||
import com.iqser.red.service.redaction.v1.server.service.drools.KieContainerCreationService;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.ObservedStorageService;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
import com.knecon.fforesight.tenantcommons.TenantContext;
|
||||
|
||||
@ -63,21 +50,16 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@RequiredArgsConstructor
|
||||
public class AnalyzeService {
|
||||
|
||||
DictionaryService dictionaryService;
|
||||
EntityDroolsExecutionService entityDroolsExecutionService;
|
||||
ComponentDroolsExecutionService componentDroolsExecutionService;
|
||||
KieContainerCreationService kieContainerCreationService;
|
||||
DictionarySearchService dictionarySearchService;
|
||||
EntityLogCreatorService entityLogCreatorService;
|
||||
ComponentLogCreatorService componentLogCreatorService;
|
||||
RedactionStorageService redactionStorageService;
|
||||
RedactionServiceSettings redactionServiceSettings;
|
||||
NotFoundImportedEntitiesService notFoundImportedEntitiesService;
|
||||
SectionFinderService sectionFinderService;
|
||||
ManualRedactionEntryService manualRedactionEntryService;
|
||||
ImportedRedactionEntryService importedRedactionEntryService;
|
||||
ObservedStorageService observedStorageService;
|
||||
FunctionTimerValues redactmanagerAnalyzePagewiseValues;
|
||||
AnalysisPreparationService analysisPreparationService;
|
||||
|
||||
|
||||
@Timed("redactmanager_reanalyze")
|
||||
@ -86,172 +68,131 @@ public class AnalyzeService {
|
||||
public AnalyzeResult reanalyze(@RequestBody AnalyzeRequest analyzeRequest) {
|
||||
|
||||
long startTime = System.currentTimeMillis();
|
||||
EntityLog entityLogWithoutEntries = redactionStorageService.getEntityLogWithoutEntries(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
log.info("Loaded previous entity log for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
Document document = DocumentGraphMapper.toDocumentGraph(observedStorageService.getDocumentData(analyzeRequest.getDossierId(), analyzeRequest.getFileId()));
|
||||
log.info("Loaded Document Graph for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
ImportedRedactions importedRedactions = redactionStorageService.getImportedRedactions(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
log.info("Loaded Imported Redactions for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
Context context = new Context(analyzeRequest.getFileId(), analyzeRequest.getDossierId(), analyzeRequest.getDossierTemplateId(), 0, analyzeRequest.getAnalysisNumber(), TenantContext.getTenantId());
|
||||
ReanalysisSetupData setupData = analysisPreparationService.getReanalysisSetupData(analyzeRequest);
|
||||
Context context = new Context(analyzeRequest.getFileId(),
|
||||
analyzeRequest.getDossierId(),
|
||||
analyzeRequest.getDossierTemplateId(),
|
||||
0,
|
||||
analyzeRequest.getAnalysisNumber(),
|
||||
TenantContext.getTenantId());
|
||||
|
||||
// not yet ready for reanalysis
|
||||
if (entityLogWithoutEntries == null || document == null || document.getNumberOfPages() == 0) {
|
||||
if (setupData.entityLog() == null || setupData.document() == null || setupData.document().getNumberOfPages() == 0) {
|
||||
return analyze(analyzeRequest);
|
||||
}
|
||||
|
||||
DictionaryIncrement dictionaryIncrement = dictionaryService.getDictionaryIncrements(analyzeRequest.getDossierTemplateId(),
|
||||
new DictionaryVersion(entityLogWithoutEntries.getDictionaryVersion(),
|
||||
entityLogWithoutEntries.getDossierDictionaryVersion()),
|
||||
analyzeRequest.getDossierId());
|
||||
ReanalysisInitialProcessingData initialProcessingData = analysisPreparationService.getReanalysisInitialProcessingData(analyzeRequest, setupData);
|
||||
|
||||
Set<String> relevantManuallyModifiedAnnotationIds = getRelevantManuallyModifiedAnnotationIds(analyzeRequest.getManualRedactions());
|
||||
if (initialProcessingData.sectionsToReAnalyse().isEmpty()) {
|
||||
|
||||
Set<Integer> sectionsToReanalyseIds = redactionStorageService.findIdsOfSectionsToReanalyse(analyzeRequest.getDossierId(),
|
||||
analyzeRequest.getFileId(),
|
||||
relevantManuallyModifiedAnnotationIds);
|
||||
sectionsToReanalyseIds.addAll(getSectionsToReanalyseIds(analyzeRequest, document, dictionaryIncrement, importedRedactions, relevantManuallyModifiedAnnotationIds));
|
||||
|
||||
List<SemanticNode> sectionsToReAnalyse = getSectionsToReAnalyse(document, sectionsToReanalyseIds);
|
||||
log.info("{} Sections to reanalyze found for file {} in dossier {}", sectionsToReanalyseIds.size(), analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
if (sectionsToReAnalyse.isEmpty()) {
|
||||
|
||||
EntityLogChanges entityLogChanges = entityLogCreatorService.updateVersionsAndReturnChanges(entityLogWithoutEntries,
|
||||
dictionaryIncrement.getDictionaryVersion(),
|
||||
EntityLogChanges entityLogChanges = entityLogCreatorService.updateVersionsAndReturnChanges(setupData.entityLog(),
|
||||
initialProcessingData.dictionaryIncrement().getDictionaryVersion(),
|
||||
analyzeRequest,
|
||||
new ArrayList<>(),
|
||||
new ArrayList<>());
|
||||
|
||||
return finalizeAnalysis(analyzeRequest,
|
||||
startTime,
|
||||
kieContainerCreationService.getLatestKieContainer(analyzeRequest.getDossierTemplateId(), RuleFileType.COMPONENT),
|
||||
initialProcessingData.kieWrapperComponentRules(),
|
||||
entityLogChanges,
|
||||
document,
|
||||
document.getNumberOfPages(),
|
||||
setupData.document(),
|
||||
setupData.document().getNumberOfPages(),
|
||||
true,
|
||||
Collections.emptySet(),
|
||||
context);
|
||||
}
|
||||
|
||||
KieWrapper kieWrapperEntityRules = kieContainerCreationService.getLatestKieContainer(analyzeRequest.getDossierTemplateId(), RuleFileType.ENTITY);
|
||||
log.info("Updated entity rules to version {} for file {} in dossier {}", kieWrapperEntityRules.rulesVersion(), analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
context.setRuleVersion(initialProcessingData.kieWrapperEntityRules().rulesVersion());
|
||||
|
||||
context.setRuleVersion(kieWrapperEntityRules.rulesVersion());
|
||||
ReanalysisFinalProcessingData finalProcessingData = analysisPreparationService.getReanalysisFinalProcessingData(analyzeRequest, setupData, initialProcessingData);
|
||||
|
||||
NerEntities nerEntities = getEntityRecognitionEntitiesFilteredBySectionIds(analyzeRequest, document, sectionsToReanalyseIds);
|
||||
log.info("Loaded Ner Entities for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
dictionarySearchService.addDictionaryEntities(finalProcessingData.dictionary(), initialProcessingData.sectionsToReAnalyse());
|
||||
log.info("Finished Dictionary Search for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
var notFoundManualRedactionEntries = manualRedactionEntryService.addManualRedactionEntriesAndReturnNotFoundEntries(analyzeRequest,
|
||||
document,
|
||||
analyzeRequest.getDossierTemplateId());
|
||||
var notFoundImportedEntries = importedRedactionEntryService.addImportedEntriesAndReturnNotFoundEntries(analyzeRequest, importedRedactions, document);
|
||||
var notFoundManualOrImportedEntries = Stream.of(notFoundManualRedactionEntries, notFoundImportedEntries)
|
||||
var notFoundManualOrImportedEntries = Stream.of(finalProcessingData.notFoundManualRedactionEntries(), finalProcessingData.notFoundImportedEntries())
|
||||
.flatMap(Collection::stream)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
|
||||
log.info("Updated Dictionaries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
dictionarySearchService.addDictionaryEntities(dictionary, sectionsToReAnalyse);
|
||||
log.info("Finished Dictionary Search for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
// we could add the imported redactions similar to the manual redactions here as well for additional processing
|
||||
List<FileAttribute> allFileAttributes = entityDroolsExecutionService.executeRules(kieWrapperEntityRules.container(),
|
||||
document,
|
||||
sectionsToReAnalyse,
|
||||
dictionary,
|
||||
List<FileAttribute> allFileAttributes = entityDroolsExecutionService.executeRules(initialProcessingData.kieWrapperEntityRules().container(),
|
||||
setupData.document(),
|
||||
initialProcessingData.sectionsToReAnalyse(),
|
||||
finalProcessingData.dictionary(),
|
||||
analyzeRequest.getFileAttributes(),
|
||||
analyzeRequest.getManualRedactions(),
|
||||
nerEntities,
|
||||
finalProcessingData.nerEntities(),
|
||||
context);
|
||||
log.info("Finished entity rule execution for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
EntityLogChanges entityLogChanges = entityLogCreatorService.updatePreviousEntityLog(analyzeRequest,
|
||||
document,
|
||||
entityLogWithoutEntries,
|
||||
setupData.document(),
|
||||
setupData.entityLog(),
|
||||
notFoundManualOrImportedEntries,
|
||||
sectionsToReanalyseIds,
|
||||
dictionary.getVersion());
|
||||
initialProcessingData.sectionsToReanalyseIds(),
|
||||
finalProcessingData.dictionary().getVersion());
|
||||
|
||||
notFoundImportedEntitiesService.processEntityLog(entityLogChanges.getEntityLog(), analyzeRequest, notFoundImportedEntries);
|
||||
notFoundImportedEntitiesService.processEntityLog(entityLogChanges.getEntityLog(), analyzeRequest, finalProcessingData.notFoundImportedEntries());
|
||||
|
||||
return finalizeAnalysis(analyzeRequest,
|
||||
startTime,
|
||||
kieContainerCreationService.getLatestKieContainer(analyzeRequest.getDossierTemplateId(), RuleFileType.COMPONENT),
|
||||
initialProcessingData.kieWrapperComponentRules(),
|
||||
entityLogChanges,
|
||||
document,
|
||||
document.getNumberOfPages(),
|
||||
setupData.document(),
|
||||
setupData.document().getNumberOfPages(),
|
||||
true,
|
||||
new HashSet<>(allFileAttributes),
|
||||
context);
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
@Timed("redactmanager_analyze")
|
||||
@Observed(name = "AnalyzeService", contextualName = "analyze")
|
||||
public AnalyzeResult analyze(AnalyzeRequest analyzeRequest) {
|
||||
|
||||
long startTime = System.currentTimeMillis();
|
||||
|
||||
var kieWrapperEntityRules = kieContainerCreationService.getLatestKieContainer(analyzeRequest.getDossierTemplateId(), RuleFileType.ENTITY);
|
||||
log.info("Updated Rules to Version {} for file {} in dossier {}", kieWrapperEntityRules.rulesVersion(), analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
AnalysisData analysisData = analysisPreparationService.getAnalysisData(analyzeRequest);
|
||||
|
||||
var kieWrapperComponentRules = kieContainerCreationService.getLatestKieContainer(analyzeRequest.getDossierTemplateId(), RuleFileType.COMPONENT);
|
||||
log.info("Updated Rules to Version {} for file {} in dossier {}", kieWrapperEntityRules.rulesVersion(), analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
Context context = new Context(analyzeRequest.getFileId(),
|
||||
analyzeRequest.getDossierId(),
|
||||
analyzeRequest.getDossierTemplateId(),
|
||||
analysisData.kieWrapperEntityRules().rulesVersion(),
|
||||
analyzeRequest.getAnalysisNumber(),
|
||||
TenantContext.getTenantId());
|
||||
|
||||
Context context = new Context(analyzeRequest.getFileId(), analyzeRequest.getDossierId(), analyzeRequest.getDossierTemplateId(), kieWrapperEntityRules.rulesVersion(), analyzeRequest.getAnalysisNumber(), TenantContext.getTenantId());
|
||||
|
||||
Document document = DocumentGraphMapper.toDocumentGraph(observedStorageService.getDocumentData(analyzeRequest.getDossierId(), analyzeRequest.getFileId()));
|
||||
log.info("Loaded Document Graph for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
ImportedRedactions importedRedactions = redactionStorageService.getImportedRedactions(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
log.info("Loaded Imported Redactions for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
NerEntities nerEntities = getEntityRecognitionEntities(analyzeRequest, document);
|
||||
log.info("Loaded Ner Entities for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
dictionaryService.updateDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
|
||||
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
|
||||
log.info("Updated Dictionaries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
var notFoundManualRedactionEntries = manualRedactionEntryService.addManualRedactionEntriesAndReturnNotFoundEntries(analyzeRequest,
|
||||
document,
|
||||
analyzeRequest.getDossierTemplateId());
|
||||
var notFoundImportedEntries = importedRedactionEntryService.addImportedEntriesAndReturnNotFoundEntries(analyzeRequest, importedRedactions, document);
|
||||
var notFoundManualOrImportedEntries = Stream.of(notFoundManualRedactionEntries, notFoundImportedEntries)
|
||||
var notFoundManualOrImportedEntries = Stream.of(analysisData.notFoundManualRedactionEntries(), analysisData.notFoundImportedEntries())
|
||||
.flatMap(Collection::stream)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
dictionarySearchService.addDictionaryEntities(dictionary, document);
|
||||
dictionarySearchService.addDictionaryEntities(analysisData.dictionary(), analysisData.document());
|
||||
log.info("Finished Dictionary Search for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
// we could add the imported redactions similar to the manual redactions here as well for additional processing
|
||||
List<FileAttribute> allFileAttributes = entityDroolsExecutionService.executeRules(kieWrapperEntityRules.container(),
|
||||
document,
|
||||
dictionary,
|
||||
List<FileAttribute> allFileAttributes = entityDroolsExecutionService.executeRules(analysisData.kieWrapperEntityRules().container(),
|
||||
analysisData.document(),
|
||||
analysisData.dictionary(),
|
||||
analyzeRequest.getFileAttributes(),
|
||||
analyzeRequest.getManualRedactions(),
|
||||
nerEntities,
|
||||
analysisData.nerEntities(),
|
||||
context);
|
||||
log.info("Finished entity rule execution for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
EntityLogChanges entityLogChanges = entityLogCreatorService.createInitialEntityLog(analyzeRequest,
|
||||
document,
|
||||
analysisData.document(),
|
||||
notFoundManualOrImportedEntries,
|
||||
dictionary.getVersion(),
|
||||
kieWrapperEntityRules.rulesVersion());
|
||||
analysisData.dictionary().getVersion(),
|
||||
analysisData.kieWrapperEntityRules().rulesVersion());
|
||||
|
||||
notFoundImportedEntitiesService.processEntityLog(entityLogChanges.getEntityLog(), analyzeRequest, notFoundImportedEntries);
|
||||
notFoundImportedEntitiesService.processEntityLog(entityLogChanges.getEntityLog(), analyzeRequest, analysisData.notFoundImportedEntries());
|
||||
|
||||
return finalizeAnalysis(analyzeRequest,
|
||||
startTime,
|
||||
kieWrapperComponentRules,
|
||||
analysisData.kieWrapperComponentRules(),
|
||||
entityLogChanges,
|
||||
document,
|
||||
document.getNumberOfPages(),
|
||||
analysisData.document(),
|
||||
analysisData.document().getNumberOfPages(),
|
||||
false,
|
||||
new HashSet<>(allFileAttributes),
|
||||
context);
|
||||
@ -345,65 +286,4 @@ public class AnalyzeService {
|
||||
log.info("Stored component log for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
}
|
||||
|
||||
|
||||
private static List<SemanticNode> getSectionsToReAnalyse(Document document, Set<Integer> sectionsToReanalyseIds) {
|
||||
|
||||
return document.streamChildren()
|
||||
.filter(section -> sectionsToReanalyseIds.contains(section.getTreeId()
|
||||
.get(0)))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
private Set<Integer> getSectionsToReanalyseIds(AnalyzeRequest analyzeRequest,
|
||||
Document document,
|
||||
DictionaryIncrement dictionaryIncrement,
|
||||
ImportedRedactions importedRedactions,
|
||||
Set<String> relevantManuallyModifiedAnnotationIds) {
|
||||
|
||||
return sectionFinderService.findSectionsToReanalyse(dictionaryIncrement, document, analyzeRequest, importedRedactions, relevantManuallyModifiedAnnotationIds);
|
||||
}
|
||||
|
||||
|
||||
private NerEntities getEntityRecognitionEntitiesFilteredBySectionIds(AnalyzeRequest analyzeRequest, Document document, Set<Integer> sectionsToReanalyseIds) {
|
||||
|
||||
NerEntities nerEntities;
|
||||
if (redactionServiceSettings.isNerServiceEnabled()) {
|
||||
NerEntitiesModel nerEntitiesModel = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
nerEntitiesModel = filterNerEntitiesModelBySectionIds(sectionsToReanalyseIds, nerEntitiesModel);
|
||||
nerEntities = NerEntitiesAdapter.toNerEntities(nerEntitiesModel, document);
|
||||
} else {
|
||||
nerEntities = new NerEntities(Collections.emptyList());
|
||||
}
|
||||
return nerEntities;
|
||||
}
|
||||
|
||||
|
||||
private static NerEntitiesModel filterNerEntitiesModelBySectionIds(Set<Integer> sectionsToReanalyseIds, NerEntitiesModel nerEntitiesModel) {
|
||||
|
||||
return new NerEntitiesModel(nerEntitiesModel.getData().entrySet()
|
||||
.stream() //
|
||||
.filter(entry -> sectionsToReanalyseIds.contains(getSuperSectionID(entry.getKey()))) //
|
||||
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)));
|
||||
}
|
||||
|
||||
|
||||
private NerEntities getEntityRecognitionEntities(AnalyzeRequest analyzeRequest, Document document) {
|
||||
|
||||
NerEntities nerEntities;
|
||||
if (redactionServiceSettings.isNerServiceEnabled()) {
|
||||
nerEntities = NerEntitiesAdapter.toNerEntities(redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId()), document);
|
||||
} else {
|
||||
nerEntities = new NerEntities(Collections.emptyList());
|
||||
}
|
||||
return nerEntities;
|
||||
}
|
||||
|
||||
|
||||
private static Integer getSuperSectionID(String section) {
|
||||
|
||||
return NerEntitiesAdapter.sectionNumberToTreeId(section)
|
||||
.get(0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -1,7 +1,6 @@
|
||||
package com.iqser.red.service.redaction.v1.server.service;
|
||||
|
||||
import java.awt.Color;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
|
||||
@ -6,6 +6,8 @@ import java.io.InputStream;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.function.Supplier;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.springframework.cache.annotation.Cacheable;
|
||||
@ -213,7 +215,6 @@ public class RedactionStorageService {
|
||||
}
|
||||
|
||||
|
||||
|
||||
// !Warning! before activating redis cache you need to set
|
||||
// -Dio.netty.noPreferDirect=true -XX:MaxDirectMemorySize=1000M
|
||||
// Jvm args to the largest document data size we want to process. for 4443 pages file that was 500mb.
|
||||
@ -224,23 +225,42 @@ public class RedactionStorageService {
|
||||
public DocumentData getDocumentData(String dossierId, String fileId) {
|
||||
|
||||
try {
|
||||
Supplier<DocumentStructure> documentStructureSupplier = () -> storageService.readJSONObject(TenantContext.getTenantId(),
|
||||
StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_STRUCTURE),
|
||||
DocumentStructure.class);
|
||||
CompletableFuture<DocumentStructure> documentStructureFuture = CompletableFuture.supplyAsync(documentStructureSupplier);
|
||||
|
||||
Supplier<DocumentTextData[]> documentTextDataSupplier = () -> storageService.readJSONObject(TenantContext.getTenantId(),
|
||||
StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_TEXT),
|
||||
DocumentTextData[].class);
|
||||
CompletableFuture<DocumentTextData[]> documentTextDataFuture = CompletableFuture.supplyAsync(documentTextDataSupplier);
|
||||
|
||||
Supplier<DocumentPositionData[]> documentPositionDataSupplier = () -> storageService.readJSONObject(TenantContext.getTenantId(),
|
||||
StorageIdUtils.getStorageId(dossierId,
|
||||
fileId,
|
||||
FileType.DOCUMENT_POSITION),
|
||||
DocumentPositionData[].class);
|
||||
CompletableFuture<DocumentPositionData[]> documentPositionDataFuture = CompletableFuture.supplyAsync(documentPositionDataSupplier);
|
||||
|
||||
Supplier<DocumentPage[]> documentPageSupplier = () -> storageService.readJSONObject(TenantContext.getTenantId(),
|
||||
StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_PAGES),
|
||||
DocumentPage[].class);
|
||||
CompletableFuture<DocumentPage[]> documentPagesFuture = CompletableFuture.supplyAsync(documentPageSupplier);
|
||||
|
||||
CompletableFuture.allOf(documentStructureFuture, documentTextDataFuture, documentPositionDataFuture, documentPagesFuture).join();
|
||||
|
||||
return DocumentData.builder()
|
||||
.documentStructure(storageService.readJSONObject(TenantContext.getTenantId(),
|
||||
StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_STRUCTURE),
|
||||
DocumentStructure.class))
|
||||
.documentTextData(storageService.readJSONObject(TenantContext.getTenantId(),
|
||||
StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_TEXT),
|
||||
DocumentTextData[].class))
|
||||
.documentPositionData(storageService.readJSONObject(TenantContext.getTenantId(),
|
||||
StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_POSITION),
|
||||
DocumentPositionData[].class))
|
||||
.documentPages(storageService.readJSONObject(TenantContext.getTenantId(),
|
||||
StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_PAGES),
|
||||
DocumentPage[].class))
|
||||
.documentStructure(documentStructureFuture.get())
|
||||
.documentTextData(documentTextDataFuture.get())
|
||||
.documentPositionData(documentPositionDataFuture.get())
|
||||
.documentPages(documentPagesFuture.get())
|
||||
.build();
|
||||
} catch (StorageObjectDoesNotExist e) {
|
||||
log.debug("DocumentData not available.");
|
||||
return null;
|
||||
} catch (Exception e) {
|
||||
log.error("An error occurred while fetching document data", e);
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -9,12 +9,14 @@ import static org.mockito.Mockito.when;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.nio.file.FileVisitOption;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
@ -58,6 +60,7 @@ import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService
|
||||
import com.iqser.red.service.redaction.v1.server.testcontainers.MongoDBTestContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.exception.NotFoundException;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
import com.knecon.fforesight.keycloakcommons.security.TenantAuthenticationManagerResolver;
|
||||
import com.knecon.fforesight.mongo.database.commons.liquibase.TenantMongoLiquibaseExecutor;
|
||||
import com.knecon.fforesight.mongo.database.commons.service.MongoConnectionProvider;
|
||||
import com.knecon.fforesight.tenantcommons.TenantContext;
|
||||
@ -80,7 +83,8 @@ import lombok.extern.slf4j.Slf4j;
|
||||
* This way you can recreate what is happening on the stack almost exactly.
|
||||
*/ public class AnalysisEnd2EndTest {
|
||||
|
||||
Path dossierTemplateToUse = Path.of("/home/kschuettler/iqser/fforesight/dossier-templates-v2/dev/LayoutParsingDatasetEvaluation"); // Add your dossier-template here
|
||||
Path dossierTemplateToUse = Path.of(
|
||||
"/Users/maverickstuder/Documents/syngenta/redactmanager/prod-cp-eu-reg/EFSA_sanitisation_pre_GFL_v1"); // Add your dossier-template here
|
||||
ObjectMapper mapper = ObjectMapperFactory.create();
|
||||
final String TENANT_ID = "tenant";
|
||||
|
||||
@ -116,12 +120,15 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@Autowired
|
||||
protected TenantMongoLiquibaseExecutor tenantMongoLiquibaseExecutor;
|
||||
|
||||
@MockBean
|
||||
protected TenantAuthenticationManagerResolver tenantAuthenticationManagerResolver;
|
||||
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
public void runAnalysisEnd2End() {
|
||||
|
||||
String folder = "/home/kschuettler/Dokumente/analysisend2end/file1"; // Should contain all files from minio directly, still zipped. Can contain multiple files.
|
||||
String folder = "/Users/maverickstuder/Documents/RedactManager/redaction-service/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/files_end2end/file0"; // Should contain all files from minio directly, still zipped. Can contain multiple files.
|
||||
|
||||
Path absoluteFolderPath;
|
||||
if (folder.startsWith("files")) { // if it starts with "files" it is most likely in the resources folder, else it should be an absolute path
|
||||
@ -137,7 +144,13 @@ import lombok.extern.slf4j.Slf4j;
|
||||
for (int i = 0; i < analyzeRequests.size(); i++) {
|
||||
AnalyzeRequest analyzeRequest = analyzeRequests.get(i);
|
||||
log.info("{}/{}: Starting analysis for file {}", i + 1, analyzeRequests.size(), analyzeRequest.getFileId());
|
||||
analyzeService.analyze(analyzeRequest);
|
||||
var times = new LinkedList<Long>();
|
||||
for (int j = 1; j <= 10; j++) {
|
||||
var start = System.currentTimeMillis();
|
||||
analyzeService.analyze(analyzeRequest);
|
||||
times.add(System.currentTimeMillis() - start);
|
||||
}
|
||||
System.out.println("times in ms for each analyze run: " + times);
|
||||
}
|
||||
}
|
||||
|
||||
@ -294,7 +307,7 @@ import lombok.extern.slf4j.Slf4j;
|
||||
Map<String, Object> dossierTemplate = mapper.readValue(dossierTemplateToUse.resolve("dossierTemplate.json").toFile(), HashMap.class);
|
||||
this.id = (String) dossierTemplate.get("dossierTemplateId");
|
||||
|
||||
List<DictionaryModel> dictionaries = Files.walk(dossierTemplateToUse)
|
||||
List<DictionaryModel> dictionaries = Files.walk(dossierTemplateToUse, FileVisitOption.FOLLOW_LINKS)
|
||||
.filter(path -> path.getFileName().toString().equals("dossierType.json"))
|
||||
.map(this::loadDictionaryModel)
|
||||
.toList();
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user