RED-3350 Communication with entity recognition via queues
This commit is contained in:
parent
90fe282313
commit
68727178e7
@ -12,7 +12,7 @@
|
||||
<artifactId>redaction-service-api-v1</artifactId>
|
||||
|
||||
<properties>
|
||||
<persistence-service.version>1.32.0</persistence-service.version>
|
||||
<persistence-service.version>1.39.0</persistence-service.version>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
|
||||
@ -2,6 +2,6 @@ package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
public enum MessageType {
|
||||
|
||||
FULL_ANALYSE, REANALYSE, SURROUNDING_TEXT
|
||||
ANALYSE, REANALYSE, STRUCTURE_ANALYSE, SURROUNDING_TEXT
|
||||
|
||||
}
|
||||
|
||||
@ -1,30 +1,42 @@
|
||||
package com.iqser.red.service.redaction.v1.server.controller;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
|
||||
import com.iqser.red.service.redaction.v1.model.*;
|
||||
import com.iqser.red.service.redaction.v1.resources.RedactionResource;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.exception.NotFoundException;
|
||||
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.*;
|
||||
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.visualization.service.PdfVisualisationService;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.pdfbox.io.MemoryUsageSetting;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.springframework.web.bind.annotation.PathVariable;
|
||||
import org.springframework.web.bind.annotation.RequestBody;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.stream.Collectors;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
|
||||
import com.iqser.red.service.redaction.v1.model.AnnotateRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.AnnotateResponse;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLog;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionResult;
|
||||
import com.iqser.red.service.redaction.v1.model.SectionArea;
|
||||
import com.iqser.red.service.redaction.v1.model.SectionGrid;
|
||||
import com.iqser.red.service.redaction.v1.resources.RedactionResource;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.exception.NotFoundException;
|
||||
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.AnnotationService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.RedactionLogMergeService;
|
||||
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.visualization.service.PdfVisualisationService;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@RestController
|
||||
@ -43,8 +55,7 @@ public class RedactionController implements RedactionResource {
|
||||
|
||||
public AnnotateResponse annotate(@RequestBody AnnotateRequest annotateRequest) {
|
||||
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(annotateRequest
|
||||
.getDossierId(), annotateRequest.getFileId(), FileType.ORIGIN));
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(annotateRequest.getDossierId(), annotateRequest.getFileId(), FileType.ORIGIN));
|
||||
var mergedRedactionLog = getRedactionLog(RedactionRequest.builder()
|
||||
.fileId(annotateRequest.getFileId())
|
||||
.manualRedactions(annotateRequest.getManualRedactions())
|
||||
@ -73,13 +84,11 @@ public class RedactionController implements RedactionResource {
|
||||
@Override
|
||||
public RedactionResult classify(@RequestBody RedactionRequest redactionRequest) {
|
||||
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest
|
||||
.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
try {
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, null);
|
||||
|
||||
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest
|
||||
.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) {
|
||||
pdDocument.setAllSecurityToBeRemoved(true);
|
||||
|
||||
@ -101,13 +110,11 @@ public class RedactionController implements RedactionResource {
|
||||
@Override
|
||||
public RedactionResult sections(@RequestBody RedactionRequest redactionRequest) {
|
||||
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest
|
||||
.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
try {
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, null);
|
||||
|
||||
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest
|
||||
.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) {
|
||||
pdDocument.setAllSecurityToBeRemoved(true);
|
||||
|
||||
@ -131,8 +138,7 @@ public class RedactionController implements RedactionResource {
|
||||
Document classifiedDoc;
|
||||
|
||||
try {
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest
|
||||
.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, null);
|
||||
} catch (Exception e) {
|
||||
throw new RedactionException(e);
|
||||
@ -174,8 +180,7 @@ public class RedactionController implements RedactionResource {
|
||||
|
||||
log.info("Loaded redaction log with computationalVersion: {}", redactionLog.getAnalysisVersion());
|
||||
|
||||
SectionGrid sectionGrid = redactionStorageService.getSectionGrid(redactionRequest.getDossierId(), redactionRequest
|
||||
.getFileId());
|
||||
SectionGrid sectionGrid = redactionStorageService.getSectionGrid(redactionRequest.getDossierId(), redactionRequest.getFileId());
|
||||
if (sectionGrid.getSections().isEmpty()) {
|
||||
|
||||
log.info("SectionGrid does not have headlines set. Computing headlines now!");
|
||||
@ -184,8 +189,7 @@ public class RedactionController implements RedactionResource {
|
||||
// enhance section grid with headline data
|
||||
for (var sectionText : text.getSectionTexts()) {
|
||||
sectionGrid.getSections()
|
||||
.add(new SectionGrid.SectionGridSection(sectionText.getSectionNumber(), sectionText.getHeadline(), sectionText
|
||||
.getSectionAreas()
|
||||
.add(new SectionGrid.SectionGridSection(sectionText.getSectionNumber(), sectionText.getHeadline(), sectionText.getSectionAreas()
|
||||
.stream()
|
||||
.map(SectionArea::getPage)
|
||||
.collect(Collectors.toSet()), sectionText.getSectionAreas()));
|
||||
@ -194,8 +198,7 @@ public class RedactionController implements RedactionResource {
|
||||
}
|
||||
|
||||
log.info("Loaded redaction log with computationalVersion: {}", redactionLog.getAnalysisVersion());
|
||||
return redactionLogMergeService.mergeRedactionLogData(redactionLog, sectionGrid, redactionRequest.getDossierTemplateId(), redactionRequest
|
||||
.getManualRedactions(), redactionRequest.getExcludedPages());
|
||||
return redactionLogMergeService.mergeRedactionLogData(redactionLog, sectionGrid, redactionRequest.getDossierTemplateId(), redactionRequest.getManualRedactions(), redactionRequest.getExcludedPages());
|
||||
}
|
||||
|
||||
|
||||
@ -217,11 +220,9 @@ public class RedactionController implements RedactionResource {
|
||||
@PathVariable("fileId") String fileId,
|
||||
@RequestBody ManualRedactions manualRedactions) {
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
var result = manualRedactionSurroundingTextService.addSurroundingText(dossierId, fileId, manualRedactions);
|
||||
log.info("add surrounding text for manual redaction in dossierId {} and fileId {} took: {}", dossierId, fileId, System
|
||||
.currentTimeMillis() - start);
|
||||
return result;
|
||||
log.info("Added surrounding text for manual redaction in dossierId {} and fileId {} took: {}", dossierId, fileId, result.getDuration());
|
||||
return result.getManualRedactions();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -1,5 +1,12 @@
|
||||
package com.iqser.red.service.redaction.v1.server.queue;
|
||||
|
||||
import static com.iqser.red.service.redaction.v1.server.queue.MessagingConfiguration.REDACTION_DQL;
|
||||
import static com.iqser.red.service.redaction.v1.server.queue.MessagingConfiguration.REDACTION_QUEUE;
|
||||
|
||||
import org.springframework.amqp.rabbit.annotation.RabbitHandler;
|
||||
import org.springframework.amqp.rabbit.annotation.RabbitListener;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
|
||||
@ -8,18 +15,10 @@ import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.server.client.FileStatusProcessingUpdateClient;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.NerAnalyserService;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import org.springframework.amqp.rabbit.annotation.RabbitHandler;
|
||||
import org.springframework.amqp.rabbit.annotation.RabbitListener;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import static com.iqser.red.service.redaction.v1.server.queue.MessagingConfiguration.REDACTION_DQL;
|
||||
import static com.iqser.red.service.redaction.v1.server.queue.MessagingConfiguration.REDACTION_QUEUE;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@ -28,7 +27,6 @@ public class RedactionMessageReceiver {
|
||||
private final ObjectMapper objectMapper;
|
||||
private final AnalyzeService analyzeService;
|
||||
private final FileStatusProcessingUpdateClient fileStatusProcessingUpdateClient;
|
||||
private final NerAnalyserService nerAnalyserService;
|
||||
private final ManualRedactionSurroundingTextService manualRedactionSurroundingTextService;
|
||||
|
||||
|
||||
@ -40,31 +38,26 @@ public class RedactionMessageReceiver {
|
||||
log.info("Processing analyze request for file: {}", analyzeRequest.getFileId());
|
||||
AnalyzeResult result = null;
|
||||
|
||||
switch (analyzeRequest.getMessageType()){
|
||||
switch (analyzeRequest.getMessageType()) {
|
||||
|
||||
case REANALYSE:
|
||||
result = analyzeService.reanalyze(analyzeRequest);
|
||||
log.info("Successfully reanalyzed dossier {} file {} took: {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId(), result.getDuration());
|
||||
break;
|
||||
case FULL_ANALYSE:
|
||||
// TODO Seperate stucture analysis by other queue
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(analyzeRequest.getDossierId(), analyzeRequest.getFileId()));
|
||||
|
||||
// TODO NerEntities should be computed and stored in entity-recognition-service, should be triggered by a seperate queue after structure analysis
|
||||
nerAnalyserService.computeNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
|
||||
result = analyzeService.analyze(analyzeRequest);
|
||||
log.info("Successfully analyzed dossier {} file {} took: {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId(), result
|
||||
.getDuration());
|
||||
|
||||
case STRUCTURE_ANALYSE:
|
||||
result = analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(analyzeRequest.getDossierId(), analyzeRequest.getFileId()));
|
||||
log.info("Successfully analyzed structure dossier {} file {} took: {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId(), result.getDuration());
|
||||
break;
|
||||
|
||||
case ANALYSE:
|
||||
result = analyzeService.analyze(analyzeRequest);
|
||||
log.info("Successfully analyzed dossier {} file {} took: {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId(), result.getDuration());
|
||||
break;
|
||||
|
||||
case SURROUNDING_TEXT:
|
||||
var manualRedactions = manualRedactionSurroundingTextService.addSurroundingText(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), analyzeRequest.getManualRedactions());
|
||||
result = AnalyzeResult.builder()
|
||||
.dossierId(analyzeRequest.getDossierId())
|
||||
.fileId(analyzeRequest.getFileId())
|
||||
.manualRedactions(manualRedactions)
|
||||
.build();
|
||||
result = manualRedactionSurroundingTextService.addSurroundingText(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), analyzeRequest.getManualRedactions());
|
||||
log.info("Successfully added surrounding text for manual redaction in dossierId {} and fileId {} took: {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId(), result.getDuration());
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
@ -32,6 +32,7 @@ import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Text;
|
||||
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrement;
|
||||
@ -66,12 +67,11 @@ public class AnalyzeService {
|
||||
private final RedactionServiceSettings redactionServiceSettings;
|
||||
private final SectionTextBuilderService sectionTextBuilderService;
|
||||
private final SectionGridCreatorService sectionGridCreatorService;
|
||||
private final NerAnalyserService nerAnalyserService;
|
||||
private final ImageService imageService;
|
||||
private final ImportedRedactionService importedRedactionService;
|
||||
|
||||
|
||||
public void analyzeDocumentStructure(StructureAnalyzeRequest analyzeRequest) {
|
||||
public AnalyzeResult analyzeDocumentStructure(StructureAnalyzeRequest analyzeRequest) {
|
||||
|
||||
long startTime = System.currentTimeMillis();
|
||||
|
||||
@ -79,7 +79,6 @@ public class AnalyzeService {
|
||||
Document classifiedDoc;
|
||||
|
||||
try {
|
||||
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.ORIGIN));
|
||||
|
||||
Map<Integer, List<PdfImage>> pdfImages = null;
|
||||
@ -105,44 +104,17 @@ public class AnalyzeService {
|
||||
.map(SectionArea::getPage)
|
||||
.collect(Collectors.toSet()), sectionText.getSectionAreas())));
|
||||
|
||||
log.info("Store text and section grid for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.TEXT, text);
|
||||
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.SECTION_GRID, classifiedDoc.getSectionGrid());
|
||||
|
||||
log.info("Document structure analysis successful, took: {}", System.currentTimeMillis() - startTime);
|
||||
}
|
||||
|
||||
|
||||
public AnalyzeResult analyze(AnalyzeRequest analyzeRequest) {
|
||||
|
||||
long startTime = System.currentTimeMillis();
|
||||
|
||||
var text = redactionStorageService.getText(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
var nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
if (redactionServiceSettings.isEnableEntityRecognition() && nerEntities == null) {
|
||||
nerAnalyserService.computeNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
}
|
||||
|
||||
dictionaryService.updateDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
|
||||
KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getDossierTemplateId());
|
||||
long rulesVersion = droolsExecutionService.getRulesVersion(analyzeRequest.getDossierTemplateId());
|
||||
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
|
||||
|
||||
PageEntities pageEntities = entityRedactionService.findEntities(dictionary, text.getSectionTexts(), kieContainer, analyzeRequest, nerEntities);
|
||||
|
||||
dictionaryService.updateExternalDictionary(dictionary, analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
|
||||
|
||||
List<RedactionLogEntry> redactionLogEntries = redactionLogCreatorService.createRedactionLog(pageEntities, text.getNumberOfPages(), analyzeRequest.getDossierTemplateId());
|
||||
|
||||
var legalBasis = legalBasisClient.getLegalBasisMapping(analyzeRequest.getDossierTemplateId());
|
||||
var redactionLog = new RedactionLog(redactionServiceSettings.getAnalysisVersion(), analyzeRequest.getAnalysisNumber(), redactionLogEntries, legalBasis, dictionary.getVersion()
|
||||
.getDossierTemplateVersion(), dictionary.getVersion()
|
||||
.getDossierVersion(), rulesVersion, legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId()));
|
||||
|
||||
var importedRedactionFilteredEntries = importedRedactionService.processImportedRedactions(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId(), analyzeRequest.getFileId(), redactionLog.getRedactionLogEntry(), true);
|
||||
redactionLog.setRedactionLogEntry(importedRedactionFilteredEntries);
|
||||
|
||||
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionary.getVersion(), false);
|
||||
return AnalyzeResult.builder()
|
||||
.dossierId(analyzeRequest.getDossierId())
|
||||
.fileId(analyzeRequest.getFileId())
|
||||
.duration(System.currentTimeMillis() - startTime)
|
||||
.numberOfPages(text.getNumberOfPages())
|
||||
.analysisVersion(redactionServiceSettings.getAnalysisVersion())
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
@ -168,10 +140,11 @@ public class AnalyzeService {
|
||||
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true);
|
||||
}
|
||||
|
||||
var nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
if (redactionServiceSettings.isEnableEntityRecognition() && nerEntities == null) {
|
||||
nerAnalyserService.computeNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
NerEntities nerEntities;
|
||||
if (redactionServiceSettings.isNerServiceEnabled()) {
|
||||
nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
} else {
|
||||
nerEntities = NerEntities.builder().build();
|
||||
}
|
||||
|
||||
List<SectionText> reanalysisSections = text.getSectionTexts()
|
||||
@ -188,13 +161,49 @@ public class AnalyzeService {
|
||||
|
||||
var importedRedactionFilteredEntries = importedRedactionService.processImportedRedactions(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId(), analyzeRequest.getFileId(), newRedactionLogEntries, false);
|
||||
|
||||
redactionLog.getRedactionLogEntry().removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()) && !entry.getType().equals(IMPORTED_REDACTION_TYPE));
|
||||
redactionLog.getRedactionLogEntry()
|
||||
.removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()) && !entry.getType()
|
||||
.equals(IMPORTED_REDACTION_TYPE));
|
||||
redactionLog.getRedactionLogEntry().addAll(importedRedactionFilteredEntries);
|
||||
|
||||
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true);
|
||||
}
|
||||
|
||||
|
||||
public AnalyzeResult analyze(AnalyzeRequest analyzeRequest) {
|
||||
|
||||
long startTime = System.currentTimeMillis();
|
||||
var text = redactionStorageService.getText(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
|
||||
NerEntities nerEntities;
|
||||
if (redactionServiceSettings.isNerServiceEnabled()) {
|
||||
nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
} else {
|
||||
nerEntities = NerEntities.builder().build();
|
||||
}
|
||||
dictionaryService.updateDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
|
||||
KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getDossierTemplateId());
|
||||
long rulesVersion = droolsExecutionService.getRulesVersion(analyzeRequest.getDossierTemplateId());
|
||||
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
|
||||
|
||||
PageEntities pageEntities = entityRedactionService.findEntities(dictionary, text.getSectionTexts(), kieContainer, analyzeRequest, nerEntities);
|
||||
|
||||
dictionaryService.updateExternalDictionary(dictionary, analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
|
||||
|
||||
List<RedactionLogEntry> redactionLogEntries = redactionLogCreatorService.createRedactionLog(pageEntities, text.getNumberOfPages(), analyzeRequest.getDossierTemplateId());
|
||||
|
||||
var legalBasis = legalBasisClient.getLegalBasisMapping(analyzeRequest.getDossierTemplateId());
|
||||
var redactionLog = new RedactionLog(redactionServiceSettings.getAnalysisVersion(), analyzeRequest.getAnalysisNumber(), redactionLogEntries, legalBasis, dictionary.getVersion()
|
||||
.getDossierTemplateVersion(), dictionary.getVersion()
|
||||
.getDossierVersion(), rulesVersion, legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId()));
|
||||
|
||||
var importedRedactionFilteredEntries = importedRedactionService.processImportedRedactions(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId(), analyzeRequest.getFileId(), redactionLog.getRedactionLogEntry(), true);
|
||||
redactionLog.setRedactionLogEntry(importedRedactionFilteredEntries);
|
||||
|
||||
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionary.getVersion(), false);
|
||||
}
|
||||
|
||||
|
||||
private Set<Integer> findSectionsToReanalyse(DictionaryIncrement dictionaryIncrement, RedactionLog redactionLog,
|
||||
Text text, AnalyzeRequest analyzeRequest) {
|
||||
|
||||
|
||||
@ -1,5 +1,18 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.AnnotationStatus;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.IdRemoval;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualImageRecategorization;
|
||||
@ -8,20 +21,21 @@ import com.iqser.red.service.redaction.v1.model.Engine;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.*;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entities;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SectionSearchableTextPair;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.codec.binary.Base64;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@ -46,7 +60,7 @@ public class EntityRedactionService {
|
||||
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
|
||||
}
|
||||
|
||||
Map<Integer, List<Entity>> entitiesPerPage = convertToEnititesPerPage(entities);
|
||||
Map<Integer, List<Entity>> entitiesPerPage = convertToEntitiesPerPage(entities);
|
||||
return new PageEntities(entitiesPerPage, imagesPerPage);
|
||||
}
|
||||
|
||||
@ -145,7 +159,7 @@ public class EntityRedactionService {
|
||||
}
|
||||
|
||||
|
||||
private Map<Integer, List<Entity>> convertToEnititesPerPage(Set<Entity> entities) {
|
||||
private Map<Integer, List<Entity>> convertToEntitiesPerPage(Set<Entity> entities) {
|
||||
|
||||
Map<Integer, List<Entity>> entitiesPerPage = new HashMap<>();
|
||||
for (Entity entity : entities) {
|
||||
@ -209,7 +223,7 @@ public class EntityRedactionService {
|
||||
|
||||
private Entities findEntities(SearchableText searchableText, String headline, int sectionNumber,
|
||||
Dictionary dictionary, boolean local, NerEntities nerEntities,
|
||||
List<Integer> cellstarts) {
|
||||
List<Integer> cellStarts) {
|
||||
|
||||
Set<Entity> found = new HashSet<>();
|
||||
String searchableString = searchableText.toString();
|
||||
@ -230,32 +244,32 @@ public class EntityRedactionService {
|
||||
|
||||
Set<Entity> nerFound = new HashSet<>();
|
||||
if (!local) {
|
||||
nerFound.addAll(getNerValues(sectionNumber, nerEntities, cellstarts, headline));
|
||||
nerFound.addAll(getNerValues(sectionNumber, nerEntities, cellStarts, headline));
|
||||
}
|
||||
|
||||
return new Entities(EntitySearchUtils.clearAndFindPositions(found, searchableText, dictionary), nerFound);
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> getNerValues(int sectionNumber, NerEntities nerEntities,
|
||||
List<Integer> cellstarts, String headline) {
|
||||
private Set<Entity> getNerValues(int sectionNumber, NerEntities nerEntities, List<Integer> cellStarts,
|
||||
String headline) {
|
||||
|
||||
Set<Entity> entities = new HashSet<>();
|
||||
|
||||
if (redactionServiceSettings.isEnableEntityRecognition() && nerEntities.getResult()
|
||||
.containsKey(sectionNumber)) {
|
||||
if (redactionServiceSettings.isNerServiceEnabled() && nerEntities.getResult().containsKey(sectionNumber)) {
|
||||
nerEntities.getResult().get(sectionNumber).forEach(res -> {
|
||||
if (cellstarts == null || cellstarts.isEmpty()) {
|
||||
entities.add(new Entity(new String(Base64.decodeBase64(res.getValue().getBytes())), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false, Engine.NER));
|
||||
if (cellStarts == null || cellStarts.isEmpty()) {
|
||||
entities.add(new Entity(res.getValue(), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false, Engine.NER));
|
||||
} else {
|
||||
boolean intersectsCellStart = false;
|
||||
for (Integer cellStart : cellstarts) {
|
||||
for (Integer cellStart : cellStarts) {
|
||||
if (res.getStartOffset() < cellStart && cellStart < res.getEndOffset()) {
|
||||
intersectsCellStart = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!intersectsCellStart) {
|
||||
entities.add(new Entity(new String(Base64.decodeBase64(res.getValue().getBytes())), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false, Engine.NER));
|
||||
entities.add(new Entity(res.getValue(), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false, Engine.NER));
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
@ -1,9 +1,17 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.Rectangle;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualRedactionEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualResizeRedaction;
|
||||
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
|
||||
import com.iqser.red.service.redaction.v1.model.Engine;
|
||||
import com.iqser.red.service.redaction.v1.model.SectionArea;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
|
||||
@ -13,14 +21,9 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@ -31,8 +34,9 @@ public class ManualRedactionSurroundingTextService {
|
||||
private final SurroundingWordsService surroundingWordsService;
|
||||
|
||||
|
||||
public ManualRedactions addSurroundingText(String dossierId, String fileId, ManualRedactions manualRedactions) {
|
||||
public AnalyzeResult addSurroundingText(String dossierId, String fileId, ManualRedactions manualRedactions) {
|
||||
|
||||
long startTime = System.currentTimeMillis();
|
||||
Text text = redactionStorageService.getText(dossierId, fileId);
|
||||
List<ManualRedactionEntry> processedAddRedactions = new ArrayList<>();
|
||||
List<ManualResizeRedaction> processedResizeRedactions = new ArrayList<>();
|
||||
@ -49,8 +53,7 @@ public class ManualRedactionSurroundingTextService {
|
||||
while (addItty.hasNext()) {
|
||||
var manualAddRedaction = addItty.next();
|
||||
if (sectionContainsEntry(sectionArea, manualAddRedaction.getPositions())) {
|
||||
var surroundingText = findSurroundingText(sectionText, manualAddRedaction.getValue(), manualAddRedaction
|
||||
.getPositions());
|
||||
var surroundingText = findSurroundingText(sectionText, manualAddRedaction.getValue(), manualAddRedaction.getPositions());
|
||||
manualAddRedaction.setTextBefore(surroundingText.getLeft());
|
||||
manualAddRedaction.setTextAfter(surroundingText.getRight());
|
||||
processedAddRedactions.add(manualAddRedaction);
|
||||
@ -62,8 +65,7 @@ public class ManualRedactionSurroundingTextService {
|
||||
while (resizeItty.hasNext()) {
|
||||
var manualResizeRedaction = resizeItty.next();
|
||||
if (sectionContainsEntry(sectionArea, manualResizeRedaction.getPositions())) {
|
||||
var surroundingText = findSurroundingText(sectionText, manualResizeRedaction.getValue(), manualResizeRedaction
|
||||
.getPositions());
|
||||
var surroundingText = findSurroundingText(sectionText, manualResizeRedaction.getValue(), manualResizeRedaction.getPositions());
|
||||
manualResizeRedaction.setTextBefore(surroundingText.getLeft());
|
||||
manualResizeRedaction.setTextAfter(surroundingText.getRight());
|
||||
processedResizeRedactions.add(manualResizeRedaction);
|
||||
@ -75,15 +77,20 @@ public class ManualRedactionSurroundingTextService {
|
||||
|
||||
manualRedactions.getEntriesToAdd().addAll(processedAddRedactions);
|
||||
manualRedactions.getResizeRedactions().addAll(processedResizeRedactions);
|
||||
return manualRedactions;
|
||||
|
||||
return AnalyzeResult.builder()
|
||||
.dossierId(dossierId)
|
||||
.fileId(fileId)
|
||||
.manualRedactions(manualRedactions)
|
||||
.duration(System.currentTimeMillis() - startTime)
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
private Pair<String, String> findSurroundingText(SectionText sectionText, String value,
|
||||
List<Rectangle> toFindPositions) {
|
||||
|
||||
Set<Entity> entities = EntitySearchUtils.find(sectionText.getText(), Set.of(value), "dummy", sectionText.getHeadline(), sectionText
|
||||
.getSectionNumber(), false, false, Engine.DICTIONARY, false);
|
||||
Set<Entity> entities = EntitySearchUtils.find(sectionText.getText(), Set.of(value), "dummy", sectionText.getHeadline(), sectionText.getSectionNumber(), false, false, Engine.DICTIONARY, false);
|
||||
Set<Entity> entitiesWithPositions = EntitySearchUtils.clearAndFindPositions(entities, sectionText.getSearchableText(), null);
|
||||
|
||||
Entity correctEntity = getEntityOnCorrectPosition(entitiesWithPositions, toFindPositions);
|
||||
@ -94,8 +101,7 @@ public class ManualRedactionSurroundingTextService {
|
||||
}
|
||||
|
||||
if (sectionText.getCellStarts() != null && !sectionText.getCellStarts().isEmpty()) {
|
||||
surroundingWordsService.addSurroundingText(Set.of(correctEntity), sectionText.getSearchableText(), null, sectionText
|
||||
.getCellStarts());
|
||||
surroundingWordsService.addSurroundingText(Set.of(correctEntity), sectionText.getSearchableText(), null, sectionText.getCellStarts());
|
||||
} else {
|
||||
surroundingWordsService.addSurroundingText(Set.of(correctEntity), sectionText.getSearchableText(), null);
|
||||
}
|
||||
|
||||
@ -1,53 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
|
||||
import com.iqser.red.service.redaction.v1.server.client.EntityRecognitionClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionRequest;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionSection;
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.codec.binary.Base64;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class NerAnalyserService {
|
||||
|
||||
private final RedactionStorageService redactionStorageService;
|
||||
private final EntityRecognitionClient entityRecognitionClient;
|
||||
private final RedactionServiceSettings redactionServiceSettings;
|
||||
|
||||
public void computeNerEntities(String dossierId, String fileId) {
|
||||
|
||||
if (redactionServiceSettings.isEnableEntityRecognition()) {
|
||||
var text = redactionStorageService.getText(dossierId, fileId);
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
|
||||
if (text != null) {
|
||||
var nerRequest = EntityRecognitionRequest.builder()
|
||||
.data(text.getSectionTexts()
|
||||
.stream()
|
||||
.map(sectionText -> new EntityRecognitionSection(sectionText.getSectionNumber(), new String(Base64
|
||||
.encodeBase64(sectionText
|
||||
.getText().getBytes()))))
|
||||
.collect(Collectors.toList()))
|
||||
.build();
|
||||
|
||||
var nerResponse = entityRecognitionClient.findAuthors(nerRequest);
|
||||
|
||||
log.info("Computing NER entities took: {} ms for dossierId {} and fileId {}", System.currentTimeMillis() - start, dossierId, fileId);
|
||||
|
||||
redactionStorageService.storeObject(dossierId, fileId, FileType.NER_ENTITIES, nerResponse);
|
||||
} else {
|
||||
log.warn("Warning, text for file: {} in dossier: {} is null", fileId, dossierId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,5 +1,15 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.Point;
|
||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
||||
@ -10,13 +20,9 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionS
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Service
|
||||
@Slf4j
|
||||
@ -85,7 +91,7 @@ public class RedactionLogCreatorService {
|
||||
|
||||
List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();
|
||||
|
||||
// Duplicates can exist due table extraction colums over multiple rows.
|
||||
// Duplicates can exist due table extraction columns over multiple rows.
|
||||
Set<String> processedIds = new HashSet<>();
|
||||
|
||||
entityLoop:
|
||||
|
||||
@ -1,8 +1,9 @@
|
||||
package com.iqser.red.service.redaction.v1.server.settings;
|
||||
|
||||
import lombok.Data;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@ConfigurationProperties("redaction-service")
|
||||
public class RedactionServiceSettings {
|
||||
@ -17,6 +18,6 @@ public class RedactionServiceSettings {
|
||||
|
||||
private int analysisVersion = 1;
|
||||
|
||||
private boolean enableEntityRecognition = true;
|
||||
private boolean nerServiceEnabled = true;
|
||||
|
||||
}
|
||||
|
||||
@ -105,14 +105,13 @@ public class RedactionStorageService {
|
||||
try {
|
||||
inputStreamResource = storageService.getObject(StorageIdUtils.getStorageId(dossierId, fileId, FileType.NER_ENTITIES));
|
||||
} catch (StorageObjectDoesNotExist e) {
|
||||
log.debug("NER Entities not available.");
|
||||
return null;
|
||||
throw new NotFoundException("NER Entities are not available.");
|
||||
}
|
||||
|
||||
try {
|
||||
return objectMapper.readValue(inputStreamResource.getInputStream(), NerEntities.class);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Could not convert NerEntities", e);
|
||||
throw new RuntimeException("Could not convert NER Entities", e);
|
||||
}
|
||||
}
|
||||
|
||||
@ -146,8 +145,6 @@ public class RedactionStorageService {
|
||||
return dossierId + "/" + fileId + "." + fileType.name() + fileType.getExtension();
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -23,9 +23,6 @@ import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.fasterxml.jackson.core.type.TypeReference;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ImportedAnnotation;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.*;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
@ -56,6 +53,12 @@ import com.iqser.red.service.persistence.service.v1.api.model.annotations.Annota
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.Comment;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.Rectangle;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.IdRemoval;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualForceRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualImageRecategorization;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualLegalBasisChange;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualRedactionEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualResizeRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.common.JSONPrimitive;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.configuration.Colors;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
|
||||
@ -215,6 +218,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
loadDictionaryForTest();
|
||||
loadTypeForTest();
|
||||
loadNerForTest();
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
|
||||
when(dictionaryClient.getAllTypesForDossierTemplate(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(getTypeResponse());
|
||||
|
||||
@ -256,293 +260,6 @@ public class RedactionIntegrationTest {
|
||||
}
|
||||
|
||||
|
||||
private void loadDictionaryForTest() {
|
||||
|
||||
dictionary.computeIfAbsent(AUTHOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/CBI_author.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(SPONSOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(VERTEBRATE, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/vertebrate.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(ADDRESS, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/CBI_address.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(REDACTION_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(HINT_ONLY, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/hint_only.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(MUST_REDACT, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/must_redact.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(PUBLISHED_INFORMATION, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/published_information.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(TEST_METHOD, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/test_method.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(PII, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/PII.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(RECOMMENDATION_AUTHOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/recommendation_CBI_author.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(RECOMMENDATION_ADDRESS, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/recommendation_CBI_address.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(FALSE_POSITIVE, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/false_positive.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(PURITY, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/purity.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(IMAGE, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(OCR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(LOGO, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(SIGNATURE, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(FORMULA, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dossierDictionary.put(IMPORTED_REDACTION, new ArrayList<>());
|
||||
}
|
||||
|
||||
|
||||
private String cleanDictionaryEntry(String entry) {
|
||||
|
||||
return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " ");
|
||||
}
|
||||
|
||||
|
||||
private void loadTypeForTest() {
|
||||
|
||||
typeColorMap.put(VERTEBRATE, "#ff85f7");
|
||||
typeColorMap.put(ADDRESS, "#ffe187");
|
||||
typeColorMap.put(AUTHOR, "#ffe187");
|
||||
typeColorMap.put(SPONSOR, "#85ebff");
|
||||
typeColorMap.put(NO_REDACTION_INDICATOR, "#be85ff");
|
||||
typeColorMap.put(REDACTION_INDICATOR, "#caff85");
|
||||
typeColorMap.put(HINT_ONLY, "#abc0c4");
|
||||
typeColorMap.put(MUST_REDACT, "#fab4c0");
|
||||
typeColorMap.put(PUBLISHED_INFORMATION, "#85ebff");
|
||||
typeColorMap.put(TEST_METHOD, "#91fae8");
|
||||
typeColorMap.put(PII, "#66ccff");
|
||||
typeColorMap.put(RECOMMENDATION_AUTHOR, "#8df06c");
|
||||
typeColorMap.put(RECOMMENDATION_ADDRESS, "#8df06c");
|
||||
typeColorMap.put(FALSE_POSITIVE, "#ffffff");
|
||||
typeColorMap.put(PURITY, "#ffe187");
|
||||
typeColorMap.put(IMAGE, "#fcc5fb");
|
||||
typeColorMap.put(OCR, "#fcc5fb");
|
||||
typeColorMap.put(LOGO, "#ffe187");
|
||||
typeColorMap.put(FORMULA, "#ffe187");
|
||||
typeColorMap.put(SIGNATURE, "#ffe187");
|
||||
typeColorMap.put(IMPORTED_REDACTION, "#32a852");
|
||||
|
||||
hintTypeMap.put(VERTEBRATE, true);
|
||||
hintTypeMap.put(ADDRESS, false);
|
||||
hintTypeMap.put(AUTHOR, false);
|
||||
hintTypeMap.put(SPONSOR, false);
|
||||
hintTypeMap.put(NO_REDACTION_INDICATOR, true);
|
||||
hintTypeMap.put(REDACTION_INDICATOR, true);
|
||||
hintTypeMap.put(HINT_ONLY, true);
|
||||
hintTypeMap.put(MUST_REDACT, true);
|
||||
hintTypeMap.put(PUBLISHED_INFORMATION, true);
|
||||
hintTypeMap.put(TEST_METHOD, true);
|
||||
hintTypeMap.put(PII, false);
|
||||
hintTypeMap.put(RECOMMENDATION_AUTHOR, false);
|
||||
hintTypeMap.put(RECOMMENDATION_ADDRESS, false);
|
||||
hintTypeMap.put(FALSE_POSITIVE, true);
|
||||
hintTypeMap.put(PURITY, false);
|
||||
hintTypeMap.put(IMAGE, true);
|
||||
hintTypeMap.put(OCR, true);
|
||||
hintTypeMap.put(FORMULA, false);
|
||||
hintTypeMap.put(LOGO, false);
|
||||
hintTypeMap.put(SIGNATURE, false);
|
||||
hintTypeMap.put(DOSSIER_REDACTIONS, false);
|
||||
hintTypeMap.put(IMPORTED_REDACTION, false);
|
||||
|
||||
caseInSensitiveMap.put(VERTEBRATE, true);
|
||||
caseInSensitiveMap.put(ADDRESS, false);
|
||||
caseInSensitiveMap.put(AUTHOR, false);
|
||||
caseInSensitiveMap.put(SPONSOR, false);
|
||||
caseInSensitiveMap.put(NO_REDACTION_INDICATOR, true);
|
||||
caseInSensitiveMap.put(REDACTION_INDICATOR, true);
|
||||
caseInSensitiveMap.put(HINT_ONLY, true);
|
||||
caseInSensitiveMap.put(MUST_REDACT, true);
|
||||
caseInSensitiveMap.put(PUBLISHED_INFORMATION, true);
|
||||
caseInSensitiveMap.put(TEST_METHOD, false);
|
||||
caseInSensitiveMap.put(PII, false);
|
||||
caseInSensitiveMap.put(RECOMMENDATION_AUTHOR, false);
|
||||
caseInSensitiveMap.put(RECOMMENDATION_ADDRESS, false);
|
||||
caseInSensitiveMap.put(FALSE_POSITIVE, false);
|
||||
caseInSensitiveMap.put(PURITY, false);
|
||||
caseInSensitiveMap.put(IMAGE, true);
|
||||
caseInSensitiveMap.put(OCR, true);
|
||||
caseInSensitiveMap.put(SIGNATURE, true);
|
||||
caseInSensitiveMap.put(LOGO, true);
|
||||
caseInSensitiveMap.put(FORMULA, true);
|
||||
caseInSensitiveMap.put(DOSSIER_REDACTIONS, false);
|
||||
caseInSensitiveMap.put(IMPORTED_REDACTION, false);
|
||||
|
||||
recommendationTypeMap.put(VERTEBRATE, false);
|
||||
recommendationTypeMap.put(ADDRESS, false);
|
||||
recommendationTypeMap.put(AUTHOR, false);
|
||||
recommendationTypeMap.put(SPONSOR, false);
|
||||
recommendationTypeMap.put(NO_REDACTION_INDICATOR, false);
|
||||
recommendationTypeMap.put(REDACTION_INDICATOR, false);
|
||||
recommendationTypeMap.put(HINT_ONLY, false);
|
||||
recommendationTypeMap.put(MUST_REDACT, false);
|
||||
recommendationTypeMap.put(PUBLISHED_INFORMATION, false);
|
||||
recommendationTypeMap.put(TEST_METHOD, false);
|
||||
recommendationTypeMap.put(PII, false);
|
||||
recommendationTypeMap.put(RECOMMENDATION_AUTHOR, true);
|
||||
recommendationTypeMap.put(RECOMMENDATION_ADDRESS, true);
|
||||
recommendationTypeMap.put(FALSE_POSITIVE, false);
|
||||
recommendationTypeMap.put(PURITY, false);
|
||||
recommendationTypeMap.put(IMAGE, false);
|
||||
recommendationTypeMap.put(OCR, false);
|
||||
recommendationTypeMap.put(FORMULA, false);
|
||||
recommendationTypeMap.put(SIGNATURE, false);
|
||||
recommendationTypeMap.put(LOGO, false);
|
||||
recommendationTypeMap.put(DOSSIER_REDACTIONS, false);
|
||||
recommendationTypeMap.put(IMPORTED_REDACTION, false);
|
||||
|
||||
rankTypeMap.put(FALSE_POSITIVE, 160);
|
||||
rankTypeMap.put(PURITY, 155);
|
||||
rankTypeMap.put(PII, 150);
|
||||
rankTypeMap.put(ADDRESS, 140);
|
||||
rankTypeMap.put(AUTHOR, 130);
|
||||
rankTypeMap.put(SPONSOR, 120);
|
||||
rankTypeMap.put(VERTEBRATE, 110);
|
||||
rankTypeMap.put(MUST_REDACT, 100);
|
||||
rankTypeMap.put(REDACTION_INDICATOR, 90);
|
||||
rankTypeMap.put(NO_REDACTION_INDICATOR, 80);
|
||||
rankTypeMap.put(PUBLISHED_INFORMATION, 70);
|
||||
rankTypeMap.put(TEST_METHOD, 60);
|
||||
rankTypeMap.put(HINT_ONLY, 50);
|
||||
rankTypeMap.put(RECOMMENDATION_AUTHOR, 40);
|
||||
rankTypeMap.put(RECOMMENDATION_ADDRESS, 30);
|
||||
rankTypeMap.put(IMAGE, 30);
|
||||
rankTypeMap.put(OCR, 29);
|
||||
rankTypeMap.put(LOGO, 28);
|
||||
rankTypeMap.put(SIGNATURE, 27);
|
||||
rankTypeMap.put(FORMULA, 26);
|
||||
rankTypeMap.put(DOSSIER_REDACTIONS, 200);
|
||||
rankTypeMap.put(IMPORTED_REDACTION, 200);
|
||||
|
||||
colors.setDefaultColor("#acfc00");
|
||||
colors.setNotRedacted("#cccccc");
|
||||
colors.setRequestAdd("#04b093");
|
||||
colors.setRequestRemove("#04b093");
|
||||
}
|
||||
|
||||
|
||||
private List<Type> getTypeResponse() {
|
||||
|
||||
return typeColorMap.entrySet()
|
||||
.stream()
|
||||
.map(typeColor -> Type.builder()
|
||||
.id(typeColor.getKey() + ":" + TEST_DOSSIER_TEMPLATE_ID)
|
||||
.type(typeColor.getKey())
|
||||
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
|
||||
.hexColor(typeColor.getValue())
|
||||
.isHint(hintTypeMap.get(typeColor.getKey()))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(typeColor.getKey()))
|
||||
.isRecommendation(recommendationTypeMap.get(typeColor.getKey()))
|
||||
.rank(rankTypeMap.get(typeColor.getKey()))
|
||||
.build())
|
||||
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
private Type getDictionaryResponse(String type, boolean isDossierDictionary) {
|
||||
|
||||
return Type.builder()
|
||||
.id(type + ":" + TEST_DOSSIER_TEMPLATE_ID)
|
||||
.hexColor(typeColorMap.get(type))
|
||||
.entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type)))
|
||||
.isHint(hintTypeMap.get(type))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(type))
|
||||
.isRecommendation(recommendationTypeMap.get(type))
|
||||
.rank(rankTypeMap.get(type))
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
private List<DictionaryEntry> toDictionaryEntry(List<String> entries) {
|
||||
|
||||
List<DictionaryEntry> dictionaryEntries = new ArrayList<>();
|
||||
entries.forEach(entry -> {
|
||||
dictionaryEntries.add(DictionaryEntry.builder()
|
||||
.value(entry)
|
||||
.version(reanlysisVersions.getOrDefault(entry, 0L))
|
||||
.deleted(deleted.contains(entry))
|
||||
.build());
|
||||
});
|
||||
return dictionaryEntries;
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test270Rotated() {
|
||||
|
||||
@ -584,9 +301,7 @@ public class RedactionIntegrationTest {
|
||||
duplicates.computeIfAbsent(entry.getId(), v -> new ArrayList<>()).add(entry);
|
||||
});
|
||||
|
||||
duplicates.entrySet().forEach(entry -> {
|
||||
assertThat(entry.getValue().size()).isEqualTo(1);
|
||||
});
|
||||
duplicates.forEach((key, value) -> assertThat(value.size()).isEqualTo(1));
|
||||
|
||||
dictionary.get(AUTHOR).add("Drinking water");
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(1L);
|
||||
@ -1047,8 +762,6 @@ public class RedactionIntegrationTest {
|
||||
@Test
|
||||
public void testManualRedaction() throws IOException {
|
||||
|
||||
// 675eba69b0c2917de55462c817adaa05
|
||||
|
||||
System.out.println("testManualRedaction");
|
||||
long start = System.currentTimeMillis();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
|
||||
@ -1099,8 +812,6 @@ public class RedactionIntegrationTest {
|
||||
.page(1)
|
||||
.build()));
|
||||
|
||||
// manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
|
||||
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
request.setManualRedactions(manualRedactions);
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
@ -1246,34 +957,6 @@ public class RedactionIntegrationTest {
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private AnalyzeRequest prepareStorage(String file) {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource(file);
|
||||
|
||||
return prepareStorage(pdfFileResource.getInputStream());
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private AnalyzeRequest prepareStorage(InputStream stream) {
|
||||
|
||||
AnalyzeRequest request = AnalyzeRequest.builder()
|
||||
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
|
||||
.dossierId(TEST_DOSSIER_ID)
|
||||
.fileId(TEST_FILE_ID)
|
||||
.lastProcessed(OffsetDateTime.now())
|
||||
.build();
|
||||
|
||||
var bytes = IOUtils.toByteArray(stream);
|
||||
|
||||
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), bytes);
|
||||
|
||||
return request;
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void sponsorCompanyTest() throws IOException {
|
||||
|
||||
@ -1527,7 +1210,8 @@ public class RedactionIntegrationTest {
|
||||
fileOutputStream.write(annotateResponse.getDocument());
|
||||
}
|
||||
|
||||
var surroundingTextResult = manualRedactionSurroundingTextService.addSurroundingText(TEST_DOSSIER_ID, TEST_FILE_ID, manualRedactions);
|
||||
var surroundingTextResult = manualRedactionSurroundingTextService.addSurroundingText(TEST_DOSSIER_ID, TEST_FILE_ID, manualRedactions)
|
||||
.getManualRedactions();
|
||||
surroundingTextResult.getEntriesToAdd().forEach(addEntry -> {
|
||||
assertThat(addEntry.getTextAfter()).isNotEmpty();
|
||||
});
|
||||
@ -1535,6 +1219,117 @@ public class RedactionIntegrationTest {
|
||||
}
|
||||
|
||||
|
||||
private void loadDictionaryForTest() {
|
||||
|
||||
dictionary.computeIfAbsent(AUTHOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/CBI_author.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(SPONSOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(VERTEBRATE, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/vertebrate.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(ADDRESS, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/CBI_address.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(REDACTION_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(HINT_ONLY, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/hint_only.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(MUST_REDACT, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/must_redact.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(PUBLISHED_INFORMATION, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/published_information.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(TEST_METHOD, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/test_method.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(PII, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/PII.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(RECOMMENDATION_AUTHOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/recommendation_CBI_author.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(RECOMMENDATION_ADDRESS, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/recommendation_CBI_address.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(FALSE_POSITIVE, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/false_positive.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(PURITY, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/purity.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(IMAGE, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(OCR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(LOGO, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(SIGNATURE, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(FORMULA, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dossierDictionary.put(IMPORTED_REDACTION, new ArrayList<>());
|
||||
}
|
||||
|
||||
|
||||
private static String loadFromClassPath(String path) {
|
||||
|
||||
URL resource = ResourceLoader.class.getClassLoader().getResource(path);
|
||||
@ -1572,6 +1367,191 @@ public class RedactionIntegrationTest {
|
||||
}
|
||||
|
||||
|
||||
private void loadTypeForTest() {
|
||||
|
||||
typeColorMap.put(VERTEBRATE, "#ff85f7");
|
||||
typeColorMap.put(ADDRESS, "#ffe187");
|
||||
typeColorMap.put(AUTHOR, "#ffe187");
|
||||
typeColorMap.put(SPONSOR, "#85ebff");
|
||||
typeColorMap.put(NO_REDACTION_INDICATOR, "#be85ff");
|
||||
typeColorMap.put(REDACTION_INDICATOR, "#caff85");
|
||||
typeColorMap.put(HINT_ONLY, "#abc0c4");
|
||||
typeColorMap.put(MUST_REDACT, "#fab4c0");
|
||||
typeColorMap.put(PUBLISHED_INFORMATION, "#85ebff");
|
||||
typeColorMap.put(TEST_METHOD, "#91fae8");
|
||||
typeColorMap.put(PII, "#66ccff");
|
||||
typeColorMap.put(RECOMMENDATION_AUTHOR, "#8df06c");
|
||||
typeColorMap.put(RECOMMENDATION_ADDRESS, "#8df06c");
|
||||
typeColorMap.put(FALSE_POSITIVE, "#ffffff");
|
||||
typeColorMap.put(PURITY, "#ffe187");
|
||||
typeColorMap.put(IMAGE, "#fcc5fb");
|
||||
typeColorMap.put(OCR, "#fcc5fb");
|
||||
typeColorMap.put(LOGO, "#ffe187");
|
||||
typeColorMap.put(FORMULA, "#ffe187");
|
||||
typeColorMap.put(SIGNATURE, "#ffe187");
|
||||
typeColorMap.put(IMPORTED_REDACTION, "#32a852");
|
||||
|
||||
hintTypeMap.put(VERTEBRATE, true);
|
||||
hintTypeMap.put(ADDRESS, false);
|
||||
hintTypeMap.put(AUTHOR, false);
|
||||
hintTypeMap.put(SPONSOR, false);
|
||||
hintTypeMap.put(NO_REDACTION_INDICATOR, true);
|
||||
hintTypeMap.put(REDACTION_INDICATOR, true);
|
||||
hintTypeMap.put(HINT_ONLY, true);
|
||||
hintTypeMap.put(MUST_REDACT, true);
|
||||
hintTypeMap.put(PUBLISHED_INFORMATION, true);
|
||||
hintTypeMap.put(TEST_METHOD, true);
|
||||
hintTypeMap.put(PII, false);
|
||||
hintTypeMap.put(RECOMMENDATION_AUTHOR, false);
|
||||
hintTypeMap.put(RECOMMENDATION_ADDRESS, false);
|
||||
hintTypeMap.put(FALSE_POSITIVE, true);
|
||||
hintTypeMap.put(PURITY, false);
|
||||
hintTypeMap.put(IMAGE, true);
|
||||
hintTypeMap.put(OCR, true);
|
||||
hintTypeMap.put(FORMULA, false);
|
||||
hintTypeMap.put(LOGO, false);
|
||||
hintTypeMap.put(SIGNATURE, false);
|
||||
hintTypeMap.put(DOSSIER_REDACTIONS, false);
|
||||
hintTypeMap.put(IMPORTED_REDACTION, false);
|
||||
|
||||
caseInSensitiveMap.put(VERTEBRATE, true);
|
||||
caseInSensitiveMap.put(ADDRESS, false);
|
||||
caseInSensitiveMap.put(AUTHOR, false);
|
||||
caseInSensitiveMap.put(SPONSOR, false);
|
||||
caseInSensitiveMap.put(NO_REDACTION_INDICATOR, true);
|
||||
caseInSensitiveMap.put(REDACTION_INDICATOR, true);
|
||||
caseInSensitiveMap.put(HINT_ONLY, true);
|
||||
caseInSensitiveMap.put(MUST_REDACT, true);
|
||||
caseInSensitiveMap.put(PUBLISHED_INFORMATION, true);
|
||||
caseInSensitiveMap.put(TEST_METHOD, false);
|
||||
caseInSensitiveMap.put(PII, false);
|
||||
caseInSensitiveMap.put(RECOMMENDATION_AUTHOR, false);
|
||||
caseInSensitiveMap.put(RECOMMENDATION_ADDRESS, false);
|
||||
caseInSensitiveMap.put(FALSE_POSITIVE, false);
|
||||
caseInSensitiveMap.put(PURITY, false);
|
||||
caseInSensitiveMap.put(IMAGE, true);
|
||||
caseInSensitiveMap.put(OCR, true);
|
||||
caseInSensitiveMap.put(SIGNATURE, true);
|
||||
caseInSensitiveMap.put(LOGO, true);
|
||||
caseInSensitiveMap.put(FORMULA, true);
|
||||
caseInSensitiveMap.put(DOSSIER_REDACTIONS, false);
|
||||
caseInSensitiveMap.put(IMPORTED_REDACTION, false);
|
||||
|
||||
recommendationTypeMap.put(VERTEBRATE, false);
|
||||
recommendationTypeMap.put(ADDRESS, false);
|
||||
recommendationTypeMap.put(AUTHOR, false);
|
||||
recommendationTypeMap.put(SPONSOR, false);
|
||||
recommendationTypeMap.put(NO_REDACTION_INDICATOR, false);
|
||||
recommendationTypeMap.put(REDACTION_INDICATOR, false);
|
||||
recommendationTypeMap.put(HINT_ONLY, false);
|
||||
recommendationTypeMap.put(MUST_REDACT, false);
|
||||
recommendationTypeMap.put(PUBLISHED_INFORMATION, false);
|
||||
recommendationTypeMap.put(TEST_METHOD, false);
|
||||
recommendationTypeMap.put(PII, false);
|
||||
recommendationTypeMap.put(RECOMMENDATION_AUTHOR, true);
|
||||
recommendationTypeMap.put(RECOMMENDATION_ADDRESS, true);
|
||||
recommendationTypeMap.put(FALSE_POSITIVE, false);
|
||||
recommendationTypeMap.put(PURITY, false);
|
||||
recommendationTypeMap.put(IMAGE, false);
|
||||
recommendationTypeMap.put(OCR, false);
|
||||
recommendationTypeMap.put(FORMULA, false);
|
||||
recommendationTypeMap.put(SIGNATURE, false);
|
||||
recommendationTypeMap.put(LOGO, false);
|
||||
recommendationTypeMap.put(DOSSIER_REDACTIONS, false);
|
||||
recommendationTypeMap.put(IMPORTED_REDACTION, false);
|
||||
|
||||
rankTypeMap.put(FALSE_POSITIVE, 160);
|
||||
rankTypeMap.put(PURITY, 155);
|
||||
rankTypeMap.put(PII, 150);
|
||||
rankTypeMap.put(ADDRESS, 140);
|
||||
rankTypeMap.put(AUTHOR, 130);
|
||||
rankTypeMap.put(SPONSOR, 120);
|
||||
rankTypeMap.put(VERTEBRATE, 110);
|
||||
rankTypeMap.put(MUST_REDACT, 100);
|
||||
rankTypeMap.put(REDACTION_INDICATOR, 90);
|
||||
rankTypeMap.put(NO_REDACTION_INDICATOR, 80);
|
||||
rankTypeMap.put(PUBLISHED_INFORMATION, 70);
|
||||
rankTypeMap.put(TEST_METHOD, 60);
|
||||
rankTypeMap.put(HINT_ONLY, 50);
|
||||
rankTypeMap.put(RECOMMENDATION_AUTHOR, 40);
|
||||
rankTypeMap.put(RECOMMENDATION_ADDRESS, 30);
|
||||
rankTypeMap.put(IMAGE, 30);
|
||||
rankTypeMap.put(OCR, 29);
|
||||
rankTypeMap.put(LOGO, 28);
|
||||
rankTypeMap.put(SIGNATURE, 27);
|
||||
rankTypeMap.put(FORMULA, 26);
|
||||
rankTypeMap.put(DOSSIER_REDACTIONS, 200);
|
||||
rankTypeMap.put(IMPORTED_REDACTION, 200);
|
||||
|
||||
colors.setDefaultColor("#acfc00");
|
||||
colors.setNotRedacted("#cccccc");
|
||||
colors.setRequestAdd("#04b093");
|
||||
colors.setRequestRemove("#04b093");
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private void loadNerForTest() {
|
||||
|
||||
ClassPathResource responseJson = new ClassPathResource("files/ner_response.json");
|
||||
var bytes = IOUtils.toByteArray(responseJson.getInputStream());
|
||||
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), bytes);
|
||||
}
|
||||
|
||||
|
||||
private List<Type> getTypeResponse() {
|
||||
|
||||
return typeColorMap.entrySet()
|
||||
.stream()
|
||||
.map(typeColor -> Type.builder()
|
||||
.id(typeColor.getKey() + ":" + TEST_DOSSIER_TEMPLATE_ID)
|
||||
.type(typeColor.getKey())
|
||||
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
|
||||
.hexColor(typeColor.getValue())
|
||||
.isHint(hintTypeMap.get(typeColor.getKey()))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(typeColor.getKey()))
|
||||
.isRecommendation(recommendationTypeMap.get(typeColor.getKey()))
|
||||
.rank(rankTypeMap.get(typeColor.getKey()))
|
||||
.build())
|
||||
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
private Type getDictionaryResponse(String type, boolean isDossierDictionary) {
|
||||
|
||||
return Type.builder()
|
||||
.id(type + ":" + TEST_DOSSIER_TEMPLATE_ID)
|
||||
.hexColor(typeColorMap.get(type))
|
||||
.entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type)))
|
||||
.isHint(hintTypeMap.get(type))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(type))
|
||||
.isRecommendation(recommendationTypeMap.get(type))
|
||||
.rank(rankTypeMap.get(type))
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
private String cleanDictionaryEntry(String entry) {
|
||||
|
||||
return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " ");
|
||||
}
|
||||
|
||||
|
||||
private List<DictionaryEntry> toDictionaryEntry(List<String> entries) {
|
||||
|
||||
List<DictionaryEntry> dictionaryEntries = new ArrayList<>();
|
||||
entries.forEach(entry -> {
|
||||
dictionaryEntries.add(DictionaryEntry.builder()
|
||||
.value(entry)
|
||||
.version(reanlysisVersions.getOrDefault(entry, 0L))
|
||||
.deleted(deleted.contains(entry))
|
||||
.build());
|
||||
});
|
||||
return dictionaryEntries;
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testImportedRedactions() throws IOException {
|
||||
|
||||
@ -1579,7 +1559,6 @@ public class RedactionIntegrationTest {
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/ImportedRedactions/ImportedRedactions.pdf");
|
||||
ClassPathResource importedRedactions = new ClassPathResource("files/ImportedRedactions/ImportedRedactions.json");
|
||||
|
||||
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS), IOUtils.toByteArray(importedRedactions.getInputStream()));
|
||||
|
||||
@ -1598,4 +1577,32 @@ public class RedactionIntegrationTest {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private AnalyzeRequest prepareStorage(InputStream stream) {
|
||||
|
||||
AnalyzeRequest request = AnalyzeRequest.builder()
|
||||
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
|
||||
.dossierId(TEST_DOSSIER_ID)
|
||||
.fileId(TEST_FILE_ID)
|
||||
.lastProcessed(OffsetDateTime.now())
|
||||
.build();
|
||||
|
||||
var bytes = IOUtils.toByteArray(stream);
|
||||
|
||||
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), bytes);
|
||||
|
||||
return request;
|
||||
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private AnalyzeRequest prepareStorage(String file) {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource(file);
|
||||
|
||||
return prepareStorage(pdfFileResource.getInputStream());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,12 @@
|
||||
{
|
||||
"result": {
|
||||
"1": [
|
||||
{
|
||||
"value": "Mannheim",
|
||||
"startOffset": 0,
|
||||
"endOffset": 8,
|
||||
"type": "CITY"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user