diff --git a/redaction-service-v1/redaction-service-api-v1/pom.xml b/redaction-service-v1/redaction-service-api-v1/pom.xml
index 0b808449..816402b9 100644
--- a/redaction-service-v1/redaction-service-api-v1/pom.xml
+++ b/redaction-service-v1/redaction-service-api-v1/pom.xml
@@ -12,7 +12,7 @@
redaction-service-api-v1
- 1.32.0
+ 1.39.0
diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/MessageType.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/MessageType.java
index 55e9666e..2e3ba7a2 100644
--- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/MessageType.java
+++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/MessageType.java
@@ -2,6 +2,6 @@ package com.iqser.red.service.redaction.v1.model;
public enum MessageType {
- FULL_ANALYSE, REANALYSE, SURROUNDING_TEXT
+ ANALYSE, REANALYSE, STRUCTURE_ANALYSE, SURROUNDING_TEXT
}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java
index 155993ca..6a50510d 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java
@@ -1,30 +1,42 @@
package com.iqser.red.service.redaction.v1.server.controller;
-import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
-import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
-import com.iqser.red.service.redaction.v1.model.*;
-import com.iqser.red.service.redaction.v1.resources.RedactionResource;
-import com.iqser.red.service.redaction.v1.server.classification.model.Document;
-import com.iqser.red.service.redaction.v1.server.classification.model.Page;
-import com.iqser.red.service.redaction.v1.server.exception.NotFoundException;
-import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
-import com.iqser.red.service.redaction.v1.server.redaction.service.*;
-import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
-import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
-import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
-import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
-import com.iqser.red.service.redaction.v1.server.visualization.service.PdfVisualisationService;
-import lombok.RequiredArgsConstructor;
-import lombok.extern.slf4j.Slf4j;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.stream.Collectors;
+
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RestController;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.util.stream.Collectors;
+import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
+import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
+import com.iqser.red.service.redaction.v1.model.AnnotateRequest;
+import com.iqser.red.service.redaction.v1.model.AnnotateResponse;
+import com.iqser.red.service.redaction.v1.model.RedactionLog;
+import com.iqser.red.service.redaction.v1.model.RedactionRequest;
+import com.iqser.red.service.redaction.v1.model.RedactionResult;
+import com.iqser.red.service.redaction.v1.model.SectionArea;
+import com.iqser.red.service.redaction.v1.model.SectionGrid;
+import com.iqser.red.service.redaction.v1.resources.RedactionResource;
+import com.iqser.red.service.redaction.v1.server.classification.model.Document;
+import com.iqser.red.service.redaction.v1.server.classification.model.Page;
+import com.iqser.red.service.redaction.v1.server.exception.NotFoundException;
+import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
+import com.iqser.red.service.redaction.v1.server.redaction.service.AnnotationService;
+import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService;
+import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService;
+import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
+import com.iqser.red.service.redaction.v1.server.redaction.service.RedactionLogMergeService;
+import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
+import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
+import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
+import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
+import com.iqser.red.service.redaction.v1.server.visualization.service.PdfVisualisationService;
+
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
@Slf4j
@RestController
@@ -43,8 +55,7 @@ public class RedactionController implements RedactionResource {
public AnnotateResponse annotate(@RequestBody AnnotateRequest annotateRequest) {
- var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(annotateRequest
- .getDossierId(), annotateRequest.getFileId(), FileType.ORIGIN));
+ var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(annotateRequest.getDossierId(), annotateRequest.getFileId(), FileType.ORIGIN));
var mergedRedactionLog = getRedactionLog(RedactionRequest.builder()
.fileId(annotateRequest.getFileId())
.manualRedactions(annotateRequest.getManualRedactions())
@@ -73,13 +84,11 @@ public class RedactionController implements RedactionResource {
@Override
public RedactionResult classify(@RequestBody RedactionRequest redactionRequest) {
- var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest
- .getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
+ var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
try {
Document classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, null);
- storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest
- .getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
+ storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) {
pdDocument.setAllSecurityToBeRemoved(true);
@@ -101,13 +110,11 @@ public class RedactionController implements RedactionResource {
@Override
public RedactionResult sections(@RequestBody RedactionRequest redactionRequest) {
- var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest
- .getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
+ var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
try {
Document classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, null);
- storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest
- .getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
+ storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) {
pdDocument.setAllSecurityToBeRemoved(true);
@@ -131,8 +138,7 @@ public class RedactionController implements RedactionResource {
Document classifiedDoc;
try {
- var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest
- .getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
+ var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, null);
} catch (Exception e) {
throw new RedactionException(e);
@@ -174,8 +180,7 @@ public class RedactionController implements RedactionResource {
log.info("Loaded redaction log with computationalVersion: {}", redactionLog.getAnalysisVersion());
- SectionGrid sectionGrid = redactionStorageService.getSectionGrid(redactionRequest.getDossierId(), redactionRequest
- .getFileId());
+ SectionGrid sectionGrid = redactionStorageService.getSectionGrid(redactionRequest.getDossierId(), redactionRequest.getFileId());
if (sectionGrid.getSections().isEmpty()) {
log.info("SectionGrid does not have headlines set. Computing headlines now!");
@@ -184,8 +189,7 @@ public class RedactionController implements RedactionResource {
// enhance section grid with headline data
for (var sectionText : text.getSectionTexts()) {
sectionGrid.getSections()
- .add(new SectionGrid.SectionGridSection(sectionText.getSectionNumber(), sectionText.getHeadline(), sectionText
- .getSectionAreas()
+ .add(new SectionGrid.SectionGridSection(sectionText.getSectionNumber(), sectionText.getHeadline(), sectionText.getSectionAreas()
.stream()
.map(SectionArea::getPage)
.collect(Collectors.toSet()), sectionText.getSectionAreas()));
@@ -194,8 +198,7 @@ public class RedactionController implements RedactionResource {
}
log.info("Loaded redaction log with computationalVersion: {}", redactionLog.getAnalysisVersion());
- return redactionLogMergeService.mergeRedactionLogData(redactionLog, sectionGrid, redactionRequest.getDossierTemplateId(), redactionRequest
- .getManualRedactions(), redactionRequest.getExcludedPages());
+ return redactionLogMergeService.mergeRedactionLogData(redactionLog, sectionGrid, redactionRequest.getDossierTemplateId(), redactionRequest.getManualRedactions(), redactionRequest.getExcludedPages());
}
@@ -217,11 +220,9 @@ public class RedactionController implements RedactionResource {
@PathVariable("fileId") String fileId,
@RequestBody ManualRedactions manualRedactions) {
- long start = System.currentTimeMillis();
var result = manualRedactionSurroundingTextService.addSurroundingText(dossierId, fileId, manualRedactions);
- log.info("add surrounding text for manual redaction in dossierId {} and fileId {} took: {}", dossierId, fileId, System
- .currentTimeMillis() - start);
- return result;
+ log.info("Added surrounding text for manual redaction in dossierId {} and fileId {} took: {}", dossierId, fileId, result.getDuration());
+ return result.getManualRedactions();
}
}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/queue/RedactionMessageReceiver.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/queue/RedactionMessageReceiver.java
index aa0e2c74..d57a4e20 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/queue/RedactionMessageReceiver.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/queue/RedactionMessageReceiver.java
@@ -1,5 +1,12 @@
package com.iqser.red.service.redaction.v1.server.queue;
+import static com.iqser.red.service.redaction.v1.server.queue.MessagingConfiguration.REDACTION_DQL;
+import static com.iqser.red.service.redaction.v1.server.queue.MessagingConfiguration.REDACTION_QUEUE;
+
+import org.springframework.amqp.rabbit.annotation.RabbitHandler;
+import org.springframework.amqp.rabbit.annotation.RabbitListener;
+import org.springframework.stereotype.Service;
+
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
@@ -8,18 +15,10 @@ import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest;
import com.iqser.red.service.redaction.v1.server.client.FileStatusProcessingUpdateClient;
import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
-import com.iqser.red.service.redaction.v1.server.redaction.service.NerAnalyserService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
-import org.springframework.amqp.rabbit.annotation.RabbitHandler;
-import org.springframework.amqp.rabbit.annotation.RabbitListener;
-import org.springframework.stereotype.Service;
-
-import static com.iqser.red.service.redaction.v1.server.queue.MessagingConfiguration.REDACTION_DQL;
-import static com.iqser.red.service.redaction.v1.server.queue.MessagingConfiguration.REDACTION_QUEUE;
-
@Slf4j
@Service
@RequiredArgsConstructor
@@ -28,7 +27,6 @@ public class RedactionMessageReceiver {
private final ObjectMapper objectMapper;
private final AnalyzeService analyzeService;
private final FileStatusProcessingUpdateClient fileStatusProcessingUpdateClient;
- private final NerAnalyserService nerAnalyserService;
private final ManualRedactionSurroundingTextService manualRedactionSurroundingTextService;
@@ -40,31 +38,26 @@ public class RedactionMessageReceiver {
log.info("Processing analyze request for file: {}", analyzeRequest.getFileId());
AnalyzeResult result = null;
- switch (analyzeRequest.getMessageType()){
+ switch (analyzeRequest.getMessageType()) {
case REANALYSE:
result = analyzeService.reanalyze(analyzeRequest);
log.info("Successfully reanalyzed dossier {} file {} took: {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId(), result.getDuration());
break;
- case FULL_ANALYSE:
- // TODO Seperate stucture analysis by other queue
- analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(analyzeRequest.getDossierId(), analyzeRequest.getFileId()));
-
- // TODO NerEntities should be computed and stored in entity-recognition-service, should be triggered by a seperate queue after structure analysis
- nerAnalyserService.computeNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
-
- result = analyzeService.analyze(analyzeRequest);
- log.info("Successfully analyzed dossier {} file {} took: {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId(), result
- .getDuration());
+ case STRUCTURE_ANALYSE:
+ result = analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(analyzeRequest.getDossierId(), analyzeRequest.getFileId()));
+ log.info("Successfully analyzed structure dossier {} file {} took: {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId(), result.getDuration());
break;
+
+ case ANALYSE:
+ result = analyzeService.analyze(analyzeRequest);
+ log.info("Successfully analyzed dossier {} file {} took: {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId(), result.getDuration());
+ break;
+
case SURROUNDING_TEXT:
- var manualRedactions = manualRedactionSurroundingTextService.addSurroundingText(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), analyzeRequest.getManualRedactions());
- result = AnalyzeResult.builder()
- .dossierId(analyzeRequest.getDossierId())
- .fileId(analyzeRequest.getFileId())
- .manualRedactions(manualRedactions)
- .build();
+ result = manualRedactionSurroundingTextService.addSurroundingText(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), analyzeRequest.getManualRedactions());
+ log.info("Successfully added surrounding text for manual redaction in dossierId {} and fileId {} took: {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId(), result.getDuration());
break;
}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java
index 5b3b26bd..3b9a439a 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java
@@ -32,6 +32,7 @@ import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
import com.iqser.red.service.redaction.v1.server.classification.model.Text;
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
+import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrement;
@@ -66,12 +67,11 @@ public class AnalyzeService {
private final RedactionServiceSettings redactionServiceSettings;
private final SectionTextBuilderService sectionTextBuilderService;
private final SectionGridCreatorService sectionGridCreatorService;
- private final NerAnalyserService nerAnalyserService;
private final ImageService imageService;
private final ImportedRedactionService importedRedactionService;
- public void analyzeDocumentStructure(StructureAnalyzeRequest analyzeRequest) {
+ public AnalyzeResult analyzeDocumentStructure(StructureAnalyzeRequest analyzeRequest) {
long startTime = System.currentTimeMillis();
@@ -79,7 +79,6 @@ public class AnalyzeService {
Document classifiedDoc;
try {
-
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.ORIGIN));
Map> pdfImages = null;
@@ -105,44 +104,17 @@ public class AnalyzeService {
.map(SectionArea::getPage)
.collect(Collectors.toSet()), sectionText.getSectionAreas())));
+ log.info("Store text and section grid for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.TEXT, text);
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.SECTION_GRID, classifiedDoc.getSectionGrid());
- log.info("Document structure analysis successful, took: {}", System.currentTimeMillis() - startTime);
- }
-
-
- public AnalyzeResult analyze(AnalyzeRequest analyzeRequest) {
-
- long startTime = System.currentTimeMillis();
-
- var text = redactionStorageService.getText(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
- var nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
- if (redactionServiceSettings.isEnableEntityRecognition() && nerEntities == null) {
- nerAnalyserService.computeNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
- nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
- }
-
- dictionaryService.updateDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
- KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getDossierTemplateId());
- long rulesVersion = droolsExecutionService.getRulesVersion(analyzeRequest.getDossierTemplateId());
- Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
-
- PageEntities pageEntities = entityRedactionService.findEntities(dictionary, text.getSectionTexts(), kieContainer, analyzeRequest, nerEntities);
-
- dictionaryService.updateExternalDictionary(dictionary, analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
-
- List redactionLogEntries = redactionLogCreatorService.createRedactionLog(pageEntities, text.getNumberOfPages(), analyzeRequest.getDossierTemplateId());
-
- var legalBasis = legalBasisClient.getLegalBasisMapping(analyzeRequest.getDossierTemplateId());
- var redactionLog = new RedactionLog(redactionServiceSettings.getAnalysisVersion(), analyzeRequest.getAnalysisNumber(), redactionLogEntries, legalBasis, dictionary.getVersion()
- .getDossierTemplateVersion(), dictionary.getVersion()
- .getDossierVersion(), rulesVersion, legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId()));
-
- var importedRedactionFilteredEntries = importedRedactionService.processImportedRedactions(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId(), analyzeRequest.getFileId(), redactionLog.getRedactionLogEntry(), true);
- redactionLog.setRedactionLogEntry(importedRedactionFilteredEntries);
-
- return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionary.getVersion(), false);
+ return AnalyzeResult.builder()
+ .dossierId(analyzeRequest.getDossierId())
+ .fileId(analyzeRequest.getFileId())
+ .duration(System.currentTimeMillis() - startTime)
+ .numberOfPages(text.getNumberOfPages())
+ .analysisVersion(redactionServiceSettings.getAnalysisVersion())
+ .build();
}
@@ -168,10 +140,11 @@ public class AnalyzeService {
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true);
}
- var nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
- if (redactionServiceSettings.isEnableEntityRecognition() && nerEntities == null) {
- nerAnalyserService.computeNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
+ NerEntities nerEntities;
+ if (redactionServiceSettings.isNerServiceEnabled()) {
nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
+ } else {
+ nerEntities = NerEntities.builder().build();
}
List reanalysisSections = text.getSectionTexts()
@@ -188,13 +161,49 @@ public class AnalyzeService {
var importedRedactionFilteredEntries = importedRedactionService.processImportedRedactions(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId(), analyzeRequest.getFileId(), newRedactionLogEntries, false);
- redactionLog.getRedactionLogEntry().removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()) && !entry.getType().equals(IMPORTED_REDACTION_TYPE));
+ redactionLog.getRedactionLogEntry()
+ .removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()) && !entry.getType()
+ .equals(IMPORTED_REDACTION_TYPE));
redactionLog.getRedactionLogEntry().addAll(importedRedactionFilteredEntries);
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true);
}
+ public AnalyzeResult analyze(AnalyzeRequest analyzeRequest) {
+
+ long startTime = System.currentTimeMillis();
+ var text = redactionStorageService.getText(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
+
+ NerEntities nerEntities;
+ if (redactionServiceSettings.isNerServiceEnabled()) {
+ nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
+ } else {
+ nerEntities = NerEntities.builder().build();
+ }
+ dictionaryService.updateDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
+ KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getDossierTemplateId());
+ long rulesVersion = droolsExecutionService.getRulesVersion(analyzeRequest.getDossierTemplateId());
+ Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
+
+ PageEntities pageEntities = entityRedactionService.findEntities(dictionary, text.getSectionTexts(), kieContainer, analyzeRequest, nerEntities);
+
+ dictionaryService.updateExternalDictionary(dictionary, analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
+
+ List redactionLogEntries = redactionLogCreatorService.createRedactionLog(pageEntities, text.getNumberOfPages(), analyzeRequest.getDossierTemplateId());
+
+ var legalBasis = legalBasisClient.getLegalBasisMapping(analyzeRequest.getDossierTemplateId());
+ var redactionLog = new RedactionLog(redactionServiceSettings.getAnalysisVersion(), analyzeRequest.getAnalysisNumber(), redactionLogEntries, legalBasis, dictionary.getVersion()
+ .getDossierTemplateVersion(), dictionary.getVersion()
+ .getDossierVersion(), rulesVersion, legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId()));
+
+ var importedRedactionFilteredEntries = importedRedactionService.processImportedRedactions(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId(), analyzeRequest.getFileId(), redactionLog.getRedactionLogEntry(), true);
+ redactionLog.setRedactionLogEntry(importedRedactionFilteredEntries);
+
+ return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionary.getVersion(), false);
+ }
+
+
private Set findSectionsToReanalyse(DictionaryIncrement dictionaryIncrement, RedactionLog redactionLog,
Text text, AnalyzeRequest analyzeRequest) {
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java
index 4f1a3324..796d91e3 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java
@@ -1,5 +1,18 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import org.apache.commons.lang3.StringUtils;
+import org.kie.api.runtime.KieContainer;
+import org.springframework.stereotype.Service;
+
import com.iqser.red.service.persistence.service.v1.api.model.annotations.AnnotationStatus;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.IdRemoval;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualImageRecategorization;
@@ -8,20 +21,21 @@ import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
-import com.iqser.red.service.redaction.v1.server.redaction.model.*;
+import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
+import com.iqser.red.service.redaction.v1.server.redaction.model.Entities;
+import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
+import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
+import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
+import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities;
+import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
+import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
+import com.iqser.red.service.redaction.v1.server.redaction.model.SectionSearchableTextPair;
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
+
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
-import org.apache.commons.codec.binary.Base64;
-import org.apache.commons.lang3.StringUtils;
-import org.kie.api.runtime.KieContainer;
-import org.springframework.stereotype.Service;
-
-import java.util.*;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
@Slf4j
@Service
@@ -46,7 +60,7 @@ public class EntityRedactionService {
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
}
- Map> entitiesPerPage = convertToEnititesPerPage(entities);
+ Map> entitiesPerPage = convertToEntitiesPerPage(entities);
return new PageEntities(entitiesPerPage, imagesPerPage);
}
@@ -145,7 +159,7 @@ public class EntityRedactionService {
}
- private Map> convertToEnititesPerPage(Set entities) {
+ private Map> convertToEntitiesPerPage(Set entities) {
Map> entitiesPerPage = new HashMap<>();
for (Entity entity : entities) {
@@ -209,7 +223,7 @@ public class EntityRedactionService {
private Entities findEntities(SearchableText searchableText, String headline, int sectionNumber,
Dictionary dictionary, boolean local, NerEntities nerEntities,
- List cellstarts) {
+ List cellStarts) {
Set found = new HashSet<>();
String searchableString = searchableText.toString();
@@ -230,32 +244,32 @@ public class EntityRedactionService {
Set nerFound = new HashSet<>();
if (!local) {
- nerFound.addAll(getNerValues(sectionNumber, nerEntities, cellstarts, headline));
+ nerFound.addAll(getNerValues(sectionNumber, nerEntities, cellStarts, headline));
}
return new Entities(EntitySearchUtils.clearAndFindPositions(found, searchableText, dictionary), nerFound);
}
- private Set getNerValues(int sectionNumber, NerEntities nerEntities,
- List cellstarts, String headline) {
+ private Set getNerValues(int sectionNumber, NerEntities nerEntities, List cellStarts,
+ String headline) {
Set entities = new HashSet<>();
- if (redactionServiceSettings.isEnableEntityRecognition() && nerEntities.getResult()
- .containsKey(sectionNumber)) {
+ if (redactionServiceSettings.isNerServiceEnabled() && nerEntities.getResult().containsKey(sectionNumber)) {
nerEntities.getResult().get(sectionNumber).forEach(res -> {
- if (cellstarts == null || cellstarts.isEmpty()) {
- entities.add(new Entity(new String(Base64.decodeBase64(res.getValue().getBytes())), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false, Engine.NER));
+ if (cellStarts == null || cellStarts.isEmpty()) {
+ entities.add(new Entity(res.getValue(), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false, Engine.NER));
} else {
boolean intersectsCellStart = false;
- for (Integer cellStart : cellstarts) {
+ for (Integer cellStart : cellStarts) {
if (res.getStartOffset() < cellStart && cellStart < res.getEndOffset()) {
intersectsCellStart = true;
+ break;
}
}
if (!intersectsCellStart) {
- entities.add(new Entity(new String(Base64.decodeBase64(res.getValue().getBytes())), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false, Engine.NER));
+ entities.add(new Entity(res.getValue(), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false, Engine.NER));
}
}
});
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ManualRedactionSurroundingTextService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ManualRedactionSurroundingTextService.java
index f0983b2c..f8256b04 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ManualRedactionSurroundingTextService.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ManualRedactionSurroundingTextService.java
@@ -1,9 +1,17 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.lang3.tuple.Pair;
+import org.springframework.stereotype.Service;
+
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.Rectangle;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualRedactionEntry;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualResizeRedaction;
+import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.model.SectionArea;
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
@@ -13,14 +21,9 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
+
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
-import org.apache.commons.lang3.tuple.Pair;
-import org.springframework.stereotype.Service;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Set;
@Slf4j
@Service
@@ -31,8 +34,9 @@ public class ManualRedactionSurroundingTextService {
private final SurroundingWordsService surroundingWordsService;
- public ManualRedactions addSurroundingText(String dossierId, String fileId, ManualRedactions manualRedactions) {
+ public AnalyzeResult addSurroundingText(String dossierId, String fileId, ManualRedactions manualRedactions) {
+ long startTime = System.currentTimeMillis();
Text text = redactionStorageService.getText(dossierId, fileId);
List processedAddRedactions = new ArrayList<>();
List processedResizeRedactions = new ArrayList<>();
@@ -49,8 +53,7 @@ public class ManualRedactionSurroundingTextService {
while (addItty.hasNext()) {
var manualAddRedaction = addItty.next();
if (sectionContainsEntry(sectionArea, manualAddRedaction.getPositions())) {
- var surroundingText = findSurroundingText(sectionText, manualAddRedaction.getValue(), manualAddRedaction
- .getPositions());
+ var surroundingText = findSurroundingText(sectionText, manualAddRedaction.getValue(), manualAddRedaction.getPositions());
manualAddRedaction.setTextBefore(surroundingText.getLeft());
manualAddRedaction.setTextAfter(surroundingText.getRight());
processedAddRedactions.add(manualAddRedaction);
@@ -62,8 +65,7 @@ public class ManualRedactionSurroundingTextService {
while (resizeItty.hasNext()) {
var manualResizeRedaction = resizeItty.next();
if (sectionContainsEntry(sectionArea, manualResizeRedaction.getPositions())) {
- var surroundingText = findSurroundingText(sectionText, manualResizeRedaction.getValue(), manualResizeRedaction
- .getPositions());
+ var surroundingText = findSurroundingText(sectionText, manualResizeRedaction.getValue(), manualResizeRedaction.getPositions());
manualResizeRedaction.setTextBefore(surroundingText.getLeft());
manualResizeRedaction.setTextAfter(surroundingText.getRight());
processedResizeRedactions.add(manualResizeRedaction);
@@ -75,15 +77,20 @@ public class ManualRedactionSurroundingTextService {
manualRedactions.getEntriesToAdd().addAll(processedAddRedactions);
manualRedactions.getResizeRedactions().addAll(processedResizeRedactions);
- return manualRedactions;
+
+ return AnalyzeResult.builder()
+ .dossierId(dossierId)
+ .fileId(fileId)
+ .manualRedactions(manualRedactions)
+ .duration(System.currentTimeMillis() - startTime)
+ .build();
}
private Pair findSurroundingText(SectionText sectionText, String value,
List toFindPositions) {
- Set entities = EntitySearchUtils.find(sectionText.getText(), Set.of(value), "dummy", sectionText.getHeadline(), sectionText
- .getSectionNumber(), false, false, Engine.DICTIONARY, false);
+ Set entities = EntitySearchUtils.find(sectionText.getText(), Set.of(value), "dummy", sectionText.getHeadline(), sectionText.getSectionNumber(), false, false, Engine.DICTIONARY, false);
Set entitiesWithPositions = EntitySearchUtils.clearAndFindPositions(entities, sectionText.getSearchableText(), null);
Entity correctEntity = getEntityOnCorrectPosition(entitiesWithPositions, toFindPositions);
@@ -94,8 +101,7 @@ public class ManualRedactionSurroundingTextService {
}
if (sectionText.getCellStarts() != null && !sectionText.getCellStarts().isEmpty()) {
- surroundingWordsService.addSurroundingText(Set.of(correctEntity), sectionText.getSearchableText(), null, sectionText
- .getCellStarts());
+ surroundingWordsService.addSurroundingText(Set.of(correctEntity), sectionText.getSearchableText(), null, sectionText.getCellStarts());
} else {
surroundingWordsService.addSurroundingText(Set.of(correctEntity), sectionText.getSearchableText(), null);
}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/NerAnalyserService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/NerAnalyserService.java
deleted file mode 100644
index 2425fd9e..00000000
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/NerAnalyserService.java
+++ /dev/null
@@ -1,53 +0,0 @@
-package com.iqser.red.service.redaction.v1.server.redaction.service;
-
-import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
-import com.iqser.red.service.redaction.v1.server.client.EntityRecognitionClient;
-import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionRequest;
-import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionSection;
-import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
-import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
-import lombok.RequiredArgsConstructor;
-import lombok.extern.slf4j.Slf4j;
-import org.apache.commons.codec.binary.Base64;
-import org.springframework.stereotype.Service;
-
-import java.util.stream.Collectors;
-
-@Slf4j
-@Service
-@RequiredArgsConstructor
-public class NerAnalyserService {
-
- private final RedactionStorageService redactionStorageService;
- private final EntityRecognitionClient entityRecognitionClient;
- private final RedactionServiceSettings redactionServiceSettings;
-
- public void computeNerEntities(String dossierId, String fileId) {
-
- if (redactionServiceSettings.isEnableEntityRecognition()) {
- var text = redactionStorageService.getText(dossierId, fileId);
-
- long start = System.currentTimeMillis();
-
- if (text != null) {
- var nerRequest = EntityRecognitionRequest.builder()
- .data(text.getSectionTexts()
- .stream()
- .map(sectionText -> new EntityRecognitionSection(sectionText.getSectionNumber(), new String(Base64
- .encodeBase64(sectionText
- .getText().getBytes()))))
- .collect(Collectors.toList()))
- .build();
-
- var nerResponse = entityRecognitionClient.findAuthors(nerRequest);
-
- log.info("Computing NER entities took: {} ms for dossierId {} and fileId {}", System.currentTimeMillis() - start, dossierId, fileId);
-
- redactionStorageService.storeObject(dossierId, fileId, FileType.NER_ENTITIES, nerResponse);
- } else {
- log.warn("Warning, text for file: {} in dossier: {} is null", fileId, dossierId);
- }
- }
- }
-
-}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java
index 9cce2ab9..e5f41214 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java
@@ -1,5 +1,15 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import org.apache.commons.collections4.CollectionUtils;
+import org.springframework.stereotype.Service;
+
import com.iqser.red.service.redaction.v1.model.Point;
import com.iqser.red.service.redaction.v1.model.Rectangle;
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
@@ -10,13 +20,9 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionS
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities;
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
+
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
-import org.apache.commons.collections4.CollectionUtils;
-import org.springframework.stereotype.Service;
-
-import java.util.*;
-import java.util.stream.Collectors;
@Service
@Slf4j
@@ -85,7 +91,7 @@ public class RedactionLogCreatorService {
List redactionLogEntities = new ArrayList<>();
- // Duplicates can exist due table extraction colums over multiple rows.
+ // Duplicates can exist due table extraction columns over multiple rows.
Set processedIds = new HashSet<>();
entityLoop:
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/settings/RedactionServiceSettings.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/settings/RedactionServiceSettings.java
index 901d8042..fd37e29c 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/settings/RedactionServiceSettings.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/settings/RedactionServiceSettings.java
@@ -1,8 +1,9 @@
package com.iqser.red.service.redaction.v1.server.settings;
-import lombok.Data;
import org.springframework.boot.context.properties.ConfigurationProperties;
+import lombok.Data;
+
@Data
@ConfigurationProperties("redaction-service")
public class RedactionServiceSettings {
@@ -17,6 +18,6 @@ public class RedactionServiceSettings {
private int analysisVersion = 1;
- private boolean enableEntityRecognition = true;
+ private boolean nerServiceEnabled = true;
}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/storage/RedactionStorageService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/storage/RedactionStorageService.java
index 388a550a..4ac59e31 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/storage/RedactionStorageService.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/storage/RedactionStorageService.java
@@ -105,14 +105,13 @@ public class RedactionStorageService {
try {
inputStreamResource = storageService.getObject(StorageIdUtils.getStorageId(dossierId, fileId, FileType.NER_ENTITIES));
} catch (StorageObjectDoesNotExist e) {
- log.debug("NER Entities not available.");
- return null;
+ throw new NotFoundException("NER Entities are not available.");
}
try {
return objectMapper.readValue(inputStreamResource.getInputStream(), NerEntities.class);
} catch (IOException e) {
- throw new RuntimeException("Could not convert NerEntities", e);
+ throw new RuntimeException("Could not convert NER Entities", e);
}
}
@@ -146,8 +145,6 @@ public class RedactionStorageService {
return dossierId + "/" + fileId + "." + fileType.name() + fileType.getExtension();
}
-
-
}
}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java
index 4b0133f0..8c2c5abe 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java
@@ -23,9 +23,6 @@ import java.util.Set;
import java.util.UUID;
import java.util.stream.Collectors;
-import com.fasterxml.jackson.core.type.TypeReference;
-import com.iqser.red.service.persistence.service.v1.api.model.annotations.ImportedAnnotation;
-import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.*;
import org.apache.commons.io.IOUtils;
import org.junit.After;
import org.junit.Before;
@@ -56,6 +53,12 @@ import com.iqser.red.service.persistence.service.v1.api.model.annotations.Annota
import com.iqser.red.service.persistence.service.v1.api.model.annotations.Comment;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.Rectangle;
+import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.IdRemoval;
+import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualForceRedaction;
+import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualImageRecategorization;
+import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualLegalBasisChange;
+import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualRedactionEntry;
+import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualResizeRedaction;
import com.iqser.red.service.persistence.service.v1.api.model.common.JSONPrimitive;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.configuration.Colors;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
@@ -215,6 +218,7 @@ public class RedactionIntegrationTest {
loadDictionaryForTest();
loadTypeForTest();
+ loadNerForTest();
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypesForDossierTemplate(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(getTypeResponse());
@@ -256,293 +260,6 @@ public class RedactionIntegrationTest {
}
- private void loadDictionaryForTest() {
-
- dictionary.computeIfAbsent(AUTHOR, v -> new ArrayList<>())
- .addAll(ResourceLoader.load("dictionaries/CBI_author.txt")
- .stream()
- .map(this::cleanDictionaryEntry)
- .collect(Collectors.toSet()));
- dictionary.computeIfAbsent(SPONSOR, v -> new ArrayList<>())
- .addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt")
- .stream()
- .map(this::cleanDictionaryEntry)
- .collect(Collectors.toSet()));
- dictionary.computeIfAbsent(VERTEBRATE, v -> new ArrayList<>())
- .addAll(ResourceLoader.load("dictionaries/vertebrate.txt")
- .stream()
- .map(this::cleanDictionaryEntry)
- .collect(Collectors.toSet()));
- dictionary.computeIfAbsent(ADDRESS, v -> new ArrayList<>())
- .addAll(ResourceLoader.load("dictionaries/CBI_address.txt")
- .stream()
- .map(this::cleanDictionaryEntry)
- .collect(Collectors.toSet()));
- dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>())
- .addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt")
- .stream()
- .map(this::cleanDictionaryEntry)
- .collect(Collectors.toSet()));
- dictionary.computeIfAbsent(REDACTION_INDICATOR, v -> new ArrayList<>())
- .addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt")
- .stream()
- .map(this::cleanDictionaryEntry)
- .collect(Collectors.toSet()));
- dictionary.computeIfAbsent(HINT_ONLY, v -> new ArrayList<>())
- .addAll(ResourceLoader.load("dictionaries/hint_only.txt")
- .stream()
- .map(this::cleanDictionaryEntry)
- .collect(Collectors.toSet()));
- dictionary.computeIfAbsent(MUST_REDACT, v -> new ArrayList<>())
- .addAll(ResourceLoader.load("dictionaries/must_redact.txt")
- .stream()
- .map(this::cleanDictionaryEntry)
- .collect(Collectors.toSet()));
- dictionary.computeIfAbsent(PUBLISHED_INFORMATION, v -> new ArrayList<>())
- .addAll(ResourceLoader.load("dictionaries/published_information.txt")
- .stream()
- .map(this::cleanDictionaryEntry)
- .collect(Collectors.toSet()));
- dictionary.computeIfAbsent(TEST_METHOD, v -> new ArrayList<>())
- .addAll(ResourceLoader.load("dictionaries/test_method.txt")
- .stream()
- .map(this::cleanDictionaryEntry)
- .collect(Collectors.toSet()));
- dictionary.computeIfAbsent(PII, v -> new ArrayList<>())
- .addAll(ResourceLoader.load("dictionaries/PII.txt")
- .stream()
- .map(this::cleanDictionaryEntry)
- .collect(Collectors.toSet()));
- dictionary.computeIfAbsent(RECOMMENDATION_AUTHOR, v -> new ArrayList<>())
- .addAll(ResourceLoader.load("dictionaries/recommendation_CBI_author.txt")
- .stream()
- .map(this::cleanDictionaryEntry)
- .collect(Collectors.toSet()));
- dictionary.computeIfAbsent(RECOMMENDATION_ADDRESS, v -> new ArrayList<>())
- .addAll(ResourceLoader.load("dictionaries/recommendation_CBI_address.txt")
- .stream()
- .map(this::cleanDictionaryEntry)
- .collect(Collectors.toSet()));
- dictionary.computeIfAbsent(FALSE_POSITIVE, v -> new ArrayList<>())
- .addAll(ResourceLoader.load("dictionaries/false_positive.txt")
- .stream()
- .map(this::cleanDictionaryEntry)
- .collect(Collectors.toSet()));
- dictionary.computeIfAbsent(PURITY, v -> new ArrayList<>())
- .addAll(ResourceLoader.load("dictionaries/purity.txt")
- .stream()
- .map(this::cleanDictionaryEntry)
- .collect(Collectors.toSet()));
- dictionary.computeIfAbsent(IMAGE, v -> new ArrayList<>())
- .addAll(ResourceLoader.load("dictionaries/empty.txt")
- .stream()
- .map(this::cleanDictionaryEntry)
- .collect(Collectors.toSet()));
- dictionary.computeIfAbsent(OCR, v -> new ArrayList<>())
- .addAll(ResourceLoader.load("dictionaries/empty.txt")
- .stream()
- .map(this::cleanDictionaryEntry)
- .collect(Collectors.toSet()));
- dictionary.computeIfAbsent(LOGO, v -> new ArrayList<>())
- .addAll(ResourceLoader.load("dictionaries/empty.txt")
- .stream()
- .map(this::cleanDictionaryEntry)
- .collect(Collectors.toSet()));
- dictionary.computeIfAbsent(SIGNATURE, v -> new ArrayList<>())
- .addAll(ResourceLoader.load("dictionaries/empty.txt")
- .stream()
- .map(this::cleanDictionaryEntry)
- .collect(Collectors.toSet()));
- dictionary.computeIfAbsent(FORMULA, v -> new ArrayList<>())
- .addAll(ResourceLoader.load("dictionaries/empty.txt")
- .stream()
- .map(this::cleanDictionaryEntry)
- .collect(Collectors.toSet()));
- dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS, v -> new ArrayList<>())
- .addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt")
- .stream()
- .map(this::cleanDictionaryEntry)
- .collect(Collectors.toSet()));
- dossierDictionary.put(IMPORTED_REDACTION, new ArrayList<>());
- }
-
-
- private String cleanDictionaryEntry(String entry) {
-
- return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " ");
- }
-
-
- private void loadTypeForTest() {
-
- typeColorMap.put(VERTEBRATE, "#ff85f7");
- typeColorMap.put(ADDRESS, "#ffe187");
- typeColorMap.put(AUTHOR, "#ffe187");
- typeColorMap.put(SPONSOR, "#85ebff");
- typeColorMap.put(NO_REDACTION_INDICATOR, "#be85ff");
- typeColorMap.put(REDACTION_INDICATOR, "#caff85");
- typeColorMap.put(HINT_ONLY, "#abc0c4");
- typeColorMap.put(MUST_REDACT, "#fab4c0");
- typeColorMap.put(PUBLISHED_INFORMATION, "#85ebff");
- typeColorMap.put(TEST_METHOD, "#91fae8");
- typeColorMap.put(PII, "#66ccff");
- typeColorMap.put(RECOMMENDATION_AUTHOR, "#8df06c");
- typeColorMap.put(RECOMMENDATION_ADDRESS, "#8df06c");
- typeColorMap.put(FALSE_POSITIVE, "#ffffff");
- typeColorMap.put(PURITY, "#ffe187");
- typeColorMap.put(IMAGE, "#fcc5fb");
- typeColorMap.put(OCR, "#fcc5fb");
- typeColorMap.put(LOGO, "#ffe187");
- typeColorMap.put(FORMULA, "#ffe187");
- typeColorMap.put(SIGNATURE, "#ffe187");
- typeColorMap.put(IMPORTED_REDACTION, "#32a852");
-
- hintTypeMap.put(VERTEBRATE, true);
- hintTypeMap.put(ADDRESS, false);
- hintTypeMap.put(AUTHOR, false);
- hintTypeMap.put(SPONSOR, false);
- hintTypeMap.put(NO_REDACTION_INDICATOR, true);
- hintTypeMap.put(REDACTION_INDICATOR, true);
- hintTypeMap.put(HINT_ONLY, true);
- hintTypeMap.put(MUST_REDACT, true);
- hintTypeMap.put(PUBLISHED_INFORMATION, true);
- hintTypeMap.put(TEST_METHOD, true);
- hintTypeMap.put(PII, false);
- hintTypeMap.put(RECOMMENDATION_AUTHOR, false);
- hintTypeMap.put(RECOMMENDATION_ADDRESS, false);
- hintTypeMap.put(FALSE_POSITIVE, true);
- hintTypeMap.put(PURITY, false);
- hintTypeMap.put(IMAGE, true);
- hintTypeMap.put(OCR, true);
- hintTypeMap.put(FORMULA, false);
- hintTypeMap.put(LOGO, false);
- hintTypeMap.put(SIGNATURE, false);
- hintTypeMap.put(DOSSIER_REDACTIONS, false);
- hintTypeMap.put(IMPORTED_REDACTION, false);
-
- caseInSensitiveMap.put(VERTEBRATE, true);
- caseInSensitiveMap.put(ADDRESS, false);
- caseInSensitiveMap.put(AUTHOR, false);
- caseInSensitiveMap.put(SPONSOR, false);
- caseInSensitiveMap.put(NO_REDACTION_INDICATOR, true);
- caseInSensitiveMap.put(REDACTION_INDICATOR, true);
- caseInSensitiveMap.put(HINT_ONLY, true);
- caseInSensitiveMap.put(MUST_REDACT, true);
- caseInSensitiveMap.put(PUBLISHED_INFORMATION, true);
- caseInSensitiveMap.put(TEST_METHOD, false);
- caseInSensitiveMap.put(PII, false);
- caseInSensitiveMap.put(RECOMMENDATION_AUTHOR, false);
- caseInSensitiveMap.put(RECOMMENDATION_ADDRESS, false);
- caseInSensitiveMap.put(FALSE_POSITIVE, false);
- caseInSensitiveMap.put(PURITY, false);
- caseInSensitiveMap.put(IMAGE, true);
- caseInSensitiveMap.put(OCR, true);
- caseInSensitiveMap.put(SIGNATURE, true);
- caseInSensitiveMap.put(LOGO, true);
- caseInSensitiveMap.put(FORMULA, true);
- caseInSensitiveMap.put(DOSSIER_REDACTIONS, false);
- caseInSensitiveMap.put(IMPORTED_REDACTION, false);
-
- recommendationTypeMap.put(VERTEBRATE, false);
- recommendationTypeMap.put(ADDRESS, false);
- recommendationTypeMap.put(AUTHOR, false);
- recommendationTypeMap.put(SPONSOR, false);
- recommendationTypeMap.put(NO_REDACTION_INDICATOR, false);
- recommendationTypeMap.put(REDACTION_INDICATOR, false);
- recommendationTypeMap.put(HINT_ONLY, false);
- recommendationTypeMap.put(MUST_REDACT, false);
- recommendationTypeMap.put(PUBLISHED_INFORMATION, false);
- recommendationTypeMap.put(TEST_METHOD, false);
- recommendationTypeMap.put(PII, false);
- recommendationTypeMap.put(RECOMMENDATION_AUTHOR, true);
- recommendationTypeMap.put(RECOMMENDATION_ADDRESS, true);
- recommendationTypeMap.put(FALSE_POSITIVE, false);
- recommendationTypeMap.put(PURITY, false);
- recommendationTypeMap.put(IMAGE, false);
- recommendationTypeMap.put(OCR, false);
- recommendationTypeMap.put(FORMULA, false);
- recommendationTypeMap.put(SIGNATURE, false);
- recommendationTypeMap.put(LOGO, false);
- recommendationTypeMap.put(DOSSIER_REDACTIONS, false);
- recommendationTypeMap.put(IMPORTED_REDACTION, false);
-
- rankTypeMap.put(FALSE_POSITIVE, 160);
- rankTypeMap.put(PURITY, 155);
- rankTypeMap.put(PII, 150);
- rankTypeMap.put(ADDRESS, 140);
- rankTypeMap.put(AUTHOR, 130);
- rankTypeMap.put(SPONSOR, 120);
- rankTypeMap.put(VERTEBRATE, 110);
- rankTypeMap.put(MUST_REDACT, 100);
- rankTypeMap.put(REDACTION_INDICATOR, 90);
- rankTypeMap.put(NO_REDACTION_INDICATOR, 80);
- rankTypeMap.put(PUBLISHED_INFORMATION, 70);
- rankTypeMap.put(TEST_METHOD, 60);
- rankTypeMap.put(HINT_ONLY, 50);
- rankTypeMap.put(RECOMMENDATION_AUTHOR, 40);
- rankTypeMap.put(RECOMMENDATION_ADDRESS, 30);
- rankTypeMap.put(IMAGE, 30);
- rankTypeMap.put(OCR, 29);
- rankTypeMap.put(LOGO, 28);
- rankTypeMap.put(SIGNATURE, 27);
- rankTypeMap.put(FORMULA, 26);
- rankTypeMap.put(DOSSIER_REDACTIONS, 200);
- rankTypeMap.put(IMPORTED_REDACTION, 200);
-
- colors.setDefaultColor("#acfc00");
- colors.setNotRedacted("#cccccc");
- colors.setRequestAdd("#04b093");
- colors.setRequestRemove("#04b093");
- }
-
-
- private List getTypeResponse() {
-
- return typeColorMap.entrySet()
- .stream()
- .map(typeColor -> Type.builder()
- .id(typeColor.getKey() + ":" + TEST_DOSSIER_TEMPLATE_ID)
- .type(typeColor.getKey())
- .dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
- .hexColor(typeColor.getValue())
- .isHint(hintTypeMap.get(typeColor.getKey()))
- .isCaseInsensitive(caseInSensitiveMap.get(typeColor.getKey()))
- .isRecommendation(recommendationTypeMap.get(typeColor.getKey()))
- .rank(rankTypeMap.get(typeColor.getKey()))
- .build())
-
- .collect(Collectors.toList());
- }
-
-
- private Type getDictionaryResponse(String type, boolean isDossierDictionary) {
-
- return Type.builder()
- .id(type + ":" + TEST_DOSSIER_TEMPLATE_ID)
- .hexColor(typeColorMap.get(type))
- .entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type)))
- .isHint(hintTypeMap.get(type))
- .isCaseInsensitive(caseInSensitiveMap.get(type))
- .isRecommendation(recommendationTypeMap.get(type))
- .rank(rankTypeMap.get(type))
- .build();
- }
-
-
- private List toDictionaryEntry(List entries) {
-
- List dictionaryEntries = new ArrayList<>();
- entries.forEach(entry -> {
- dictionaryEntries.add(DictionaryEntry.builder()
- .value(entry)
- .version(reanlysisVersions.getOrDefault(entry, 0L))
- .deleted(deleted.contains(entry))
- .build());
- });
- return dictionaryEntries;
- }
-
-
@Test
public void test270Rotated() {
@@ -584,9 +301,7 @@ public class RedactionIntegrationTest {
duplicates.computeIfAbsent(entry.getId(), v -> new ArrayList<>()).add(entry);
});
- duplicates.entrySet().forEach(entry -> {
- assertThat(entry.getValue().size()).isEqualTo(1);
- });
+ duplicates.forEach((key, value) -> assertThat(value.size()).isEqualTo(1));
dictionary.get(AUTHOR).add("Drinking water");
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(1L);
@@ -1047,8 +762,6 @@ public class RedactionIntegrationTest {
@Test
public void testManualRedaction() throws IOException {
-// 675eba69b0c2917de55462c817adaa05
-
System.out.println("testManualRedaction");
long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
@@ -1099,8 +812,6 @@ public class RedactionIntegrationTest {
.page(1)
.build()));
-// manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
-
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
request.setManualRedactions(manualRedactions);
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
@@ -1246,34 +957,6 @@ public class RedactionIntegrationTest {
}
- @SneakyThrows
- private AnalyzeRequest prepareStorage(String file) {
-
- ClassPathResource pdfFileResource = new ClassPathResource(file);
-
- return prepareStorage(pdfFileResource.getInputStream());
- }
-
-
- @SneakyThrows
- private AnalyzeRequest prepareStorage(InputStream stream) {
-
- AnalyzeRequest request = AnalyzeRequest.builder()
- .dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
- .dossierId(TEST_DOSSIER_ID)
- .fileId(TEST_FILE_ID)
- .lastProcessed(OffsetDateTime.now())
- .build();
-
- var bytes = IOUtils.toByteArray(stream);
-
- storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), bytes);
-
- return request;
-
- }
-
-
@Test
public void sponsorCompanyTest() throws IOException {
@@ -1527,7 +1210,8 @@ public class RedactionIntegrationTest {
fileOutputStream.write(annotateResponse.getDocument());
}
- var surroundingTextResult = manualRedactionSurroundingTextService.addSurroundingText(TEST_DOSSIER_ID, TEST_FILE_ID, manualRedactions);
+ var surroundingTextResult = manualRedactionSurroundingTextService.addSurroundingText(TEST_DOSSIER_ID, TEST_FILE_ID, manualRedactions)
+ .getManualRedactions();
surroundingTextResult.getEntriesToAdd().forEach(addEntry -> {
assertThat(addEntry.getTextAfter()).isNotEmpty();
});
@@ -1535,6 +1219,117 @@ public class RedactionIntegrationTest {
}
+ private void loadDictionaryForTest() {
+
+ dictionary.computeIfAbsent(AUTHOR, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/CBI_author.txt")
+ .stream()
+ .map(this::cleanDictionaryEntry)
+ .collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(SPONSOR, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt")
+ .stream()
+ .map(this::cleanDictionaryEntry)
+ .collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(VERTEBRATE, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/vertebrate.txt")
+ .stream()
+ .map(this::cleanDictionaryEntry)
+ .collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(ADDRESS, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/CBI_address.txt")
+ .stream()
+ .map(this::cleanDictionaryEntry)
+ .collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt")
+ .stream()
+ .map(this::cleanDictionaryEntry)
+ .collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(REDACTION_INDICATOR, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt")
+ .stream()
+ .map(this::cleanDictionaryEntry)
+ .collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(HINT_ONLY, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/hint_only.txt")
+ .stream()
+ .map(this::cleanDictionaryEntry)
+ .collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(MUST_REDACT, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/must_redact.txt")
+ .stream()
+ .map(this::cleanDictionaryEntry)
+ .collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(PUBLISHED_INFORMATION, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/published_information.txt")
+ .stream()
+ .map(this::cleanDictionaryEntry)
+ .collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(TEST_METHOD, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/test_method.txt")
+ .stream()
+ .map(this::cleanDictionaryEntry)
+ .collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(PII, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/PII.txt")
+ .stream()
+ .map(this::cleanDictionaryEntry)
+ .collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(RECOMMENDATION_AUTHOR, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/recommendation_CBI_author.txt")
+ .stream()
+ .map(this::cleanDictionaryEntry)
+ .collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(RECOMMENDATION_ADDRESS, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/recommendation_CBI_address.txt")
+ .stream()
+ .map(this::cleanDictionaryEntry)
+ .collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(FALSE_POSITIVE, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/false_positive.txt")
+ .stream()
+ .map(this::cleanDictionaryEntry)
+ .collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(PURITY, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/purity.txt")
+ .stream()
+ .map(this::cleanDictionaryEntry)
+ .collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(IMAGE, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/empty.txt")
+ .stream()
+ .map(this::cleanDictionaryEntry)
+ .collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(OCR, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/empty.txt")
+ .stream()
+ .map(this::cleanDictionaryEntry)
+ .collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(LOGO, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/empty.txt")
+ .stream()
+ .map(this::cleanDictionaryEntry)
+ .collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(SIGNATURE, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/empty.txt")
+ .stream()
+ .map(this::cleanDictionaryEntry)
+ .collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(FORMULA, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/empty.txt")
+ .stream()
+ .map(this::cleanDictionaryEntry)
+ .collect(Collectors.toSet()));
+ dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt")
+ .stream()
+ .map(this::cleanDictionaryEntry)
+ .collect(Collectors.toSet()));
+ dossierDictionary.put(IMPORTED_REDACTION, new ArrayList<>());
+ }
+
+
private static String loadFromClassPath(String path) {
URL resource = ResourceLoader.class.getClassLoader().getResource(path);
@@ -1572,6 +1367,191 @@ public class RedactionIntegrationTest {
}
+ private void loadTypeForTest() {
+
+ typeColorMap.put(VERTEBRATE, "#ff85f7");
+ typeColorMap.put(ADDRESS, "#ffe187");
+ typeColorMap.put(AUTHOR, "#ffe187");
+ typeColorMap.put(SPONSOR, "#85ebff");
+ typeColorMap.put(NO_REDACTION_INDICATOR, "#be85ff");
+ typeColorMap.put(REDACTION_INDICATOR, "#caff85");
+ typeColorMap.put(HINT_ONLY, "#abc0c4");
+ typeColorMap.put(MUST_REDACT, "#fab4c0");
+ typeColorMap.put(PUBLISHED_INFORMATION, "#85ebff");
+ typeColorMap.put(TEST_METHOD, "#91fae8");
+ typeColorMap.put(PII, "#66ccff");
+ typeColorMap.put(RECOMMENDATION_AUTHOR, "#8df06c");
+ typeColorMap.put(RECOMMENDATION_ADDRESS, "#8df06c");
+ typeColorMap.put(FALSE_POSITIVE, "#ffffff");
+ typeColorMap.put(PURITY, "#ffe187");
+ typeColorMap.put(IMAGE, "#fcc5fb");
+ typeColorMap.put(OCR, "#fcc5fb");
+ typeColorMap.put(LOGO, "#ffe187");
+ typeColorMap.put(FORMULA, "#ffe187");
+ typeColorMap.put(SIGNATURE, "#ffe187");
+ typeColorMap.put(IMPORTED_REDACTION, "#32a852");
+
+ hintTypeMap.put(VERTEBRATE, true);
+ hintTypeMap.put(ADDRESS, false);
+ hintTypeMap.put(AUTHOR, false);
+ hintTypeMap.put(SPONSOR, false);
+ hintTypeMap.put(NO_REDACTION_INDICATOR, true);
+ hintTypeMap.put(REDACTION_INDICATOR, true);
+ hintTypeMap.put(HINT_ONLY, true);
+ hintTypeMap.put(MUST_REDACT, true);
+ hintTypeMap.put(PUBLISHED_INFORMATION, true);
+ hintTypeMap.put(TEST_METHOD, true);
+ hintTypeMap.put(PII, false);
+ hintTypeMap.put(RECOMMENDATION_AUTHOR, false);
+ hintTypeMap.put(RECOMMENDATION_ADDRESS, false);
+ hintTypeMap.put(FALSE_POSITIVE, true);
+ hintTypeMap.put(PURITY, false);
+ hintTypeMap.put(IMAGE, true);
+ hintTypeMap.put(OCR, true);
+ hintTypeMap.put(FORMULA, false);
+ hintTypeMap.put(LOGO, false);
+ hintTypeMap.put(SIGNATURE, false);
+ hintTypeMap.put(DOSSIER_REDACTIONS, false);
+ hintTypeMap.put(IMPORTED_REDACTION, false);
+
+ caseInSensitiveMap.put(VERTEBRATE, true);
+ caseInSensitiveMap.put(ADDRESS, false);
+ caseInSensitiveMap.put(AUTHOR, false);
+ caseInSensitiveMap.put(SPONSOR, false);
+ caseInSensitiveMap.put(NO_REDACTION_INDICATOR, true);
+ caseInSensitiveMap.put(REDACTION_INDICATOR, true);
+ caseInSensitiveMap.put(HINT_ONLY, true);
+ caseInSensitiveMap.put(MUST_REDACT, true);
+ caseInSensitiveMap.put(PUBLISHED_INFORMATION, true);
+ caseInSensitiveMap.put(TEST_METHOD, false);
+ caseInSensitiveMap.put(PII, false);
+ caseInSensitiveMap.put(RECOMMENDATION_AUTHOR, false);
+ caseInSensitiveMap.put(RECOMMENDATION_ADDRESS, false);
+ caseInSensitiveMap.put(FALSE_POSITIVE, false);
+ caseInSensitiveMap.put(PURITY, false);
+ caseInSensitiveMap.put(IMAGE, true);
+ caseInSensitiveMap.put(OCR, true);
+ caseInSensitiveMap.put(SIGNATURE, true);
+ caseInSensitiveMap.put(LOGO, true);
+ caseInSensitiveMap.put(FORMULA, true);
+ caseInSensitiveMap.put(DOSSIER_REDACTIONS, false);
+ caseInSensitiveMap.put(IMPORTED_REDACTION, false);
+
+ recommendationTypeMap.put(VERTEBRATE, false);
+ recommendationTypeMap.put(ADDRESS, false);
+ recommendationTypeMap.put(AUTHOR, false);
+ recommendationTypeMap.put(SPONSOR, false);
+ recommendationTypeMap.put(NO_REDACTION_INDICATOR, false);
+ recommendationTypeMap.put(REDACTION_INDICATOR, false);
+ recommendationTypeMap.put(HINT_ONLY, false);
+ recommendationTypeMap.put(MUST_REDACT, false);
+ recommendationTypeMap.put(PUBLISHED_INFORMATION, false);
+ recommendationTypeMap.put(TEST_METHOD, false);
+ recommendationTypeMap.put(PII, false);
+ recommendationTypeMap.put(RECOMMENDATION_AUTHOR, true);
+ recommendationTypeMap.put(RECOMMENDATION_ADDRESS, true);
+ recommendationTypeMap.put(FALSE_POSITIVE, false);
+ recommendationTypeMap.put(PURITY, false);
+ recommendationTypeMap.put(IMAGE, false);
+ recommendationTypeMap.put(OCR, false);
+ recommendationTypeMap.put(FORMULA, false);
+ recommendationTypeMap.put(SIGNATURE, false);
+ recommendationTypeMap.put(LOGO, false);
+ recommendationTypeMap.put(DOSSIER_REDACTIONS, false);
+ recommendationTypeMap.put(IMPORTED_REDACTION, false);
+
+ rankTypeMap.put(FALSE_POSITIVE, 160);
+ rankTypeMap.put(PURITY, 155);
+ rankTypeMap.put(PII, 150);
+ rankTypeMap.put(ADDRESS, 140);
+ rankTypeMap.put(AUTHOR, 130);
+ rankTypeMap.put(SPONSOR, 120);
+ rankTypeMap.put(VERTEBRATE, 110);
+ rankTypeMap.put(MUST_REDACT, 100);
+ rankTypeMap.put(REDACTION_INDICATOR, 90);
+ rankTypeMap.put(NO_REDACTION_INDICATOR, 80);
+ rankTypeMap.put(PUBLISHED_INFORMATION, 70);
+ rankTypeMap.put(TEST_METHOD, 60);
+ rankTypeMap.put(HINT_ONLY, 50);
+ rankTypeMap.put(RECOMMENDATION_AUTHOR, 40);
+ rankTypeMap.put(RECOMMENDATION_ADDRESS, 30);
+ rankTypeMap.put(IMAGE, 30);
+ rankTypeMap.put(OCR, 29);
+ rankTypeMap.put(LOGO, 28);
+ rankTypeMap.put(SIGNATURE, 27);
+ rankTypeMap.put(FORMULA, 26);
+ rankTypeMap.put(DOSSIER_REDACTIONS, 200);
+ rankTypeMap.put(IMPORTED_REDACTION, 200);
+
+ colors.setDefaultColor("#acfc00");
+ colors.setNotRedacted("#cccccc");
+ colors.setRequestAdd("#04b093");
+ colors.setRequestRemove("#04b093");
+ }
+
+
+ @SneakyThrows
+ private void loadNerForTest() {
+
+ ClassPathResource responseJson = new ClassPathResource("files/ner_response.json");
+ var bytes = IOUtils.toByteArray(responseJson.getInputStream());
+ storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), bytes);
+ }
+
+
+ private List getTypeResponse() {
+
+ return typeColorMap.entrySet()
+ .stream()
+ .map(typeColor -> Type.builder()
+ .id(typeColor.getKey() + ":" + TEST_DOSSIER_TEMPLATE_ID)
+ .type(typeColor.getKey())
+ .dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
+ .hexColor(typeColor.getValue())
+ .isHint(hintTypeMap.get(typeColor.getKey()))
+ .isCaseInsensitive(caseInSensitiveMap.get(typeColor.getKey()))
+ .isRecommendation(recommendationTypeMap.get(typeColor.getKey()))
+ .rank(rankTypeMap.get(typeColor.getKey()))
+ .build())
+
+ .collect(Collectors.toList());
+ }
+
+
+ private Type getDictionaryResponse(String type, boolean isDossierDictionary) {
+
+ return Type.builder()
+ .id(type + ":" + TEST_DOSSIER_TEMPLATE_ID)
+ .hexColor(typeColorMap.get(type))
+ .entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type)))
+ .isHint(hintTypeMap.get(type))
+ .isCaseInsensitive(caseInSensitiveMap.get(type))
+ .isRecommendation(recommendationTypeMap.get(type))
+ .rank(rankTypeMap.get(type))
+ .build();
+ }
+
+
+ private String cleanDictionaryEntry(String entry) {
+
+ return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " ");
+ }
+
+
+ private List toDictionaryEntry(List entries) {
+
+ List dictionaryEntries = new ArrayList<>();
+ entries.forEach(entry -> {
+ dictionaryEntries.add(DictionaryEntry.builder()
+ .value(entry)
+ .version(reanlysisVersions.getOrDefault(entry, 0L))
+ .deleted(deleted.contains(entry))
+ .build());
+ });
+ return dictionaryEntries;
+ }
+
+
@Test
public void testImportedRedactions() throws IOException {
@@ -1579,7 +1559,6 @@ public class RedactionIntegrationTest {
ClassPathResource pdfFileResource = new ClassPathResource("files/ImportedRedactions/ImportedRedactions.pdf");
ClassPathResource importedRedactions = new ClassPathResource("files/ImportedRedactions/ImportedRedactions.json");
-
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS), IOUtils.toByteArray(importedRedactions.getInputStream()));
@@ -1598,4 +1577,32 @@ public class RedactionIntegrationTest {
}
}
+
+ @SneakyThrows
+ private AnalyzeRequest prepareStorage(InputStream stream) {
+
+ AnalyzeRequest request = AnalyzeRequest.builder()
+ .dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
+ .dossierId(TEST_DOSSIER_ID)
+ .fileId(TEST_FILE_ID)
+ .lastProcessed(OffsetDateTime.now())
+ .build();
+
+ var bytes = IOUtils.toByteArray(stream);
+
+ storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), bytes);
+
+ return request;
+
+ }
+
+
+ @SneakyThrows
+ private AnalyzeRequest prepareStorage(String file) {
+
+ ClassPathResource pdfFileResource = new ClassPathResource(file);
+
+ return prepareStorage(pdfFileResource.getInputStream());
+ }
+
}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/ner_response.json b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/ner_response.json
new file mode 100644
index 00000000..64ae1dcd
--- /dev/null
+++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/ner_response.json
@@ -0,0 +1,12 @@
+{
+ "result": {
+ "1": [
+ {
+ "value": "Mannheim",
+ "startOffset": 0,
+ "endOffset": 8,
+ "type": "CITY"
+ }
+ ]
+ }
+}
\ No newline at end of file