diff --git a/redaction-service-v1/redaction-service-api-v1/pom.xml b/redaction-service-v1/redaction-service-api-v1/pom.xml index 0b808449..816402b9 100644 --- a/redaction-service-v1/redaction-service-api-v1/pom.xml +++ b/redaction-service-v1/redaction-service-api-v1/pom.xml @@ -12,7 +12,7 @@ redaction-service-api-v1 - 1.32.0 + 1.39.0 diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/MessageType.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/MessageType.java index 55e9666e..2e3ba7a2 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/MessageType.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/MessageType.java @@ -2,6 +2,6 @@ package com.iqser.red.service.redaction.v1.model; public enum MessageType { - FULL_ANALYSE, REANALYSE, SURROUNDING_TEXT + ANALYSE, REANALYSE, STRUCTURE_ANALYSE, SURROUNDING_TEXT } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java index 155993ca..6a50510d 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java @@ -1,30 +1,42 @@ package com.iqser.red.service.redaction.v1.server.controller; -import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions; -import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType; -import com.iqser.red.service.redaction.v1.model.*; -import com.iqser.red.service.redaction.v1.resources.RedactionResource; -import com.iqser.red.service.redaction.v1.server.classification.model.Document; -import com.iqser.red.service.redaction.v1.server.classification.model.Page; -import com.iqser.red.service.redaction.v1.server.exception.NotFoundException; -import com.iqser.red.service.redaction.v1.server.exception.RedactionException; -import com.iqser.red.service.redaction.v1.server.redaction.service.*; -import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService; -import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; -import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer; -import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; -import com.iqser.red.service.redaction.v1.server.visualization.service.PdfVisualisationService; -import lombok.RequiredArgsConstructor; -import lombok.extern.slf4j.Slf4j; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.stream.Collectors; + import org.apache.pdfbox.io.MemoryUsageSetting; import org.apache.pdfbox.pdmodel.PDDocument; import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.RequestBody; import org.springframework.web.bind.annotation.RestController; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.util.stream.Collectors; +import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions; +import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType; +import com.iqser.red.service.redaction.v1.model.AnnotateRequest; +import com.iqser.red.service.redaction.v1.model.AnnotateResponse; +import com.iqser.red.service.redaction.v1.model.RedactionLog; +import com.iqser.red.service.redaction.v1.model.RedactionRequest; +import com.iqser.red.service.redaction.v1.model.RedactionResult; +import com.iqser.red.service.redaction.v1.model.SectionArea; +import com.iqser.red.service.redaction.v1.model.SectionGrid; +import com.iqser.red.service.redaction.v1.resources.RedactionResource; +import com.iqser.red.service.redaction.v1.server.classification.model.Document; +import com.iqser.red.service.redaction.v1.server.classification.model.Page; +import com.iqser.red.service.redaction.v1.server.exception.NotFoundException; +import com.iqser.red.service.redaction.v1.server.exception.RedactionException; +import com.iqser.red.service.redaction.v1.server.redaction.service.AnnotationService; +import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService; +import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService; +import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService; +import com.iqser.red.service.redaction.v1.server.redaction.service.RedactionLogMergeService; +import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService; +import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; +import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer; +import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; +import com.iqser.red.service.redaction.v1.server.visualization.service.PdfVisualisationService; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; @Slf4j @RestController @@ -43,8 +55,7 @@ public class RedactionController implements RedactionResource { public AnnotateResponse annotate(@RequestBody AnnotateRequest annotateRequest) { - var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(annotateRequest - .getDossierId(), annotateRequest.getFileId(), FileType.ORIGIN)); + var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(annotateRequest.getDossierId(), annotateRequest.getFileId(), FileType.ORIGIN)); var mergedRedactionLog = getRedactionLog(RedactionRequest.builder() .fileId(annotateRequest.getFileId()) .manualRedactions(annotateRequest.getManualRedactions()) @@ -73,13 +84,11 @@ public class RedactionController implements RedactionResource { @Override public RedactionResult classify(@RequestBody RedactionRequest redactionRequest) { - var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest - .getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN)); + var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN)); try { Document classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, null); - storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest - .getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN)); + storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN)); try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) { pdDocument.setAllSecurityToBeRemoved(true); @@ -101,13 +110,11 @@ public class RedactionController implements RedactionResource { @Override public RedactionResult sections(@RequestBody RedactionRequest redactionRequest) { - var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest - .getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN)); + var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN)); try { Document classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, null); - storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest - .getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN)); + storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN)); try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) { pdDocument.setAllSecurityToBeRemoved(true); @@ -131,8 +138,7 @@ public class RedactionController implements RedactionResource { Document classifiedDoc; try { - var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest - .getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN)); + var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN)); classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, null); } catch (Exception e) { throw new RedactionException(e); @@ -174,8 +180,7 @@ public class RedactionController implements RedactionResource { log.info("Loaded redaction log with computationalVersion: {}", redactionLog.getAnalysisVersion()); - SectionGrid sectionGrid = redactionStorageService.getSectionGrid(redactionRequest.getDossierId(), redactionRequest - .getFileId()); + SectionGrid sectionGrid = redactionStorageService.getSectionGrid(redactionRequest.getDossierId(), redactionRequest.getFileId()); if (sectionGrid.getSections().isEmpty()) { log.info("SectionGrid does not have headlines set. Computing headlines now!"); @@ -184,8 +189,7 @@ public class RedactionController implements RedactionResource { // enhance section grid with headline data for (var sectionText : text.getSectionTexts()) { sectionGrid.getSections() - .add(new SectionGrid.SectionGridSection(sectionText.getSectionNumber(), sectionText.getHeadline(), sectionText - .getSectionAreas() + .add(new SectionGrid.SectionGridSection(sectionText.getSectionNumber(), sectionText.getHeadline(), sectionText.getSectionAreas() .stream() .map(SectionArea::getPage) .collect(Collectors.toSet()), sectionText.getSectionAreas())); @@ -194,8 +198,7 @@ public class RedactionController implements RedactionResource { } log.info("Loaded redaction log with computationalVersion: {}", redactionLog.getAnalysisVersion()); - return redactionLogMergeService.mergeRedactionLogData(redactionLog, sectionGrid, redactionRequest.getDossierTemplateId(), redactionRequest - .getManualRedactions(), redactionRequest.getExcludedPages()); + return redactionLogMergeService.mergeRedactionLogData(redactionLog, sectionGrid, redactionRequest.getDossierTemplateId(), redactionRequest.getManualRedactions(), redactionRequest.getExcludedPages()); } @@ -217,11 +220,9 @@ public class RedactionController implements RedactionResource { @PathVariable("fileId") String fileId, @RequestBody ManualRedactions manualRedactions) { - long start = System.currentTimeMillis(); var result = manualRedactionSurroundingTextService.addSurroundingText(dossierId, fileId, manualRedactions); - log.info("add surrounding text for manual redaction in dossierId {} and fileId {} took: {}", dossierId, fileId, System - .currentTimeMillis() - start); - return result; + log.info("Added surrounding text for manual redaction in dossierId {} and fileId {} took: {}", dossierId, fileId, result.getDuration()); + return result.getManualRedactions(); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/queue/RedactionMessageReceiver.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/queue/RedactionMessageReceiver.java index aa0e2c74..d57a4e20 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/queue/RedactionMessageReceiver.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/queue/RedactionMessageReceiver.java @@ -1,5 +1,12 @@ package com.iqser.red.service.redaction.v1.server.queue; +import static com.iqser.red.service.redaction.v1.server.queue.MessagingConfiguration.REDACTION_DQL; +import static com.iqser.red.service.redaction.v1.server.queue.MessagingConfiguration.REDACTION_QUEUE; + +import org.springframework.amqp.rabbit.annotation.RabbitHandler; +import org.springframework.amqp.rabbit.annotation.RabbitListener; +import org.springframework.stereotype.Service; + import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.iqser.red.service.redaction.v1.model.AnalyzeRequest; @@ -8,18 +15,10 @@ import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest; import com.iqser.red.service.redaction.v1.server.client.FileStatusProcessingUpdateClient; import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService; import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService; -import com.iqser.red.service.redaction.v1.server.redaction.service.NerAnalyserService; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.springframework.amqp.rabbit.annotation.RabbitHandler; -import org.springframework.amqp.rabbit.annotation.RabbitListener; -import org.springframework.stereotype.Service; - -import static com.iqser.red.service.redaction.v1.server.queue.MessagingConfiguration.REDACTION_DQL; -import static com.iqser.red.service.redaction.v1.server.queue.MessagingConfiguration.REDACTION_QUEUE; - @Slf4j @Service @RequiredArgsConstructor @@ -28,7 +27,6 @@ public class RedactionMessageReceiver { private final ObjectMapper objectMapper; private final AnalyzeService analyzeService; private final FileStatusProcessingUpdateClient fileStatusProcessingUpdateClient; - private final NerAnalyserService nerAnalyserService; private final ManualRedactionSurroundingTextService manualRedactionSurroundingTextService; @@ -40,31 +38,26 @@ public class RedactionMessageReceiver { log.info("Processing analyze request for file: {}", analyzeRequest.getFileId()); AnalyzeResult result = null; - switch (analyzeRequest.getMessageType()){ + switch (analyzeRequest.getMessageType()) { case REANALYSE: result = analyzeService.reanalyze(analyzeRequest); log.info("Successfully reanalyzed dossier {} file {} took: {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId(), result.getDuration()); break; - case FULL_ANALYSE: - // TODO Seperate stucture analysis by other queue - analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(analyzeRequest.getDossierId(), analyzeRequest.getFileId())); - - // TODO NerEntities should be computed and stored in entity-recognition-service, should be triggered by a seperate queue after structure analysis - nerAnalyserService.computeNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); - - result = analyzeService.analyze(analyzeRequest); - log.info("Successfully analyzed dossier {} file {} took: {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId(), result - .getDuration()); + case STRUCTURE_ANALYSE: + result = analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(analyzeRequest.getDossierId(), analyzeRequest.getFileId())); + log.info("Successfully analyzed structure dossier {} file {} took: {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId(), result.getDuration()); break; + + case ANALYSE: + result = analyzeService.analyze(analyzeRequest); + log.info("Successfully analyzed dossier {} file {} took: {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId(), result.getDuration()); + break; + case SURROUNDING_TEXT: - var manualRedactions = manualRedactionSurroundingTextService.addSurroundingText(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), analyzeRequest.getManualRedactions()); - result = AnalyzeResult.builder() - .dossierId(analyzeRequest.getDossierId()) - .fileId(analyzeRequest.getFileId()) - .manualRedactions(manualRedactions) - .build(); + result = manualRedactionSurroundingTextService.addSurroundingText(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), analyzeRequest.getManualRedactions()); + log.info("Successfully added surrounding text for manual redaction in dossierId {} and fileId {} took: {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId(), result.getDuration()); break; } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java index 5b3b26bd..3b9a439a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java @@ -32,6 +32,7 @@ import com.iqser.red.service.redaction.v1.server.classification.model.Document; import com.iqser.red.service.redaction.v1.server.classification.model.SectionText; import com.iqser.red.service.redaction.v1.server.classification.model.Text; import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient; +import com.iqser.red.service.redaction.v1.server.client.model.NerEntities; import com.iqser.red.service.redaction.v1.server.exception.RedactionException; import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary; import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrement; @@ -66,12 +67,11 @@ public class AnalyzeService { private final RedactionServiceSettings redactionServiceSettings; private final SectionTextBuilderService sectionTextBuilderService; private final SectionGridCreatorService sectionGridCreatorService; - private final NerAnalyserService nerAnalyserService; private final ImageService imageService; private final ImportedRedactionService importedRedactionService; - public void analyzeDocumentStructure(StructureAnalyzeRequest analyzeRequest) { + public AnalyzeResult analyzeDocumentStructure(StructureAnalyzeRequest analyzeRequest) { long startTime = System.currentTimeMillis(); @@ -79,7 +79,6 @@ public class AnalyzeService { Document classifiedDoc; try { - var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.ORIGIN)); Map> pdfImages = null; @@ -105,44 +104,17 @@ public class AnalyzeService { .map(SectionArea::getPage) .collect(Collectors.toSet()), sectionText.getSectionAreas()))); + log.info("Store text and section grid for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.TEXT, text); redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.SECTION_GRID, classifiedDoc.getSectionGrid()); - log.info("Document structure analysis successful, took: {}", System.currentTimeMillis() - startTime); - } - - - public AnalyzeResult analyze(AnalyzeRequest analyzeRequest) { - - long startTime = System.currentTimeMillis(); - - var text = redactionStorageService.getText(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); - var nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); - if (redactionServiceSettings.isEnableEntityRecognition() && nerEntities == null) { - nerAnalyserService.computeNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); - nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); - } - - dictionaryService.updateDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId()); - KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getDossierTemplateId()); - long rulesVersion = droolsExecutionService.getRulesVersion(analyzeRequest.getDossierTemplateId()); - Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId()); - - PageEntities pageEntities = entityRedactionService.findEntities(dictionary, text.getSectionTexts(), kieContainer, analyzeRequest, nerEntities); - - dictionaryService.updateExternalDictionary(dictionary, analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId()); - - List redactionLogEntries = redactionLogCreatorService.createRedactionLog(pageEntities, text.getNumberOfPages(), analyzeRequest.getDossierTemplateId()); - - var legalBasis = legalBasisClient.getLegalBasisMapping(analyzeRequest.getDossierTemplateId()); - var redactionLog = new RedactionLog(redactionServiceSettings.getAnalysisVersion(), analyzeRequest.getAnalysisNumber(), redactionLogEntries, legalBasis, dictionary.getVersion() - .getDossierTemplateVersion(), dictionary.getVersion() - .getDossierVersion(), rulesVersion, legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId())); - - var importedRedactionFilteredEntries = importedRedactionService.processImportedRedactions(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId(), analyzeRequest.getFileId(), redactionLog.getRedactionLogEntry(), true); - redactionLog.setRedactionLogEntry(importedRedactionFilteredEntries); - - return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionary.getVersion(), false); + return AnalyzeResult.builder() + .dossierId(analyzeRequest.getDossierId()) + .fileId(analyzeRequest.getFileId()) + .duration(System.currentTimeMillis() - startTime) + .numberOfPages(text.getNumberOfPages()) + .analysisVersion(redactionServiceSettings.getAnalysisVersion()) + .build(); } @@ -168,10 +140,11 @@ public class AnalyzeService { return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true); } - var nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); - if (redactionServiceSettings.isEnableEntityRecognition() && nerEntities == null) { - nerAnalyserService.computeNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); + NerEntities nerEntities; + if (redactionServiceSettings.isNerServiceEnabled()) { nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); + } else { + nerEntities = NerEntities.builder().build(); } List reanalysisSections = text.getSectionTexts() @@ -188,13 +161,49 @@ public class AnalyzeService { var importedRedactionFilteredEntries = importedRedactionService.processImportedRedactions(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId(), analyzeRequest.getFileId(), newRedactionLogEntries, false); - redactionLog.getRedactionLogEntry().removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()) && !entry.getType().equals(IMPORTED_REDACTION_TYPE)); + redactionLog.getRedactionLogEntry() + .removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()) && !entry.getType() + .equals(IMPORTED_REDACTION_TYPE)); redactionLog.getRedactionLogEntry().addAll(importedRedactionFilteredEntries); return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true); } + public AnalyzeResult analyze(AnalyzeRequest analyzeRequest) { + + long startTime = System.currentTimeMillis(); + var text = redactionStorageService.getText(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); + + NerEntities nerEntities; + if (redactionServiceSettings.isNerServiceEnabled()) { + nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); + } else { + nerEntities = NerEntities.builder().build(); + } + dictionaryService.updateDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId()); + KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getDossierTemplateId()); + long rulesVersion = droolsExecutionService.getRulesVersion(analyzeRequest.getDossierTemplateId()); + Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId()); + + PageEntities pageEntities = entityRedactionService.findEntities(dictionary, text.getSectionTexts(), kieContainer, analyzeRequest, nerEntities); + + dictionaryService.updateExternalDictionary(dictionary, analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId()); + + List redactionLogEntries = redactionLogCreatorService.createRedactionLog(pageEntities, text.getNumberOfPages(), analyzeRequest.getDossierTemplateId()); + + var legalBasis = legalBasisClient.getLegalBasisMapping(analyzeRequest.getDossierTemplateId()); + var redactionLog = new RedactionLog(redactionServiceSettings.getAnalysisVersion(), analyzeRequest.getAnalysisNumber(), redactionLogEntries, legalBasis, dictionary.getVersion() + .getDossierTemplateVersion(), dictionary.getVersion() + .getDossierVersion(), rulesVersion, legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId())); + + var importedRedactionFilteredEntries = importedRedactionService.processImportedRedactions(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId(), analyzeRequest.getFileId(), redactionLog.getRedactionLogEntry(), true); + redactionLog.setRedactionLogEntry(importedRedactionFilteredEntries); + + return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionary.getVersion(), false); + } + + private Set findSectionsToReanalyse(DictionaryIncrement dictionaryIncrement, RedactionLog redactionLog, Text text, AnalyzeRequest analyzeRequest) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java index 4f1a3324..796d91e3 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java @@ -1,5 +1,18 @@ package com.iqser.red.service.redaction.v1.server.redaction.service; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import org.apache.commons.lang3.StringUtils; +import org.kie.api.runtime.KieContainer; +import org.springframework.stereotype.Service; + import com.iqser.red.service.persistence.service.v1.api.model.annotations.AnnotationStatus; import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.IdRemoval; import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualImageRecategorization; @@ -8,20 +21,21 @@ import com.iqser.red.service.redaction.v1.model.Engine; import com.iqser.red.service.redaction.v1.server.classification.model.SectionText; import com.iqser.red.service.redaction.v1.server.client.model.NerEntities; import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary; -import com.iqser.red.service.redaction.v1.server.redaction.model.*; +import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel; +import com.iqser.red.service.redaction.v1.server.redaction.model.Entities; +import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; +import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence; +import com.iqser.red.service.redaction.v1.server.redaction.model.Image; +import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities; +import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText; +import com.iqser.red.service.redaction.v1.server.redaction.model.Section; +import com.iqser.red.service.redaction.v1.server.redaction.model.SectionSearchableTextPair; import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils; import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder; import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings; + import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.apache.commons.codec.binary.Base64; -import org.apache.commons.lang3.StringUtils; -import org.kie.api.runtime.KieContainer; -import org.springframework.stereotype.Service; - -import java.util.*; -import java.util.stream.Collectors; -import java.util.stream.Stream; @Slf4j @Service @@ -46,7 +60,7 @@ public class EntityRedactionService { EntitySearchUtils.removeEntitiesContainedInLarger(entities); } - Map> entitiesPerPage = convertToEnititesPerPage(entities); + Map> entitiesPerPage = convertToEntitiesPerPage(entities); return new PageEntities(entitiesPerPage, imagesPerPage); } @@ -145,7 +159,7 @@ public class EntityRedactionService { } - private Map> convertToEnititesPerPage(Set entities) { + private Map> convertToEntitiesPerPage(Set entities) { Map> entitiesPerPage = new HashMap<>(); for (Entity entity : entities) { @@ -209,7 +223,7 @@ public class EntityRedactionService { private Entities findEntities(SearchableText searchableText, String headline, int sectionNumber, Dictionary dictionary, boolean local, NerEntities nerEntities, - List cellstarts) { + List cellStarts) { Set found = new HashSet<>(); String searchableString = searchableText.toString(); @@ -230,32 +244,32 @@ public class EntityRedactionService { Set nerFound = new HashSet<>(); if (!local) { - nerFound.addAll(getNerValues(sectionNumber, nerEntities, cellstarts, headline)); + nerFound.addAll(getNerValues(sectionNumber, nerEntities, cellStarts, headline)); } return new Entities(EntitySearchUtils.clearAndFindPositions(found, searchableText, dictionary), nerFound); } - private Set getNerValues(int sectionNumber, NerEntities nerEntities, - List cellstarts, String headline) { + private Set getNerValues(int sectionNumber, NerEntities nerEntities, List cellStarts, + String headline) { Set entities = new HashSet<>(); - if (redactionServiceSettings.isEnableEntityRecognition() && nerEntities.getResult() - .containsKey(sectionNumber)) { + if (redactionServiceSettings.isNerServiceEnabled() && nerEntities.getResult().containsKey(sectionNumber)) { nerEntities.getResult().get(sectionNumber).forEach(res -> { - if (cellstarts == null || cellstarts.isEmpty()) { - entities.add(new Entity(new String(Base64.decodeBase64(res.getValue().getBytes())), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false, Engine.NER)); + if (cellStarts == null || cellStarts.isEmpty()) { + entities.add(new Entity(res.getValue(), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false, Engine.NER)); } else { boolean intersectsCellStart = false; - for (Integer cellStart : cellstarts) { + for (Integer cellStart : cellStarts) { if (res.getStartOffset() < cellStart && cellStart < res.getEndOffset()) { intersectsCellStart = true; + break; } } if (!intersectsCellStart) { - entities.add(new Entity(new String(Base64.decodeBase64(res.getValue().getBytes())), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false, Engine.NER)); + entities.add(new Entity(res.getValue(), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false, Engine.NER)); } } }); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ManualRedactionSurroundingTextService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ManualRedactionSurroundingTextService.java index f0983b2c..f8256b04 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ManualRedactionSurroundingTextService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ManualRedactionSurroundingTextService.java @@ -1,9 +1,17 @@ package com.iqser.red.service.redaction.v1.server.redaction.service; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import org.apache.commons.lang3.tuple.Pair; +import org.springframework.stereotype.Service; + import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions; import com.iqser.red.service.persistence.service.v1.api.model.annotations.Rectangle; import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualRedactionEntry; import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualResizeRedaction; +import com.iqser.red.service.redaction.v1.model.AnalyzeResult; import com.iqser.red.service.redaction.v1.model.Engine; import com.iqser.red.service.redaction.v1.model.SectionArea; import com.iqser.red.service.redaction.v1.server.classification.model.SectionText; @@ -13,14 +21,9 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence; import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils; import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; + import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.apache.commons.lang3.tuple.Pair; -import org.springframework.stereotype.Service; - -import java.util.ArrayList; -import java.util.List; -import java.util.Set; @Slf4j @Service @@ -31,8 +34,9 @@ public class ManualRedactionSurroundingTextService { private final SurroundingWordsService surroundingWordsService; - public ManualRedactions addSurroundingText(String dossierId, String fileId, ManualRedactions manualRedactions) { + public AnalyzeResult addSurroundingText(String dossierId, String fileId, ManualRedactions manualRedactions) { + long startTime = System.currentTimeMillis(); Text text = redactionStorageService.getText(dossierId, fileId); List processedAddRedactions = new ArrayList<>(); List processedResizeRedactions = new ArrayList<>(); @@ -49,8 +53,7 @@ public class ManualRedactionSurroundingTextService { while (addItty.hasNext()) { var manualAddRedaction = addItty.next(); if (sectionContainsEntry(sectionArea, manualAddRedaction.getPositions())) { - var surroundingText = findSurroundingText(sectionText, manualAddRedaction.getValue(), manualAddRedaction - .getPositions()); + var surroundingText = findSurroundingText(sectionText, manualAddRedaction.getValue(), manualAddRedaction.getPositions()); manualAddRedaction.setTextBefore(surroundingText.getLeft()); manualAddRedaction.setTextAfter(surroundingText.getRight()); processedAddRedactions.add(manualAddRedaction); @@ -62,8 +65,7 @@ public class ManualRedactionSurroundingTextService { while (resizeItty.hasNext()) { var manualResizeRedaction = resizeItty.next(); if (sectionContainsEntry(sectionArea, manualResizeRedaction.getPositions())) { - var surroundingText = findSurroundingText(sectionText, manualResizeRedaction.getValue(), manualResizeRedaction - .getPositions()); + var surroundingText = findSurroundingText(sectionText, manualResizeRedaction.getValue(), manualResizeRedaction.getPositions()); manualResizeRedaction.setTextBefore(surroundingText.getLeft()); manualResizeRedaction.setTextAfter(surroundingText.getRight()); processedResizeRedactions.add(manualResizeRedaction); @@ -75,15 +77,20 @@ public class ManualRedactionSurroundingTextService { manualRedactions.getEntriesToAdd().addAll(processedAddRedactions); manualRedactions.getResizeRedactions().addAll(processedResizeRedactions); - return manualRedactions; + + return AnalyzeResult.builder() + .dossierId(dossierId) + .fileId(fileId) + .manualRedactions(manualRedactions) + .duration(System.currentTimeMillis() - startTime) + .build(); } private Pair findSurroundingText(SectionText sectionText, String value, List toFindPositions) { - Set entities = EntitySearchUtils.find(sectionText.getText(), Set.of(value), "dummy", sectionText.getHeadline(), sectionText - .getSectionNumber(), false, false, Engine.DICTIONARY, false); + Set entities = EntitySearchUtils.find(sectionText.getText(), Set.of(value), "dummy", sectionText.getHeadline(), sectionText.getSectionNumber(), false, false, Engine.DICTIONARY, false); Set entitiesWithPositions = EntitySearchUtils.clearAndFindPositions(entities, sectionText.getSearchableText(), null); Entity correctEntity = getEntityOnCorrectPosition(entitiesWithPositions, toFindPositions); @@ -94,8 +101,7 @@ public class ManualRedactionSurroundingTextService { } if (sectionText.getCellStarts() != null && !sectionText.getCellStarts().isEmpty()) { - surroundingWordsService.addSurroundingText(Set.of(correctEntity), sectionText.getSearchableText(), null, sectionText - .getCellStarts()); + surroundingWordsService.addSurroundingText(Set.of(correctEntity), sectionText.getSearchableText(), null, sectionText.getCellStarts()); } else { surroundingWordsService.addSurroundingText(Set.of(correctEntity), sectionText.getSearchableText(), null); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/NerAnalyserService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/NerAnalyserService.java deleted file mode 100644 index 2425fd9e..00000000 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/NerAnalyserService.java +++ /dev/null @@ -1,53 +0,0 @@ -package com.iqser.red.service.redaction.v1.server.redaction.service; - -import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType; -import com.iqser.red.service.redaction.v1.server.client.EntityRecognitionClient; -import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionRequest; -import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionSection; -import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings; -import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; -import lombok.RequiredArgsConstructor; -import lombok.extern.slf4j.Slf4j; -import org.apache.commons.codec.binary.Base64; -import org.springframework.stereotype.Service; - -import java.util.stream.Collectors; - -@Slf4j -@Service -@RequiredArgsConstructor -public class NerAnalyserService { - - private final RedactionStorageService redactionStorageService; - private final EntityRecognitionClient entityRecognitionClient; - private final RedactionServiceSettings redactionServiceSettings; - - public void computeNerEntities(String dossierId, String fileId) { - - if (redactionServiceSettings.isEnableEntityRecognition()) { - var text = redactionStorageService.getText(dossierId, fileId); - - long start = System.currentTimeMillis(); - - if (text != null) { - var nerRequest = EntityRecognitionRequest.builder() - .data(text.getSectionTexts() - .stream() - .map(sectionText -> new EntityRecognitionSection(sectionText.getSectionNumber(), new String(Base64 - .encodeBase64(sectionText - .getText().getBytes())))) - .collect(Collectors.toList())) - .build(); - - var nerResponse = entityRecognitionClient.findAuthors(nerRequest); - - log.info("Computing NER entities took: {} ms for dossierId {} and fileId {}", System.currentTimeMillis() - start, dossierId, fileId); - - redactionStorageService.storeObject(dossierId, fileId, FileType.NER_ENTITIES, nerResponse); - } else { - log.warn("Warning, text for file: {} in dossier: {} is null", fileId, dossierId); - } - } - } - -} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java index 9cce2ab9..e5f41214 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java @@ -1,5 +1,15 @@ package com.iqser.red.service.redaction.v1.server.redaction.service; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.commons.collections4.CollectionUtils; +import org.springframework.stereotype.Service; + import com.iqser.red.service.redaction.v1.model.Point; import com.iqser.red.service.redaction.v1.model.Rectangle; import com.iqser.red.service.redaction.v1.model.RedactionLogEntry; @@ -10,13 +20,9 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionS import com.iqser.red.service.redaction.v1.server.redaction.model.Image; import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities; import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder; + import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.apache.commons.collections4.CollectionUtils; -import org.springframework.stereotype.Service; - -import java.util.*; -import java.util.stream.Collectors; @Service @Slf4j @@ -85,7 +91,7 @@ public class RedactionLogCreatorService { List redactionLogEntities = new ArrayList<>(); - // Duplicates can exist due table extraction colums over multiple rows. + // Duplicates can exist due table extraction columns over multiple rows. Set processedIds = new HashSet<>(); entityLoop: diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/settings/RedactionServiceSettings.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/settings/RedactionServiceSettings.java index 901d8042..fd37e29c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/settings/RedactionServiceSettings.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/settings/RedactionServiceSettings.java @@ -1,8 +1,9 @@ package com.iqser.red.service.redaction.v1.server.settings; -import lombok.Data; import org.springframework.boot.context.properties.ConfigurationProperties; +import lombok.Data; + @Data @ConfigurationProperties("redaction-service") public class RedactionServiceSettings { @@ -17,6 +18,6 @@ public class RedactionServiceSettings { private int analysisVersion = 1; - private boolean enableEntityRecognition = true; + private boolean nerServiceEnabled = true; } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/storage/RedactionStorageService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/storage/RedactionStorageService.java index 388a550a..4ac59e31 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/storage/RedactionStorageService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/storage/RedactionStorageService.java @@ -105,14 +105,13 @@ public class RedactionStorageService { try { inputStreamResource = storageService.getObject(StorageIdUtils.getStorageId(dossierId, fileId, FileType.NER_ENTITIES)); } catch (StorageObjectDoesNotExist e) { - log.debug("NER Entities not available."); - return null; + throw new NotFoundException("NER Entities are not available."); } try { return objectMapper.readValue(inputStreamResource.getInputStream(), NerEntities.class); } catch (IOException e) { - throw new RuntimeException("Could not convert NerEntities", e); + throw new RuntimeException("Could not convert NER Entities", e); } } @@ -146,8 +145,6 @@ public class RedactionStorageService { return dossierId + "/" + fileId + "." + fileType.name() + fileType.getExtension(); } - - } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index 4b0133f0..8c2c5abe 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -23,9 +23,6 @@ import java.util.Set; import java.util.UUID; import java.util.stream.Collectors; -import com.fasterxml.jackson.core.type.TypeReference; -import com.iqser.red.service.persistence.service.v1.api.model.annotations.ImportedAnnotation; -import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.*; import org.apache.commons.io.IOUtils; import org.junit.After; import org.junit.Before; @@ -56,6 +53,12 @@ import com.iqser.red.service.persistence.service.v1.api.model.annotations.Annota import com.iqser.red.service.persistence.service.v1.api.model.annotations.Comment; import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions; import com.iqser.red.service.persistence.service.v1.api.model.annotations.Rectangle; +import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.IdRemoval; +import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualForceRedaction; +import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualImageRecategorization; +import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualLegalBasisChange; +import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualRedactionEntry; +import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualResizeRedaction; import com.iqser.red.service.persistence.service.v1.api.model.common.JSONPrimitive; import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.configuration.Colors; import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType; @@ -215,6 +218,7 @@ public class RedactionIntegrationTest { loadDictionaryForTest(); loadTypeForTest(); + loadNerForTest(); when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); when(dictionaryClient.getAllTypesForDossierTemplate(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(getTypeResponse()); @@ -256,293 +260,6 @@ public class RedactionIntegrationTest { } - private void loadDictionaryForTest() { - - dictionary.computeIfAbsent(AUTHOR, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/CBI_author.txt") - .stream() - .map(this::cleanDictionaryEntry) - .collect(Collectors.toSet())); - dictionary.computeIfAbsent(SPONSOR, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt") - .stream() - .map(this::cleanDictionaryEntry) - .collect(Collectors.toSet())); - dictionary.computeIfAbsent(VERTEBRATE, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/vertebrate.txt") - .stream() - .map(this::cleanDictionaryEntry) - .collect(Collectors.toSet())); - dictionary.computeIfAbsent(ADDRESS, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/CBI_address.txt") - .stream() - .map(this::cleanDictionaryEntry) - .collect(Collectors.toSet())); - dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt") - .stream() - .map(this::cleanDictionaryEntry) - .collect(Collectors.toSet())); - dictionary.computeIfAbsent(REDACTION_INDICATOR, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt") - .stream() - .map(this::cleanDictionaryEntry) - .collect(Collectors.toSet())); - dictionary.computeIfAbsent(HINT_ONLY, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/hint_only.txt") - .stream() - .map(this::cleanDictionaryEntry) - .collect(Collectors.toSet())); - dictionary.computeIfAbsent(MUST_REDACT, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/must_redact.txt") - .stream() - .map(this::cleanDictionaryEntry) - .collect(Collectors.toSet())); - dictionary.computeIfAbsent(PUBLISHED_INFORMATION, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/published_information.txt") - .stream() - .map(this::cleanDictionaryEntry) - .collect(Collectors.toSet())); - dictionary.computeIfAbsent(TEST_METHOD, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/test_method.txt") - .stream() - .map(this::cleanDictionaryEntry) - .collect(Collectors.toSet())); - dictionary.computeIfAbsent(PII, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/PII.txt") - .stream() - .map(this::cleanDictionaryEntry) - .collect(Collectors.toSet())); - dictionary.computeIfAbsent(RECOMMENDATION_AUTHOR, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/recommendation_CBI_author.txt") - .stream() - .map(this::cleanDictionaryEntry) - .collect(Collectors.toSet())); - dictionary.computeIfAbsent(RECOMMENDATION_ADDRESS, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/recommendation_CBI_address.txt") - .stream() - .map(this::cleanDictionaryEntry) - .collect(Collectors.toSet())); - dictionary.computeIfAbsent(FALSE_POSITIVE, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/false_positive.txt") - .stream() - .map(this::cleanDictionaryEntry) - .collect(Collectors.toSet())); - dictionary.computeIfAbsent(PURITY, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/purity.txt") - .stream() - .map(this::cleanDictionaryEntry) - .collect(Collectors.toSet())); - dictionary.computeIfAbsent(IMAGE, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/empty.txt") - .stream() - .map(this::cleanDictionaryEntry) - .collect(Collectors.toSet())); - dictionary.computeIfAbsent(OCR, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/empty.txt") - .stream() - .map(this::cleanDictionaryEntry) - .collect(Collectors.toSet())); - dictionary.computeIfAbsent(LOGO, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/empty.txt") - .stream() - .map(this::cleanDictionaryEntry) - .collect(Collectors.toSet())); - dictionary.computeIfAbsent(SIGNATURE, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/empty.txt") - .stream() - .map(this::cleanDictionaryEntry) - .collect(Collectors.toSet())); - dictionary.computeIfAbsent(FORMULA, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/empty.txt") - .stream() - .map(this::cleanDictionaryEntry) - .collect(Collectors.toSet())); - dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt") - .stream() - .map(this::cleanDictionaryEntry) - .collect(Collectors.toSet())); - dossierDictionary.put(IMPORTED_REDACTION, new ArrayList<>()); - } - - - private String cleanDictionaryEntry(String entry) { - - return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " "); - } - - - private void loadTypeForTest() { - - typeColorMap.put(VERTEBRATE, "#ff85f7"); - typeColorMap.put(ADDRESS, "#ffe187"); - typeColorMap.put(AUTHOR, "#ffe187"); - typeColorMap.put(SPONSOR, "#85ebff"); - typeColorMap.put(NO_REDACTION_INDICATOR, "#be85ff"); - typeColorMap.put(REDACTION_INDICATOR, "#caff85"); - typeColorMap.put(HINT_ONLY, "#abc0c4"); - typeColorMap.put(MUST_REDACT, "#fab4c0"); - typeColorMap.put(PUBLISHED_INFORMATION, "#85ebff"); - typeColorMap.put(TEST_METHOD, "#91fae8"); - typeColorMap.put(PII, "#66ccff"); - typeColorMap.put(RECOMMENDATION_AUTHOR, "#8df06c"); - typeColorMap.put(RECOMMENDATION_ADDRESS, "#8df06c"); - typeColorMap.put(FALSE_POSITIVE, "#ffffff"); - typeColorMap.put(PURITY, "#ffe187"); - typeColorMap.put(IMAGE, "#fcc5fb"); - typeColorMap.put(OCR, "#fcc5fb"); - typeColorMap.put(LOGO, "#ffe187"); - typeColorMap.put(FORMULA, "#ffe187"); - typeColorMap.put(SIGNATURE, "#ffe187"); - typeColorMap.put(IMPORTED_REDACTION, "#32a852"); - - hintTypeMap.put(VERTEBRATE, true); - hintTypeMap.put(ADDRESS, false); - hintTypeMap.put(AUTHOR, false); - hintTypeMap.put(SPONSOR, false); - hintTypeMap.put(NO_REDACTION_INDICATOR, true); - hintTypeMap.put(REDACTION_INDICATOR, true); - hintTypeMap.put(HINT_ONLY, true); - hintTypeMap.put(MUST_REDACT, true); - hintTypeMap.put(PUBLISHED_INFORMATION, true); - hintTypeMap.put(TEST_METHOD, true); - hintTypeMap.put(PII, false); - hintTypeMap.put(RECOMMENDATION_AUTHOR, false); - hintTypeMap.put(RECOMMENDATION_ADDRESS, false); - hintTypeMap.put(FALSE_POSITIVE, true); - hintTypeMap.put(PURITY, false); - hintTypeMap.put(IMAGE, true); - hintTypeMap.put(OCR, true); - hintTypeMap.put(FORMULA, false); - hintTypeMap.put(LOGO, false); - hintTypeMap.put(SIGNATURE, false); - hintTypeMap.put(DOSSIER_REDACTIONS, false); - hintTypeMap.put(IMPORTED_REDACTION, false); - - caseInSensitiveMap.put(VERTEBRATE, true); - caseInSensitiveMap.put(ADDRESS, false); - caseInSensitiveMap.put(AUTHOR, false); - caseInSensitiveMap.put(SPONSOR, false); - caseInSensitiveMap.put(NO_REDACTION_INDICATOR, true); - caseInSensitiveMap.put(REDACTION_INDICATOR, true); - caseInSensitiveMap.put(HINT_ONLY, true); - caseInSensitiveMap.put(MUST_REDACT, true); - caseInSensitiveMap.put(PUBLISHED_INFORMATION, true); - caseInSensitiveMap.put(TEST_METHOD, false); - caseInSensitiveMap.put(PII, false); - caseInSensitiveMap.put(RECOMMENDATION_AUTHOR, false); - caseInSensitiveMap.put(RECOMMENDATION_ADDRESS, false); - caseInSensitiveMap.put(FALSE_POSITIVE, false); - caseInSensitiveMap.put(PURITY, false); - caseInSensitiveMap.put(IMAGE, true); - caseInSensitiveMap.put(OCR, true); - caseInSensitiveMap.put(SIGNATURE, true); - caseInSensitiveMap.put(LOGO, true); - caseInSensitiveMap.put(FORMULA, true); - caseInSensitiveMap.put(DOSSIER_REDACTIONS, false); - caseInSensitiveMap.put(IMPORTED_REDACTION, false); - - recommendationTypeMap.put(VERTEBRATE, false); - recommendationTypeMap.put(ADDRESS, false); - recommendationTypeMap.put(AUTHOR, false); - recommendationTypeMap.put(SPONSOR, false); - recommendationTypeMap.put(NO_REDACTION_INDICATOR, false); - recommendationTypeMap.put(REDACTION_INDICATOR, false); - recommendationTypeMap.put(HINT_ONLY, false); - recommendationTypeMap.put(MUST_REDACT, false); - recommendationTypeMap.put(PUBLISHED_INFORMATION, false); - recommendationTypeMap.put(TEST_METHOD, false); - recommendationTypeMap.put(PII, false); - recommendationTypeMap.put(RECOMMENDATION_AUTHOR, true); - recommendationTypeMap.put(RECOMMENDATION_ADDRESS, true); - recommendationTypeMap.put(FALSE_POSITIVE, false); - recommendationTypeMap.put(PURITY, false); - recommendationTypeMap.put(IMAGE, false); - recommendationTypeMap.put(OCR, false); - recommendationTypeMap.put(FORMULA, false); - recommendationTypeMap.put(SIGNATURE, false); - recommendationTypeMap.put(LOGO, false); - recommendationTypeMap.put(DOSSIER_REDACTIONS, false); - recommendationTypeMap.put(IMPORTED_REDACTION, false); - - rankTypeMap.put(FALSE_POSITIVE, 160); - rankTypeMap.put(PURITY, 155); - rankTypeMap.put(PII, 150); - rankTypeMap.put(ADDRESS, 140); - rankTypeMap.put(AUTHOR, 130); - rankTypeMap.put(SPONSOR, 120); - rankTypeMap.put(VERTEBRATE, 110); - rankTypeMap.put(MUST_REDACT, 100); - rankTypeMap.put(REDACTION_INDICATOR, 90); - rankTypeMap.put(NO_REDACTION_INDICATOR, 80); - rankTypeMap.put(PUBLISHED_INFORMATION, 70); - rankTypeMap.put(TEST_METHOD, 60); - rankTypeMap.put(HINT_ONLY, 50); - rankTypeMap.put(RECOMMENDATION_AUTHOR, 40); - rankTypeMap.put(RECOMMENDATION_ADDRESS, 30); - rankTypeMap.put(IMAGE, 30); - rankTypeMap.put(OCR, 29); - rankTypeMap.put(LOGO, 28); - rankTypeMap.put(SIGNATURE, 27); - rankTypeMap.put(FORMULA, 26); - rankTypeMap.put(DOSSIER_REDACTIONS, 200); - rankTypeMap.put(IMPORTED_REDACTION, 200); - - colors.setDefaultColor("#acfc00"); - colors.setNotRedacted("#cccccc"); - colors.setRequestAdd("#04b093"); - colors.setRequestRemove("#04b093"); - } - - - private List getTypeResponse() { - - return typeColorMap.entrySet() - .stream() - .map(typeColor -> Type.builder() - .id(typeColor.getKey() + ":" + TEST_DOSSIER_TEMPLATE_ID) - .type(typeColor.getKey()) - .dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID) - .hexColor(typeColor.getValue()) - .isHint(hintTypeMap.get(typeColor.getKey())) - .isCaseInsensitive(caseInSensitiveMap.get(typeColor.getKey())) - .isRecommendation(recommendationTypeMap.get(typeColor.getKey())) - .rank(rankTypeMap.get(typeColor.getKey())) - .build()) - - .collect(Collectors.toList()); - } - - - private Type getDictionaryResponse(String type, boolean isDossierDictionary) { - - return Type.builder() - .id(type + ":" + TEST_DOSSIER_TEMPLATE_ID) - .hexColor(typeColorMap.get(type)) - .entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type))) - .isHint(hintTypeMap.get(type)) - .isCaseInsensitive(caseInSensitiveMap.get(type)) - .isRecommendation(recommendationTypeMap.get(type)) - .rank(rankTypeMap.get(type)) - .build(); - } - - - private List toDictionaryEntry(List entries) { - - List dictionaryEntries = new ArrayList<>(); - entries.forEach(entry -> { - dictionaryEntries.add(DictionaryEntry.builder() - .value(entry) - .version(reanlysisVersions.getOrDefault(entry, 0L)) - .deleted(deleted.contains(entry)) - .build()); - }); - return dictionaryEntries; - } - - @Test public void test270Rotated() { @@ -584,9 +301,7 @@ public class RedactionIntegrationTest { duplicates.computeIfAbsent(entry.getId(), v -> new ArrayList<>()).add(entry); }); - duplicates.entrySet().forEach(entry -> { - assertThat(entry.getValue().size()).isEqualTo(1); - }); + duplicates.forEach((key, value) -> assertThat(value.size()).isEqualTo(1)); dictionary.get(AUTHOR).add("Drinking water"); when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(1L); @@ -1047,8 +762,6 @@ public class RedactionIntegrationTest { @Test public void testManualRedaction() throws IOException { -// 675eba69b0c2917de55462c817adaa05 - System.out.println("testManualRedaction"); long start = System.currentTimeMillis(); ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf"); @@ -1099,8 +812,6 @@ public class RedactionIntegrationTest { .page(1) .build())); -// manualRedactions.getEntriesToAdd().add(manualRedactionEntry); - AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream()); request.setManualRedactions(manualRedactions); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); @@ -1246,34 +957,6 @@ public class RedactionIntegrationTest { } - @SneakyThrows - private AnalyzeRequest prepareStorage(String file) { - - ClassPathResource pdfFileResource = new ClassPathResource(file); - - return prepareStorage(pdfFileResource.getInputStream()); - } - - - @SneakyThrows - private AnalyzeRequest prepareStorage(InputStream stream) { - - AnalyzeRequest request = AnalyzeRequest.builder() - .dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID) - .dossierId(TEST_DOSSIER_ID) - .fileId(TEST_FILE_ID) - .lastProcessed(OffsetDateTime.now()) - .build(); - - var bytes = IOUtils.toByteArray(stream); - - storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), bytes); - - return request; - - } - - @Test public void sponsorCompanyTest() throws IOException { @@ -1527,7 +1210,8 @@ public class RedactionIntegrationTest { fileOutputStream.write(annotateResponse.getDocument()); } - var surroundingTextResult = manualRedactionSurroundingTextService.addSurroundingText(TEST_DOSSIER_ID, TEST_FILE_ID, manualRedactions); + var surroundingTextResult = manualRedactionSurroundingTextService.addSurroundingText(TEST_DOSSIER_ID, TEST_FILE_ID, manualRedactions) + .getManualRedactions(); surroundingTextResult.getEntriesToAdd().forEach(addEntry -> { assertThat(addEntry.getTextAfter()).isNotEmpty(); }); @@ -1535,6 +1219,117 @@ public class RedactionIntegrationTest { } + private void loadDictionaryForTest() { + + dictionary.computeIfAbsent(AUTHOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/CBI_author.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); + dictionary.computeIfAbsent(SPONSOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); + dictionary.computeIfAbsent(VERTEBRATE, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/vertebrate.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); + dictionary.computeIfAbsent(ADDRESS, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/CBI_address.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); + dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); + dictionary.computeIfAbsent(REDACTION_INDICATOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); + dictionary.computeIfAbsent(HINT_ONLY, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/hint_only.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); + dictionary.computeIfAbsent(MUST_REDACT, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/must_redact.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); + dictionary.computeIfAbsent(PUBLISHED_INFORMATION, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/published_information.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); + dictionary.computeIfAbsent(TEST_METHOD, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/test_method.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); + dictionary.computeIfAbsent(PII, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/PII.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); + dictionary.computeIfAbsent(RECOMMENDATION_AUTHOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/recommendation_CBI_author.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); + dictionary.computeIfAbsent(RECOMMENDATION_ADDRESS, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/recommendation_CBI_address.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); + dictionary.computeIfAbsent(FALSE_POSITIVE, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/false_positive.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); + dictionary.computeIfAbsent(PURITY, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/purity.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); + dictionary.computeIfAbsent(IMAGE, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/empty.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); + dictionary.computeIfAbsent(OCR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/empty.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); + dictionary.computeIfAbsent(LOGO, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/empty.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); + dictionary.computeIfAbsent(SIGNATURE, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/empty.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); + dictionary.computeIfAbsent(FORMULA, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/empty.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); + dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); + dossierDictionary.put(IMPORTED_REDACTION, new ArrayList<>()); + } + + private static String loadFromClassPath(String path) { URL resource = ResourceLoader.class.getClassLoader().getResource(path); @@ -1572,6 +1367,191 @@ public class RedactionIntegrationTest { } + private void loadTypeForTest() { + + typeColorMap.put(VERTEBRATE, "#ff85f7"); + typeColorMap.put(ADDRESS, "#ffe187"); + typeColorMap.put(AUTHOR, "#ffe187"); + typeColorMap.put(SPONSOR, "#85ebff"); + typeColorMap.put(NO_REDACTION_INDICATOR, "#be85ff"); + typeColorMap.put(REDACTION_INDICATOR, "#caff85"); + typeColorMap.put(HINT_ONLY, "#abc0c4"); + typeColorMap.put(MUST_REDACT, "#fab4c0"); + typeColorMap.put(PUBLISHED_INFORMATION, "#85ebff"); + typeColorMap.put(TEST_METHOD, "#91fae8"); + typeColorMap.put(PII, "#66ccff"); + typeColorMap.put(RECOMMENDATION_AUTHOR, "#8df06c"); + typeColorMap.put(RECOMMENDATION_ADDRESS, "#8df06c"); + typeColorMap.put(FALSE_POSITIVE, "#ffffff"); + typeColorMap.put(PURITY, "#ffe187"); + typeColorMap.put(IMAGE, "#fcc5fb"); + typeColorMap.put(OCR, "#fcc5fb"); + typeColorMap.put(LOGO, "#ffe187"); + typeColorMap.put(FORMULA, "#ffe187"); + typeColorMap.put(SIGNATURE, "#ffe187"); + typeColorMap.put(IMPORTED_REDACTION, "#32a852"); + + hintTypeMap.put(VERTEBRATE, true); + hintTypeMap.put(ADDRESS, false); + hintTypeMap.put(AUTHOR, false); + hintTypeMap.put(SPONSOR, false); + hintTypeMap.put(NO_REDACTION_INDICATOR, true); + hintTypeMap.put(REDACTION_INDICATOR, true); + hintTypeMap.put(HINT_ONLY, true); + hintTypeMap.put(MUST_REDACT, true); + hintTypeMap.put(PUBLISHED_INFORMATION, true); + hintTypeMap.put(TEST_METHOD, true); + hintTypeMap.put(PII, false); + hintTypeMap.put(RECOMMENDATION_AUTHOR, false); + hintTypeMap.put(RECOMMENDATION_ADDRESS, false); + hintTypeMap.put(FALSE_POSITIVE, true); + hintTypeMap.put(PURITY, false); + hintTypeMap.put(IMAGE, true); + hintTypeMap.put(OCR, true); + hintTypeMap.put(FORMULA, false); + hintTypeMap.put(LOGO, false); + hintTypeMap.put(SIGNATURE, false); + hintTypeMap.put(DOSSIER_REDACTIONS, false); + hintTypeMap.put(IMPORTED_REDACTION, false); + + caseInSensitiveMap.put(VERTEBRATE, true); + caseInSensitiveMap.put(ADDRESS, false); + caseInSensitiveMap.put(AUTHOR, false); + caseInSensitiveMap.put(SPONSOR, false); + caseInSensitiveMap.put(NO_REDACTION_INDICATOR, true); + caseInSensitiveMap.put(REDACTION_INDICATOR, true); + caseInSensitiveMap.put(HINT_ONLY, true); + caseInSensitiveMap.put(MUST_REDACT, true); + caseInSensitiveMap.put(PUBLISHED_INFORMATION, true); + caseInSensitiveMap.put(TEST_METHOD, false); + caseInSensitiveMap.put(PII, false); + caseInSensitiveMap.put(RECOMMENDATION_AUTHOR, false); + caseInSensitiveMap.put(RECOMMENDATION_ADDRESS, false); + caseInSensitiveMap.put(FALSE_POSITIVE, false); + caseInSensitiveMap.put(PURITY, false); + caseInSensitiveMap.put(IMAGE, true); + caseInSensitiveMap.put(OCR, true); + caseInSensitiveMap.put(SIGNATURE, true); + caseInSensitiveMap.put(LOGO, true); + caseInSensitiveMap.put(FORMULA, true); + caseInSensitiveMap.put(DOSSIER_REDACTIONS, false); + caseInSensitiveMap.put(IMPORTED_REDACTION, false); + + recommendationTypeMap.put(VERTEBRATE, false); + recommendationTypeMap.put(ADDRESS, false); + recommendationTypeMap.put(AUTHOR, false); + recommendationTypeMap.put(SPONSOR, false); + recommendationTypeMap.put(NO_REDACTION_INDICATOR, false); + recommendationTypeMap.put(REDACTION_INDICATOR, false); + recommendationTypeMap.put(HINT_ONLY, false); + recommendationTypeMap.put(MUST_REDACT, false); + recommendationTypeMap.put(PUBLISHED_INFORMATION, false); + recommendationTypeMap.put(TEST_METHOD, false); + recommendationTypeMap.put(PII, false); + recommendationTypeMap.put(RECOMMENDATION_AUTHOR, true); + recommendationTypeMap.put(RECOMMENDATION_ADDRESS, true); + recommendationTypeMap.put(FALSE_POSITIVE, false); + recommendationTypeMap.put(PURITY, false); + recommendationTypeMap.put(IMAGE, false); + recommendationTypeMap.put(OCR, false); + recommendationTypeMap.put(FORMULA, false); + recommendationTypeMap.put(SIGNATURE, false); + recommendationTypeMap.put(LOGO, false); + recommendationTypeMap.put(DOSSIER_REDACTIONS, false); + recommendationTypeMap.put(IMPORTED_REDACTION, false); + + rankTypeMap.put(FALSE_POSITIVE, 160); + rankTypeMap.put(PURITY, 155); + rankTypeMap.put(PII, 150); + rankTypeMap.put(ADDRESS, 140); + rankTypeMap.put(AUTHOR, 130); + rankTypeMap.put(SPONSOR, 120); + rankTypeMap.put(VERTEBRATE, 110); + rankTypeMap.put(MUST_REDACT, 100); + rankTypeMap.put(REDACTION_INDICATOR, 90); + rankTypeMap.put(NO_REDACTION_INDICATOR, 80); + rankTypeMap.put(PUBLISHED_INFORMATION, 70); + rankTypeMap.put(TEST_METHOD, 60); + rankTypeMap.put(HINT_ONLY, 50); + rankTypeMap.put(RECOMMENDATION_AUTHOR, 40); + rankTypeMap.put(RECOMMENDATION_ADDRESS, 30); + rankTypeMap.put(IMAGE, 30); + rankTypeMap.put(OCR, 29); + rankTypeMap.put(LOGO, 28); + rankTypeMap.put(SIGNATURE, 27); + rankTypeMap.put(FORMULA, 26); + rankTypeMap.put(DOSSIER_REDACTIONS, 200); + rankTypeMap.put(IMPORTED_REDACTION, 200); + + colors.setDefaultColor("#acfc00"); + colors.setNotRedacted("#cccccc"); + colors.setRequestAdd("#04b093"); + colors.setRequestRemove("#04b093"); + } + + + @SneakyThrows + private void loadNerForTest() { + + ClassPathResource responseJson = new ClassPathResource("files/ner_response.json"); + var bytes = IOUtils.toByteArray(responseJson.getInputStream()); + storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), bytes); + } + + + private List getTypeResponse() { + + return typeColorMap.entrySet() + .stream() + .map(typeColor -> Type.builder() + .id(typeColor.getKey() + ":" + TEST_DOSSIER_TEMPLATE_ID) + .type(typeColor.getKey()) + .dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID) + .hexColor(typeColor.getValue()) + .isHint(hintTypeMap.get(typeColor.getKey())) + .isCaseInsensitive(caseInSensitiveMap.get(typeColor.getKey())) + .isRecommendation(recommendationTypeMap.get(typeColor.getKey())) + .rank(rankTypeMap.get(typeColor.getKey())) + .build()) + + .collect(Collectors.toList()); + } + + + private Type getDictionaryResponse(String type, boolean isDossierDictionary) { + + return Type.builder() + .id(type + ":" + TEST_DOSSIER_TEMPLATE_ID) + .hexColor(typeColorMap.get(type)) + .entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type))) + .isHint(hintTypeMap.get(type)) + .isCaseInsensitive(caseInSensitiveMap.get(type)) + .isRecommendation(recommendationTypeMap.get(type)) + .rank(rankTypeMap.get(type)) + .build(); + } + + + private String cleanDictionaryEntry(String entry) { + + return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " "); + } + + + private List toDictionaryEntry(List entries) { + + List dictionaryEntries = new ArrayList<>(); + entries.forEach(entry -> { + dictionaryEntries.add(DictionaryEntry.builder() + .value(entry) + .version(reanlysisVersions.getOrDefault(entry, 0L)) + .deleted(deleted.contains(entry)) + .build()); + }); + return dictionaryEntries; + } + + @Test public void testImportedRedactions() throws IOException { @@ -1579,7 +1559,6 @@ public class RedactionIntegrationTest { ClassPathResource pdfFileResource = new ClassPathResource("files/ImportedRedactions/ImportedRedactions.pdf"); ClassPathResource importedRedactions = new ClassPathResource("files/ImportedRedactions/ImportedRedactions.json"); - AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream()); storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS), IOUtils.toByteArray(importedRedactions.getInputStream())); @@ -1598,4 +1577,32 @@ public class RedactionIntegrationTest { } } + + @SneakyThrows + private AnalyzeRequest prepareStorage(InputStream stream) { + + AnalyzeRequest request = AnalyzeRequest.builder() + .dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID) + .dossierId(TEST_DOSSIER_ID) + .fileId(TEST_FILE_ID) + .lastProcessed(OffsetDateTime.now()) + .build(); + + var bytes = IOUtils.toByteArray(stream); + + storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), bytes); + + return request; + + } + + + @SneakyThrows + private AnalyzeRequest prepareStorage(String file) { + + ClassPathResource pdfFileResource = new ClassPathResource(file); + + return prepareStorage(pdfFileResource.getInputStream()); + } + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/ner_response.json b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/ner_response.json new file mode 100644 index 00000000..64ae1dcd --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/ner_response.json @@ -0,0 +1,12 @@ +{ + "result": { + "1": [ + { + "value": "Mannheim", + "startOffset": 0, + "endOffset": 8, + "type": "CITY" + } + ] + } +} \ No newline at end of file