RED-3350 Communication with entity recognition via queues

This commit is contained in:
Philipp Schramm 2022-02-03 13:00:23 +01:00
parent 90fe282313
commit 68727178e7
13 changed files with 534 additions and 541 deletions

View File

@ -12,7 +12,7 @@
<artifactId>redaction-service-api-v1</artifactId>
<properties>
<persistence-service.version>1.32.0</persistence-service.version>
<persistence-service.version>1.39.0</persistence-service.version>
</properties>
<dependencies>

View File

@ -2,6 +2,6 @@ package com.iqser.red.service.redaction.v1.model;
public enum MessageType {
FULL_ANALYSE, REANALYSE, SURROUNDING_TEXT
ANALYSE, REANALYSE, STRUCTURE_ANALYSE, SURROUNDING_TEXT
}

View File

@ -1,30 +1,42 @@
package com.iqser.red.service.redaction.v1.server.controller;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.redaction.v1.model.*;
import com.iqser.red.service.redaction.v1.resources.RedactionResource;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
import com.iqser.red.service.redaction.v1.server.exception.NotFoundException;
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
import com.iqser.red.service.redaction.v1.server.redaction.service.*;
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import com.iqser.red.service.redaction.v1.server.visualization.service.PdfVisualisationService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.stream.Collectors;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RestController;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.stream.Collectors;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.redaction.v1.model.AnnotateRequest;
import com.iqser.red.service.redaction.v1.model.AnnotateResponse;
import com.iqser.red.service.redaction.v1.model.RedactionLog;
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
import com.iqser.red.service.redaction.v1.model.RedactionResult;
import com.iqser.red.service.redaction.v1.model.SectionArea;
import com.iqser.red.service.redaction.v1.model.SectionGrid;
import com.iqser.red.service.redaction.v1.resources.RedactionResource;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
import com.iqser.red.service.redaction.v1.server.exception.NotFoundException;
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
import com.iqser.red.service.redaction.v1.server.redaction.service.AnnotationService;
import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService;
import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService;
import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
import com.iqser.red.service.redaction.v1.server.redaction.service.RedactionLogMergeService;
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import com.iqser.red.service.redaction.v1.server.visualization.service.PdfVisualisationService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@RestController
@ -43,8 +55,7 @@ public class RedactionController implements RedactionResource {
public AnnotateResponse annotate(@RequestBody AnnotateRequest annotateRequest) {
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(annotateRequest
.getDossierId(), annotateRequest.getFileId(), FileType.ORIGIN));
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(annotateRequest.getDossierId(), annotateRequest.getFileId(), FileType.ORIGIN));
var mergedRedactionLog = getRedactionLog(RedactionRequest.builder()
.fileId(annotateRequest.getFileId())
.manualRedactions(annotateRequest.getManualRedactions())
@ -73,13 +84,11 @@ public class RedactionController implements RedactionResource {
@Override
public RedactionResult classify(@RequestBody RedactionRequest redactionRequest) {
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest
.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
try {
Document classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, null);
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest
.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) {
pdDocument.setAllSecurityToBeRemoved(true);
@ -101,13 +110,11 @@ public class RedactionController implements RedactionResource {
@Override
public RedactionResult sections(@RequestBody RedactionRequest redactionRequest) {
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest
.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
try {
Document classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, null);
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest
.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) {
pdDocument.setAllSecurityToBeRemoved(true);
@ -131,8 +138,7 @@ public class RedactionController implements RedactionResource {
Document classifiedDoc;
try {
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest
.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, null);
} catch (Exception e) {
throw new RedactionException(e);
@ -174,8 +180,7 @@ public class RedactionController implements RedactionResource {
log.info("Loaded redaction log with computationalVersion: {}", redactionLog.getAnalysisVersion());
SectionGrid sectionGrid = redactionStorageService.getSectionGrid(redactionRequest.getDossierId(), redactionRequest
.getFileId());
SectionGrid sectionGrid = redactionStorageService.getSectionGrid(redactionRequest.getDossierId(), redactionRequest.getFileId());
if (sectionGrid.getSections().isEmpty()) {
log.info("SectionGrid does not have headlines set. Computing headlines now!");
@ -184,8 +189,7 @@ public class RedactionController implements RedactionResource {
// enhance section grid with headline data
for (var sectionText : text.getSectionTexts()) {
sectionGrid.getSections()
.add(new SectionGrid.SectionGridSection(sectionText.getSectionNumber(), sectionText.getHeadline(), sectionText
.getSectionAreas()
.add(new SectionGrid.SectionGridSection(sectionText.getSectionNumber(), sectionText.getHeadline(), sectionText.getSectionAreas()
.stream()
.map(SectionArea::getPage)
.collect(Collectors.toSet()), sectionText.getSectionAreas()));
@ -194,8 +198,7 @@ public class RedactionController implements RedactionResource {
}
log.info("Loaded redaction log with computationalVersion: {}", redactionLog.getAnalysisVersion());
return redactionLogMergeService.mergeRedactionLogData(redactionLog, sectionGrid, redactionRequest.getDossierTemplateId(), redactionRequest
.getManualRedactions(), redactionRequest.getExcludedPages());
return redactionLogMergeService.mergeRedactionLogData(redactionLog, sectionGrid, redactionRequest.getDossierTemplateId(), redactionRequest.getManualRedactions(), redactionRequest.getExcludedPages());
}
@ -217,11 +220,9 @@ public class RedactionController implements RedactionResource {
@PathVariable("fileId") String fileId,
@RequestBody ManualRedactions manualRedactions) {
long start = System.currentTimeMillis();
var result = manualRedactionSurroundingTextService.addSurroundingText(dossierId, fileId, manualRedactions);
log.info("add surrounding text for manual redaction in dossierId {} and fileId {} took: {}", dossierId, fileId, System
.currentTimeMillis() - start);
return result;
log.info("Added surrounding text for manual redaction in dossierId {} and fileId {} took: {}", dossierId, fileId, result.getDuration());
return result.getManualRedactions();
}
}

View File

@ -1,5 +1,12 @@
package com.iqser.red.service.redaction.v1.server.queue;
import static com.iqser.red.service.redaction.v1.server.queue.MessagingConfiguration.REDACTION_DQL;
import static com.iqser.red.service.redaction.v1.server.queue.MessagingConfiguration.REDACTION_QUEUE;
import org.springframework.amqp.rabbit.annotation.RabbitHandler;
import org.springframework.amqp.rabbit.annotation.RabbitListener;
import org.springframework.stereotype.Service;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
@ -8,18 +15,10 @@ import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest;
import com.iqser.red.service.redaction.v1.server.client.FileStatusProcessingUpdateClient;
import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
import com.iqser.red.service.redaction.v1.server.redaction.service.NerAnalyserService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.amqp.rabbit.annotation.RabbitHandler;
import org.springframework.amqp.rabbit.annotation.RabbitListener;
import org.springframework.stereotype.Service;
import static com.iqser.red.service.redaction.v1.server.queue.MessagingConfiguration.REDACTION_DQL;
import static com.iqser.red.service.redaction.v1.server.queue.MessagingConfiguration.REDACTION_QUEUE;
@Slf4j
@Service
@RequiredArgsConstructor
@ -28,7 +27,6 @@ public class RedactionMessageReceiver {
private final ObjectMapper objectMapper;
private final AnalyzeService analyzeService;
private final FileStatusProcessingUpdateClient fileStatusProcessingUpdateClient;
private final NerAnalyserService nerAnalyserService;
private final ManualRedactionSurroundingTextService manualRedactionSurroundingTextService;
@ -40,31 +38,26 @@ public class RedactionMessageReceiver {
log.info("Processing analyze request for file: {}", analyzeRequest.getFileId());
AnalyzeResult result = null;
switch (analyzeRequest.getMessageType()){
switch (analyzeRequest.getMessageType()) {
case REANALYSE:
result = analyzeService.reanalyze(analyzeRequest);
log.info("Successfully reanalyzed dossier {} file {} took: {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId(), result.getDuration());
break;
case FULL_ANALYSE:
// TODO Seperate stucture analysis by other queue
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(analyzeRequest.getDossierId(), analyzeRequest.getFileId()));
// TODO NerEntities should be computed and stored in entity-recognition-service, should be triggered by a seperate queue after structure analysis
nerAnalyserService.computeNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
result = analyzeService.analyze(analyzeRequest);
log.info("Successfully analyzed dossier {} file {} took: {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId(), result
.getDuration());
case STRUCTURE_ANALYSE:
result = analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(analyzeRequest.getDossierId(), analyzeRequest.getFileId()));
log.info("Successfully analyzed structure dossier {} file {} took: {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId(), result.getDuration());
break;
case ANALYSE:
result = analyzeService.analyze(analyzeRequest);
log.info("Successfully analyzed dossier {} file {} took: {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId(), result.getDuration());
break;
case SURROUNDING_TEXT:
var manualRedactions = manualRedactionSurroundingTextService.addSurroundingText(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), analyzeRequest.getManualRedactions());
result = AnalyzeResult.builder()
.dossierId(analyzeRequest.getDossierId())
.fileId(analyzeRequest.getFileId())
.manualRedactions(manualRedactions)
.build();
result = manualRedactionSurroundingTextService.addSurroundingText(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), analyzeRequest.getManualRedactions());
log.info("Successfully added surrounding text for manual redaction in dossierId {} and fileId {} took: {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId(), result.getDuration());
break;
}

View File

@ -32,6 +32,7 @@ import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
import com.iqser.red.service.redaction.v1.server.classification.model.Text;
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrement;
@ -66,12 +67,11 @@ public class AnalyzeService {
private final RedactionServiceSettings redactionServiceSettings;
private final SectionTextBuilderService sectionTextBuilderService;
private final SectionGridCreatorService sectionGridCreatorService;
private final NerAnalyserService nerAnalyserService;
private final ImageService imageService;
private final ImportedRedactionService importedRedactionService;
public void analyzeDocumentStructure(StructureAnalyzeRequest analyzeRequest) {
public AnalyzeResult analyzeDocumentStructure(StructureAnalyzeRequest analyzeRequest) {
long startTime = System.currentTimeMillis();
@ -79,7 +79,6 @@ public class AnalyzeService {
Document classifiedDoc;
try {
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.ORIGIN));
Map<Integer, List<PdfImage>> pdfImages = null;
@ -105,44 +104,17 @@ public class AnalyzeService {
.map(SectionArea::getPage)
.collect(Collectors.toSet()), sectionText.getSectionAreas())));
log.info("Store text and section grid for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.TEXT, text);
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.SECTION_GRID, classifiedDoc.getSectionGrid());
log.info("Document structure analysis successful, took: {}", System.currentTimeMillis() - startTime);
}
public AnalyzeResult analyze(AnalyzeRequest analyzeRequest) {
long startTime = System.currentTimeMillis();
var text = redactionStorageService.getText(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
var nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
if (redactionServiceSettings.isEnableEntityRecognition() && nerEntities == null) {
nerAnalyserService.computeNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
}
dictionaryService.updateDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getDossierTemplateId());
long rulesVersion = droolsExecutionService.getRulesVersion(analyzeRequest.getDossierTemplateId());
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
PageEntities pageEntities = entityRedactionService.findEntities(dictionary, text.getSectionTexts(), kieContainer, analyzeRequest, nerEntities);
dictionaryService.updateExternalDictionary(dictionary, analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
List<RedactionLogEntry> redactionLogEntries = redactionLogCreatorService.createRedactionLog(pageEntities, text.getNumberOfPages(), analyzeRequest.getDossierTemplateId());
var legalBasis = legalBasisClient.getLegalBasisMapping(analyzeRequest.getDossierTemplateId());
var redactionLog = new RedactionLog(redactionServiceSettings.getAnalysisVersion(), analyzeRequest.getAnalysisNumber(), redactionLogEntries, legalBasis, dictionary.getVersion()
.getDossierTemplateVersion(), dictionary.getVersion()
.getDossierVersion(), rulesVersion, legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId()));
var importedRedactionFilteredEntries = importedRedactionService.processImportedRedactions(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId(), analyzeRequest.getFileId(), redactionLog.getRedactionLogEntry(), true);
redactionLog.setRedactionLogEntry(importedRedactionFilteredEntries);
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionary.getVersion(), false);
return AnalyzeResult.builder()
.dossierId(analyzeRequest.getDossierId())
.fileId(analyzeRequest.getFileId())
.duration(System.currentTimeMillis() - startTime)
.numberOfPages(text.getNumberOfPages())
.analysisVersion(redactionServiceSettings.getAnalysisVersion())
.build();
}
@ -168,10 +140,11 @@ public class AnalyzeService {
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true);
}
var nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
if (redactionServiceSettings.isEnableEntityRecognition() && nerEntities == null) {
nerAnalyserService.computeNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
NerEntities nerEntities;
if (redactionServiceSettings.isNerServiceEnabled()) {
nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
} else {
nerEntities = NerEntities.builder().build();
}
List<SectionText> reanalysisSections = text.getSectionTexts()
@ -188,13 +161,49 @@ public class AnalyzeService {
var importedRedactionFilteredEntries = importedRedactionService.processImportedRedactions(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId(), analyzeRequest.getFileId(), newRedactionLogEntries, false);
redactionLog.getRedactionLogEntry().removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()) && !entry.getType().equals(IMPORTED_REDACTION_TYPE));
redactionLog.getRedactionLogEntry()
.removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()) && !entry.getType()
.equals(IMPORTED_REDACTION_TYPE));
redactionLog.getRedactionLogEntry().addAll(importedRedactionFilteredEntries);
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true);
}
public AnalyzeResult analyze(AnalyzeRequest analyzeRequest) {
long startTime = System.currentTimeMillis();
var text = redactionStorageService.getText(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
NerEntities nerEntities;
if (redactionServiceSettings.isNerServiceEnabled()) {
nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
} else {
nerEntities = NerEntities.builder().build();
}
dictionaryService.updateDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getDossierTemplateId());
long rulesVersion = droolsExecutionService.getRulesVersion(analyzeRequest.getDossierTemplateId());
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
PageEntities pageEntities = entityRedactionService.findEntities(dictionary, text.getSectionTexts(), kieContainer, analyzeRequest, nerEntities);
dictionaryService.updateExternalDictionary(dictionary, analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
List<RedactionLogEntry> redactionLogEntries = redactionLogCreatorService.createRedactionLog(pageEntities, text.getNumberOfPages(), analyzeRequest.getDossierTemplateId());
var legalBasis = legalBasisClient.getLegalBasisMapping(analyzeRequest.getDossierTemplateId());
var redactionLog = new RedactionLog(redactionServiceSettings.getAnalysisVersion(), analyzeRequest.getAnalysisNumber(), redactionLogEntries, legalBasis, dictionary.getVersion()
.getDossierTemplateVersion(), dictionary.getVersion()
.getDossierVersion(), rulesVersion, legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId()));
var importedRedactionFilteredEntries = importedRedactionService.processImportedRedactions(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId(), analyzeRequest.getFileId(), redactionLog.getRedactionLogEntry(), true);
redactionLog.setRedactionLogEntry(importedRedactionFilteredEntries);
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionary.getVersion(), false);
}
private Set<Integer> findSectionsToReanalyse(DictionaryIncrement dictionaryIncrement, RedactionLog redactionLog,
Text text, AnalyzeRequest analyzeRequest) {

View File

@ -1,5 +1,18 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;
import org.kie.api.runtime.KieContainer;
import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.AnnotationStatus;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.IdRemoval;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualImageRecategorization;
@ -8,20 +21,21 @@ import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.*;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entities;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities;
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
import com.iqser.red.service.redaction.v1.server.redaction.model.SectionSearchableTextPair;
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang3.StringUtils;
import org.kie.api.runtime.KieContainer;
import org.springframework.stereotype.Service;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@Slf4j
@Service
@ -46,7 +60,7 @@ public class EntityRedactionService {
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
}
Map<Integer, List<Entity>> entitiesPerPage = convertToEnititesPerPage(entities);
Map<Integer, List<Entity>> entitiesPerPage = convertToEntitiesPerPage(entities);
return new PageEntities(entitiesPerPage, imagesPerPage);
}
@ -145,7 +159,7 @@ public class EntityRedactionService {
}
private Map<Integer, List<Entity>> convertToEnititesPerPage(Set<Entity> entities) {
private Map<Integer, List<Entity>> convertToEntitiesPerPage(Set<Entity> entities) {
Map<Integer, List<Entity>> entitiesPerPage = new HashMap<>();
for (Entity entity : entities) {
@ -209,7 +223,7 @@ public class EntityRedactionService {
private Entities findEntities(SearchableText searchableText, String headline, int sectionNumber,
Dictionary dictionary, boolean local, NerEntities nerEntities,
List<Integer> cellstarts) {
List<Integer> cellStarts) {
Set<Entity> found = new HashSet<>();
String searchableString = searchableText.toString();
@ -230,32 +244,32 @@ public class EntityRedactionService {
Set<Entity> nerFound = new HashSet<>();
if (!local) {
nerFound.addAll(getNerValues(sectionNumber, nerEntities, cellstarts, headline));
nerFound.addAll(getNerValues(sectionNumber, nerEntities, cellStarts, headline));
}
return new Entities(EntitySearchUtils.clearAndFindPositions(found, searchableText, dictionary), nerFound);
}
private Set<Entity> getNerValues(int sectionNumber, NerEntities nerEntities,
List<Integer> cellstarts, String headline) {
private Set<Entity> getNerValues(int sectionNumber, NerEntities nerEntities, List<Integer> cellStarts,
String headline) {
Set<Entity> entities = new HashSet<>();
if (redactionServiceSettings.isEnableEntityRecognition() && nerEntities.getResult()
.containsKey(sectionNumber)) {
if (redactionServiceSettings.isNerServiceEnabled() && nerEntities.getResult().containsKey(sectionNumber)) {
nerEntities.getResult().get(sectionNumber).forEach(res -> {
if (cellstarts == null || cellstarts.isEmpty()) {
entities.add(new Entity(new String(Base64.decodeBase64(res.getValue().getBytes())), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false, Engine.NER));
if (cellStarts == null || cellStarts.isEmpty()) {
entities.add(new Entity(res.getValue(), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false, Engine.NER));
} else {
boolean intersectsCellStart = false;
for (Integer cellStart : cellstarts) {
for (Integer cellStart : cellStarts) {
if (res.getStartOffset() < cellStart && cellStart < res.getEndOffset()) {
intersectsCellStart = true;
break;
}
}
if (!intersectsCellStart) {
entities.add(new Entity(new String(Base64.decodeBase64(res.getValue().getBytes())), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false, Engine.NER));
entities.add(new Entity(res.getValue(), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false, Engine.NER));
}
}
});

View File

@ -1,9 +1,17 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import org.apache.commons.lang3.tuple.Pair;
import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.Rectangle;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualRedactionEntry;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualResizeRedaction;
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.model.SectionArea;
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
@ -13,14 +21,9 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.tuple.Pair;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
@Slf4j
@Service
@ -31,8 +34,9 @@ public class ManualRedactionSurroundingTextService {
private final SurroundingWordsService surroundingWordsService;
public ManualRedactions addSurroundingText(String dossierId, String fileId, ManualRedactions manualRedactions) {
public AnalyzeResult addSurroundingText(String dossierId, String fileId, ManualRedactions manualRedactions) {
long startTime = System.currentTimeMillis();
Text text = redactionStorageService.getText(dossierId, fileId);
List<ManualRedactionEntry> processedAddRedactions = new ArrayList<>();
List<ManualResizeRedaction> processedResizeRedactions = new ArrayList<>();
@ -49,8 +53,7 @@ public class ManualRedactionSurroundingTextService {
while (addItty.hasNext()) {
var manualAddRedaction = addItty.next();
if (sectionContainsEntry(sectionArea, manualAddRedaction.getPositions())) {
var surroundingText = findSurroundingText(sectionText, manualAddRedaction.getValue(), manualAddRedaction
.getPositions());
var surroundingText = findSurroundingText(sectionText, manualAddRedaction.getValue(), manualAddRedaction.getPositions());
manualAddRedaction.setTextBefore(surroundingText.getLeft());
manualAddRedaction.setTextAfter(surroundingText.getRight());
processedAddRedactions.add(manualAddRedaction);
@ -62,8 +65,7 @@ public class ManualRedactionSurroundingTextService {
while (resizeItty.hasNext()) {
var manualResizeRedaction = resizeItty.next();
if (sectionContainsEntry(sectionArea, manualResizeRedaction.getPositions())) {
var surroundingText = findSurroundingText(sectionText, manualResizeRedaction.getValue(), manualResizeRedaction
.getPositions());
var surroundingText = findSurroundingText(sectionText, manualResizeRedaction.getValue(), manualResizeRedaction.getPositions());
manualResizeRedaction.setTextBefore(surroundingText.getLeft());
manualResizeRedaction.setTextAfter(surroundingText.getRight());
processedResizeRedactions.add(manualResizeRedaction);
@ -75,15 +77,20 @@ public class ManualRedactionSurroundingTextService {
manualRedactions.getEntriesToAdd().addAll(processedAddRedactions);
manualRedactions.getResizeRedactions().addAll(processedResizeRedactions);
return manualRedactions;
return AnalyzeResult.builder()
.dossierId(dossierId)
.fileId(fileId)
.manualRedactions(manualRedactions)
.duration(System.currentTimeMillis() - startTime)
.build();
}
private Pair<String, String> findSurroundingText(SectionText sectionText, String value,
List<Rectangle> toFindPositions) {
Set<Entity> entities = EntitySearchUtils.find(sectionText.getText(), Set.of(value), "dummy", sectionText.getHeadline(), sectionText
.getSectionNumber(), false, false, Engine.DICTIONARY, false);
Set<Entity> entities = EntitySearchUtils.find(sectionText.getText(), Set.of(value), "dummy", sectionText.getHeadline(), sectionText.getSectionNumber(), false, false, Engine.DICTIONARY, false);
Set<Entity> entitiesWithPositions = EntitySearchUtils.clearAndFindPositions(entities, sectionText.getSearchableText(), null);
Entity correctEntity = getEntityOnCorrectPosition(entitiesWithPositions, toFindPositions);
@ -94,8 +101,7 @@ public class ManualRedactionSurroundingTextService {
}
if (sectionText.getCellStarts() != null && !sectionText.getCellStarts().isEmpty()) {
surroundingWordsService.addSurroundingText(Set.of(correctEntity), sectionText.getSearchableText(), null, sectionText
.getCellStarts());
surroundingWordsService.addSurroundingText(Set.of(correctEntity), sectionText.getSearchableText(), null, sectionText.getCellStarts());
} else {
surroundingWordsService.addSurroundingText(Set.of(correctEntity), sectionText.getSearchableText(), null);
}

View File

@ -1,53 +0,0 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.redaction.v1.server.client.EntityRecognitionClient;
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionRequest;
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionSection;
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.codec.binary.Base64;
import org.springframework.stereotype.Service;
import java.util.stream.Collectors;
@Slf4j
@Service
@RequiredArgsConstructor
public class NerAnalyserService {
private final RedactionStorageService redactionStorageService;
private final EntityRecognitionClient entityRecognitionClient;
private final RedactionServiceSettings redactionServiceSettings;
public void computeNerEntities(String dossierId, String fileId) {
if (redactionServiceSettings.isEnableEntityRecognition()) {
var text = redactionStorageService.getText(dossierId, fileId);
long start = System.currentTimeMillis();
if (text != null) {
var nerRequest = EntityRecognitionRequest.builder()
.data(text.getSectionTexts()
.stream()
.map(sectionText -> new EntityRecognitionSection(sectionText.getSectionNumber(), new String(Base64
.encodeBase64(sectionText
.getText().getBytes()))))
.collect(Collectors.toList()))
.build();
var nerResponse = entityRecognitionClient.findAuthors(nerRequest);
log.info("Computing NER entities took: {} ms for dossierId {} and fileId {}", System.currentTimeMillis() - start, dossierId, fileId);
redactionStorageService.storeObject(dossierId, fileId, FileType.NER_ENTITIES, nerResponse);
} else {
log.warn("Warning, text for file: {} in dossier: {} is null", fileId, dossierId);
}
}
}
}

View File

@ -1,5 +1,15 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.collections4.CollectionUtils;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.model.Point;
import com.iqser.red.service.redaction.v1.model.Rectangle;
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
@ -10,13 +20,9 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionS
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities;
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.CollectionUtils;
import org.springframework.stereotype.Service;
import java.util.*;
import java.util.stream.Collectors;
@Service
@Slf4j
@ -85,7 +91,7 @@ public class RedactionLogCreatorService {
List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();
// Duplicates can exist due table extraction colums over multiple rows.
// Duplicates can exist due table extraction columns over multiple rows.
Set<String> processedIds = new HashSet<>();
entityLoop:

View File

@ -1,8 +1,9 @@
package com.iqser.red.service.redaction.v1.server.settings;
import lombok.Data;
import org.springframework.boot.context.properties.ConfigurationProperties;
import lombok.Data;
@Data
@ConfigurationProperties("redaction-service")
public class RedactionServiceSettings {
@ -17,6 +18,6 @@ public class RedactionServiceSettings {
private int analysisVersion = 1;
private boolean enableEntityRecognition = true;
private boolean nerServiceEnabled = true;
}

View File

@ -105,14 +105,13 @@ public class RedactionStorageService {
try {
inputStreamResource = storageService.getObject(StorageIdUtils.getStorageId(dossierId, fileId, FileType.NER_ENTITIES));
} catch (StorageObjectDoesNotExist e) {
log.debug("NER Entities not available.");
return null;
throw new NotFoundException("NER Entities are not available.");
}
try {
return objectMapper.readValue(inputStreamResource.getInputStream(), NerEntities.class);
} catch (IOException e) {
throw new RuntimeException("Could not convert NerEntities", e);
throw new RuntimeException("Could not convert NER Entities", e);
}
}
@ -146,8 +145,6 @@ public class RedactionStorageService {
return dossierId + "/" + fileId + "." + fileType.name() + fileType.getExtension();
}
}
}

View File

@ -23,9 +23,6 @@ import java.util.Set;
import java.util.UUID;
import java.util.stream.Collectors;
import com.fasterxml.jackson.core.type.TypeReference;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ImportedAnnotation;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.*;
import org.apache.commons.io.IOUtils;
import org.junit.After;
import org.junit.Before;
@ -56,6 +53,12 @@ import com.iqser.red.service.persistence.service.v1.api.model.annotations.Annota
import com.iqser.red.service.persistence.service.v1.api.model.annotations.Comment;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.Rectangle;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.IdRemoval;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualForceRedaction;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualImageRecategorization;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualLegalBasisChange;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualRedactionEntry;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualResizeRedaction;
import com.iqser.red.service.persistence.service.v1.api.model.common.JSONPrimitive;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.configuration.Colors;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
@ -215,6 +218,7 @@ public class RedactionIntegrationTest {
loadDictionaryForTest();
loadTypeForTest();
loadNerForTest();
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypesForDossierTemplate(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(getTypeResponse());
@ -256,293 +260,6 @@ public class RedactionIntegrationTest {
}
private void loadDictionaryForTest() {
dictionary.computeIfAbsent(AUTHOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_author.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(SPONSOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(VERTEBRATE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/vertebrate.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(ADDRESS, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_address.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(REDACTION_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(HINT_ONLY, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/hint_only.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(MUST_REDACT, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/must_redact.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(PUBLISHED_INFORMATION, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/published_information.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(TEST_METHOD, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/test_method.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(PII, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/PII.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(RECOMMENDATION_AUTHOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/recommendation_CBI_author.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(RECOMMENDATION_ADDRESS, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/recommendation_CBI_address.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(FALSE_POSITIVE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/false_positive.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(PURITY, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/purity.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(IMAGE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(OCR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(LOGO, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(SIGNATURE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(FORMULA, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dossierDictionary.put(IMPORTED_REDACTION, new ArrayList<>());
}
private String cleanDictionaryEntry(String entry) {
return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " ");
}
private void loadTypeForTest() {
typeColorMap.put(VERTEBRATE, "#ff85f7");
typeColorMap.put(ADDRESS, "#ffe187");
typeColorMap.put(AUTHOR, "#ffe187");
typeColorMap.put(SPONSOR, "#85ebff");
typeColorMap.put(NO_REDACTION_INDICATOR, "#be85ff");
typeColorMap.put(REDACTION_INDICATOR, "#caff85");
typeColorMap.put(HINT_ONLY, "#abc0c4");
typeColorMap.put(MUST_REDACT, "#fab4c0");
typeColorMap.put(PUBLISHED_INFORMATION, "#85ebff");
typeColorMap.put(TEST_METHOD, "#91fae8");
typeColorMap.put(PII, "#66ccff");
typeColorMap.put(RECOMMENDATION_AUTHOR, "#8df06c");
typeColorMap.put(RECOMMENDATION_ADDRESS, "#8df06c");
typeColorMap.put(FALSE_POSITIVE, "#ffffff");
typeColorMap.put(PURITY, "#ffe187");
typeColorMap.put(IMAGE, "#fcc5fb");
typeColorMap.put(OCR, "#fcc5fb");
typeColorMap.put(LOGO, "#ffe187");
typeColorMap.put(FORMULA, "#ffe187");
typeColorMap.put(SIGNATURE, "#ffe187");
typeColorMap.put(IMPORTED_REDACTION, "#32a852");
hintTypeMap.put(VERTEBRATE, true);
hintTypeMap.put(ADDRESS, false);
hintTypeMap.put(AUTHOR, false);
hintTypeMap.put(SPONSOR, false);
hintTypeMap.put(NO_REDACTION_INDICATOR, true);
hintTypeMap.put(REDACTION_INDICATOR, true);
hintTypeMap.put(HINT_ONLY, true);
hintTypeMap.put(MUST_REDACT, true);
hintTypeMap.put(PUBLISHED_INFORMATION, true);
hintTypeMap.put(TEST_METHOD, true);
hintTypeMap.put(PII, false);
hintTypeMap.put(RECOMMENDATION_AUTHOR, false);
hintTypeMap.put(RECOMMENDATION_ADDRESS, false);
hintTypeMap.put(FALSE_POSITIVE, true);
hintTypeMap.put(PURITY, false);
hintTypeMap.put(IMAGE, true);
hintTypeMap.put(OCR, true);
hintTypeMap.put(FORMULA, false);
hintTypeMap.put(LOGO, false);
hintTypeMap.put(SIGNATURE, false);
hintTypeMap.put(DOSSIER_REDACTIONS, false);
hintTypeMap.put(IMPORTED_REDACTION, false);
caseInSensitiveMap.put(VERTEBRATE, true);
caseInSensitiveMap.put(ADDRESS, false);
caseInSensitiveMap.put(AUTHOR, false);
caseInSensitiveMap.put(SPONSOR, false);
caseInSensitiveMap.put(NO_REDACTION_INDICATOR, true);
caseInSensitiveMap.put(REDACTION_INDICATOR, true);
caseInSensitiveMap.put(HINT_ONLY, true);
caseInSensitiveMap.put(MUST_REDACT, true);
caseInSensitiveMap.put(PUBLISHED_INFORMATION, true);
caseInSensitiveMap.put(TEST_METHOD, false);
caseInSensitiveMap.put(PII, false);
caseInSensitiveMap.put(RECOMMENDATION_AUTHOR, false);
caseInSensitiveMap.put(RECOMMENDATION_ADDRESS, false);
caseInSensitiveMap.put(FALSE_POSITIVE, false);
caseInSensitiveMap.put(PURITY, false);
caseInSensitiveMap.put(IMAGE, true);
caseInSensitiveMap.put(OCR, true);
caseInSensitiveMap.put(SIGNATURE, true);
caseInSensitiveMap.put(LOGO, true);
caseInSensitiveMap.put(FORMULA, true);
caseInSensitiveMap.put(DOSSIER_REDACTIONS, false);
caseInSensitiveMap.put(IMPORTED_REDACTION, false);
recommendationTypeMap.put(VERTEBRATE, false);
recommendationTypeMap.put(ADDRESS, false);
recommendationTypeMap.put(AUTHOR, false);
recommendationTypeMap.put(SPONSOR, false);
recommendationTypeMap.put(NO_REDACTION_INDICATOR, false);
recommendationTypeMap.put(REDACTION_INDICATOR, false);
recommendationTypeMap.put(HINT_ONLY, false);
recommendationTypeMap.put(MUST_REDACT, false);
recommendationTypeMap.put(PUBLISHED_INFORMATION, false);
recommendationTypeMap.put(TEST_METHOD, false);
recommendationTypeMap.put(PII, false);
recommendationTypeMap.put(RECOMMENDATION_AUTHOR, true);
recommendationTypeMap.put(RECOMMENDATION_ADDRESS, true);
recommendationTypeMap.put(FALSE_POSITIVE, false);
recommendationTypeMap.put(PURITY, false);
recommendationTypeMap.put(IMAGE, false);
recommendationTypeMap.put(OCR, false);
recommendationTypeMap.put(FORMULA, false);
recommendationTypeMap.put(SIGNATURE, false);
recommendationTypeMap.put(LOGO, false);
recommendationTypeMap.put(DOSSIER_REDACTIONS, false);
recommendationTypeMap.put(IMPORTED_REDACTION, false);
rankTypeMap.put(FALSE_POSITIVE, 160);
rankTypeMap.put(PURITY, 155);
rankTypeMap.put(PII, 150);
rankTypeMap.put(ADDRESS, 140);
rankTypeMap.put(AUTHOR, 130);
rankTypeMap.put(SPONSOR, 120);
rankTypeMap.put(VERTEBRATE, 110);
rankTypeMap.put(MUST_REDACT, 100);
rankTypeMap.put(REDACTION_INDICATOR, 90);
rankTypeMap.put(NO_REDACTION_INDICATOR, 80);
rankTypeMap.put(PUBLISHED_INFORMATION, 70);
rankTypeMap.put(TEST_METHOD, 60);
rankTypeMap.put(HINT_ONLY, 50);
rankTypeMap.put(RECOMMENDATION_AUTHOR, 40);
rankTypeMap.put(RECOMMENDATION_ADDRESS, 30);
rankTypeMap.put(IMAGE, 30);
rankTypeMap.put(OCR, 29);
rankTypeMap.put(LOGO, 28);
rankTypeMap.put(SIGNATURE, 27);
rankTypeMap.put(FORMULA, 26);
rankTypeMap.put(DOSSIER_REDACTIONS, 200);
rankTypeMap.put(IMPORTED_REDACTION, 200);
colors.setDefaultColor("#acfc00");
colors.setNotRedacted("#cccccc");
colors.setRequestAdd("#04b093");
colors.setRequestRemove("#04b093");
}
private List<Type> getTypeResponse() {
return typeColorMap.entrySet()
.stream()
.map(typeColor -> Type.builder()
.id(typeColor.getKey() + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(typeColor.getKey())
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
.hexColor(typeColor.getValue())
.isHint(hintTypeMap.get(typeColor.getKey()))
.isCaseInsensitive(caseInSensitiveMap.get(typeColor.getKey()))
.isRecommendation(recommendationTypeMap.get(typeColor.getKey()))
.rank(rankTypeMap.get(typeColor.getKey()))
.build())
.collect(Collectors.toList());
}
private Type getDictionaryResponse(String type, boolean isDossierDictionary) {
return Type.builder()
.id(type + ":" + TEST_DOSSIER_TEMPLATE_ID)
.hexColor(typeColorMap.get(type))
.entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type)))
.isHint(hintTypeMap.get(type))
.isCaseInsensitive(caseInSensitiveMap.get(type))
.isRecommendation(recommendationTypeMap.get(type))
.rank(rankTypeMap.get(type))
.build();
}
private List<DictionaryEntry> toDictionaryEntry(List<String> entries) {
List<DictionaryEntry> dictionaryEntries = new ArrayList<>();
entries.forEach(entry -> {
dictionaryEntries.add(DictionaryEntry.builder()
.value(entry)
.version(reanlysisVersions.getOrDefault(entry, 0L))
.deleted(deleted.contains(entry))
.build());
});
return dictionaryEntries;
}
@Test
public void test270Rotated() {
@ -584,9 +301,7 @@ public class RedactionIntegrationTest {
duplicates.computeIfAbsent(entry.getId(), v -> new ArrayList<>()).add(entry);
});
duplicates.entrySet().forEach(entry -> {
assertThat(entry.getValue().size()).isEqualTo(1);
});
duplicates.forEach((key, value) -> assertThat(value.size()).isEqualTo(1));
dictionary.get(AUTHOR).add("Drinking water");
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(1L);
@ -1047,8 +762,6 @@ public class RedactionIntegrationTest {
@Test
public void testManualRedaction() throws IOException {
// 675eba69b0c2917de55462c817adaa05
System.out.println("testManualRedaction");
long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
@ -1099,8 +812,6 @@ public class RedactionIntegrationTest {
.page(1)
.build()));
// manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
request.setManualRedactions(manualRedactions);
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
@ -1246,34 +957,6 @@ public class RedactionIntegrationTest {
}
@SneakyThrows
private AnalyzeRequest prepareStorage(String file) {
ClassPathResource pdfFileResource = new ClassPathResource(file);
return prepareStorage(pdfFileResource.getInputStream());
}
@SneakyThrows
private AnalyzeRequest prepareStorage(InputStream stream) {
AnalyzeRequest request = AnalyzeRequest.builder()
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.lastProcessed(OffsetDateTime.now())
.build();
var bytes = IOUtils.toByteArray(stream);
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), bytes);
return request;
}
@Test
public void sponsorCompanyTest() throws IOException {
@ -1527,7 +1210,8 @@ public class RedactionIntegrationTest {
fileOutputStream.write(annotateResponse.getDocument());
}
var surroundingTextResult = manualRedactionSurroundingTextService.addSurroundingText(TEST_DOSSIER_ID, TEST_FILE_ID, manualRedactions);
var surroundingTextResult = manualRedactionSurroundingTextService.addSurroundingText(TEST_DOSSIER_ID, TEST_FILE_ID, manualRedactions)
.getManualRedactions();
surroundingTextResult.getEntriesToAdd().forEach(addEntry -> {
assertThat(addEntry.getTextAfter()).isNotEmpty();
});
@ -1535,6 +1219,117 @@ public class RedactionIntegrationTest {
}
private void loadDictionaryForTest() {
dictionary.computeIfAbsent(AUTHOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_author.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(SPONSOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(VERTEBRATE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/vertebrate.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(ADDRESS, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_address.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(REDACTION_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(HINT_ONLY, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/hint_only.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(MUST_REDACT, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/must_redact.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(PUBLISHED_INFORMATION, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/published_information.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(TEST_METHOD, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/test_method.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(PII, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/PII.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(RECOMMENDATION_AUTHOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/recommendation_CBI_author.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(RECOMMENDATION_ADDRESS, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/recommendation_CBI_address.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(FALSE_POSITIVE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/false_positive.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(PURITY, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/purity.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(IMAGE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(OCR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(LOGO, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(SIGNATURE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(FORMULA, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dossierDictionary.put(IMPORTED_REDACTION, new ArrayList<>());
}
private static String loadFromClassPath(String path) {
URL resource = ResourceLoader.class.getClassLoader().getResource(path);
@ -1572,6 +1367,191 @@ public class RedactionIntegrationTest {
}
private void loadTypeForTest() {
typeColorMap.put(VERTEBRATE, "#ff85f7");
typeColorMap.put(ADDRESS, "#ffe187");
typeColorMap.put(AUTHOR, "#ffe187");
typeColorMap.put(SPONSOR, "#85ebff");
typeColorMap.put(NO_REDACTION_INDICATOR, "#be85ff");
typeColorMap.put(REDACTION_INDICATOR, "#caff85");
typeColorMap.put(HINT_ONLY, "#abc0c4");
typeColorMap.put(MUST_REDACT, "#fab4c0");
typeColorMap.put(PUBLISHED_INFORMATION, "#85ebff");
typeColorMap.put(TEST_METHOD, "#91fae8");
typeColorMap.put(PII, "#66ccff");
typeColorMap.put(RECOMMENDATION_AUTHOR, "#8df06c");
typeColorMap.put(RECOMMENDATION_ADDRESS, "#8df06c");
typeColorMap.put(FALSE_POSITIVE, "#ffffff");
typeColorMap.put(PURITY, "#ffe187");
typeColorMap.put(IMAGE, "#fcc5fb");
typeColorMap.put(OCR, "#fcc5fb");
typeColorMap.put(LOGO, "#ffe187");
typeColorMap.put(FORMULA, "#ffe187");
typeColorMap.put(SIGNATURE, "#ffe187");
typeColorMap.put(IMPORTED_REDACTION, "#32a852");
hintTypeMap.put(VERTEBRATE, true);
hintTypeMap.put(ADDRESS, false);
hintTypeMap.put(AUTHOR, false);
hintTypeMap.put(SPONSOR, false);
hintTypeMap.put(NO_REDACTION_INDICATOR, true);
hintTypeMap.put(REDACTION_INDICATOR, true);
hintTypeMap.put(HINT_ONLY, true);
hintTypeMap.put(MUST_REDACT, true);
hintTypeMap.put(PUBLISHED_INFORMATION, true);
hintTypeMap.put(TEST_METHOD, true);
hintTypeMap.put(PII, false);
hintTypeMap.put(RECOMMENDATION_AUTHOR, false);
hintTypeMap.put(RECOMMENDATION_ADDRESS, false);
hintTypeMap.put(FALSE_POSITIVE, true);
hintTypeMap.put(PURITY, false);
hintTypeMap.put(IMAGE, true);
hintTypeMap.put(OCR, true);
hintTypeMap.put(FORMULA, false);
hintTypeMap.put(LOGO, false);
hintTypeMap.put(SIGNATURE, false);
hintTypeMap.put(DOSSIER_REDACTIONS, false);
hintTypeMap.put(IMPORTED_REDACTION, false);
caseInSensitiveMap.put(VERTEBRATE, true);
caseInSensitiveMap.put(ADDRESS, false);
caseInSensitiveMap.put(AUTHOR, false);
caseInSensitiveMap.put(SPONSOR, false);
caseInSensitiveMap.put(NO_REDACTION_INDICATOR, true);
caseInSensitiveMap.put(REDACTION_INDICATOR, true);
caseInSensitiveMap.put(HINT_ONLY, true);
caseInSensitiveMap.put(MUST_REDACT, true);
caseInSensitiveMap.put(PUBLISHED_INFORMATION, true);
caseInSensitiveMap.put(TEST_METHOD, false);
caseInSensitiveMap.put(PII, false);
caseInSensitiveMap.put(RECOMMENDATION_AUTHOR, false);
caseInSensitiveMap.put(RECOMMENDATION_ADDRESS, false);
caseInSensitiveMap.put(FALSE_POSITIVE, false);
caseInSensitiveMap.put(PURITY, false);
caseInSensitiveMap.put(IMAGE, true);
caseInSensitiveMap.put(OCR, true);
caseInSensitiveMap.put(SIGNATURE, true);
caseInSensitiveMap.put(LOGO, true);
caseInSensitiveMap.put(FORMULA, true);
caseInSensitiveMap.put(DOSSIER_REDACTIONS, false);
caseInSensitiveMap.put(IMPORTED_REDACTION, false);
recommendationTypeMap.put(VERTEBRATE, false);
recommendationTypeMap.put(ADDRESS, false);
recommendationTypeMap.put(AUTHOR, false);
recommendationTypeMap.put(SPONSOR, false);
recommendationTypeMap.put(NO_REDACTION_INDICATOR, false);
recommendationTypeMap.put(REDACTION_INDICATOR, false);
recommendationTypeMap.put(HINT_ONLY, false);
recommendationTypeMap.put(MUST_REDACT, false);
recommendationTypeMap.put(PUBLISHED_INFORMATION, false);
recommendationTypeMap.put(TEST_METHOD, false);
recommendationTypeMap.put(PII, false);
recommendationTypeMap.put(RECOMMENDATION_AUTHOR, true);
recommendationTypeMap.put(RECOMMENDATION_ADDRESS, true);
recommendationTypeMap.put(FALSE_POSITIVE, false);
recommendationTypeMap.put(PURITY, false);
recommendationTypeMap.put(IMAGE, false);
recommendationTypeMap.put(OCR, false);
recommendationTypeMap.put(FORMULA, false);
recommendationTypeMap.put(SIGNATURE, false);
recommendationTypeMap.put(LOGO, false);
recommendationTypeMap.put(DOSSIER_REDACTIONS, false);
recommendationTypeMap.put(IMPORTED_REDACTION, false);
rankTypeMap.put(FALSE_POSITIVE, 160);
rankTypeMap.put(PURITY, 155);
rankTypeMap.put(PII, 150);
rankTypeMap.put(ADDRESS, 140);
rankTypeMap.put(AUTHOR, 130);
rankTypeMap.put(SPONSOR, 120);
rankTypeMap.put(VERTEBRATE, 110);
rankTypeMap.put(MUST_REDACT, 100);
rankTypeMap.put(REDACTION_INDICATOR, 90);
rankTypeMap.put(NO_REDACTION_INDICATOR, 80);
rankTypeMap.put(PUBLISHED_INFORMATION, 70);
rankTypeMap.put(TEST_METHOD, 60);
rankTypeMap.put(HINT_ONLY, 50);
rankTypeMap.put(RECOMMENDATION_AUTHOR, 40);
rankTypeMap.put(RECOMMENDATION_ADDRESS, 30);
rankTypeMap.put(IMAGE, 30);
rankTypeMap.put(OCR, 29);
rankTypeMap.put(LOGO, 28);
rankTypeMap.put(SIGNATURE, 27);
rankTypeMap.put(FORMULA, 26);
rankTypeMap.put(DOSSIER_REDACTIONS, 200);
rankTypeMap.put(IMPORTED_REDACTION, 200);
colors.setDefaultColor("#acfc00");
colors.setNotRedacted("#cccccc");
colors.setRequestAdd("#04b093");
colors.setRequestRemove("#04b093");
}
@SneakyThrows
private void loadNerForTest() {
ClassPathResource responseJson = new ClassPathResource("files/ner_response.json");
var bytes = IOUtils.toByteArray(responseJson.getInputStream());
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), bytes);
}
private List<Type> getTypeResponse() {
return typeColorMap.entrySet()
.stream()
.map(typeColor -> Type.builder()
.id(typeColor.getKey() + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(typeColor.getKey())
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
.hexColor(typeColor.getValue())
.isHint(hintTypeMap.get(typeColor.getKey()))
.isCaseInsensitive(caseInSensitiveMap.get(typeColor.getKey()))
.isRecommendation(recommendationTypeMap.get(typeColor.getKey()))
.rank(rankTypeMap.get(typeColor.getKey()))
.build())
.collect(Collectors.toList());
}
private Type getDictionaryResponse(String type, boolean isDossierDictionary) {
return Type.builder()
.id(type + ":" + TEST_DOSSIER_TEMPLATE_ID)
.hexColor(typeColorMap.get(type))
.entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type)))
.isHint(hintTypeMap.get(type))
.isCaseInsensitive(caseInSensitiveMap.get(type))
.isRecommendation(recommendationTypeMap.get(type))
.rank(rankTypeMap.get(type))
.build();
}
private String cleanDictionaryEntry(String entry) {
return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " ");
}
private List<DictionaryEntry> toDictionaryEntry(List<String> entries) {
List<DictionaryEntry> dictionaryEntries = new ArrayList<>();
entries.forEach(entry -> {
dictionaryEntries.add(DictionaryEntry.builder()
.value(entry)
.version(reanlysisVersions.getOrDefault(entry, 0L))
.deleted(deleted.contains(entry))
.build());
});
return dictionaryEntries;
}
@Test
public void testImportedRedactions() throws IOException {
@ -1579,7 +1559,6 @@ public class RedactionIntegrationTest {
ClassPathResource pdfFileResource = new ClassPathResource("files/ImportedRedactions/ImportedRedactions.pdf");
ClassPathResource importedRedactions = new ClassPathResource("files/ImportedRedactions/ImportedRedactions.json");
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS), IOUtils.toByteArray(importedRedactions.getInputStream()));
@ -1598,4 +1577,32 @@ public class RedactionIntegrationTest {
}
}
@SneakyThrows
private AnalyzeRequest prepareStorage(InputStream stream) {
AnalyzeRequest request = AnalyzeRequest.builder()
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.lastProcessed(OffsetDateTime.now())
.build();
var bytes = IOUtils.toByteArray(stream);
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), bytes);
return request;
}
@SneakyThrows
private AnalyzeRequest prepareStorage(String file) {
ClassPathResource pdfFileResource = new ClassPathResource(file);
return prepareStorage(pdfFileResource.getInputStream());
}
}

View File

@ -0,0 +1,12 @@
{
"result": {
"1": [
{
"value": "Mannheim",
"startOffset": 0,
"endOffset": 8,
"type": "CITY"
}
]
}
}