diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/AnalyzeResult.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/AnalyzeResult.java index 3875b00d..b30db8cb 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/AnalyzeResult.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/AnalyzeResult.java @@ -21,7 +21,9 @@ public class AnalyzeResult { private boolean hasImages; private boolean hasUpdates; private long dictionaryVersion; + private long dossierDictionaryVersion; private long rulesVersion; + } diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLog.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLog.java index 886f26e1..b6c13b93 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLog.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLog.java @@ -16,13 +16,16 @@ public class RedactionLog { private String ruleSetId; + private long dossierDictionaryVersion = -1; - public RedactionLog(List redactionLogEntry, long dictionaryVersion, long rulesVersion, String ruleSetId) { + + public RedactionLog(List redactionLogEntry, long dictionaryVersion, long rulesVersion, String ruleSetId, long dossierDictionaryVersion) { this.redactionLogEntry = redactionLogEntry; this.dictionaryVersion = dictionaryVersion; this.rulesVersion = rulesVersion; this.ruleSetId = ruleSetId; + this.dossierDictionaryVersion = dossierDictionaryVersion; } diff --git a/redaction-service-v1/redaction-service-server-v1/pom.xml b/redaction-service-v1/redaction-service-server-v1/pom.xml index 0a1a501e..1975a136 100644 --- a/redaction-service-v1/redaction-service-server-v1/pom.xml +++ b/redaction-service-v1/redaction-service-server-v1/pom.xml @@ -24,7 +24,7 @@ com.iqser.red.service configuration-service-api-v1 - 2.5.0 + 2.5.6 com.iqser.red.service diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Document.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Document.java index c78ebccd..d312bd8c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Document.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Document.java @@ -2,6 +2,7 @@ package com.iqser.red.service.redaction.v1.server.classification.model; import com.iqser.red.service.redaction.v1.model.RedactionLogEntry; import com.iqser.red.service.redaction.v1.model.SectionGrid; +import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryVersion; import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; import com.iqser.red.service.redaction.v1.server.redaction.model.Image; import lombok.Data; @@ -31,7 +32,7 @@ public class Document { private List redactionLogEntities = new ArrayList<>(); private SectionGrid sectionGrid = new SectionGrid(); - private long dictionaryVersion; + private DictionaryVersion dictionaryVersion; private long rulesVersion; private List sectionText = new ArrayList<>(); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java index 71102747..31f8583b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java @@ -50,7 +50,7 @@ public class RedactionController implements RedactionResource { try (PDDocument pdDocument = PDDocument.load(storedObjectStream, MemoryUsageSetting.setupTempFileOnly())) { pdDocument.setAllSecurityToBeRemoved(true); - dictionaryService.updateDictionary(redactionLog.getRuleSetId()); + dictionaryService.updateDictionary(redactionLog.getRuleSetId(), annotateRequest.getProjectId()); annotationService.annotate(pdDocument, redactionLog, sectionsGrid); try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Dictionary.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Dictionary.java index c8d514d1..3879bb80 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Dictionary.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Dictionary.java @@ -18,13 +18,13 @@ public class Dictionary { private Map localAccessMap = new HashMap<>(); @Getter - private long version; + private DictionaryVersion version; - public Dictionary(List dictionaryModels, long dictionaryVersion) { + public Dictionary(List dictionaryModels, DictionaryVersion version) { this.dictionaryModels = dictionaryModels; this.dictionaryModels.forEach(dm -> localAccessMap.put(dm.getType(), dm)); - this.version = dictionaryVersion; + this.version = version; } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryIncrement.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryIncrement.java index c17862cb..29a71403 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryIncrement.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryIncrement.java @@ -10,6 +10,6 @@ import java.util.Set; public class DictionaryIncrement { private Set values; - private long dictionaryVersion; + private DictionaryVersion dictionaryVersion; } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryVersion.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryVersion.java new file mode 100644 index 00000000..6a69bb60 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryVersion.java @@ -0,0 +1,16 @@ +package com.iqser.red.service.redaction.v1.server.redaction.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class DictionaryVersion { + + long rulesetVersion; + long dossierVersion; +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeResponseService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeResponseService.java index dbf75a52..ca2962c2 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeResponseService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeResponseService.java @@ -42,6 +42,7 @@ public class AnalyzeResponseService { .hasUpdates(hasUpdates) .rulesVersion(redactionLog.getRulesVersion()) .dictionaryVersion(redactionLog.getDictionaryVersion()) + .dossierDictionaryVersion(redactionLog.getDossierDictionaryVersion()) .build(); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java index 4da063a7..2d5ae830 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java @@ -1,5 +1,7 @@ package com.iqser.red.service.redaction.v1.server.redaction.service; +import static com.iqser.red.service.configuration.v1.api.resource.DictionaryResource.GLOBAL_DOSSIER; + import com.iqser.red.service.configuration.v1.api.model.Colors; import com.iqser.red.service.configuration.v1.api.model.DictionaryEntry; import com.iqser.red.service.configuration.v1.api.model.TypeResponse; @@ -10,6 +12,8 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncre import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrementValue; import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel; import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryRepresentation; +import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryVersion; + import feign.FeignException; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -29,52 +33,68 @@ public class DictionaryService { private final DictionaryClient dictionaryClient; private final Map dictionariesByRuleSets = new HashMap<>(); + private final Map dictionariesByDossier = new HashMap<>(); - public long updateDictionary(String ruleSetId) { + public DictionaryVersion updateDictionary(String ruleSetId, String dossierId) { - long version = dictionaryClient.getVersion(ruleSetId); - - var foundDictionary = dictionariesByRuleSets.get(ruleSetId); - - if (foundDictionary == null || version > foundDictionary.getDictionaryVersion()) { - updateDictionaryEntry(ruleSetId, version); + long rulesetDictionaryVersion = dictionaryClient.getVersion(ruleSetId, GLOBAL_DOSSIER); + var rulesetDictionary = dictionariesByRuleSets.get(ruleSetId); + if (rulesetDictionary == null || rulesetDictionaryVersion > rulesetDictionary.getDictionaryVersion()) { + updateDictionaryEntry(ruleSetId, rulesetDictionaryVersion, GLOBAL_DOSSIER); } - return version; + long dossierDictionaryVersion = dictionaryClient.getVersion(ruleSetId, dossierId); + var dossierDictionary = dictionariesByDossier.get(dossierId); + if (dossierDictionary == null || dossierDictionaryVersion > dossierDictionary.getDictionaryVersion()) { + updateDictionaryEntry(ruleSetId, dossierDictionaryVersion, dossierId); + } + + return DictionaryVersion.builder().rulesetVersion(rulesetDictionaryVersion).dossierVersion(dossierDictionaryVersion).build(); } - public DictionaryIncrement getDictionaryIncrements(String ruleSetId, long fromVersion) { + public DictionaryIncrement getDictionaryIncrements(String ruleSetId, DictionaryVersion fromVersion, String dossierId) { - long version = updateDictionary(ruleSetId); + DictionaryVersion version = updateDictionary(ruleSetId, dossierId); Set newValues = new HashSet<>(); List dictionaryModels = dictionariesByRuleSets.get(ruleSetId).getDictionary(); dictionaryModels.forEach(dictionaryModel -> { dictionaryModel.getEntries().forEach(dictionaryEntry -> { - if (dictionaryEntry.getVersion() > fromVersion) { + if (dictionaryEntry.getVersion() > fromVersion.getRulesetVersion()) { newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive())); } }); }); + if(dictionariesByDossier.containsKey(dossierId)) { + dictionaryModels = dictionariesByDossier.get(dossierId).getDictionary(); + dictionaryModels.forEach(dictionaryModel -> { + dictionaryModel.getEntries().forEach(dictionaryEntry -> { + if (dictionaryEntry.getVersion() > fromVersion.getDossierVersion()) { + newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive())); + } + }); + }); + } + return new DictionaryIncrement(newValues, version); } - private void updateDictionaryEntry(String ruleSetId, long version) { + private void updateDictionaryEntry(String ruleSetId, long version, String dossierId) { try { DictionaryRepresentation dictionaryRepresentation = new DictionaryRepresentation(); - TypeResponse typeResponse = dictionaryClient.getAllTypes(ruleSetId); + TypeResponse typeResponse = dictionaryClient.getAllTypes(ruleSetId, dossierId); if (typeResponse != null && CollectionUtils.isNotEmpty(typeResponse.getTypes())) { List dictionary = typeResponse.getTypes() .stream() .map(t -> new DictionaryModel(t.getType(), t.getRank(), convertColor(t.getHexColor()), t.isCaseInsensitive(), t - .isHint(), t.isRecommendation(), convertEntries(t), new HashSet<>())) + .isHint(), t.isRecommendation(), convertEntries(t, dossierId), new HashSet<>())) .sorted(Comparator.comparingInt(DictionaryModel::getRank).reversed()) .collect(Collectors.toList()); @@ -90,7 +110,11 @@ public class DictionaryService { dictionaryRepresentation.setDictionaryVersion(version); dictionaryRepresentation.setDictionary(dictionary); - dictionariesByRuleSets.put(ruleSetId, dictionaryRepresentation); + if(dossierId.equals(GLOBAL_DOSSIER)) { + dictionariesByRuleSets.put(ruleSetId, dictionaryRepresentation); + } else { + dictionariesByDossier.put(dossierId, dictionaryRepresentation); + } } } catch (FeignException e) { log.warn("Got some unknown feignException", e); @@ -103,19 +127,19 @@ public class DictionaryService { dictionary.getDictionaryModels().forEach(dm -> { if (dm.isRecommendation() && !dm.getLocalEntries().isEmpty()) { - dictionaryClient.addEntries(dm.getType(), ruleSetId, new ArrayList<>(dm.getLocalEntries()), false); - long externalVersion = dictionaryClient.getVersion(ruleSetId); - if (externalVersion == dictionary.getVersion() + 1) { - dictionary.setVersion(externalVersion); + dictionaryClient.addEntries(dm.getType(), ruleSetId, new ArrayList<>(dm.getLocalEntries()), false, GLOBAL_DOSSIER); + long externalVersion = dictionaryClient.getVersion(ruleSetId, GLOBAL_DOSSIER); + if (externalVersion == dictionary.getVersion().getRulesetVersion() + 1) { + dictionary.getVersion().setRulesetVersion(externalVersion); } } }); } - private Set convertEntries(TypeResult t) { + private Set convertEntries(TypeResult t, String dossierId) { - Set entries = new HashSet<>(dictionaryClient.getDictionaryForType(t.getType(), t.getRuleSetId()) + Set entries = new HashSet<>(dictionaryClient.getDictionaryForType(t.getType(), t.getRuleSetId(), dossierId) .getEntries()); if (t.isCaseInsensitive()) { @@ -172,17 +196,26 @@ public class DictionaryService { } - public Dictionary getDeepCopyDictionary(String ruleSetId) { + public Dictionary getDeepCopyDictionary(String ruleSetId, String dossierId) { List copy = new ArrayList<>(); - var representation = dictionariesByRuleSets.get(ruleSetId); - var dictionary = dictionariesByRuleSets.get(ruleSetId).getDictionary(); - dictionary.forEach(dm -> { + var rulesetRepresentation = dictionariesByRuleSets.get(ruleSetId); + rulesetRepresentation.getDictionary().forEach(dm -> { copy.add(SerializationUtils.clone(dm)); }); - return new Dictionary(copy, representation.getDictionaryVersion()); + //TODO merge dictionaries if they have same names + long dossierDictionaryVersion = -1; + if(dictionariesByDossier.containsKey(dossierId)) { + var dossierRepresentation = dictionariesByDossier.get(dossierId); + dossierRepresentation.getDictionary().forEach(dm -> { + copy.add(SerializationUtils.clone(dm)); + }); + dossierDictionaryVersion = dossierRepresentation.getDictionaryVersion(); + } + + return new Dictionary(copy, DictionaryVersion.builder().rulesetVersion(rulesetRepresentation.getDictionaryVersion()).dossierVersion(dossierDictionaryVersion).build()); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java index 5ee4cb3f..24dc4e5e 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java @@ -33,13 +33,13 @@ public class EntityRedactionService { private final SurroundingWordsService surroundingWordsService; - public void processDocument(Document classifiedDoc, String ruleSetId, ManualRedactions manualRedactions) { + public void processDocument(Document classifiedDoc, String ruleSetId, ManualRedactions manualRedactions, String dossierId) { - dictionaryService.updateDictionary(ruleSetId); + dictionaryService.updateDictionary(ruleSetId, dossierId); KieContainer container = droolsExecutionService.updateRules(ruleSetId); long rulesVersion = droolsExecutionService.getRulesVersion(ruleSetId); - Dictionary dictionary = dictionaryService.getDeepCopyDictionary(ruleSetId); + Dictionary dictionary = dictionaryService.getDeepCopyDictionary(ruleSetId, dossierId); Set documentEntities = new HashSet<>(findEntities(classifiedDoc, container, manualRedactions, dictionary, false, null)); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ReanalyzeService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ReanalyzeService.java index 0a3e46f6..abfc8e75 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ReanalyzeService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ReanalyzeService.java @@ -11,9 +11,11 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.*; import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils; import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService; import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; + import lombok.RequiredArgsConstructor; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; + import org.kie.api.runtime.KieContainer; import org.springframework.stereotype.Service; import org.springframework.web.bind.annotation.RequestBody; @@ -37,14 +39,17 @@ public class ReanalyzeService { private final RedactionChangeLogService redactionChangeLogService; private final AnalyzeResponseService analyzeResponseService; + public AnalyzeResult analyze(AnalyzeRequest analyzeRequest) { + long startTime = System.currentTimeMillis(); var pageCount = 0; Document classifiedDoc; try { - var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.ORIGIN)); + var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(analyzeRequest + .getProjectId(), analyzeRequest.getFileId(), FileType.ORIGIN)); classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream); pageCount = classifiedDoc.getPages().size(); } catch (Exception e) { @@ -52,24 +57,28 @@ public class ReanalyzeService { } log.info("Document structure analysis successful, starting redaction analysis..."); - entityRedactionService.processDocument(classifiedDoc, analyzeRequest.getRuleSetId(), analyzeRequest.getManualRedactions()); + entityRedactionService.processDocument(classifiedDoc, analyzeRequest.getRuleSetId(), analyzeRequest.getManualRedactions(), analyzeRequest + .getProjectId()); redactionLogCreatorService.createRedactionLog(classifiedDoc, pageCount, analyzeRequest.getManualRedactions(), analyzeRequest .getRuleSetId()); log.info("Redaction analysis successful..."); + var redactionLog = new RedactionLog(classifiedDoc.getRedactionLogEntities(), classifiedDoc.getDictionaryVersion() + .getRulesetVersion(), classifiedDoc.getRulesVersion(), analyzeRequest.getRuleSetId(), classifiedDoc.getDictionaryVersion() + .getDossierVersion()); - var redactionLog = new RedactionLog(classifiedDoc.getRedactionLogEntities(), classifiedDoc.getDictionaryVersion(), classifiedDoc - .getRulesVersion(), analyzeRequest.getRuleSetId()); - - log.info("Analyzed with rules {} and dictionary {} for ruleSet: {}", classifiedDoc.getRulesVersion(), classifiedDoc.getDictionaryVersion(), analyzeRequest.getRuleSetId()); + log.info("Analyzed with rules {} and dictionary {} for ruleSet: {}", classifiedDoc.getRulesVersion(), classifiedDoc + .getDictionaryVersion(), analyzeRequest.getRuleSetId()); // first create changelog - this only happens when we migrate files analyzed via the old process and we don't want to loose changeLog data var changeLog = redactionChangeLogService.createAndStoreChangeLog(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), redactionLog); // store redactionLog redactionStorageService.storeObject(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.REDACTION_LOG, redactionLog); - redactionStorageService.storeObject(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.TEXT, new Text(pageCount, classifiedDoc.getSectionText())); - redactionStorageService.storeObject(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.SECTION_GRID, classifiedDoc.getSectionGrid()); + redactionStorageService.storeObject(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.TEXT, new Text(pageCount, classifiedDoc + .getSectionText())); + redactionStorageService.storeObject(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.SECTION_GRID, classifiedDoc + .getSectionGrid()); long duration = System.currentTimeMillis() - startTime; return analyzeResponseService.createAnalyzeResponse(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), duration, pageCount, redactionLog, changeLog); @@ -78,6 +87,7 @@ public class ReanalyzeService { @SneakyThrows public AnalyzeResult reanalyze(@RequestBody AnalyzeRequest analyzeRequest) { + long startTime = System.currentTimeMillis(); var redactionLog = redactionStorageService.getRedactionLog(analyzeRequest.getProjectId(), analyzeRequest.getFileId()); @@ -88,7 +98,8 @@ public class ReanalyzeService { return analyze(analyzeRequest); } - DictionaryIncrement dictionaryIncrement = dictionaryService.getDictionaryIncrements(analyzeRequest.getRuleSetId(), redactionLog.getDictionaryVersion()); + DictionaryIncrement dictionaryIncrement = dictionaryService.getDictionaryIncrements(analyzeRequest.getRuleSetId(), new DictionaryVersion(redactionLog + .getDictionaryVersion(), redactionLog.getDossierDictionaryVersion()), analyzeRequest.getProjectId()); Set manualForceAndRemoveIds = getForceAndRemoveIds(analyzeRequest.getManualRedactions()); Map> comments = null; @@ -146,12 +157,11 @@ public class ReanalyzeService { } } - //-- KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getRuleSetId()); - Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getRuleSetId()); + Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getRuleSetId(), analyzeRequest.getRuleSetId()); List sectionSearchableTextPairs = new ArrayList<>(); for (SectionText reanalysisSection : reanalysisSections) { @@ -183,8 +193,7 @@ public class ReanalyzeService { Set entities = new HashSet<>(); Map> imagesPerPage = new HashMap<>(); sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> { - Section analysedRowSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair - .getSection()); + Section analysedRowSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair.getSection()); entities.addAll(analysedRowSection.getEntities()); EntitySearchUtils.removeEntitiesContainedInLarger(entities); @@ -214,36 +223,42 @@ public class ReanalyzeService { List newRedactionLogEntries = new ArrayList<>(); for (int page = 1; page <= text.getNumberOfPages(); page++) { if (entitiesPerPage.get(page) != null) { - newRedactionLogEntries.addAll(redactionLogCreatorService.addEntries(entitiesPerPage, analyzeRequest - .getManualRedactions(), page, analyzeRequest.getRuleSetId())); + newRedactionLogEntries.addAll(redactionLogCreatorService.addEntries(entitiesPerPage, analyzeRequest.getManualRedactions(), page, analyzeRequest + .getRuleSetId())); } if (imagesPerPage.get(page) != null) { - newRedactionLogEntries.addAll(redactionLogCreatorService.addImageEntries(imagesPerPage, analyzeRequest - .getManualRedactions(), page, analyzeRequest.getRuleSetId())); + newRedactionLogEntries.addAll(redactionLogCreatorService.addImageEntries(imagesPerPage, analyzeRequest.getManualRedactions(), page, analyzeRequest + .getRuleSetId())); } newRedactionLogEntries.addAll(redactionLogCreatorService.addManualAddEntries(manualAdds, comments, page, analyzeRequest .getRuleSetId())); } - - redactionLog.getRedactionLogEntry().removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()) && !entry.isImage() || entry.getSectionNumber() == 0 && !entry.isImage()); + redactionLog.getRedactionLogEntry() + .removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()) && !entry.isImage() || entry.getSectionNumber() == 0 && !entry + .isImage()); redactionLog.getRedactionLogEntry().addAll(newRedactionLogEntries); - return finalizeAnalysis - (analyzeRequest, startTime, redactionLog, text, dictionaryIncrement); + return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement); } - private AnalyzeResult finalizeAnalysis(@RequestBody AnalyzeRequest analyzeRequest, long startTime, RedactionLog redactionLog, Text text, DictionaryIncrement dictionaryIncrement) { - redactionLog.setDictionaryVersion(dictionaryIncrement.getDictionaryVersion()); + + private AnalyzeResult finalizeAnalysis(@RequestBody AnalyzeRequest analyzeRequest, long startTime, + RedactionLog redactionLog, Text text, + DictionaryIncrement dictionaryIncrement) { + + redactionLog.setDictionaryVersion(dictionaryIncrement.getDictionaryVersion().getRulesetVersion()); + redactionLog.setDossierDictionaryVersion(dictionaryIncrement.getDictionaryVersion().getDossierVersion()); var changeLog = redactionChangeLogService.createAndStoreChangeLog(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), redactionLog); redactionStorageService.storeObject(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.REDACTION_LOG, redactionLog); long duration = System.currentTimeMillis() - startTime; - return analyzeResponseService.createAnalyzeResponse(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), duration, text.getNumberOfPages(), redactionLog, changeLog); + return analyzeResponseService.createAnalyzeResponse(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), duration, text + .getNumberOfPages(), redactionLog, changeLog); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index 29852b95..8b21791f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -3,6 +3,7 @@ package com.iqser.red.service.redaction.v1.server; import com.amazonaws.services.s3.AmazonS3; import com.fasterxml.jackson.databind.ObjectMapper; import com.iqser.red.service.configuration.v1.api.model.*; +import com.iqser.red.service.configuration.v1.api.resource.DictionaryResource; import com.iqser.red.service.file.management.v1.api.model.FileType; import com.iqser.red.service.redaction.v1.model.*; import com.iqser.red.service.redaction.v1.server.classification.model.SectionText; @@ -73,6 +74,7 @@ public class RedactionIntegrationTest { private static final String SIGNATURE = "signature"; private static final String FORMULA = "formula"; private static final String OCR = "ocr"; + private static final String DOSSIER_REDACTIONS = "dossier_redactions"; private static final String RECOMMENDATION_AUTHOR = "recommendation_CBI_author"; private static final String RECOMMENDATION_ADDRESS = "recommendation_CBI_address"; @@ -81,6 +83,7 @@ public class RedactionIntegrationTest { private static final String PII = "PII"; + @Autowired private RedactionController redactionController; @@ -112,6 +115,7 @@ public class RedactionIntegrationTest { private RabbitTemplate rabbitTemplate; private final Map> dictionary = new HashMap<>(); + private final Map> dossierDictionary = new HashMap<>(); private final Map typeColorMap = new HashMap<>(); private final Map hintTypeMap = new HashMap<>(); private final Map caseInSensitiveMap = new HashMap<>(); @@ -170,30 +174,45 @@ public class RedactionIntegrationTest { loadDictionaryForTest(); loadTypeForTest(); - when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(0L); - when(dictionaryClient.getAllTypes(TEST_RULESET_ID)).thenReturn(TypeResponse.builder() + when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(0L); + when(dictionaryClient.getAllTypes(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(TypeResponse.builder() .types(getTypeResponse()) .build()); - when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(VERTEBRATE)); - when(dictionaryClient.getDictionaryForType(ADDRESS, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(ADDRESS)); - when(dictionaryClient.getDictionaryForType(AUTHOR, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(AUTHOR)); - when(dictionaryClient.getDictionaryForType(SPONSOR, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(SPONSOR)); - when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(NO_REDACTION_INDICATOR)); - when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(REDACTION_INDICATOR)); - when(dictionaryClient.getDictionaryForType(HINT_ONLY, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(HINT_ONLY)); - when(dictionaryClient.getDictionaryForType(MUST_REDACT, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(MUST_REDACT)); - when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(PUBLISHED_INFORMATION)); - when(dictionaryClient.getDictionaryForType(TEST_METHOD, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(TEST_METHOD)); - when(dictionaryClient.getDictionaryForType(PII, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(PII)); - when(dictionaryClient.getDictionaryForType(RECOMMENDATION_AUTHOR, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(RECOMMENDATION_AUTHOR)); - when(dictionaryClient.getDictionaryForType(RECOMMENDATION_ADDRESS, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(RECOMMENDATION_ADDRESS)); - when(dictionaryClient.getDictionaryForType(FALSE_POSITIVE, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(FALSE_POSITIVE)); - when(dictionaryClient.getDictionaryForType(PURITY, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(PURITY)); - when(dictionaryClient.getDictionaryForType(IMAGE, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(IMAGE)); - when(dictionaryClient.getDictionaryForType(OCR, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(OCR)); - when(dictionaryClient.getDictionaryForType(LOGO, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(LOGO)); - when(dictionaryClient.getDictionaryForType(SIGNATURE, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(SIGNATURE)); - when(dictionaryClient.getDictionaryForType(FORMULA, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(FORMULA)); + + when(dictionaryClient.getVersion(TEST_RULESET_ID, TEST_PROJECT_ID)).thenReturn(0L); + when(dictionaryClient.getAllTypes(TEST_RULESET_ID, TEST_PROJECT_ID)).thenReturn(TypeResponse.builder() + .types(List.of(TypeResult.builder() + .type(DOSSIER_REDACTIONS) + .ruleSetId(TEST_RULESET_ID) + .hexColor( "#ffe187") + .isHint(hintTypeMap.get(DOSSIER_REDACTIONS)) + .isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS)) + .isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS)) + .rank(rankTypeMap.get(DOSSIER_REDACTIONS)) + .build())) + .build()); + + when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(VERTEBRATE, false)); + when(dictionaryClient.getDictionaryForType(ADDRESS, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(ADDRESS, false)); + when(dictionaryClient.getDictionaryForType(AUTHOR, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(AUTHOR, false)); + when(dictionaryClient.getDictionaryForType(SPONSOR, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(SPONSOR, false)); + when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(NO_REDACTION_INDICATOR, false)); + when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(REDACTION_INDICATOR, false)); + when(dictionaryClient.getDictionaryForType(HINT_ONLY, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(HINT_ONLY, false)); + when(dictionaryClient.getDictionaryForType(MUST_REDACT, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(MUST_REDACT, false)); + when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(PUBLISHED_INFORMATION, false)); + when(dictionaryClient.getDictionaryForType(TEST_METHOD, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(TEST_METHOD, false)); + when(dictionaryClient.getDictionaryForType(PII, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(PII, false)); + when(dictionaryClient.getDictionaryForType(RECOMMENDATION_AUTHOR, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(RECOMMENDATION_AUTHOR, false)); + when(dictionaryClient.getDictionaryForType(RECOMMENDATION_ADDRESS, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(RECOMMENDATION_ADDRESS, false)); + when(dictionaryClient.getDictionaryForType(FALSE_POSITIVE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(FALSE_POSITIVE, false)); + when(dictionaryClient.getDictionaryForType(PURITY, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(PURITY, false)); + when(dictionaryClient.getDictionaryForType(IMAGE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(IMAGE, false)); + when(dictionaryClient.getDictionaryForType(OCR, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(OCR, false)); + when(dictionaryClient.getDictionaryForType(LOGO, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(LOGO, false)); + when(dictionaryClient.getDictionaryForType(SIGNATURE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(SIGNATURE, false)); + when(dictionaryClient.getDictionaryForType(FORMULA, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(FORMULA, false)); + when(dictionaryClient.getDictionaryForType(DOSSIER_REDACTIONS, TEST_RULESET_ID, TEST_PROJECT_ID)).thenReturn(getDictionaryResponse(DOSSIER_REDACTIONS, true)); when(dictionaryClient.getColors(TEST_RULESET_ID)).thenReturn(colors); } @@ -300,6 +319,11 @@ public class RedactionIntegrationTest { .stream() .map(this::cleanDictionaryEntry) .collect(Collectors.toSet())); + dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); } @@ -352,6 +376,7 @@ public class RedactionIntegrationTest { hintTypeMap.put(FORMULA, false); hintTypeMap.put(LOGO, false); hintTypeMap.put(SIGNATURE, false); + hintTypeMap.put(DOSSIER_REDACTIONS, false); caseInSensitiveMap.put(VERTEBRATE, true); caseInSensitiveMap.put(ADDRESS, false); @@ -373,6 +398,7 @@ public class RedactionIntegrationTest { caseInSensitiveMap.put(SIGNATURE, true); caseInSensitiveMap.put(LOGO, true); caseInSensitiveMap.put(FORMULA, true); + caseInSensitiveMap.put(DOSSIER_REDACTIONS, false); recommendationTypeMap.put(VERTEBRATE, false); recommendationTypeMap.put(ADDRESS, false); @@ -394,6 +420,7 @@ public class RedactionIntegrationTest { recommendationTypeMap.put(FORMULA, false); recommendationTypeMap.put(SIGNATURE, false); recommendationTypeMap.put(LOGO, false); + recommendationTypeMap.put(DOSSIER_REDACTIONS, false); rankTypeMap.put(FALSE_POSITIVE, 160); rankTypeMap.put(PURITY, 155); @@ -415,6 +442,7 @@ public class RedactionIntegrationTest { rankTypeMap.put(LOGO, 28); rankTypeMap.put(SIGNATURE, 27); rankTypeMap.put(FORMULA, 26); + rankTypeMap.put(DOSSIER_REDACTIONS, 200); colors.setDefaultColor("#acfc00"); colors.setNotRedacted("#cccccc"); @@ -441,11 +469,11 @@ public class RedactionIntegrationTest { } - private DictionaryResponse getDictionaryResponse(String type) { + private DictionaryResponse getDictionaryResponse(String type, boolean isDossierDictionary) { return DictionaryResponse.builder() .hexColor(typeColorMap.get(type)) - .entries(toDictionaryEntry(dictionary.get(type))) + .entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type))) .isHint(hintTypeMap.get(type)) .isCaseInsensitive(caseInSensitiveMap.get(type)) .isRecommendation(recommendationTypeMap.get(type)) @@ -504,7 +532,7 @@ public class RedactionIntegrationTest { }); dictionary.get(AUTHOR).add("Drinking water"); - when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(1L); + when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(1L); AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder() .projectId(TEST_PROJECT_ID) @@ -560,7 +588,7 @@ public class RedactionIntegrationTest { }); dictionary.get(AUTHOR).add("Drinking water"); - when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(1L); + when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(1L); long rstart = System.currentTimeMillis(); reanalyzeService.reanalyze(request); @@ -651,9 +679,9 @@ public class RedactionIntegrationTest { dictionary.get(VERTEBRATE).add("s-metolachlor"); reanlysisVersions.put("s-metolachlor", 3L); - when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(3L); + when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(3L); - when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(VERTEBRATE)); + when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(VERTEBRATE, false)); start = System.currentTimeMillis(); AnalyzeResult reanalyzeResult = reanalyzeService.reanalyze(request); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java index 3d17be2f..f0298c14 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java @@ -2,6 +2,7 @@ package com.iqser.red.service.redaction.v1.server.redaction.service; import com.amazonaws.services.s3.AmazonS3; import com.iqser.red.service.configuration.v1.api.model.*; +import com.iqser.red.service.configuration.v1.api.resource.DictionaryResource; import com.iqser.red.service.redaction.v1.server.Application; import com.iqser.red.service.redaction.v1.server.FileSystemBackedStorageService; import com.iqser.red.service.redaction.v1.server.classification.model.Document; @@ -133,20 +134,20 @@ public class EntityRedactionServiceTest { DictionaryResponse dictionaryResponse = DictionaryResponse.builder() .entries(toDictionaryEntry(Arrays.asList("Casey, H.W.", "O’Loughlin, C.K.", "Salamon, C.M.", "Smith, S.H."))) .build(); - when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet()); - when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse); + when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet()); + when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse); DictionaryResponse addressResponse = DictionaryResponse.builder() .entries(toDictionaryEntry(Collections.singletonList("Toxigenics, Inc., Decatur, IL 62526, USA"))) .build(); - when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse); + when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse); DictionaryResponse sponsorResponse = DictionaryResponse.builder() .entries(Collections.emptyList()) .build(); - when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse); + when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse); Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream()); - entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null); + entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId"); assertThat(classifiedDoc.getEntities()).hasSize(1); // one page assertThat(classifiedDoc.getEntities().get(1)).hasSize(7);// 3 author cells, 1 address, 1 Y and 2 N entities } @@ -160,19 +161,19 @@ public class EntityRedactionServiceTest { DictionaryResponse dictionaryResponse = DictionaryResponse.builder() .entries(toDictionaryEntry(Arrays.asList("Casey, H.W.", "O’Loughlin, C.K.", "Salamon, C.M.", "Smith, S.H."))) .build(); - when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet()); - when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse); + when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet()); + when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse); DictionaryResponse addressResponse = DictionaryResponse.builder() .entries(toDictionaryEntry(Collections.singletonList("Toxigenics, Inc., Decatur, IL 62526, USA"))) .build(); - when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse); + when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse); DictionaryResponse sponsorResponse = DictionaryResponse.builder() .entries(Collections.emptyList()) .build(); - when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse); + when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse); Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream()); - entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null); + entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId"); assertThat(classifiedDoc.getEntities()).hasSize(1); // one page assertThat(classifiedDoc.getEntities().get(1)).hasSize(7);// 3 author cells, 1 address, 1 Y and 2 N entities } @@ -183,21 +184,21 @@ public class EntityRedactionServiceTest { ClassPathResource pdfFileResource = new ClassPathResource("files/Cyprodinil/40 Cyprodinil - EU AIR3 - LCA Section 1" + " Supplement - Identity of the active substance - Reference list.pdf"); - when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet()); + when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet()); DictionaryResponse dictionaryResponse = DictionaryResponse.builder() .entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt")))) .build(); - when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse); + when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse); DictionaryResponse addressResponse = DictionaryResponse.builder() .entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt")))) .build(); - when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse); + when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse); DictionaryResponse sponsorResponse = DictionaryResponse.builder() .entries(Collections.emptyList()) .build(); - when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse); + when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse); Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream()); - entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null); + entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId"); assertThat(classifiedDoc.getEntities() .entrySet() .stream() @@ -205,7 +206,7 @@ public class EntityRedactionServiceTest { pdfFileResource = new ClassPathResource("files/Compounds/27 A8637C - EU AIR3 - MCP Section 1 - Identity of " + "the plant protection product.pdf"); classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream()); - entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null); + entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId"); assertThat(classifiedDoc.getEntities() .entrySet() .stream() @@ -216,21 +217,21 @@ public class EntityRedactionServiceTest { public void testFalsePositiveInWrongCell() throws IOException { ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Row With Ambiguous Redaction.pdf"); - when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet()); + when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet()); DictionaryResponse dictionaryResponse = DictionaryResponse.builder() .entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt")))) .build(); - when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse); + when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse); DictionaryResponse addressResponse = DictionaryResponse.builder() .entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt")))) .build(); - when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse); + when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse); DictionaryResponse sponsorResponse = DictionaryResponse.builder() .entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_sponsor.txt")))) .build(); - when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse); + when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse); Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream()); - entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null); + entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId"); assertThat(classifiedDoc.getEntities()).hasSize(1); // one page assertThat(classifiedDoc.getEntities().get(1).stream() .filter(entity -> entity.getMatchedRule() == 9) @@ -283,21 +284,21 @@ public class EntityRedactionServiceTest { droolsExecutionService.updateRules(TEST_RULESET_ID); ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Applicant Producer Table.pdf"); - when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet()); + when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet()); DictionaryResponse dictionaryResponse = DictionaryResponse.builder() .entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt")))) .build(); - when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse); + when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse); DictionaryResponse addressResponse = DictionaryResponse.builder() .entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt")))) .build(); - when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse); + when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse); DictionaryResponse sponsorResponse = DictionaryResponse.builder() .entries(Collections.emptyList()) .build(); - when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse); + when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse); Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream()); - entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null); + entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId"); assertThat(classifiedDoc.getEntities()).hasSize(1); // one page assertThat(classifiedDoc.getEntities().get(1).stream() .filter(entity -> entity.getMatchedRule() == 6) @@ -322,21 +323,21 @@ public class EntityRedactionServiceTest { droolsExecutionService.updateRules(TEST_RULESET_ID); ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/batches_new_line.pdf"); - when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet()); + when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet()); DictionaryResponse addressResponse = DictionaryResponse.builder() .entries(Collections.emptyList()) .build(); - when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse); + when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse); DictionaryResponse authorResponse = DictionaryResponse.builder() .entries(Collections.emptyList()) .build(); - when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(authorResponse); + when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(authorResponse); DictionaryResponse dictionaryResponse = DictionaryResponse.builder() .entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_sponsor.txt")))) .build(); - when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse); + when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse); Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream()); - entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null); + entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId"); assertThat(classifiedDoc.getEntities()).hasSize(1); // one page assertThat(classifiedDoc.getEntities().get(1).stream() .filter(entity -> entity.getMatchedRule() == 11) @@ -354,19 +355,19 @@ public class EntityRedactionServiceTest { .entries(toDictionaryEntry(Arrays.asList("Bissig R.", "Thanei P."))) .build(); - when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet()); - when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse); + when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet()); + when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse); DictionaryResponse addressResponse = DictionaryResponse.builder() .entries(toDictionaryEntry(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland"))) .build(); - when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse); + when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse); DictionaryResponse sponsorResponse = DictionaryResponse.builder() .entries(Collections.emptyList()) .build(); - when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse); + when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse); Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream()); - entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null); + entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId"); assertThat(classifiedDoc.getEntities()).hasSize(2); // two pages assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(8); assertThat(classifiedDoc.getEntities().get(2).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(5); // 2 names, 1 address, 2 Y @@ -377,15 +378,15 @@ public class EntityRedactionServiceTest { .entries(toDictionaryEntry(Arrays.asList("Tribolet, R.", "Muir, G.", "Kühne-Thu, H.", "Close, C."))) .build(); - when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet()); - when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse); + when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet()); + when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse); addressResponse = DictionaryResponse.builder() .entries(toDictionaryEntry(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland"))) .build(); - when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse); + when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse); classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream()); - entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null); + entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId"); assertThat(classifiedDoc.getEntities()).hasSize(1); // one page assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(3); assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 8).count()).isEqualTo(9); @@ -402,19 +403,19 @@ public class EntityRedactionServiceTest { .entries(toDictionaryEntry(Collections.singletonList("Aldershof S."))) .build(); - when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet()); - when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse); + when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet()); + when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse); DictionaryResponse addressResponse = DictionaryResponse.builder() .entries(toDictionaryEntry(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland"))) .build(); - when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse); + when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse); DictionaryResponse sponsorResponse = DictionaryResponse.builder() .entries(Collections.emptyList()) .build(); - when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse); + when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse); Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream()); - entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null); + entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId"); assertThat(classifiedDoc.getEntities()).hasSize(1); // one page assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 8).count()).isEqualTo(6); } @@ -453,19 +454,19 @@ public class EntityRedactionServiceTest { TypeResult.builder().ruleSetId(TEST_RULESET_ID).type(ADDRESS_CODE).hexColor("#ff00ff").build(), TypeResult.builder().ruleSetId(TEST_RULESET_ID).type(SPONSOR_CODE).hexColor("#00ffff").build())) .build(); - when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet()); - when(dictionaryClient.getAllTypes(TEST_RULESET_ID)).thenReturn(typeResponse); + when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet()); + when(dictionaryClient.getAllTypes(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(typeResponse); // Default empty return to prevent NPEs DictionaryResponse dictionaryResponse = DictionaryResponse.builder() .build(); - when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse); + when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse); DictionaryResponse addressResponse = DictionaryResponse.builder() .build(); - when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse); + when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse); DictionaryResponse sponsorResponse = DictionaryResponse.builder() .build(); - when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse); + when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse); Colors colors = new Colors(); colors.setDefaultColor("#acfc00"); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/dossier_redactions.txt b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/dossier_redactions.txt new file mode 100644 index 00000000..3840a8ac --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/dossier_redactions.txt @@ -0,0 +1 @@ +Difenoconazole \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index a2a78200..5f7e24f2 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -328,4 +328,12 @@ rule "28: Redact Logos" Section(matchesImageType("logo")) then section.redactImage("logo", 28, "Logo found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); - end \ No newline at end of file + end + + +rule "29: Redact Dossier Redactions" + when + Section(matchesType("dossier_redactions")) + then + section.redact("dossier_redactions", 29, "Dossier Redaction found", "Article 39(1)(2) of Regulation (EC) No 178/2002"); + end \ No newline at end of file