Pull request #151: RED-1334: Enabled to use dictionaries per dossier

Merge in RED/redaction-service from RED-1334 to master

* commit '48beb984ca02ab163a64af38907a05d70b048655':
  RED-1334: Enabled to use dictionaries per dossier
This commit is contained in:
Dominique Eiflaender 2021-04-28 15:20:50 +02:00
commit 01eceaaef4
16 changed files with 249 additions and 140 deletions

View File

@ -21,7 +21,9 @@ public class AnalyzeResult {
private boolean hasImages;
private boolean hasUpdates;
private long dictionaryVersion;
private long dossierDictionaryVersion;
private long rulesVersion;
}

View File

@ -16,13 +16,16 @@ public class RedactionLog {
private String ruleSetId;
private long dossierDictionaryVersion = -1;
public RedactionLog(List<RedactionLogEntry> redactionLogEntry, long dictionaryVersion, long rulesVersion, String ruleSetId) {
public RedactionLog(List<RedactionLogEntry> redactionLogEntry, long dictionaryVersion, long rulesVersion, String ruleSetId, long dossierDictionaryVersion) {
this.redactionLogEntry = redactionLogEntry;
this.dictionaryVersion = dictionaryVersion;
this.rulesVersion = rulesVersion;
this.ruleSetId = ruleSetId;
this.dossierDictionaryVersion = dossierDictionaryVersion;
}

View File

@ -24,7 +24,7 @@
<dependency>
<groupId>com.iqser.red.service</groupId>
<artifactId>configuration-service-api-v1</artifactId>
<version>2.5.0</version>
<version>2.5.6</version>
</dependency>
<dependency>
<groupId>com.iqser.red.service</groupId>

View File

@ -2,6 +2,7 @@ package com.iqser.red.service.redaction.v1.server.classification.model;
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.model.SectionGrid;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryVersion;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
import lombok.Data;
@ -31,7 +32,7 @@ public class Document {
private List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();
private SectionGrid sectionGrid = new SectionGrid();
private long dictionaryVersion;
private DictionaryVersion dictionaryVersion;
private long rulesVersion;
private List<SectionText> sectionText = new ArrayList<>();

View File

@ -50,7 +50,7 @@ public class RedactionController implements RedactionResource {
try (PDDocument pdDocument = PDDocument.load(storedObjectStream, MemoryUsageSetting.setupTempFileOnly())) {
pdDocument.setAllSecurityToBeRemoved(true);
dictionaryService.updateDictionary(redactionLog.getRuleSetId());
dictionaryService.updateDictionary(redactionLog.getRuleSetId(), annotateRequest.getProjectId());
annotationService.annotate(pdDocument, redactionLog, sectionsGrid);
try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {

View File

@ -18,13 +18,13 @@ public class Dictionary {
private Map<String, DictionaryModel> localAccessMap = new HashMap<>();
@Getter
private long version;
private DictionaryVersion version;
public Dictionary(List<DictionaryModel> dictionaryModels, long dictionaryVersion) {
public Dictionary(List<DictionaryModel> dictionaryModels, DictionaryVersion version) {
this.dictionaryModels = dictionaryModels;
this.dictionaryModels.forEach(dm -> localAccessMap.put(dm.getType(), dm));
this.version = dictionaryVersion;
this.version = version;
}

View File

@ -10,6 +10,6 @@ import java.util.Set;
public class DictionaryIncrement {
private Set<DictionaryIncrementValue> values;
private long dictionaryVersion;
private DictionaryVersion dictionaryVersion;
}

View File

@ -0,0 +1,16 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class DictionaryVersion {
long rulesetVersion;
long dossierVersion;
}

View File

@ -42,6 +42,7 @@ public class AnalyzeResponseService {
.hasUpdates(hasUpdates)
.rulesVersion(redactionLog.getRulesVersion())
.dictionaryVersion(redactionLog.getDictionaryVersion())
.dossierDictionaryVersion(redactionLog.getDossierDictionaryVersion())
.build();
}
}

View File

@ -1,5 +1,7 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import static com.iqser.red.service.configuration.v1.api.resource.DictionaryResource.GLOBAL_DOSSIER;
import com.iqser.red.service.configuration.v1.api.model.Colors;
import com.iqser.red.service.configuration.v1.api.model.DictionaryEntry;
import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
@ -10,6 +12,8 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncre
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrementValue;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryRepresentation;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryVersion;
import feign.FeignException;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@ -29,52 +33,68 @@ public class DictionaryService {
private final DictionaryClient dictionaryClient;
private final Map<String, DictionaryRepresentation> dictionariesByRuleSets = new HashMap<>();
private final Map<String, DictionaryRepresentation> dictionariesByDossier = new HashMap<>();
public long updateDictionary(String ruleSetId) {
public DictionaryVersion updateDictionary(String ruleSetId, String dossierId) {
long version = dictionaryClient.getVersion(ruleSetId);
var foundDictionary = dictionariesByRuleSets.get(ruleSetId);
if (foundDictionary == null || version > foundDictionary.getDictionaryVersion()) {
updateDictionaryEntry(ruleSetId, version);
long rulesetDictionaryVersion = dictionaryClient.getVersion(ruleSetId, GLOBAL_DOSSIER);
var rulesetDictionary = dictionariesByRuleSets.get(ruleSetId);
if (rulesetDictionary == null || rulesetDictionaryVersion > rulesetDictionary.getDictionaryVersion()) {
updateDictionaryEntry(ruleSetId, rulesetDictionaryVersion, GLOBAL_DOSSIER);
}
return version;
long dossierDictionaryVersion = dictionaryClient.getVersion(ruleSetId, dossierId);
var dossierDictionary = dictionariesByDossier.get(dossierId);
if (dossierDictionary == null || dossierDictionaryVersion > dossierDictionary.getDictionaryVersion()) {
updateDictionaryEntry(ruleSetId, dossierDictionaryVersion, dossierId);
}
return DictionaryVersion.builder().rulesetVersion(rulesetDictionaryVersion).dossierVersion(dossierDictionaryVersion).build();
}
public DictionaryIncrement getDictionaryIncrements(String ruleSetId, long fromVersion) {
public DictionaryIncrement getDictionaryIncrements(String ruleSetId, DictionaryVersion fromVersion, String dossierId) {
long version = updateDictionary(ruleSetId);
DictionaryVersion version = updateDictionary(ruleSetId, dossierId);
Set<DictionaryIncrementValue> newValues = new HashSet<>();
List<DictionaryModel> dictionaryModels = dictionariesByRuleSets.get(ruleSetId).getDictionary();
dictionaryModels.forEach(dictionaryModel -> {
dictionaryModel.getEntries().forEach(dictionaryEntry -> {
if (dictionaryEntry.getVersion() > fromVersion) {
if (dictionaryEntry.getVersion() > fromVersion.getRulesetVersion()) {
newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive()));
}
});
});
if(dictionariesByDossier.containsKey(dossierId)) {
dictionaryModels = dictionariesByDossier.get(dossierId).getDictionary();
dictionaryModels.forEach(dictionaryModel -> {
dictionaryModel.getEntries().forEach(dictionaryEntry -> {
if (dictionaryEntry.getVersion() > fromVersion.getDossierVersion()) {
newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive()));
}
});
});
}
return new DictionaryIncrement(newValues, version);
}
private void updateDictionaryEntry(String ruleSetId, long version) {
private void updateDictionaryEntry(String ruleSetId, long version, String dossierId) {
try {
DictionaryRepresentation dictionaryRepresentation = new DictionaryRepresentation();
TypeResponse typeResponse = dictionaryClient.getAllTypes(ruleSetId);
TypeResponse typeResponse = dictionaryClient.getAllTypes(ruleSetId, dossierId);
if (typeResponse != null && CollectionUtils.isNotEmpty(typeResponse.getTypes())) {
List<DictionaryModel> dictionary = typeResponse.getTypes()
.stream()
.map(t -> new DictionaryModel(t.getType(), t.getRank(), convertColor(t.getHexColor()), t.isCaseInsensitive(), t
.isHint(), t.isRecommendation(), convertEntries(t), new HashSet<>()))
.isHint(), t.isRecommendation(), convertEntries(t, dossierId), new HashSet<>()))
.sorted(Comparator.comparingInt(DictionaryModel::getRank).reversed())
.collect(Collectors.toList());
@ -90,7 +110,11 @@ public class DictionaryService {
dictionaryRepresentation.setDictionaryVersion(version);
dictionaryRepresentation.setDictionary(dictionary);
dictionariesByRuleSets.put(ruleSetId, dictionaryRepresentation);
if(dossierId.equals(GLOBAL_DOSSIER)) {
dictionariesByRuleSets.put(ruleSetId, dictionaryRepresentation);
} else {
dictionariesByDossier.put(dossierId, dictionaryRepresentation);
}
}
} catch (FeignException e) {
log.warn("Got some unknown feignException", e);
@ -103,19 +127,19 @@ public class DictionaryService {
dictionary.getDictionaryModels().forEach(dm -> {
if (dm.isRecommendation() && !dm.getLocalEntries().isEmpty()) {
dictionaryClient.addEntries(dm.getType(), ruleSetId, new ArrayList<>(dm.getLocalEntries()), false);
long externalVersion = dictionaryClient.getVersion(ruleSetId);
if (externalVersion == dictionary.getVersion() + 1) {
dictionary.setVersion(externalVersion);
dictionaryClient.addEntries(dm.getType(), ruleSetId, new ArrayList<>(dm.getLocalEntries()), false, GLOBAL_DOSSIER);
long externalVersion = dictionaryClient.getVersion(ruleSetId, GLOBAL_DOSSIER);
if (externalVersion == dictionary.getVersion().getRulesetVersion() + 1) {
dictionary.getVersion().setRulesetVersion(externalVersion);
}
}
});
}
private Set<DictionaryEntry> convertEntries(TypeResult t) {
private Set<DictionaryEntry> convertEntries(TypeResult t, String dossierId) {
Set<DictionaryEntry> entries = new HashSet<>(dictionaryClient.getDictionaryForType(t.getType(), t.getRuleSetId())
Set<DictionaryEntry> entries = new HashSet<>(dictionaryClient.getDictionaryForType(t.getType(), t.getRuleSetId(), dossierId)
.getEntries());
if (t.isCaseInsensitive()) {
@ -172,17 +196,26 @@ public class DictionaryService {
}
public Dictionary getDeepCopyDictionary(String ruleSetId) {
public Dictionary getDeepCopyDictionary(String ruleSetId, String dossierId) {
List<DictionaryModel> copy = new ArrayList<>();
var representation = dictionariesByRuleSets.get(ruleSetId);
var dictionary = dictionariesByRuleSets.get(ruleSetId).getDictionary();
dictionary.forEach(dm -> {
var rulesetRepresentation = dictionariesByRuleSets.get(ruleSetId);
rulesetRepresentation.getDictionary().forEach(dm -> {
copy.add(SerializationUtils.clone(dm));
});
return new Dictionary(copy, representation.getDictionaryVersion());
//TODO merge dictionaries if they have same names
long dossierDictionaryVersion = -1;
if(dictionariesByDossier.containsKey(dossierId)) {
var dossierRepresentation = dictionariesByDossier.get(dossierId);
dossierRepresentation.getDictionary().forEach(dm -> {
copy.add(SerializationUtils.clone(dm));
});
dossierDictionaryVersion = dossierRepresentation.getDictionaryVersion();
}
return new Dictionary(copy, DictionaryVersion.builder().rulesetVersion(rulesetRepresentation.getDictionaryVersion()).dossierVersion(dossierDictionaryVersion).build());
}

View File

@ -33,13 +33,13 @@ public class EntityRedactionService {
private final SurroundingWordsService surroundingWordsService;
public void processDocument(Document classifiedDoc, String ruleSetId, ManualRedactions manualRedactions) {
public void processDocument(Document classifiedDoc, String ruleSetId, ManualRedactions manualRedactions, String dossierId) {
dictionaryService.updateDictionary(ruleSetId);
dictionaryService.updateDictionary(ruleSetId, dossierId);
KieContainer container = droolsExecutionService.updateRules(ruleSetId);
long rulesVersion = droolsExecutionService.getRulesVersion(ruleSetId);
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(ruleSetId);
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(ruleSetId, dossierId);
Set<Entity> documentEntities = new HashSet<>(findEntities(classifiedDoc, container, manualRedactions, dictionary, false, null));

View File

@ -11,9 +11,11 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.*;
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.kie.api.runtime.KieContainer;
import org.springframework.stereotype.Service;
import org.springframework.web.bind.annotation.RequestBody;
@ -37,14 +39,17 @@ public class ReanalyzeService {
private final RedactionChangeLogService redactionChangeLogService;
private final AnalyzeResponseService analyzeResponseService;
public AnalyzeResult analyze(AnalyzeRequest analyzeRequest) {
long startTime = System.currentTimeMillis();
var pageCount = 0;
Document classifiedDoc;
try {
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.ORIGIN));
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(analyzeRequest
.getProjectId(), analyzeRequest.getFileId(), FileType.ORIGIN));
classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream);
pageCount = classifiedDoc.getPages().size();
} catch (Exception e) {
@ -52,24 +57,28 @@ public class ReanalyzeService {
}
log.info("Document structure analysis successful, starting redaction analysis...");
entityRedactionService.processDocument(classifiedDoc, analyzeRequest.getRuleSetId(), analyzeRequest.getManualRedactions());
entityRedactionService.processDocument(classifiedDoc, analyzeRequest.getRuleSetId(), analyzeRequest.getManualRedactions(), analyzeRequest
.getProjectId());
redactionLogCreatorService.createRedactionLog(classifiedDoc, pageCount, analyzeRequest.getManualRedactions(), analyzeRequest
.getRuleSetId());
log.info("Redaction analysis successful...");
var redactionLog = new RedactionLog(classifiedDoc.getRedactionLogEntities(), classifiedDoc.getDictionaryVersion()
.getRulesetVersion(), classifiedDoc.getRulesVersion(), analyzeRequest.getRuleSetId(), classifiedDoc.getDictionaryVersion()
.getDossierVersion());
var redactionLog = new RedactionLog(classifiedDoc.getRedactionLogEntities(), classifiedDoc.getDictionaryVersion(), classifiedDoc
.getRulesVersion(), analyzeRequest.getRuleSetId());
log.info("Analyzed with rules {} and dictionary {} for ruleSet: {}", classifiedDoc.getRulesVersion(), classifiedDoc.getDictionaryVersion(), analyzeRequest.getRuleSetId());
log.info("Analyzed with rules {} and dictionary {} for ruleSet: {}", classifiedDoc.getRulesVersion(), classifiedDoc
.getDictionaryVersion(), analyzeRequest.getRuleSetId());
// first create changelog - this only happens when we migrate files analyzed via the old process and we don't want to loose changeLog data
var changeLog = redactionChangeLogService.createAndStoreChangeLog(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), redactionLog);
// store redactionLog
redactionStorageService.storeObject(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.REDACTION_LOG, redactionLog);
redactionStorageService.storeObject(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.TEXT, new Text(pageCount, classifiedDoc.getSectionText()));
redactionStorageService.storeObject(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.SECTION_GRID, classifiedDoc.getSectionGrid());
redactionStorageService.storeObject(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.TEXT, new Text(pageCount, classifiedDoc
.getSectionText()));
redactionStorageService.storeObject(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.SECTION_GRID, classifiedDoc
.getSectionGrid());
long duration = System.currentTimeMillis() - startTime;
return analyzeResponseService.createAnalyzeResponse(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), duration, pageCount, redactionLog, changeLog);
@ -78,6 +87,7 @@ public class ReanalyzeService {
@SneakyThrows
public AnalyzeResult reanalyze(@RequestBody AnalyzeRequest analyzeRequest) {
long startTime = System.currentTimeMillis();
var redactionLog = redactionStorageService.getRedactionLog(analyzeRequest.getProjectId(), analyzeRequest.getFileId());
@ -88,7 +98,8 @@ public class ReanalyzeService {
return analyze(analyzeRequest);
}
DictionaryIncrement dictionaryIncrement = dictionaryService.getDictionaryIncrements(analyzeRequest.getRuleSetId(), redactionLog.getDictionaryVersion());
DictionaryIncrement dictionaryIncrement = dictionaryService.getDictionaryIncrements(analyzeRequest.getRuleSetId(), new DictionaryVersion(redactionLog
.getDictionaryVersion(), redactionLog.getDossierDictionaryVersion()), analyzeRequest.getProjectId());
Set<String> manualForceAndRemoveIds = getForceAndRemoveIds(analyzeRequest.getManualRedactions());
Map<String, List<Comment>> comments = null;
@ -146,12 +157,11 @@ public class ReanalyzeService {
}
}
//--
KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getRuleSetId());
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getRuleSetId());
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getRuleSetId(), analyzeRequest.getRuleSetId());
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
for (SectionText reanalysisSection : reanalysisSections) {
@ -183,8 +193,7 @@ public class ReanalyzeService {
Set<Entity> entities = new HashSet<>();
Map<Integer, Set<Image>> imagesPerPage = new HashMap<>();
sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> {
Section analysedRowSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair
.getSection());
Section analysedRowSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair.getSection());
entities.addAll(analysedRowSection.getEntities());
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
@ -214,36 +223,42 @@ public class ReanalyzeService {
List<RedactionLogEntry> newRedactionLogEntries = new ArrayList<>();
for (int page = 1; page <= text.getNumberOfPages(); page++) {
if (entitiesPerPage.get(page) != null) {
newRedactionLogEntries.addAll(redactionLogCreatorService.addEntries(entitiesPerPage, analyzeRequest
.getManualRedactions(), page, analyzeRequest.getRuleSetId()));
newRedactionLogEntries.addAll(redactionLogCreatorService.addEntries(entitiesPerPage, analyzeRequest.getManualRedactions(), page, analyzeRequest
.getRuleSetId()));
}
if (imagesPerPage.get(page) != null) {
newRedactionLogEntries.addAll(redactionLogCreatorService.addImageEntries(imagesPerPage, analyzeRequest
.getManualRedactions(), page, analyzeRequest.getRuleSetId()));
newRedactionLogEntries.addAll(redactionLogCreatorService.addImageEntries(imagesPerPage, analyzeRequest.getManualRedactions(), page, analyzeRequest
.getRuleSetId()));
}
newRedactionLogEntries.addAll(redactionLogCreatorService.addManualAddEntries(manualAdds, comments, page, analyzeRequest
.getRuleSetId()));
}
redactionLog.getRedactionLogEntry().removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()) && !entry.isImage() || entry.getSectionNumber() == 0 && !entry.isImage());
redactionLog.getRedactionLogEntry()
.removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()) && !entry.isImage() || entry.getSectionNumber() == 0 && !entry
.isImage());
redactionLog.getRedactionLogEntry().addAll(newRedactionLogEntries);
return finalizeAnalysis
(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement);
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement);
}
private AnalyzeResult finalizeAnalysis(@RequestBody AnalyzeRequest analyzeRequest, long startTime, RedactionLog redactionLog, Text text, DictionaryIncrement dictionaryIncrement) {
redactionLog.setDictionaryVersion(dictionaryIncrement.getDictionaryVersion());
private AnalyzeResult finalizeAnalysis(@RequestBody AnalyzeRequest analyzeRequest, long startTime,
RedactionLog redactionLog, Text text,
DictionaryIncrement dictionaryIncrement) {
redactionLog.setDictionaryVersion(dictionaryIncrement.getDictionaryVersion().getRulesetVersion());
redactionLog.setDossierDictionaryVersion(dictionaryIncrement.getDictionaryVersion().getDossierVersion());
var changeLog = redactionChangeLogService.createAndStoreChangeLog(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), redactionLog);
redactionStorageService.storeObject(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.REDACTION_LOG, redactionLog);
long duration = System.currentTimeMillis() - startTime;
return analyzeResponseService.createAnalyzeResponse(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), duration, text.getNumberOfPages(), redactionLog, changeLog);
return analyzeResponseService.createAnalyzeResponse(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), duration, text
.getNumberOfPages(), redactionLog, changeLog);
}

View File

@ -3,6 +3,7 @@ package com.iqser.red.service.redaction.v1.server;
import com.amazonaws.services.s3.AmazonS3;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.configuration.v1.api.model.*;
import com.iqser.red.service.configuration.v1.api.resource.DictionaryResource;
import com.iqser.red.service.file.management.v1.api.model.FileType;
import com.iqser.red.service.redaction.v1.model.*;
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
@ -73,6 +74,7 @@ public class RedactionIntegrationTest {
private static final String SIGNATURE = "signature";
private static final String FORMULA = "formula";
private static final String OCR = "ocr";
private static final String DOSSIER_REDACTIONS = "dossier_redactions";
private static final String RECOMMENDATION_AUTHOR = "recommendation_CBI_author";
private static final String RECOMMENDATION_ADDRESS = "recommendation_CBI_address";
@ -81,6 +83,7 @@ public class RedactionIntegrationTest {
private static final String PII = "PII";
@Autowired
private RedactionController redactionController;
@ -112,6 +115,7 @@ public class RedactionIntegrationTest {
private RabbitTemplate rabbitTemplate;
private final Map<String, List<String>> dictionary = new HashMap<>();
private final Map<String, List<String>> dossierDictionary = new HashMap<>();
private final Map<String, String> typeColorMap = new HashMap<>();
private final Map<String, Boolean> hintTypeMap = new HashMap<>();
private final Map<String, Boolean> caseInSensitiveMap = new HashMap<>();
@ -170,30 +174,45 @@ public class RedactionIntegrationTest {
loadDictionaryForTest();
loadTypeForTest();
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypes(TEST_RULESET_ID)).thenReturn(TypeResponse.builder()
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(0L);
when(dictionaryClient.getAllTypes(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(TypeResponse.builder()
.types(getTypeResponse())
.build());
when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(VERTEBRATE));
when(dictionaryClient.getDictionaryForType(ADDRESS, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(ADDRESS));
when(dictionaryClient.getDictionaryForType(AUTHOR, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(AUTHOR));
when(dictionaryClient.getDictionaryForType(SPONSOR, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(SPONSOR));
when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(NO_REDACTION_INDICATOR));
when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(REDACTION_INDICATOR));
when(dictionaryClient.getDictionaryForType(HINT_ONLY, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(HINT_ONLY));
when(dictionaryClient.getDictionaryForType(MUST_REDACT, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(MUST_REDACT));
when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(PUBLISHED_INFORMATION));
when(dictionaryClient.getDictionaryForType(TEST_METHOD, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(TEST_METHOD));
when(dictionaryClient.getDictionaryForType(PII, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(PII));
when(dictionaryClient.getDictionaryForType(RECOMMENDATION_AUTHOR, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(RECOMMENDATION_AUTHOR));
when(dictionaryClient.getDictionaryForType(RECOMMENDATION_ADDRESS, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(RECOMMENDATION_ADDRESS));
when(dictionaryClient.getDictionaryForType(FALSE_POSITIVE, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(FALSE_POSITIVE));
when(dictionaryClient.getDictionaryForType(PURITY, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(PURITY));
when(dictionaryClient.getDictionaryForType(IMAGE, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(IMAGE));
when(dictionaryClient.getDictionaryForType(OCR, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(OCR));
when(dictionaryClient.getDictionaryForType(LOGO, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(LOGO));
when(dictionaryClient.getDictionaryForType(SIGNATURE, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(SIGNATURE));
when(dictionaryClient.getDictionaryForType(FORMULA, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(FORMULA));
when(dictionaryClient.getVersion(TEST_RULESET_ID, TEST_PROJECT_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypes(TEST_RULESET_ID, TEST_PROJECT_ID)).thenReturn(TypeResponse.builder()
.types(List.of(TypeResult.builder()
.type(DOSSIER_REDACTIONS)
.ruleSetId(TEST_RULESET_ID)
.hexColor( "#ffe187")
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS))
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS))
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS))
.rank(rankTypeMap.get(DOSSIER_REDACTIONS))
.build()))
.build());
when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
when(dictionaryClient.getDictionaryForType(ADDRESS, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(ADDRESS, false));
when(dictionaryClient.getDictionaryForType(AUTHOR, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(AUTHOR, false));
when(dictionaryClient.getDictionaryForType(SPONSOR, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(SPONSOR, false));
when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(NO_REDACTION_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(REDACTION_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(HINT_ONLY, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(HINT_ONLY, false));
when(dictionaryClient.getDictionaryForType(MUST_REDACT, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(MUST_REDACT, false));
when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(PUBLISHED_INFORMATION, false));
when(dictionaryClient.getDictionaryForType(TEST_METHOD, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(TEST_METHOD, false));
when(dictionaryClient.getDictionaryForType(PII, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(PII, false));
when(dictionaryClient.getDictionaryForType(RECOMMENDATION_AUTHOR, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(RECOMMENDATION_AUTHOR, false));
when(dictionaryClient.getDictionaryForType(RECOMMENDATION_ADDRESS, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(RECOMMENDATION_ADDRESS, false));
when(dictionaryClient.getDictionaryForType(FALSE_POSITIVE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(FALSE_POSITIVE, false));
when(dictionaryClient.getDictionaryForType(PURITY, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(PURITY, false));
when(dictionaryClient.getDictionaryForType(IMAGE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(IMAGE, false));
when(dictionaryClient.getDictionaryForType(OCR, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(OCR, false));
when(dictionaryClient.getDictionaryForType(LOGO, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(LOGO, false));
when(dictionaryClient.getDictionaryForType(SIGNATURE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(SIGNATURE, false));
when(dictionaryClient.getDictionaryForType(FORMULA, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(FORMULA, false));
when(dictionaryClient.getDictionaryForType(DOSSIER_REDACTIONS, TEST_RULESET_ID, TEST_PROJECT_ID)).thenReturn(getDictionaryResponse(DOSSIER_REDACTIONS, true));
when(dictionaryClient.getColors(TEST_RULESET_ID)).thenReturn(colors);
}
@ -300,6 +319,11 @@ public class RedactionIntegrationTest {
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
}
@ -352,6 +376,7 @@ public class RedactionIntegrationTest {
hintTypeMap.put(FORMULA, false);
hintTypeMap.put(LOGO, false);
hintTypeMap.put(SIGNATURE, false);
hintTypeMap.put(DOSSIER_REDACTIONS, false);
caseInSensitiveMap.put(VERTEBRATE, true);
caseInSensitiveMap.put(ADDRESS, false);
@ -373,6 +398,7 @@ public class RedactionIntegrationTest {
caseInSensitiveMap.put(SIGNATURE, true);
caseInSensitiveMap.put(LOGO, true);
caseInSensitiveMap.put(FORMULA, true);
caseInSensitiveMap.put(DOSSIER_REDACTIONS, false);
recommendationTypeMap.put(VERTEBRATE, false);
recommendationTypeMap.put(ADDRESS, false);
@ -394,6 +420,7 @@ public class RedactionIntegrationTest {
recommendationTypeMap.put(FORMULA, false);
recommendationTypeMap.put(SIGNATURE, false);
recommendationTypeMap.put(LOGO, false);
recommendationTypeMap.put(DOSSIER_REDACTIONS, false);
rankTypeMap.put(FALSE_POSITIVE, 160);
rankTypeMap.put(PURITY, 155);
@ -415,6 +442,7 @@ public class RedactionIntegrationTest {
rankTypeMap.put(LOGO, 28);
rankTypeMap.put(SIGNATURE, 27);
rankTypeMap.put(FORMULA, 26);
rankTypeMap.put(DOSSIER_REDACTIONS, 200);
colors.setDefaultColor("#acfc00");
colors.setNotRedacted("#cccccc");
@ -441,11 +469,11 @@ public class RedactionIntegrationTest {
}
private DictionaryResponse getDictionaryResponse(String type) {
private DictionaryResponse getDictionaryResponse(String type, boolean isDossierDictionary) {
return DictionaryResponse.builder()
.hexColor(typeColorMap.get(type))
.entries(toDictionaryEntry(dictionary.get(type)))
.entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type)))
.isHint(hintTypeMap.get(type))
.isCaseInsensitive(caseInSensitiveMap.get(type))
.isRecommendation(recommendationTypeMap.get(type))
@ -504,7 +532,7 @@ public class RedactionIntegrationTest {
});
dictionary.get(AUTHOR).add("Drinking water");
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(1L);
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(1L);
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
.projectId(TEST_PROJECT_ID)
@ -560,7 +588,7 @@ public class RedactionIntegrationTest {
});
dictionary.get(AUTHOR).add("Drinking water");
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(1L);
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(1L);
long rstart = System.currentTimeMillis();
reanalyzeService.reanalyze(request);
@ -651,9 +679,9 @@ public class RedactionIntegrationTest {
dictionary.get(VERTEBRATE).add("s-metolachlor");
reanlysisVersions.put("s-metolachlor", 3L);
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(3L);
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(3L);
when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(VERTEBRATE));
when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
start = System.currentTimeMillis();
AnalyzeResult reanalyzeResult = reanalyzeService.reanalyze(request);

View File

@ -2,6 +2,7 @@ package com.iqser.red.service.redaction.v1.server.redaction.service;
import com.amazonaws.services.s3.AmazonS3;
import com.iqser.red.service.configuration.v1.api.model.*;
import com.iqser.red.service.configuration.v1.api.resource.DictionaryResource;
import com.iqser.red.service.redaction.v1.server.Application;
import com.iqser.red.service.redaction.v1.server.FileSystemBackedStorageService;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
@ -133,20 +134,20 @@ public class EntityRedactionServiceTest {
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(Arrays.asList("Casey, H.W.", "OLoughlin, C.K.", "Salamon, C.M.", "Smith, S.H.")))
.build();
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(Collections.singletonList("Toxigenics, Inc., Decatur, IL 62526, USA")))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
.entries(Collections.emptyList())
.build();
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
assertThat(classifiedDoc.getEntities().get(1)).hasSize(7);// 3 author cells, 1 address, 1 Y and 2 N entities
}
@ -160,19 +161,19 @@ public class EntityRedactionServiceTest {
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(Arrays.asList("Casey, H.W.", "OLoughlin, C.K.", "Salamon, C.M.", "Smith, S.H.")))
.build();
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(Collections.singletonList("Toxigenics, Inc., Decatur, IL 62526, USA")))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
.entries(Collections.emptyList())
.build();
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
assertThat(classifiedDoc.getEntities().get(1)).hasSize(7);// 3 author cells, 1 address, 1 Y and 2 N entities
}
@ -183,21 +184,21 @@ public class EntityRedactionServiceTest {
ClassPathResource pdfFileResource = new ClassPathResource("files/Cyprodinil/40 Cyprodinil - EU AIR3 - LCA Section 1" +
" Supplement - Identity of the active substance - Reference list.pdf");
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt"))))
.build();
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt"))))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
.entries(Collections.emptyList())
.build();
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
assertThat(classifiedDoc.getEntities()
.entrySet()
.stream()
@ -205,7 +206,7 @@ public class EntityRedactionServiceTest {
pdfFileResource = new ClassPathResource("files/Compounds/27 A8637C - EU AIR3 - MCP Section 1 - Identity of " +
"the plant protection product.pdf");
classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
assertThat(classifiedDoc.getEntities()
.entrySet()
.stream()
@ -216,21 +217,21 @@ public class EntityRedactionServiceTest {
public void testFalsePositiveInWrongCell() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Row With Ambiguous Redaction.pdf");
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt"))))
.build();
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt"))))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_sponsor.txt"))))
.build();
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
assertThat(classifiedDoc.getEntities().get(1).stream()
.filter(entity -> entity.getMatchedRule() == 9)
@ -283,21 +284,21 @@ public class EntityRedactionServiceTest {
droolsExecutionService.updateRules(TEST_RULESET_ID);
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Applicant Producer Table.pdf");
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt"))))
.build();
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt"))))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
.entries(Collections.emptyList())
.build();
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
assertThat(classifiedDoc.getEntities().get(1).stream()
.filter(entity -> entity.getMatchedRule() == 6)
@ -322,21 +323,21 @@ public class EntityRedactionServiceTest {
droolsExecutionService.updateRules(TEST_RULESET_ID);
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/batches_new_line.pdf");
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(Collections.emptyList())
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
DictionaryResponse authorResponse = DictionaryResponse.builder()
.entries(Collections.emptyList())
.build();
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(authorResponse);
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(authorResponse);
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_sponsor.txt"))))
.build();
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
assertThat(classifiedDoc.getEntities().get(1).stream()
.filter(entity -> entity.getMatchedRule() == 11)
@ -354,19 +355,19 @@ public class EntityRedactionServiceTest {
.entries(toDictionaryEntry(Arrays.asList("Bissig R.", "Thanei P.")))
.build();
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland")))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
.entries(Collections.emptyList())
.build();
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
assertThat(classifiedDoc.getEntities()).hasSize(2); // two pages
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(8);
assertThat(classifiedDoc.getEntities().get(2).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(5); // 2 names, 1 address, 2 Y
@ -377,15 +378,15 @@ public class EntityRedactionServiceTest {
.entries(toDictionaryEntry(Arrays.asList("Tribolet, R.", "Muir, G.", "Kühne-Thu, H.", "Close, C.")))
.build();
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
addressResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland")))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(3);
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 8).count()).isEqualTo(9);
@ -402,19 +403,19 @@ public class EntityRedactionServiceTest {
.entries(toDictionaryEntry(Collections.singletonList("Aldershof S.")))
.build();
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland")))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
.entries(Collections.emptyList())
.build();
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 8).count()).isEqualTo(6);
}
@ -453,19 +454,19 @@ public class EntityRedactionServiceTest {
TypeResult.builder().ruleSetId(TEST_RULESET_ID).type(ADDRESS_CODE).hexColor("#ff00ff").build(),
TypeResult.builder().ruleSetId(TEST_RULESET_ID).type(SPONSOR_CODE).hexColor("#00ffff").build()))
.build();
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getAllTypes(TEST_RULESET_ID)).thenReturn(typeResponse);
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getAllTypes(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(typeResponse);
// Default empty return to prevent NPEs
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.build();
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
.build();
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
Colors colors = new Colors();
colors.setDefaultColor("#acfc00");

View File

@ -328,4 +328,12 @@ rule "28: Redact Logos"
Section(matchesImageType("logo"))
then
section.redactImage("logo", 28, "Logo found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
end
end
rule "29: Redact Dossier Redactions"
when
Section(matchesType("dossier_redactions"))
then
section.redact("dossier_redactions", 29, "Dossier Redaction found", "Article 39(1)(2) of Regulation (EC) No 178/2002");
end