Pull request #199: RED-1806: Use localDictionary also in Reanlysis to fix problem at removing values from false_positive that affect rules that should find values in entire document

Merge in RED/redaction-service from RED-1806 to master

* commit '0832ea15c09335cfb0d9f0139764ab6c99a22423':
  RED-1806: Use localDictionary also in Reanlysis to fix problem at removing values from false_positive that affect rules that should find values in entire document
This commit is contained in:
Dominique Eiflaender 2021-07-27 13:08:43 +02:00 committed by Timo Bejan
commit 2c4c52d891
5 changed files with 211 additions and 155 deletions

View File

@ -43,35 +43,13 @@ public class EntityRedactionService {
if (dictionary.hasLocalEntries()) {
Map<Integer, Set<Entity>> hintsPerSectionNumber = new HashMap<>();
documentEntities.stream().forEach(entity -> {
if (dictionary.isHint(entity.getType()) && entity.isDictionaryEntry()) {
hintsPerSectionNumber.computeIfAbsent(entity.getSectionNumber(), (x) -> new HashSet<>())
.add(entity);
}
});
Map<Integer, Set<Entity>> hintsPerSectionNumber = getHintsPerSection(documentEntities, dictionary);
Set<Entity> foundByLocal = findEntities(classifiedDoc, container, manualRedactions, dictionary, true, hintsPerSectionNumber, fileAttributes);
EntitySearchUtils.addEntitiesWithHigherRank(documentEntities, foundByLocal, dictionary);
EntitySearchUtils.removeEntitiesContainedInLarger(documentEntities);
}
for (Entity entity : documentEntities) {
Map<Integer, List<EntityPositionSequence>> sequenceOnPage = new HashMap<>();
for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) {
sequenceOnPage.computeIfAbsent(entityPositionSequence.getPageNumber(), (x) -> new ArrayList<>())
.add(entityPositionSequence);
}
for (Map.Entry<Integer, List<EntityPositionSequence>> entry : sequenceOnPage.entrySet()) {
classifiedDoc.getEntities()
.computeIfAbsent(entry.getKey(), (x) -> new ArrayList<>())
.add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), entry
.getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber(), entity
.getLegalBasis(), entity.isDictionaryEntry(), entity.getTextBefore(), entity.getTextAfter(), entity
.getStart(), entity.getEnd(), entity.isDossierDictionaryEntry()));
}
}
classifiedDoc.setEntities(convertToEnititesPerPage(documentEntities));
dictionaryService.updateExternalDictionary(dictionary, dossierTemplateId);
@ -80,6 +58,39 @@ public class EntityRedactionService {
}
public Map<Integer, List<Entity>> convertToEnititesPerPage(Set<Entity> entities){
Map<Integer, List<Entity>> entitiesPerPage = new HashMap<>();
for (Entity entity : entities) {
Map<Integer, List<EntityPositionSequence>> sequenceOnPage = new HashMap<>();
for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) {
sequenceOnPage.computeIfAbsent(entityPositionSequence.getPageNumber(), (x) -> new ArrayList<>())
.add(entityPositionSequence);
}
for (Map.Entry<Integer, List<EntityPositionSequence>> entry : sequenceOnPage.entrySet()) {
entitiesPerPage
.computeIfAbsent(entry.getKey(), (x) -> new ArrayList<>())
.add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), entry
.getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber(), entity
.getLegalBasis(), entity.isDictionaryEntry(), entity.getTextBefore(), entity.getTextAfter(), entity
.getStart(), entity.getEnd(), entity.isDossierDictionaryEntry()));
}
}
return entitiesPerPage;
}
public Map<Integer, Set<Entity>> getHintsPerSection(Set<Entity> entities, Dictionary dictionary){
Map<Integer, Set<Entity>> hintsPerSectionNumber = new HashMap<>();
entities.stream().forEach(entity -> {
if (dictionary.isHint(entity.getType()) && entity.isDictionaryEntry()) {
hintsPerSectionNumber.computeIfAbsent(entity.getSectionNumber(), (x) -> new HashSet<>())
.add(entity);
}
});
return hintsPerSectionNumber;
}
private Set<Entity> findEntities(Document classifiedDoc, KieContainer kieContainer,
ManualRedactions manualRedactions, Dictionary dictionary, boolean local,
Map<Integer, Set<Entity>> hintsPerSectionNumber,
@ -123,42 +134,46 @@ public class EntityRedactionService {
}
sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> {
Section analysedRowSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair.getSection());
documentEntities.addAll(analysedRowSection.getEntities());
Section analysedSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair.getSection());
documentEntities.addAll(analysedSection.getEntities());
for (Image image : analysedRowSection.getImages()) {
for (Image image : analysedSection.getImages()) {
classifiedDoc.getImages().computeIfAbsent(image.getPage(), (a) -> new HashSet<>()).add(image);
}
analysedRowSection.getLocalDictionaryAdds().keySet().forEach(key -> {
if (dictionary.isRecommendation(key)) {
analysedRowSection.getLocalDictionaryAdds().get(key).forEach(value -> {
if (!dictionary.containsValue(key, value)) {
dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value);
}
});
} else {
analysedRowSection.getLocalDictionaryAdds().get(key).forEach(value -> {
if (dictionary.getLocalAccessMap().get(key) == null) {
log.warn("Dictionary {} is null", key);
}
if (dictionary.getLocalAccessMap().get(key).getLocalEntries() == null) {
log.warn("Dictionary {} localEntries is null", key);
}
dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value);
});
}
});
addLocalValuesToDictionary(analysedSection, dictionary);
});
return documentEntities;
}
public void addLocalValuesToDictionary(Section analysedSection, Dictionary dictionary){
analysedSection.getLocalDictionaryAdds().keySet().forEach(key -> {
if (dictionary.isRecommendation(key)) {
analysedSection.getLocalDictionaryAdds().get(key).forEach(value -> {
if (!dictionary.containsValue(key, value)) {
dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value);
}
});
} else {
analysedSection.getLocalDictionaryAdds().get(key).forEach(value -> {
if (dictionary.getLocalAccessMap().get(key) == null) {
log.warn("Dictionary {} is null", key);
}
if (dictionary.getLocalAccessMap().get(key).getLocalEntries() == null) {
log.warn("Dictionary {} localEntries is null", key);
}
dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value);
});
}
});
}
private List<SectionSearchableTextPair> processTablePerRow(Document classifiedDoc, Table table,
AtomicInteger sectionNumber, Dictionary dictionary,
boolean local,

View File

@ -13,9 +13,11 @@ import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUti
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.kie.api.runtime.KieContainer;
import org.springframework.stereotype.Service;
import org.springframework.web.bind.annotation.RequestBody;
@ -60,8 +62,7 @@ public class ReanalyzeService {
entityRedactionService.processDocument(classifiedDoc, analyzeRequest.getDossierTemplateId(), analyzeRequest.getManualRedactions(), analyzeRequest
.getDossierId(), analyzeRequest.getFileAttributes());
redactionLogCreatorService.createRedactionLog(classifiedDoc, pageCount, analyzeRequest
.getDossierTemplateId());
redactionLogCreatorService.createRedactionLog(classifiedDoc, pageCount, analyzeRequest.getDossierTemplateId());
log.info("Redaction analysis successful...");
@ -105,6 +106,59 @@ public class ReanalyzeService {
DictionaryIncrement dictionaryIncrement = dictionaryService.getDictionaryIncrements(analyzeRequest.getDossierTemplateId(), new DictionaryVersion(redactionLog
.getDictionaryVersion(), redactionLog.getDossierDictionaryVersion()), analyzeRequest.getDossierId());
Set<Integer> sectionsToReanalyse = findSectionsToReanalyse(dictionaryIncrement, redactionLog, text, analyzeRequest);
if (sectionsToReanalyse.isEmpty()) {
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement);
}
List<SectionText> reanalysisSections = text.getSectionTexts()
.stream()
.filter(sectionText -> sectionsToReanalyse.contains(sectionText.getSectionNumber()))
.collect(Collectors.toList());
KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getDossierTemplateId());
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest
.getDossierId());
Map<Integer, Set<Image>> imagesPerPage = new HashMap<>();
Set<Entity> entities = findEntities(reanalysisSections, dictionary, kieContainer, analyzeRequest, false, null, imagesPerPage);
if (dictionary.hasLocalEntries()) {
Map<Integer, Set<Entity>> hintsPerSectionNumber = entityRedactionService.getHintsPerSection(entities, dictionary);
Set<Entity> foundByLocal = findEntities(reanalysisSections, dictionary, kieContainer, analyzeRequest, true, hintsPerSectionNumber, imagesPerPage);
EntitySearchUtils.addEntitiesWithHigherRank(entities, foundByLocal, dictionary);
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
}
Map<Integer, List<Entity>> entitiesPerPage = entityRedactionService.convertToEnititesPerPage(entities);
List<RedactionLogEntry> newRedactionLogEntries = new ArrayList<>();
for (int page = 1; page <= text.getNumberOfPages(); page++) {
if (entitiesPerPage.get(page) != null) {
newRedactionLogEntries.addAll(redactionLogCreatorService.addEntries(entitiesPerPage, page, analyzeRequest
.getDossierTemplateId()));
}
if (imagesPerPage.get(page) != null) {
newRedactionLogEntries.addAll(redactionLogCreatorService.addImageEntries(imagesPerPage, page, analyzeRequest
.getDossierTemplateId()));
}
}
redactionLog.getRedactionLogEntry().removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()));
redactionLog.getRedactionLogEntry().addAll(newRedactionLogEntries);
AnalyzeResult analyzeResult = finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement);
analyzeResult.setWasReanalyzed(true);
return analyzeResult;
}
private Set<Integer> findSectionsToReanalyse(DictionaryIncrement dictionaryIncrement, RedactionLog redactionLog,
Text text, AnalyzeRequest analyzeRequest) {
Set<String> relevantManuallyModifiedAnnotationIds = getRelevantManuallyModifiedAnnotationIds(analyzeRequest.getManualRedactions());
Set<Integer> sectionsToReanalyse = new HashSet<>();
@ -128,31 +182,20 @@ public class ReanalyzeService {
log.info("Should reanalyze {} sections for request: {}", sectionsToReanalyse.size(), analyzeRequest);
if (sectionsToReanalyse.isEmpty()) {
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement);
}
return sectionsToReanalyse;
}
List<SectionText> reanalysisSections = new ArrayList<>();
for (SectionText sectionText : text.getSectionTexts()) {
if (sectionsToReanalyse.contains(sectionText.getSectionNumber())) {
reanalysisSections.add(sectionText);
}
}
//--
KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getDossierTemplateId());
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest
.getDossierId());
private Set<Entity> findEntities(List<SectionText> reanalysisSections, Dictionary dictionary,
KieContainer kieContainer, AnalyzeRequest analyzeRequest, boolean local,
Map<Integer, Set<Entity>> hintsPerSectionNumber,
Map<Integer, Set<Image>> imagesPerPage) {
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
for (SectionText reanalysisSection : reanalysisSections) {
Set<Entity> entities = entityRedactionService.findEntities(reanalysisSection.getSearchableText(), reanalysisSection
.getHeadline(), reanalysisSection.getSectionNumber(), dictionary, false);
.getHeadline(), reanalysisSection.getSectionNumber(), dictionary, local);
if (reanalysisSection.getCellStarts() != null && !reanalysisSection.getCellStarts().isEmpty()) {
surroundingWordsService.addSurroundingText(entities, reanalysisSection.getSearchableText(), dictionary, reanalysisSection
.getCellStarts());
@ -160,14 +203,15 @@ public class ReanalyzeService {
surroundingWordsService.addSurroundingText(entities, reanalysisSection.getSearchableText(), dictionary);
}
if (reanalysisSection.getImages() != null && !reanalysisSection.getImages()
if (!local && reanalysisSection.getImages() != null && !reanalysisSection.getImages()
.isEmpty() && analyzeRequest.getManualRedactions() != null && analyzeRequest.getManualRedactions()
.getImageRecategorizations() != null) {
for (Image image : reanalysisSection.getImages()) {
String imageId = IdBuilder.buildId(image.getPosition(), image.getPage());
for (ManualImageRecategorization imageRecategorization : analyzeRequest.getManualRedactions()
.getImageRecategorizations()) {
if (imageRecategorization.getStatus().equals(Status.APPROVED) && imageRecategorization.getId().equals(imageId)) {
if (imageRecategorization.getStatus().equals(Status.APPROVED) && imageRecategorization.getId()
.equals(imageId)) {
image.setType(imageRecategorization.getType());
}
}
@ -177,7 +221,10 @@ public class ReanalyzeService {
sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder()
.isLocal(false)
.dictionaryTypes(dictionary.getTypes())
.entities(entities)
.entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(reanalysisSection.getSectionNumber()) ? Stream
.concat(entities.stream(), hintsPerSectionNumber.get(reanalysisSection.getSectionNumber())
.stream())
.collect(Collectors.toSet()) : entities)
.text(reanalysisSection.getSearchableText().getAsStringWithLinebreaks())
.searchText(reanalysisSection.getSearchableText().toString())
.headline(reanalysisSection.getHeadline())
@ -191,54 +238,19 @@ public class ReanalyzeService {
}
Set<Entity> entities = new HashSet<>();
Map<Integer, Set<Image>> imagesPerPage = new HashMap<>();
sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> {
Section analysedRowSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair.getSection());
entities.addAll(analysedRowSection.getEntities());
Section analysedSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair.getSection());
entities.addAll(analysedSection.getEntities());
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
for (Image image : analysedRowSection.getImages()) {
for (Image image : analysedSection.getImages()) {
imagesPerPage.computeIfAbsent(image.getPage(), (a) -> new HashSet<>()).add(image);
}
entityRedactionService.addLocalValuesToDictionary(analysedSection, dictionary);
});
Map<Integer, List<Entity>> entitiesPerPage = new HashMap<>();
for (Entity entity : entities) {
Map<Integer, List<EntityPositionSequence>> sequenceOnPage = new HashMap<>();
for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) {
sequenceOnPage.computeIfAbsent(entityPositionSequence.getPageNumber(), (x) -> new ArrayList<>())
.add(entityPositionSequence);
}
for (Map.Entry<Integer, List<EntityPositionSequence>> entry : sequenceOnPage.entrySet()) {
entitiesPerPage.computeIfAbsent(entry.getKey(), (x) -> new ArrayList<>())
.add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), entry
.getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber(), entity
.getLegalBasis(), entity.isDictionaryEntry(), entity.getTextBefore(), entity.getTextAfter(), entity
.getStart(), entity.getEnd(), entity.isDossierDictionaryEntry()));
}
}
List<RedactionLogEntry> newRedactionLogEntries = new ArrayList<>();
for (int page = 1; page <= text.getNumberOfPages(); page++) {
if (entitiesPerPage.get(page) != null) {
newRedactionLogEntries.addAll(redactionLogCreatorService.addEntries(entitiesPerPage, page, analyzeRequest
.getDossierTemplateId()));
}
if (imagesPerPage.get(page) != null) {
newRedactionLogEntries.addAll(redactionLogCreatorService.addImageEntries(imagesPerPage, page, analyzeRequest
.getDossierTemplateId()));
}
}
redactionLog.getRedactionLogEntry().removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()));
redactionLog.getRedactionLogEntry().addAll(newRedactionLogEntries);
AnalyzeResult analyzeResult = finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement);
analyzeResult.setWasReanalyzed(true);
return analyzeResult;
return entities;
}
@ -296,10 +308,9 @@ public class ReanalyzeService {
private void excludeExcludedPages(RedactionLog redactionLog, Set<Integer> excludedPages) {
redactionLog.getRedactionLogEntry().forEach(entry ->
entry.getPositions().forEach(pos ->
entry.setExcluded(excludedPages != null && excludedPages.contains(pos.getPage()))
));
redactionLog.getRedactionLogEntry()
.forEach(entry -> entry.getPositions()
.forEach(pos -> entry.setExcluded(excludedPages != null && excludedPages.contains(pos.getPage()))));
}
}

View File

@ -18,7 +18,9 @@ import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.storage.commons.service.StorageService;
import lombok.SneakyThrows;
import org.apache.commons.io.IOUtils;
import org.junit.After;
import org.junit.Before;
@ -84,7 +86,6 @@ public class RedactionIntegrationTest {
private static final String PII = "PII";
@Autowired
private RedactionController redactionController;
@ -127,6 +128,7 @@ public class RedactionIntegrationTest {
private final Map<String, Integer> rankTypeMap = new HashMap<>();
private final Colors colors = new Colors();
private final Map<String, Long> reanlysisVersions = new HashMap<>();
private final Set<String> deleted = new HashSet<>();
private final static String TEST_DOSSIER_TEMPLATE_ID = "123";
private final static String TEST_DOSSIER_ID = "123";
@ -152,18 +154,20 @@ public class RedactionIntegrationTest {
return kieServices.newKieContainer(kieModule.getReleaseId());
}
@Bean
@Primary
public StorageService inmemoryStorage() {
return new FileSystemBackedStorageService();
}
}
@After
public void cleanupStorage() {
if (this.storageService instanceof FileSystemBackedStorageService) {
((FileSystemBackedStorageService) this.storageService).clearStorage();
}
@ -179,7 +183,8 @@ public class RedactionIntegrationTest {
loadDictionaryForTest();
loadTypeForTest();
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(0L);
when(dictionaryClient.getAllTypes(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(TypeResponse.builder()
when(dictionaryClient.getAllTypes(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(TypeResponse
.builder()
.types(getTypeResponse())
.build());
@ -188,7 +193,7 @@ public class RedactionIntegrationTest {
.types(List.of(TypeResult.builder()
.type(DOSSIER_REDACTIONS)
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
.hexColor( "#ffe187")
.hexColor("#ffe187")
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS))
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS))
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS))
@ -196,26 +201,42 @@ public class RedactionIntegrationTest {
.build()))
.build());
when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
when(dictionaryClient.getDictionaryForType(ADDRESS, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(ADDRESS, false));
when(dictionaryClient.getDictionaryForType(AUTHOR, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(AUTHOR, false));
when(dictionaryClient.getDictionaryForType(SPONSOR, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(SPONSOR, false));
when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(NO_REDACTION_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(REDACTION_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(HINT_ONLY, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(HINT_ONLY, false));
when(dictionaryClient.getDictionaryForType(MUST_REDACT, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(MUST_REDACT, false));
when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(PUBLISHED_INFORMATION, false));
when(dictionaryClient.getDictionaryForType(TEST_METHOD, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(TEST_METHOD, false));
when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
.thenReturn(getDictionaryResponse(VERTEBRATE, false));
when(dictionaryClient.getDictionaryForType(ADDRESS, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
.thenReturn(getDictionaryResponse(ADDRESS, false));
when(dictionaryClient.getDictionaryForType(AUTHOR, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
.thenReturn(getDictionaryResponse(AUTHOR, false));
when(dictionaryClient.getDictionaryForType(SPONSOR, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
.thenReturn(getDictionaryResponse(SPONSOR, false));
when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
.thenReturn(getDictionaryResponse(NO_REDACTION_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
.thenReturn(getDictionaryResponse(REDACTION_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(HINT_ONLY, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
.thenReturn(getDictionaryResponse(HINT_ONLY, false));
when(dictionaryClient.getDictionaryForType(MUST_REDACT, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
.thenReturn(getDictionaryResponse(MUST_REDACT, false));
when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
.thenReturn(getDictionaryResponse(PUBLISHED_INFORMATION, false));
when(dictionaryClient.getDictionaryForType(TEST_METHOD, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
.thenReturn(getDictionaryResponse(TEST_METHOD, false));
when(dictionaryClient.getDictionaryForType(PII, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(PII, false));
when(dictionaryClient.getDictionaryForType(RECOMMENDATION_AUTHOR, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(RECOMMENDATION_AUTHOR, false));
when(dictionaryClient.getDictionaryForType(RECOMMENDATION_ADDRESS, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(RECOMMENDATION_ADDRESS, false));
when(dictionaryClient.getDictionaryForType(FALSE_POSITIVE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(FALSE_POSITIVE, false));
when(dictionaryClient.getDictionaryForType(PURITY, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(PURITY, false));
when(dictionaryClient.getDictionaryForType(RECOMMENDATION_AUTHOR, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
.thenReturn(getDictionaryResponse(RECOMMENDATION_AUTHOR, false));
when(dictionaryClient.getDictionaryForType(RECOMMENDATION_ADDRESS, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
.thenReturn(getDictionaryResponse(RECOMMENDATION_ADDRESS, false));
when(dictionaryClient.getDictionaryForType(FALSE_POSITIVE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
.thenReturn(getDictionaryResponse(FALSE_POSITIVE, false));
when(dictionaryClient.getDictionaryForType(PURITY, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
.thenReturn(getDictionaryResponse(PURITY, false));
when(dictionaryClient.getDictionaryForType(IMAGE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(IMAGE, false));
when(dictionaryClient.getDictionaryForType(OCR, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(OCR, false));
when(dictionaryClient.getDictionaryForType(LOGO, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(LOGO, false));
when(dictionaryClient.getDictionaryForType(SIGNATURE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(SIGNATURE, false));
when(dictionaryClient.getDictionaryForType(FORMULA, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(FORMULA, false));
when(dictionaryClient.getDictionaryForType(SIGNATURE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
.thenReturn(getDictionaryResponse(SIGNATURE, false));
when(dictionaryClient.getDictionaryForType(FORMULA, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
.thenReturn(getDictionaryResponse(FORMULA, false));
when(dictionaryClient.getDictionaryForType(DOSSIER_REDACTIONS, TEST_DOSSIER_TEMPLATE_ID, TEST_DOSSIER_ID)).thenReturn(getDictionaryResponse(DOSSIER_REDACTIONS, true));
when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors);
}
@ -477,7 +498,8 @@ public class RedactionIntegrationTest {
return DictionaryResponse.builder()
.hexColor(typeColorMap.get(type))
.entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type)))
.entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary
.get(type)))
.isHint(hintTypeMap.get(type))
.isCaseInsensitive(caseInSensitiveMap.get(type))
.isRecommendation(recommendationTypeMap.get(type))
@ -490,7 +512,8 @@ public class RedactionIntegrationTest {
List<DictionaryEntry> dictionaryEntries = new ArrayList<>();
entries.forEach(entry -> {
dictionaryEntries.add(new DictionaryEntry(entry, reanlysisVersions.containsKey(entry) ? reanlysisVersions.get(entry) : 0L, false));
dictionaryEntries.add(new DictionaryEntry(entry, reanlysisVersions.containsKey(entry) ? reanlysisVersions.get(entry) : 0L, deleted
.contains(entry) ? true : false));
});
return dictionaryEntries;
}
@ -498,6 +521,7 @@ public class RedactionIntegrationTest {
@Test
public void test270Rotated() {
AnalyzeRequest request = prepareStorage("files/Minimal Examples/270Rotated.pdf");
MemoryStats.printMemoryStats();
AnalyzeResult result = reanalyzeService.analyze(request);
@ -508,12 +532,14 @@ public class RedactionIntegrationTest {
@Test
@Ignore
public void testLargeScannedFileOOM() {
AnalyzeRequest request = prepareStorage("scanned/VV-377031.pdf");
MemoryStats.printMemoryStats();
AnalyzeResult result = reanalyzeService.analyze(request);
assertThat(result).isNotNull();
}
@Test
public void testMergedImages() throws IOException {
@ -552,14 +578,13 @@ public class RedactionIntegrationTest {
long rend = System.currentTimeMillis();
System.out.println("reanalysis analysis duration: " + (rend - rstart));
long end = System.currentTimeMillis();
System.out.println("duration: " + (end - start));
}
@Test
@Ignore
public void noExceptionShouldBeThrownForAnyFiles() throws IOException {
@ -635,7 +660,12 @@ public class RedactionIntegrationTest {
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
request.setExcludedPages(Set.of(1));
request.setFileAttributes(List.of(FileAttribute.builder().id("fileAttributeId").label("Vertebrate Study").placeholder("{fileattributes.vertebrateStudy}").value("true").build()));
request.setFileAttributes(List.of(FileAttribute.builder()
.id("fileAttributeId")
.label("Vertebrate Study")
.placeholder("{fileattributes.vertebrateStudy}")
.value("true")
.build()));
AnalyzeResult result = reanalyzeService.analyze(request);
@ -683,12 +713,18 @@ public class RedactionIntegrationTest {
dictionary.get(AUTHOR).add("physical");
reanlysisVersions.put("physical", 2L);
// dictionary.get(VERTEBRATE).add("s-metolachlor");
// reanlysisVersions.put("s-metolachlor", 3L);
deleted.add("David Chubb");
dictionary.get(FALSE_POSITIVE).add("David Chubb");
reanlysisVersions.put("David Chubb", 3L);
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(3L);
when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
.thenReturn(getDictionaryResponse(VERTEBRATE, false));
when(dictionaryClient.getDictionaryForType(FALSE_POSITIVE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
.thenReturn(getDictionaryResponse(FALSE_POSITIVE, false));
start = System.currentTimeMillis();
@ -702,10 +738,8 @@ public class RedactionIntegrationTest {
request.setManualRedactions(manualRedactions);
AnalyzeResult reanalyzeResult = reanalyzeService.reanalyze(request);
redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
end = System.currentTimeMillis();
@ -775,7 +809,6 @@ public class RedactionIntegrationTest {
.status(Status.APPROVED)
.build()));
manualRedactions.getComments().put("e5be0f1d941bbb92a068e198648d06c4", List.of(comment));
manualRedactions.getComments().put("0836727c3508a0b2ea271da69c04cc2f", List.of(comment));
manualRedactions.getComments().put(manualAddId, List.of(comment));
@ -790,12 +823,10 @@ public class RedactionIntegrationTest {
// manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
request.setManualRedactions(manualRedactions);
AnalyzeResult result = reanalyzeService.analyze(request);
manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder()
.id("5b940b2cb401ed9f5be6fc24f6e77bcf")
@ -816,7 +847,6 @@ public class RedactionIntegrationTest {
.fileId(TEST_FILE_ID)
.build());
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
fileOutputStream.write(annotateResponse.getDocument());
}
@ -833,7 +863,6 @@ public class RedactionIntegrationTest {
System.out.println("classificationTest");
ClassPathResource pdfFileResource = new ClassPathResource("files/new/Single Study - Oral (Gavage) Mouse.pdf");
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
RedactionRequest redactionRequest = RedactionRequest.builder()
@ -934,8 +963,10 @@ public class RedactionIntegrationTest {
});
}
@SneakyThrows
private AnalyzeRequest prepareStorage(String file) {
ClassPathResource pdfFileResource = new ClassPathResource(file);
return prepareStorage(pdfFileResource.getInputStream());
@ -967,7 +998,6 @@ public class RedactionIntegrationTest {
long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/sponsor_companies.pdf");
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
AnalyzeResult result = reanalyzeService.analyze(request);

View File

@ -1676,7 +1676,6 @@ da Silva Rejane
Das R
Das, R.
Daughtry, CST
David Chubb
David Chubb|Lorraine Britton
David Clarke
Davies

View File

@ -235,4 +235,5 @@ N/A
No details reported
Not available
Test facility
TBD
TBD
David Chubb