Pull request #199: RED-1806: Use localDictionary also in Reanlysis to fix problem at removing values from false_positive that affect rules that should find values in entire document
Merge in RED/redaction-service from RED-1806 to master * commit '0832ea15c09335cfb0d9f0139764ab6c99a22423': RED-1806: Use localDictionary also in Reanlysis to fix problem at removing values from false_positive that affect rules that should find values in entire document
This commit is contained in:
commit
2c4c52d891
@ -43,35 +43,13 @@ public class EntityRedactionService {
|
||||
|
||||
if (dictionary.hasLocalEntries()) {
|
||||
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber = new HashMap<>();
|
||||
documentEntities.stream().forEach(entity -> {
|
||||
if (dictionary.isHint(entity.getType()) && entity.isDictionaryEntry()) {
|
||||
hintsPerSectionNumber.computeIfAbsent(entity.getSectionNumber(), (x) -> new HashSet<>())
|
||||
.add(entity);
|
||||
}
|
||||
});
|
||||
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber = getHintsPerSection(documentEntities, dictionary);
|
||||
Set<Entity> foundByLocal = findEntities(classifiedDoc, container, manualRedactions, dictionary, true, hintsPerSectionNumber, fileAttributes);
|
||||
EntitySearchUtils.addEntitiesWithHigherRank(documentEntities, foundByLocal, dictionary);
|
||||
EntitySearchUtils.removeEntitiesContainedInLarger(documentEntities);
|
||||
}
|
||||
|
||||
for (Entity entity : documentEntities) {
|
||||
Map<Integer, List<EntityPositionSequence>> sequenceOnPage = new HashMap<>();
|
||||
for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) {
|
||||
sequenceOnPage.computeIfAbsent(entityPositionSequence.getPageNumber(), (x) -> new ArrayList<>())
|
||||
.add(entityPositionSequence);
|
||||
}
|
||||
|
||||
for (Map.Entry<Integer, List<EntityPositionSequence>> entry : sequenceOnPage.entrySet()) {
|
||||
classifiedDoc.getEntities()
|
||||
.computeIfAbsent(entry.getKey(), (x) -> new ArrayList<>())
|
||||
.add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), entry
|
||||
.getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber(), entity
|
||||
.getLegalBasis(), entity.isDictionaryEntry(), entity.getTextBefore(), entity.getTextAfter(), entity
|
||||
.getStart(), entity.getEnd(), entity.isDossierDictionaryEntry()));
|
||||
}
|
||||
}
|
||||
classifiedDoc.setEntities(convertToEnititesPerPage(documentEntities));
|
||||
|
||||
dictionaryService.updateExternalDictionary(dictionary, dossierTemplateId);
|
||||
|
||||
@ -80,6 +58,39 @@ public class EntityRedactionService {
|
||||
}
|
||||
|
||||
|
||||
public Map<Integer, List<Entity>> convertToEnititesPerPage(Set<Entity> entities){
|
||||
Map<Integer, List<Entity>> entitiesPerPage = new HashMap<>();
|
||||
for (Entity entity : entities) {
|
||||
Map<Integer, List<EntityPositionSequence>> sequenceOnPage = new HashMap<>();
|
||||
for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) {
|
||||
sequenceOnPage.computeIfAbsent(entityPositionSequence.getPageNumber(), (x) -> new ArrayList<>())
|
||||
.add(entityPositionSequence);
|
||||
}
|
||||
|
||||
for (Map.Entry<Integer, List<EntityPositionSequence>> entry : sequenceOnPage.entrySet()) {
|
||||
entitiesPerPage
|
||||
.computeIfAbsent(entry.getKey(), (x) -> new ArrayList<>())
|
||||
.add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), entry
|
||||
.getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber(), entity
|
||||
.getLegalBasis(), entity.isDictionaryEntry(), entity.getTextBefore(), entity.getTextAfter(), entity
|
||||
.getStart(), entity.getEnd(), entity.isDossierDictionaryEntry()));
|
||||
}
|
||||
}
|
||||
return entitiesPerPage;
|
||||
}
|
||||
|
||||
|
||||
public Map<Integer, Set<Entity>> getHintsPerSection(Set<Entity> entities, Dictionary dictionary){
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber = new HashMap<>();
|
||||
entities.stream().forEach(entity -> {
|
||||
if (dictionary.isHint(entity.getType()) && entity.isDictionaryEntry()) {
|
||||
hintsPerSectionNumber.computeIfAbsent(entity.getSectionNumber(), (x) -> new HashSet<>())
|
||||
.add(entity);
|
||||
}
|
||||
});
|
||||
return hintsPerSectionNumber;
|
||||
}
|
||||
|
||||
private Set<Entity> findEntities(Document classifiedDoc, KieContainer kieContainer,
|
||||
ManualRedactions manualRedactions, Dictionary dictionary, boolean local,
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber,
|
||||
@ -123,42 +134,46 @@ public class EntityRedactionService {
|
||||
}
|
||||
|
||||
sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> {
|
||||
Section analysedRowSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair.getSection());
|
||||
documentEntities.addAll(analysedRowSection.getEntities());
|
||||
Section analysedSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair.getSection());
|
||||
documentEntities.addAll(analysedSection.getEntities());
|
||||
|
||||
for (Image image : analysedRowSection.getImages()) {
|
||||
for (Image image : analysedSection.getImages()) {
|
||||
classifiedDoc.getImages().computeIfAbsent(image.getPage(), (a) -> new HashSet<>()).add(image);
|
||||
}
|
||||
|
||||
analysedRowSection.getLocalDictionaryAdds().keySet().forEach(key -> {
|
||||
if (dictionary.isRecommendation(key)) {
|
||||
analysedRowSection.getLocalDictionaryAdds().get(key).forEach(value -> {
|
||||
if (!dictionary.containsValue(key, value)) {
|
||||
dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value);
|
||||
}
|
||||
});
|
||||
} else {
|
||||
analysedRowSection.getLocalDictionaryAdds().get(key).forEach(value -> {
|
||||
|
||||
if (dictionary.getLocalAccessMap().get(key) == null) {
|
||||
log.warn("Dictionary {} is null", key);
|
||||
}
|
||||
|
||||
if (dictionary.getLocalAccessMap().get(key).getLocalEntries() == null) {
|
||||
log.warn("Dictionary {} localEntries is null", key);
|
||||
}
|
||||
|
||||
dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
addLocalValuesToDictionary(analysedSection, dictionary);
|
||||
});
|
||||
|
||||
return documentEntities;
|
||||
}
|
||||
|
||||
|
||||
public void addLocalValuesToDictionary(Section analysedSection, Dictionary dictionary){
|
||||
analysedSection.getLocalDictionaryAdds().keySet().forEach(key -> {
|
||||
if (dictionary.isRecommendation(key)) {
|
||||
analysedSection.getLocalDictionaryAdds().get(key).forEach(value -> {
|
||||
if (!dictionary.containsValue(key, value)) {
|
||||
dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value);
|
||||
}
|
||||
});
|
||||
} else {
|
||||
analysedSection.getLocalDictionaryAdds().get(key).forEach(value -> {
|
||||
|
||||
if (dictionary.getLocalAccessMap().get(key) == null) {
|
||||
log.warn("Dictionary {} is null", key);
|
||||
}
|
||||
|
||||
if (dictionary.getLocalAccessMap().get(key).getLocalEntries() == null) {
|
||||
log.warn("Dictionary {} localEntries is null", key);
|
||||
}
|
||||
|
||||
dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value);
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
private List<SectionSearchableTextPair> processTablePerRow(Document classifiedDoc, Table table,
|
||||
AtomicInteger sectionNumber, Dictionary dictionary,
|
||||
boolean local,
|
||||
|
||||
@ -13,9 +13,11 @@ import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUti
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
|
||||
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.web.bind.annotation.RequestBody;
|
||||
@ -60,8 +62,7 @@ public class ReanalyzeService {
|
||||
|
||||
entityRedactionService.processDocument(classifiedDoc, analyzeRequest.getDossierTemplateId(), analyzeRequest.getManualRedactions(), analyzeRequest
|
||||
.getDossierId(), analyzeRequest.getFileAttributes());
|
||||
redactionLogCreatorService.createRedactionLog(classifiedDoc, pageCount, analyzeRequest
|
||||
.getDossierTemplateId());
|
||||
redactionLogCreatorService.createRedactionLog(classifiedDoc, pageCount, analyzeRequest.getDossierTemplateId());
|
||||
|
||||
log.info("Redaction analysis successful...");
|
||||
|
||||
@ -105,6 +106,59 @@ public class ReanalyzeService {
|
||||
DictionaryIncrement dictionaryIncrement = dictionaryService.getDictionaryIncrements(analyzeRequest.getDossierTemplateId(), new DictionaryVersion(redactionLog
|
||||
.getDictionaryVersion(), redactionLog.getDossierDictionaryVersion()), analyzeRequest.getDossierId());
|
||||
|
||||
Set<Integer> sectionsToReanalyse = findSectionsToReanalyse(dictionaryIncrement, redactionLog, text, analyzeRequest);
|
||||
|
||||
if (sectionsToReanalyse.isEmpty()) {
|
||||
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement);
|
||||
}
|
||||
|
||||
List<SectionText> reanalysisSections = text.getSectionTexts()
|
||||
.stream()
|
||||
.filter(sectionText -> sectionsToReanalyse.contains(sectionText.getSectionNumber()))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getDossierTemplateId());
|
||||
|
||||
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest
|
||||
.getDossierId());
|
||||
|
||||
Map<Integer, Set<Image>> imagesPerPage = new HashMap<>();
|
||||
Set<Entity> entities = findEntities(reanalysisSections, dictionary, kieContainer, analyzeRequest, false, null, imagesPerPage);
|
||||
|
||||
if (dictionary.hasLocalEntries()) {
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber = entityRedactionService.getHintsPerSection(entities, dictionary);
|
||||
Set<Entity> foundByLocal = findEntities(reanalysisSections, dictionary, kieContainer, analyzeRequest, true, hintsPerSectionNumber, imagesPerPage);
|
||||
EntitySearchUtils.addEntitiesWithHigherRank(entities, foundByLocal, dictionary);
|
||||
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
|
||||
}
|
||||
|
||||
Map<Integer, List<Entity>> entitiesPerPage = entityRedactionService.convertToEnititesPerPage(entities);
|
||||
|
||||
List<RedactionLogEntry> newRedactionLogEntries = new ArrayList<>();
|
||||
for (int page = 1; page <= text.getNumberOfPages(); page++) {
|
||||
if (entitiesPerPage.get(page) != null) {
|
||||
newRedactionLogEntries.addAll(redactionLogCreatorService.addEntries(entitiesPerPage, page, analyzeRequest
|
||||
.getDossierTemplateId()));
|
||||
}
|
||||
|
||||
if (imagesPerPage.get(page) != null) {
|
||||
newRedactionLogEntries.addAll(redactionLogCreatorService.addImageEntries(imagesPerPage, page, analyzeRequest
|
||||
.getDossierTemplateId()));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
redactionLog.getRedactionLogEntry().removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()));
|
||||
redactionLog.getRedactionLogEntry().addAll(newRedactionLogEntries);
|
||||
AnalyzeResult analyzeResult = finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement);
|
||||
analyzeResult.setWasReanalyzed(true);
|
||||
return analyzeResult;
|
||||
}
|
||||
|
||||
|
||||
private Set<Integer> findSectionsToReanalyse(DictionaryIncrement dictionaryIncrement, RedactionLog redactionLog,
|
||||
Text text, AnalyzeRequest analyzeRequest) {
|
||||
|
||||
Set<String> relevantManuallyModifiedAnnotationIds = getRelevantManuallyModifiedAnnotationIds(analyzeRequest.getManualRedactions());
|
||||
|
||||
Set<Integer> sectionsToReanalyse = new HashSet<>();
|
||||
@ -128,31 +182,20 @@ public class ReanalyzeService {
|
||||
|
||||
log.info("Should reanalyze {} sections for request: {}", sectionsToReanalyse.size(), analyzeRequest);
|
||||
|
||||
if (sectionsToReanalyse.isEmpty()) {
|
||||
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement);
|
||||
}
|
||||
return sectionsToReanalyse;
|
||||
}
|
||||
|
||||
List<SectionText> reanalysisSections = new ArrayList<>();
|
||||
|
||||
for (SectionText sectionText : text.getSectionTexts()) {
|
||||
|
||||
if (sectionsToReanalyse.contains(sectionText.getSectionNumber())) {
|
||||
reanalysisSections.add(sectionText);
|
||||
}
|
||||
}
|
||||
|
||||
//--
|
||||
|
||||
KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getDossierTemplateId());
|
||||
|
||||
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest
|
||||
.getDossierId());
|
||||
private Set<Entity> findEntities(List<SectionText> reanalysisSections, Dictionary dictionary,
|
||||
KieContainer kieContainer, AnalyzeRequest analyzeRequest, boolean local,
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber,
|
||||
Map<Integer, Set<Image>> imagesPerPage) {
|
||||
|
||||
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
|
||||
for (SectionText reanalysisSection : reanalysisSections) {
|
||||
|
||||
Set<Entity> entities = entityRedactionService.findEntities(reanalysisSection.getSearchableText(), reanalysisSection
|
||||
.getHeadline(), reanalysisSection.getSectionNumber(), dictionary, false);
|
||||
.getHeadline(), reanalysisSection.getSectionNumber(), dictionary, local);
|
||||
if (reanalysisSection.getCellStarts() != null && !reanalysisSection.getCellStarts().isEmpty()) {
|
||||
surroundingWordsService.addSurroundingText(entities, reanalysisSection.getSearchableText(), dictionary, reanalysisSection
|
||||
.getCellStarts());
|
||||
@ -160,14 +203,15 @@ public class ReanalyzeService {
|
||||
surroundingWordsService.addSurroundingText(entities, reanalysisSection.getSearchableText(), dictionary);
|
||||
}
|
||||
|
||||
if (reanalysisSection.getImages() != null && !reanalysisSection.getImages()
|
||||
if (!local && reanalysisSection.getImages() != null && !reanalysisSection.getImages()
|
||||
.isEmpty() && analyzeRequest.getManualRedactions() != null && analyzeRequest.getManualRedactions()
|
||||
.getImageRecategorizations() != null) {
|
||||
for (Image image : reanalysisSection.getImages()) {
|
||||
String imageId = IdBuilder.buildId(image.getPosition(), image.getPage());
|
||||
for (ManualImageRecategorization imageRecategorization : analyzeRequest.getManualRedactions()
|
||||
.getImageRecategorizations()) {
|
||||
if (imageRecategorization.getStatus().equals(Status.APPROVED) && imageRecategorization.getId().equals(imageId)) {
|
||||
if (imageRecategorization.getStatus().equals(Status.APPROVED) && imageRecategorization.getId()
|
||||
.equals(imageId)) {
|
||||
image.setType(imageRecategorization.getType());
|
||||
}
|
||||
}
|
||||
@ -177,7 +221,10 @@ public class ReanalyzeService {
|
||||
sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder()
|
||||
.isLocal(false)
|
||||
.dictionaryTypes(dictionary.getTypes())
|
||||
.entities(entities)
|
||||
.entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(reanalysisSection.getSectionNumber()) ? Stream
|
||||
.concat(entities.stream(), hintsPerSectionNumber.get(reanalysisSection.getSectionNumber())
|
||||
.stream())
|
||||
.collect(Collectors.toSet()) : entities)
|
||||
.text(reanalysisSection.getSearchableText().getAsStringWithLinebreaks())
|
||||
.searchText(reanalysisSection.getSearchableText().toString())
|
||||
.headline(reanalysisSection.getHeadline())
|
||||
@ -191,54 +238,19 @@ public class ReanalyzeService {
|
||||
}
|
||||
|
||||
Set<Entity> entities = new HashSet<>();
|
||||
Map<Integer, Set<Image>> imagesPerPage = new HashMap<>();
|
||||
sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> {
|
||||
Section analysedRowSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair.getSection());
|
||||
entities.addAll(analysedRowSection.getEntities());
|
||||
Section analysedSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair.getSection());
|
||||
entities.addAll(analysedSection.getEntities());
|
||||
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
|
||||
|
||||
for (Image image : analysedRowSection.getImages()) {
|
||||
for (Image image : analysedSection.getImages()) {
|
||||
imagesPerPage.computeIfAbsent(image.getPage(), (a) -> new HashSet<>()).add(image);
|
||||
}
|
||||
|
||||
entityRedactionService.addLocalValuesToDictionary(analysedSection, dictionary);
|
||||
});
|
||||
|
||||
Map<Integer, List<Entity>> entitiesPerPage = new HashMap<>();
|
||||
for (Entity entity : entities) {
|
||||
Map<Integer, List<EntityPositionSequence>> sequenceOnPage = new HashMap<>();
|
||||
for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) {
|
||||
sequenceOnPage.computeIfAbsent(entityPositionSequence.getPageNumber(), (x) -> new ArrayList<>())
|
||||
.add(entityPositionSequence);
|
||||
}
|
||||
|
||||
for (Map.Entry<Integer, List<EntityPositionSequence>> entry : sequenceOnPage.entrySet()) {
|
||||
entitiesPerPage.computeIfAbsent(entry.getKey(), (x) -> new ArrayList<>())
|
||||
.add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), entry
|
||||
.getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber(), entity
|
||||
.getLegalBasis(), entity.isDictionaryEntry(), entity.getTextBefore(), entity.getTextAfter(), entity
|
||||
.getStart(), entity.getEnd(), entity.isDossierDictionaryEntry()));
|
||||
}
|
||||
}
|
||||
|
||||
List<RedactionLogEntry> newRedactionLogEntries = new ArrayList<>();
|
||||
for (int page = 1; page <= text.getNumberOfPages(); page++) {
|
||||
if (entitiesPerPage.get(page) != null) {
|
||||
newRedactionLogEntries.addAll(redactionLogCreatorService.addEntries(entitiesPerPage, page, analyzeRequest
|
||||
.getDossierTemplateId()));
|
||||
}
|
||||
|
||||
if (imagesPerPage.get(page) != null) {
|
||||
newRedactionLogEntries.addAll(redactionLogCreatorService.addImageEntries(imagesPerPage, page, analyzeRequest
|
||||
.getDossierTemplateId()));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
redactionLog.getRedactionLogEntry().removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()));
|
||||
redactionLog.getRedactionLogEntry().addAll(newRedactionLogEntries);
|
||||
AnalyzeResult analyzeResult = finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement);
|
||||
analyzeResult.setWasReanalyzed(true);
|
||||
return analyzeResult;
|
||||
return entities;
|
||||
}
|
||||
|
||||
|
||||
@ -296,10 +308,9 @@ public class ReanalyzeService {
|
||||
|
||||
private void excludeExcludedPages(RedactionLog redactionLog, Set<Integer> excludedPages) {
|
||||
|
||||
redactionLog.getRedactionLogEntry().forEach(entry ->
|
||||
entry.getPositions().forEach(pos ->
|
||||
entry.setExcluded(excludedPages != null && excludedPages.contains(pos.getPage()))
|
||||
));
|
||||
redactionLog.getRedactionLogEntry()
|
||||
.forEach(entry -> entry.getPositions()
|
||||
.forEach(pos -> entry.setExcluded(excludedPages != null && excludedPages.contains(pos.getPage()))));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -18,7 +18,9 @@ import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
@ -84,7 +86,6 @@ public class RedactionIntegrationTest {
|
||||
|
||||
private static final String PII = "PII";
|
||||
|
||||
|
||||
@Autowired
|
||||
private RedactionController redactionController;
|
||||
|
||||
@ -127,6 +128,7 @@ public class RedactionIntegrationTest {
|
||||
private final Map<String, Integer> rankTypeMap = new HashMap<>();
|
||||
private final Colors colors = new Colors();
|
||||
private final Map<String, Long> reanlysisVersions = new HashMap<>();
|
||||
private final Set<String> deleted = new HashSet<>();
|
||||
|
||||
private final static String TEST_DOSSIER_TEMPLATE_ID = "123";
|
||||
private final static String TEST_DOSSIER_ID = "123";
|
||||
@ -152,18 +154,20 @@ public class RedactionIntegrationTest {
|
||||
return kieServices.newKieContainer(kieModule.getReleaseId());
|
||||
}
|
||||
|
||||
|
||||
@Bean
|
||||
@Primary
|
||||
public StorageService inmemoryStorage() {
|
||||
|
||||
return new FileSystemBackedStorageService();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@After
|
||||
public void cleanupStorage() {
|
||||
|
||||
if (this.storageService instanceof FileSystemBackedStorageService) {
|
||||
((FileSystemBackedStorageService) this.storageService).clearStorage();
|
||||
}
|
||||
@ -179,7 +183,8 @@ public class RedactionIntegrationTest {
|
||||
loadDictionaryForTest();
|
||||
loadTypeForTest();
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(0L);
|
||||
when(dictionaryClient.getAllTypes(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(TypeResponse.builder()
|
||||
when(dictionaryClient.getAllTypes(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(TypeResponse
|
||||
.builder()
|
||||
.types(getTypeResponse())
|
||||
.build());
|
||||
|
||||
@ -188,7 +193,7 @@ public class RedactionIntegrationTest {
|
||||
.types(List.of(TypeResult.builder()
|
||||
.type(DOSSIER_REDACTIONS)
|
||||
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
|
||||
.hexColor( "#ffe187")
|
||||
.hexColor("#ffe187")
|
||||
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS))
|
||||
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS))
|
||||
@ -196,26 +201,42 @@ public class RedactionIntegrationTest {
|
||||
.build()))
|
||||
.build());
|
||||
|
||||
when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(ADDRESS, false));
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(AUTHOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(SPONSOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(NO_REDACTION_INDICATOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(REDACTION_INDICATOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(HINT_ONLY, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(HINT_ONLY, false));
|
||||
when(dictionaryClient.getDictionaryForType(MUST_REDACT, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(MUST_REDACT, false));
|
||||
when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(PUBLISHED_INFORMATION, false));
|
||||
when(dictionaryClient.getDictionaryForType(TEST_METHOD, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(TEST_METHOD, false));
|
||||
when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
|
||||
.thenReturn(getDictionaryResponse(VERTEBRATE, false));
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
|
||||
.thenReturn(getDictionaryResponse(ADDRESS, false));
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
|
||||
.thenReturn(getDictionaryResponse(AUTHOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
|
||||
.thenReturn(getDictionaryResponse(SPONSOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
|
||||
.thenReturn(getDictionaryResponse(NO_REDACTION_INDICATOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
|
||||
.thenReturn(getDictionaryResponse(REDACTION_INDICATOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(HINT_ONLY, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
|
||||
.thenReturn(getDictionaryResponse(HINT_ONLY, false));
|
||||
when(dictionaryClient.getDictionaryForType(MUST_REDACT, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
|
||||
.thenReturn(getDictionaryResponse(MUST_REDACT, false));
|
||||
when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
|
||||
.thenReturn(getDictionaryResponse(PUBLISHED_INFORMATION, false));
|
||||
when(dictionaryClient.getDictionaryForType(TEST_METHOD, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
|
||||
.thenReturn(getDictionaryResponse(TEST_METHOD, false));
|
||||
when(dictionaryClient.getDictionaryForType(PII, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(PII, false));
|
||||
when(dictionaryClient.getDictionaryForType(RECOMMENDATION_AUTHOR, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(RECOMMENDATION_AUTHOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(RECOMMENDATION_ADDRESS, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(RECOMMENDATION_ADDRESS, false));
|
||||
when(dictionaryClient.getDictionaryForType(FALSE_POSITIVE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(FALSE_POSITIVE, false));
|
||||
when(dictionaryClient.getDictionaryForType(PURITY, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(PURITY, false));
|
||||
when(dictionaryClient.getDictionaryForType(RECOMMENDATION_AUTHOR, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
|
||||
.thenReturn(getDictionaryResponse(RECOMMENDATION_AUTHOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(RECOMMENDATION_ADDRESS, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
|
||||
.thenReturn(getDictionaryResponse(RECOMMENDATION_ADDRESS, false));
|
||||
when(dictionaryClient.getDictionaryForType(FALSE_POSITIVE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
|
||||
.thenReturn(getDictionaryResponse(FALSE_POSITIVE, false));
|
||||
when(dictionaryClient.getDictionaryForType(PURITY, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
|
||||
.thenReturn(getDictionaryResponse(PURITY, false));
|
||||
when(dictionaryClient.getDictionaryForType(IMAGE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(IMAGE, false));
|
||||
when(dictionaryClient.getDictionaryForType(OCR, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(OCR, false));
|
||||
when(dictionaryClient.getDictionaryForType(LOGO, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(LOGO, false));
|
||||
when(dictionaryClient.getDictionaryForType(SIGNATURE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(SIGNATURE, false));
|
||||
when(dictionaryClient.getDictionaryForType(FORMULA, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(FORMULA, false));
|
||||
when(dictionaryClient.getDictionaryForType(SIGNATURE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
|
||||
.thenReturn(getDictionaryResponse(SIGNATURE, false));
|
||||
when(dictionaryClient.getDictionaryForType(FORMULA, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
|
||||
.thenReturn(getDictionaryResponse(FORMULA, false));
|
||||
when(dictionaryClient.getDictionaryForType(DOSSIER_REDACTIONS, TEST_DOSSIER_TEMPLATE_ID, TEST_DOSSIER_ID)).thenReturn(getDictionaryResponse(DOSSIER_REDACTIONS, true));
|
||||
when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors);
|
||||
}
|
||||
@ -477,7 +498,8 @@ public class RedactionIntegrationTest {
|
||||
|
||||
return DictionaryResponse.builder()
|
||||
.hexColor(typeColorMap.get(type))
|
||||
.entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type)))
|
||||
.entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary
|
||||
.get(type)))
|
||||
.isHint(hintTypeMap.get(type))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(type))
|
||||
.isRecommendation(recommendationTypeMap.get(type))
|
||||
@ -490,7 +512,8 @@ public class RedactionIntegrationTest {
|
||||
|
||||
List<DictionaryEntry> dictionaryEntries = new ArrayList<>();
|
||||
entries.forEach(entry -> {
|
||||
dictionaryEntries.add(new DictionaryEntry(entry, reanlysisVersions.containsKey(entry) ? reanlysisVersions.get(entry) : 0L, false));
|
||||
dictionaryEntries.add(new DictionaryEntry(entry, reanlysisVersions.containsKey(entry) ? reanlysisVersions.get(entry) : 0L, deleted
|
||||
.contains(entry) ? true : false));
|
||||
});
|
||||
return dictionaryEntries;
|
||||
}
|
||||
@ -498,6 +521,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
@Test
|
||||
public void test270Rotated() {
|
||||
|
||||
AnalyzeRequest request = prepareStorage("files/Minimal Examples/270Rotated.pdf");
|
||||
MemoryStats.printMemoryStats();
|
||||
AnalyzeResult result = reanalyzeService.analyze(request);
|
||||
@ -508,12 +532,14 @@ public class RedactionIntegrationTest {
|
||||
@Test
|
||||
@Ignore
|
||||
public void testLargeScannedFileOOM() {
|
||||
|
||||
AnalyzeRequest request = prepareStorage("scanned/VV-377031.pdf");
|
||||
MemoryStats.printMemoryStats();
|
||||
AnalyzeResult result = reanalyzeService.analyze(request);
|
||||
assertThat(result).isNotNull();
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testMergedImages() throws IOException {
|
||||
|
||||
@ -552,14 +578,13 @@ public class RedactionIntegrationTest {
|
||||
long rend = System.currentTimeMillis();
|
||||
System.out.println("reanalysis analysis duration: " + (rend - rstart));
|
||||
|
||||
|
||||
long end = System.currentTimeMillis();
|
||||
|
||||
System.out.println("duration: " + (end - start));
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
@Ignore
|
||||
public void noExceptionShouldBeThrownForAnyFiles() throws IOException {
|
||||
@ -635,7 +660,12 @@ public class RedactionIntegrationTest {
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
request.setExcludedPages(Set.of(1));
|
||||
|
||||
request.setFileAttributes(List.of(FileAttribute.builder().id("fileAttributeId").label("Vertebrate Study").placeholder("{fileattributes.vertebrateStudy}").value("true").build()));
|
||||
request.setFileAttributes(List.of(FileAttribute.builder()
|
||||
.id("fileAttributeId")
|
||||
.label("Vertebrate Study")
|
||||
.placeholder("{fileattributes.vertebrateStudy}")
|
||||
.value("true")
|
||||
.build()));
|
||||
|
||||
AnalyzeResult result = reanalyzeService.analyze(request);
|
||||
|
||||
@ -683,12 +713,18 @@ public class RedactionIntegrationTest {
|
||||
dictionary.get(AUTHOR).add("physical");
|
||||
reanlysisVersions.put("physical", 2L);
|
||||
|
||||
// dictionary.get(VERTEBRATE).add("s-metolachlor");
|
||||
// reanlysisVersions.put("s-metolachlor", 3L);
|
||||
deleted.add("David Chubb");
|
||||
|
||||
dictionary.get(FALSE_POSITIVE).add("David Chubb");
|
||||
reanlysisVersions.put("David Chubb", 3L);
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(3L);
|
||||
|
||||
when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
|
||||
when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
|
||||
.thenReturn(getDictionaryResponse(VERTEBRATE, false));
|
||||
|
||||
when(dictionaryClient.getDictionaryForType(FALSE_POSITIVE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
|
||||
.thenReturn(getDictionaryResponse(FALSE_POSITIVE, false));
|
||||
|
||||
start = System.currentTimeMillis();
|
||||
|
||||
@ -702,10 +738,8 @@ public class RedactionIntegrationTest {
|
||||
|
||||
request.setManualRedactions(manualRedactions);
|
||||
|
||||
|
||||
AnalyzeResult reanalyzeResult = reanalyzeService.reanalyze(request);
|
||||
|
||||
|
||||
redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
|
||||
end = System.currentTimeMillis();
|
||||
@ -775,7 +809,6 @@ public class RedactionIntegrationTest {
|
||||
.status(Status.APPROVED)
|
||||
.build()));
|
||||
|
||||
|
||||
manualRedactions.getComments().put("e5be0f1d941bbb92a068e198648d06c4", List.of(comment));
|
||||
manualRedactions.getComments().put("0836727c3508a0b2ea271da69c04cc2f", List.of(comment));
|
||||
manualRedactions.getComments().put(manualAddId, List.of(comment));
|
||||
@ -790,12 +823,10 @@ public class RedactionIntegrationTest {
|
||||
|
||||
// manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
|
||||
|
||||
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
request.setManualRedactions(manualRedactions);
|
||||
AnalyzeResult result = reanalyzeService.analyze(request);
|
||||
|
||||
|
||||
manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
|
||||
manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder()
|
||||
.id("5b940b2cb401ed9f5be6fc24f6e77bcf")
|
||||
@ -816,7 +847,6 @@ public class RedactionIntegrationTest {
|
||||
.fileId(TEST_FILE_ID)
|
||||
.build());
|
||||
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
|
||||
fileOutputStream.write(annotateResponse.getDocument());
|
||||
}
|
||||
@ -833,7 +863,6 @@ public class RedactionIntegrationTest {
|
||||
System.out.println("classificationTest");
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/new/Single Study - Oral (Gavage) Mouse.pdf");
|
||||
|
||||
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
|
||||
RedactionRequest redactionRequest = RedactionRequest.builder()
|
||||
@ -934,8 +963,10 @@ public class RedactionIntegrationTest {
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private AnalyzeRequest prepareStorage(String file) {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource(file);
|
||||
|
||||
return prepareStorage(pdfFileResource.getInputStream());
|
||||
@ -967,7 +998,6 @@ public class RedactionIntegrationTest {
|
||||
long start = System.currentTimeMillis();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/sponsor_companies.pdf");
|
||||
|
||||
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
|
||||
AnalyzeResult result = reanalyzeService.analyze(request);
|
||||
|
||||
@ -1676,7 +1676,6 @@ da Silva Rejane
|
||||
Das R
|
||||
Das, R.
|
||||
Daughtry, CST
|
||||
David Chubb
|
||||
David Chubb|Lorraine Britton
|
||||
David Clarke
|
||||
Davies
|
||||
|
||||
@ -235,4 +235,5 @@ N/A
|
||||
No details reported
|
||||
Not available
|
||||
Test facility
|
||||
TBD
|
||||
TBD
|
||||
David Chubb
|
||||
Loading…
x
Reference in New Issue
Block a user