RED-4510: Refactoring after review

This commit is contained in:
Philipp Schramm 2022-07-21 12:44:04 +02:00
parent 40380a12a3
commit 8c5ede3fde

View File

@ -1,5 +1,6 @@
package com.iqser.red.service.redaction.v1.server;
import static java.util.Map.entry;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.Mockito.when;
@ -109,24 +110,21 @@ public class RulesTest {
private static final String DOSSIER_REDACTIONS = "dossier_redactions";
private static final String IMPORTED_REDACTION = "imported_redaction";
private static final String PII = "PII";
private static final String RESOURCES_PATH = "src/test/resources/";
private static final String REDACTION_LOG_PATH = RESOURCES_PATH + "RedactionLog/";
private final static String TEST_DOSSIER_TEMPLATE_ID = "123";
private final static String TEST_DOSSIER_ID = "123";
private String TEST_FILE_ID = "123";
private final Map<String, List<String>> dictionary = new HashMap<>();
private final Map<String, List<String>> dossierDictionary = new HashMap<>();
private final Map<String, List<String>> falsePositive = new HashMap<>();
private final Map<String, List<String>> falseRecommendation = new HashMap<>();
private final Map<String, String> typeColorMap = new HashMap<>();
private final Map<String, Boolean> hintTypeMap = new HashMap<>();
private final Map<String, Boolean> caseInSensitiveMap = new HashMap<>();
private final Map<String, Boolean> recommendationTypeMap = new HashMap<>();
private final Map<String, Integer> rankTypeMap = new HashMap<>();
private static final Map<String, String> typeColorMap = Map.ofEntries(entry(VERTEBRATE, "#ff85f7"), entry(ADDRESS, "#ffe187"), entry(AUTHOR, "#ffe187"), entry(SPONSOR, "#85ebff"), entry(NO_REDACTION_INDICATOR, "#be85ff"), entry(REDACTION_INDICATOR, "#caff85"), entry(HINT_ONLY, "#abc0c4"), entry(MUST_REDACT, "#fab4c0"), entry(PUBLISHED_INFORMATION, "#85ebff"), entry(TEST_METHOD, "#91fae8"), entry(PII, "#66ccff"), entry(PURITY, "#ffe187"), entry(IMAGE, "#fcc5fb"), entry(OCR, "#fcc5fb"), entry(LOGO, "#ffe187"), entry(FORMULA, "#ffe187"), entry(SIGNATURE, "#ffe187"), entry(IMPORTED_REDACTION, "#fcfbe6"));
private static final Map<String, Boolean> hintTypeMap = Map.ofEntries(entry(VERTEBRATE, true), entry(ADDRESS, false), entry(AUTHOR, false), entry(SPONSOR, false), entry(NO_REDACTION_INDICATOR, true), entry(REDACTION_INDICATOR, true), entry(HINT_ONLY, true), entry(MUST_REDACT, true), entry(PUBLISHED_INFORMATION, true), entry(TEST_METHOD, true), entry(PII, false), entry(PURITY, false), entry(IMAGE, true), entry(OCR, true), entry(FORMULA, false), entry(LOGO, false), entry(SIGNATURE, false), entry(DOSSIER_REDACTIONS, false), entry(IMPORTED_REDACTION, false));
private static final Map<String, Boolean> caseInSensitiveMap = Map.ofEntries(entry(VERTEBRATE, true), entry(ADDRESS, false), entry(AUTHOR, false), entry(SPONSOR, false), entry(NO_REDACTION_INDICATOR, true), entry(REDACTION_INDICATOR, true), entry(HINT_ONLY, true), entry(MUST_REDACT, true), entry(PUBLISHED_INFORMATION, true), entry(TEST_METHOD, false), entry(PII, false), entry(PURITY, false), entry(IMAGE, true), entry(OCR, true), entry(SIGNATURE, true), entry(LOGO, true), entry(FORMULA, true), entry(DOSSIER_REDACTIONS, false), entry(IMPORTED_REDACTION, false));
private static final Map<String, Boolean> recommendationTypeMap = Map.ofEntries(entry(VERTEBRATE, false), entry(ADDRESS, false), entry(AUTHOR, false), entry(SPONSOR, false), entry(NO_REDACTION_INDICATOR, false), entry(REDACTION_INDICATOR, false), entry(HINT_ONLY, false), entry(MUST_REDACT, false), entry(PUBLISHED_INFORMATION, false), entry(TEST_METHOD, false), entry(PII, false), entry(PURITY, false), entry(IMAGE, false), entry(OCR, false), entry(FORMULA, false), entry(SIGNATURE, false), entry(LOGO, false), entry(DOSSIER_REDACTIONS, false), entry(IMPORTED_REDACTION, false));
private static final Map<String, Integer> rankTypeMap = Map.ofEntries(entry(PURITY, 155), entry(PII, 150), entry(ADDRESS, 140), entry(AUTHOR, 130), entry(SPONSOR, 120), entry(VERTEBRATE, 110), entry(MUST_REDACT, 100), entry(REDACTION_INDICATOR, 90), entry(NO_REDACTION_INDICATOR, 80), entry(PUBLISHED_INFORMATION, 70), entry(TEST_METHOD, 60), entry(HINT_ONLY, 50), entry(IMAGE, 30), entry(OCR, 29), entry(LOGO, 28), entry(SIGNATURE, 27), entry(FORMULA, 26), entry(DOSSIER_REDACTIONS, 200), entry(IMPORTED_REDACTION, 200));
private final Colors colors = new Colors();
private final Map<String, Long> reanalysisVersions = new HashMap<>();
private final Set<String> deleted = new HashSet<>();
@Autowired
private RedactionController redactionController;
@Autowired
@ -151,7 +149,15 @@ public class RulesTest {
private RabbitTemplate rabbitTemplate;
@MockBean
private LegalBasisClient legalBasisClient;
private String TEST_FILE_ID = "123";
@After
public void cleanupStorage() {
if (this.storageService instanceof FileSystemBackedStorageService) {
((FileSystemBackedStorageService) this.storageService).clearStorage();
}
}
private static String loadFromClassPath(String path) {
@ -173,18 +179,12 @@ public class RulesTest {
}
@After
public void cleanupStorage() {
if (this.storageService instanceof FileSystemBackedStorageService) {
((FileSystemBackedStorageService) this.storageService).clearStorage();
}
}
@Before
public void stubClients() {
objectMapper.registerModule(new JavaTimeModule());
objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(JSONPrimitive.of(RULES));
@ -212,252 +212,66 @@ public class RulesTest {
}
private void loadDictionaryForTest() {
/**
* Generates RedactionLog for given file and saves it here: REDACTION_LOG_PATH.
* Test is ignored, because it's for manual tests.
*/
@Ignore
@Test
public void generateRedactionLogForOneFile() {
dictionary.computeIfAbsent(AUTHOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_author.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(SPONSOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(VERTEBRATE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/vertebrate.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(ADDRESS, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_address.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(REDACTION_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(HINT_ONLY, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/hint_only.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(MUST_REDACT, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/must_redact.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(PUBLISHED_INFORMATION, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/published_information.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(TEST_METHOD, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/test_method.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(PII, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/PII.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(PURITY, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/purity.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(IMAGE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(OCR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(LOGO, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(SIGNATURE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(FORMULA, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dossierDictionary.put(IMPORTED_REDACTION, new ArrayList<>());
falsePositive.computeIfAbsent(PII, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
}
private void loadTypeForTest() {
typeColorMap.put(VERTEBRATE, "#ff85f7");
typeColorMap.put(ADDRESS, "#ffe187");
typeColorMap.put(AUTHOR, "#ffe187");
typeColorMap.put(SPONSOR, "#85ebff");
typeColorMap.put(NO_REDACTION_INDICATOR, "#be85ff");
typeColorMap.put(REDACTION_INDICATOR, "#caff85");
typeColorMap.put(HINT_ONLY, "#abc0c4");
typeColorMap.put(MUST_REDACT, "#fab4c0");
typeColorMap.put(PUBLISHED_INFORMATION, "#85ebff");
typeColorMap.put(TEST_METHOD, "#91fae8");
typeColorMap.put(PII, "#66ccff");
typeColorMap.put(PURITY, "#ffe187");
typeColorMap.put(IMAGE, "#fcc5fb");
typeColorMap.put(OCR, "#fcc5fb");
typeColorMap.put(LOGO, "#ffe187");
typeColorMap.put(FORMULA, "#ffe187");
typeColorMap.put(SIGNATURE, "#ffe187");
typeColorMap.put(IMPORTED_REDACTION, "#fcfbe6");
hintTypeMap.put(VERTEBRATE, true);
hintTypeMap.put(ADDRESS, false);
hintTypeMap.put(AUTHOR, false);
hintTypeMap.put(SPONSOR, false);
hintTypeMap.put(NO_REDACTION_INDICATOR, true);
hintTypeMap.put(REDACTION_INDICATOR, true);
hintTypeMap.put(HINT_ONLY, true);
hintTypeMap.put(MUST_REDACT, true);
hintTypeMap.put(PUBLISHED_INFORMATION, true);
hintTypeMap.put(TEST_METHOD, true);
hintTypeMap.put(PII, false);
hintTypeMap.put(PURITY, false);
hintTypeMap.put(IMAGE, true);
hintTypeMap.put(OCR, true);
hintTypeMap.put(FORMULA, false);
hintTypeMap.put(LOGO, false);
hintTypeMap.put(SIGNATURE, false);
hintTypeMap.put(DOSSIER_REDACTIONS, false);
hintTypeMap.put(IMPORTED_REDACTION, false);
caseInSensitiveMap.put(VERTEBRATE, true);
caseInSensitiveMap.put(ADDRESS, false);
caseInSensitiveMap.put(AUTHOR, false);
caseInSensitiveMap.put(SPONSOR, false);
caseInSensitiveMap.put(NO_REDACTION_INDICATOR, true);
caseInSensitiveMap.put(REDACTION_INDICATOR, true);
caseInSensitiveMap.put(HINT_ONLY, true);
caseInSensitiveMap.put(MUST_REDACT, true);
caseInSensitiveMap.put(PUBLISHED_INFORMATION, true);
caseInSensitiveMap.put(TEST_METHOD, false);
caseInSensitiveMap.put(PII, false);
caseInSensitiveMap.put(PURITY, false);
caseInSensitiveMap.put(IMAGE, true);
caseInSensitiveMap.put(OCR, true);
caseInSensitiveMap.put(SIGNATURE, true);
caseInSensitiveMap.put(LOGO, true);
caseInSensitiveMap.put(FORMULA, true);
caseInSensitiveMap.put(DOSSIER_REDACTIONS, false);
caseInSensitiveMap.put(IMPORTED_REDACTION, false);
recommendationTypeMap.put(VERTEBRATE, false);
recommendationTypeMap.put(ADDRESS, false);
recommendationTypeMap.put(AUTHOR, false);
recommendationTypeMap.put(SPONSOR, false);
recommendationTypeMap.put(NO_REDACTION_INDICATOR, false);
recommendationTypeMap.put(REDACTION_INDICATOR, false);
recommendationTypeMap.put(HINT_ONLY, false);
recommendationTypeMap.put(MUST_REDACT, false);
recommendationTypeMap.put(PUBLISHED_INFORMATION, false);
recommendationTypeMap.put(TEST_METHOD, false);
recommendationTypeMap.put(PII, false);
recommendationTypeMap.put(PURITY, false);
recommendationTypeMap.put(IMAGE, false);
recommendationTypeMap.put(OCR, false);
recommendationTypeMap.put(FORMULA, false);
recommendationTypeMap.put(SIGNATURE, false);
recommendationTypeMap.put(LOGO, false);
recommendationTypeMap.put(DOSSIER_REDACTIONS, false);
recommendationTypeMap.put(IMPORTED_REDACTION, false);
rankTypeMap.put(PURITY, 155);
rankTypeMap.put(PII, 150);
rankTypeMap.put(ADDRESS, 140);
rankTypeMap.put(AUTHOR, 130);
rankTypeMap.put(SPONSOR, 120);
rankTypeMap.put(VERTEBRATE, 110);
rankTypeMap.put(MUST_REDACT, 100);
rankTypeMap.put(REDACTION_INDICATOR, 90);
rankTypeMap.put(NO_REDACTION_INDICATOR, 80);
rankTypeMap.put(PUBLISHED_INFORMATION, 70);
rankTypeMap.put(TEST_METHOD, 60);
rankTypeMap.put(HINT_ONLY, 50);
rankTypeMap.put(IMAGE, 30);
rankTypeMap.put(OCR, 29);
rankTypeMap.put(LOGO, 28);
rankTypeMap.put(SIGNATURE, 27);
rankTypeMap.put(FORMULA, 26);
rankTypeMap.put(DOSSIER_REDACTIONS, 200);
rankTypeMap.put(IMPORTED_REDACTION, 200);
colors.setSkippedColor("#cccccc");
colors.setRequestAddColor("#04b093");
colors.setRequestRemoveColor("#04b093");
}
private List<Type> getTypeResponse() {
return typeColorMap.entrySet()
.stream()
.map(typeColor -> Type.builder()
.id(typeColor.getKey() + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(typeColor.getKey())
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
.hexColor(typeColor.getValue())
.isHint(hintTypeMap.get(typeColor.getKey()))
.isCaseInsensitive(caseInSensitiveMap.get(typeColor.getKey()))
.isRecommendation(recommendationTypeMap.get(typeColor.getKey()))
.rank(rankTypeMap.get(typeColor.getKey()))
.build())
.collect(Collectors.toList());
}
private void mockDictionaryCalls(Long version) {
when(dictionaryClient.getDictionaryForType(VERTEBRATE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
when(dictionaryClient.getDictionaryForType(ADDRESS + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(ADDRESS, false));
when(dictionaryClient.getDictionaryForType(AUTHOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(AUTHOR, false));
when(dictionaryClient.getDictionaryForType(SPONSOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(SPONSOR, false));
when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(NO_REDACTION_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(REDACTION_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(HINT_ONLY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(HINT_ONLY, false));
when(dictionaryClient.getDictionaryForType(MUST_REDACT + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(MUST_REDACT, false));
when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PUBLISHED_INFORMATION, false));
when(dictionaryClient.getDictionaryForType(TEST_METHOD + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(TEST_METHOD, false));
when(dictionaryClient.getDictionaryForType(PII + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PII, false));
when(dictionaryClient.getDictionaryForType(PURITY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PURITY, false));
when(dictionaryClient.getDictionaryForType(IMAGE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(IMAGE, false));
when(dictionaryClient.getDictionaryForType(OCR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(OCR, false));
when(dictionaryClient.getDictionaryForType(LOGO + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(LOGO, false));
when(dictionaryClient.getDictionaryForType(SIGNATURE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(SIGNATURE, false));
when(dictionaryClient.getDictionaryForType(FORMULA + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(FORMULA, false));
when(dictionaryClient.getDictionaryForType(DOSSIER_REDACTIONS + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(DOSSIER_REDACTIONS, true));
when(dictionaryClient.getDictionaryForType(IMPORTED_REDACTION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(IMPORTED_REDACTION, true));
}
private String cleanDictionaryEntry(String entry) {
return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " ");
}
private Type getDictionaryResponse(String type, boolean isDossierDictionary) {
return Type.builder()
.id(type + ":" + TEST_DOSSIER_TEMPLATE_ID)
.hexColor(typeColorMap.get(type))
.entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type)))
.falsePositiveEntries(falsePositive.containsKey(type) ? toDictionaryEntry(falsePositive.get(type)) : new ArrayList<>())
.falseRecommendationEntries(falseRecommendation.containsKey(type) ? toDictionaryEntry(falseRecommendation.get(type)) : new ArrayList<>())
.isHint(hintTypeMap.get(type))
.isCaseInsensitive(caseInSensitiveMap.get(type))
.isRecommendation(recommendationTypeMap.get(type))
.rank(rankTypeMap.get(type))
.build();
}
private List<DictionaryEntry> toDictionaryEntry(List<String> entries) {
List<DictionaryEntry> dictionaryEntries = new ArrayList<>();
entries.forEach(entry -> dictionaryEntries.add(DictionaryEntry.builder()
.value(entry)
.version(reanalysisVersions.getOrDefault(entry, 0L))
.deleted(deleted.contains(entry))
.build()));
return dictionaryEntries;
String fileName = "files/Compounds/31 A14111B - EU AIR3 - MCP Section 1 - Identity of the plant protection product.pdf";
generateAndSaveRedactionLog(fileName);
}
/**
* Generates RedactionLog for given file and saves it here: REDACTION_LOG_PATH
* Generates RedactionLog for all files and saves it here: REDACTION_LOG_PATH.
* Test is ignored, because it's for manual tests.
*/
@Ignore
@Test
@SneakyThrows
public void generateRedactionLogForOneFile() {
public void generateRedactionLogForAllFiles() {
Set<String> files = getFileNames(new HashSet<>(), FileSystems.getDefault().getPath(RESOURCES_PATH));
System.out.println("Will generate RedactionLog for " + files.size() + " files.");
TEST_FILE_ID = "1000";
files.forEach(this::generateAndSaveRedactionLog);
}
/**
* Analyses file and compares its RedactionLog with saved one from here: REDACTION_LOG_PATH.
* If RedactionLog Json does not exist, test will fail.
* Test is ignored, because it's for manual tests.
*/
@Ignore
@Test
public void analyseFileAndCompareRedactionLog() {
String fileName = "files/Compounds/31 A14111B - EU AIR3 - MCP Section 1 - Identity of the plant protection product.pdf";
generateRedactionLog(fileName);
analyseFileAndCompareRedactionLog(fileName);
}
/**
* Analyses all files and compares its RedactionLog with saved one from here: REDACTION_LOG_PATH.
* If RedactionLogs Json does not exist, test will fail.
* Test is ignored, because it's for manual tests.
*/
@Ignore
@Test
public void analyseAllFilesAndCompareRedactionLogs() {
Set<String> files = getFileNames(new HashSet<>(), FileSystems.getDefault().getPath(RESOURCES_PATH));
System.out.println("Will analyse " + files.size() + " files and compare its RedactionLogs.");
TEST_FILE_ID = "5000";
files.forEach(this::analyseFileAndCompareRedactionLog);
}
@SneakyThrows
public void generateRedactionLog(String fileName) {
public void generateAndSaveRedactionLog(String fileName) {
increaseTestFileId();
@ -475,101 +289,6 @@ public class RulesTest {
}
private void increaseTestFileId() {
TEST_FILE_ID = Integer.toString(Integer.parseInt(TEST_FILE_ID) + 1);
}
@SneakyThrows
private void loadNerForTest() {
ClassPathResource responseJson = new ClassPathResource("files/ner_response.json");
var bytes = IOUtils.toByteArray(responseJson.getInputStream());
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), bytes);
}
@SneakyThrows
private AnalyzeRequest prepareStorage(String file) {
ClassPathResource pdfFileResource = new ClassPathResource(file);
return prepareStorage(pdfFileResource.getInputStream());
}
@SneakyThrows
private void saveRedactionLogAsJson(RedactionLog redactionLog, String pdfFileName) {
File pdfFile = new File(pdfFileName);
String directory = REDACTION_LOG_PATH + pdfFile.getParentFile().getPath();
File dr = new File(directory);
boolean created = dr.mkdirs();
if (created) {
System.out.println("Directory was created");
}
String fileName = StringUtils.replace(pdfFile.getName(), ".pdf", ".json");
File file = new File(directory, fileName);
ObjectMapper mapper = new ObjectMapper();
mapper.registerModule(new JavaTimeModule());
mapper.writeValue(file, redactionLog);
System.out.println("Saved RedactionLog for " + fileName + " here " + directory);
}
@SneakyThrows
private AnalyzeRequest prepareStorage(InputStream stream) {
AnalyzeRequest request = AnalyzeRequest.builder()
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.lastProcessed(OffsetDateTime.now())
.build();
var bytes = IOUtils.toByteArray(stream);
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), bytes);
return request;
}
/**
* Generates RedactionLog for all files and saves it here: REDACTION_LOG_PATH
*/
@Ignore
@Test
@SneakyThrows
public void generateRedactionLogForAllFiles() {
Set<String> files = getFileNames(new HashSet<>(), FileSystems.getDefault().getPath(RESOURCES_PATH));
System.out.println("Will generate RedactionLog for " + files.size() + " files.");
TEST_FILE_ID = "1000";
files.forEach(this::generateRedactionLog);
}
/**
* Analyses file and compares its RedactionLog with saved one from here: REDACTION_LOG_PATH.
* If RedactionLog Json does not exist, test will fail.
*/
@Ignore
@Test
@SneakyThrows
public void analyseFileAndCompareRedactionLog() {
String fileName = "files/Compounds/28 A8637C - EU AIR3 - MCP Section 10 - Ecotoxicological studies on the plant protection product.pdf";
analyseFileAndCompareRedactionLog(fileName);
}
@SneakyThrows
public void analyseFileAndCompareRedactionLog(String fileName) {
@ -586,6 +305,7 @@ public class RulesTest {
RedactionLog redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
// All timestamps are ignored, because they are for sure different
assertThat(redactionLog.getAnalysisVersion()).isEqualTo(savedRedactionLog.getAnalysisVersion());
assertThat(redactionLog.getAnalysisNumber()).isEqualTo(savedRedactionLog.getAnalysisNumber());
assertThat(redactionLog.getDictionaryVersion()).isEqualTo(savedRedactionLog.getDictionaryVersion());
@ -690,6 +410,12 @@ public class RulesTest {
}
private void increaseTestFileId() {
TEST_FILE_ID = Integer.toString(Integer.parseInt(TEST_FILE_ID) + 1);
}
@SneakyThrows
private RedactionLog loadSavedRedactionLog(String pdfFileName) {
@ -697,29 +423,171 @@ public class RulesTest {
String directory = REDACTION_LOG_PATH + pdfFile.getParentFile().getPath();
String fileName = StringUtils.replace(pdfFile.getName(), ".pdf", ".json");
File file = new File(directory, fileName);
ObjectMapper om = new ObjectMapper();
om.registerModule(new JavaTimeModule());
om.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
assertThat(file).exists();
return om.readValue(file, RedactionLog.class);
return objectMapper.readValue(file, RedactionLog.class);
}
/**
* Analyses all files and compares its RedactionLog with saved one from here: REDACTION_LOG_PATH
* If RedactionLogs Json does not exist, test will fail.
*/
@Ignore
@Test
@SneakyThrows
public void analyseAllFilesAndCompareRedactionLogs() {
private void loadNerForTest() {
Set<String> files = getFileNames(new HashSet<>(), FileSystems.getDefault().getPath(RESOURCES_PATH));
System.out.println("Will analyse " + files.size() + " files and compare its RedactionLogs.");
TEST_FILE_ID = "5000";
files.forEach(this::analyseFileAndCompareRedactionLog);
ClassPathResource responseJson = new ClassPathResource("files/ner_response.json");
var bytes = IOUtils.toByteArray(responseJson.getInputStream());
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), bytes);
}
@SneakyThrows
private AnalyzeRequest prepareStorage(String file) {
ClassPathResource pdfFileResource = new ClassPathResource(file);
return prepareStorage(pdfFileResource.getInputStream());
}
@SneakyThrows
private AnalyzeRequest prepareStorage(InputStream stream) {
AnalyzeRequest request = AnalyzeRequest.builder()
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.lastProcessed(OffsetDateTime.now())
.build();
var bytes = IOUtils.toByteArray(stream);
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), bytes);
return request;
}
@SneakyThrows
private void saveRedactionLogAsJson(RedactionLog redactionLog, String pdfFileName) {
File pdfFile = new File(pdfFileName);
String directory = REDACTION_LOG_PATH + pdfFile.getParentFile().getPath();
File dr = new File(directory);
boolean created = dr.mkdirs();
if (created) {
System.out.println("Directory was created");
}
String fileName = StringUtils.replace(pdfFile.getName(), ".pdf", ".json");
File file = new File(directory, fileName);
objectMapper.writeValue(file, redactionLog);
System.out.println("Saved RedactionLog for " + fileName + " here " + directory);
}
private void loadDictionaryForTest() {
dictionary.computeIfAbsent(AUTHOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_author.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(SPONSOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(VERTEBRATE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/vertebrate.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(ADDRESS, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_address.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(REDACTION_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(HINT_ONLY, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/hint_only.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(MUST_REDACT, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/must_redact.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(PUBLISHED_INFORMATION, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/published_information.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(TEST_METHOD, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/test_method.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(PII, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/PII.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(PURITY, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/purity.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(IMAGE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(OCR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(LOGO, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(SIGNATURE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(FORMULA, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dossierDictionary.put(IMPORTED_REDACTION, new ArrayList<>());
falsePositive.computeIfAbsent(PII, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
}
private void loadTypeForTest() {
colors.setSkippedColor("#cccccc");
colors.setRequestAddColor("#04b093");
colors.setRequestRemoveColor("#04b093");
}
private List<Type> getTypeResponse() {
return typeColorMap.entrySet()
.stream()
.map(typeColor -> Type.builder()
.id(typeColor.getKey() + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(typeColor.getKey())
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
.hexColor(typeColor.getValue())
.isHint(hintTypeMap.get(typeColor.getKey()))
.isCaseInsensitive(caseInSensitiveMap.get(typeColor.getKey()))
.isRecommendation(recommendationTypeMap.get(typeColor.getKey()))
.rank(rankTypeMap.get(typeColor.getKey()))
.build())
.collect(Collectors.toList());
}
private void mockDictionaryCalls(Long version) {
when(dictionaryClient.getDictionaryForType(VERTEBRATE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
when(dictionaryClient.getDictionaryForType(ADDRESS + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(ADDRESS, false));
when(dictionaryClient.getDictionaryForType(AUTHOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(AUTHOR, false));
when(dictionaryClient.getDictionaryForType(SPONSOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(SPONSOR, false));
when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(NO_REDACTION_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(REDACTION_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(HINT_ONLY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(HINT_ONLY, false));
when(dictionaryClient.getDictionaryForType(MUST_REDACT + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(MUST_REDACT, false));
when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PUBLISHED_INFORMATION, false));
when(dictionaryClient.getDictionaryForType(TEST_METHOD + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(TEST_METHOD, false));
when(dictionaryClient.getDictionaryForType(PII + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PII, false));
when(dictionaryClient.getDictionaryForType(PURITY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PURITY, false));
when(dictionaryClient.getDictionaryForType(IMAGE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(IMAGE, false));
when(dictionaryClient.getDictionaryForType(OCR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(OCR, false));
when(dictionaryClient.getDictionaryForType(LOGO + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(LOGO, false));
when(dictionaryClient.getDictionaryForType(SIGNATURE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(SIGNATURE, false));
when(dictionaryClient.getDictionaryForType(FORMULA + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(FORMULA, false));
when(dictionaryClient.getDictionaryForType(DOSSIER_REDACTIONS + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(DOSSIER_REDACTIONS, true));
when(dictionaryClient.getDictionaryForType(IMPORTED_REDACTION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(IMPORTED_REDACTION, true));
}
private String cleanDictionaryEntry(String entry) {
return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " ");
}
@ -741,6 +609,30 @@ public class RulesTest {
}
private Type getDictionaryResponse(String type, boolean isDossierDictionary) {
return Type.builder()
.id(type + ":" + TEST_DOSSIER_TEMPLATE_ID)
.hexColor(typeColorMap.get(type))
.entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type)))
.falsePositiveEntries(falsePositive.containsKey(type) ? toDictionaryEntry(falsePositive.get(type)) : new ArrayList<>())
.falseRecommendationEntries(new ArrayList<>())
.isHint(hintTypeMap.get(type))
.isCaseInsensitive(caseInSensitiveMap.get(type))
.isRecommendation(recommendationTypeMap.get(type))
.rank(rankTypeMap.get(type))
.build();
}
private List<DictionaryEntry> toDictionaryEntry(List<String> entries) {
List<DictionaryEntry> dictionaryEntries = new ArrayList<>();
entries.forEach(entry -> dictionaryEntries.add(DictionaryEntry.builder().value(entry).version(0L).deleted(false).build()));
return dictionaryEntries;
}
@Configuration
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class, StorageAutoConfiguration.class})
public static class RedactionIntegrationTestConfiguration {