From 8c5ede3fde0fd1b2df9e8fca0908e9233d6d403c Mon Sep 17 00:00:00 2001 From: Philipp Schramm Date: Thu, 21 Jul 2022 12:44:04 +0200 Subject: [PATCH] RED-4510: Refactoring after review --- .../redaction/v1/server/RulesTest.java | 620 ++++++++---------- 1 file changed, 256 insertions(+), 364 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RulesTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RulesTest.java index bd752544..b4966821 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RulesTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RulesTest.java @@ -1,5 +1,6 @@ package com.iqser.red.service.redaction.v1.server; +import static java.util.Map.entry; import static org.assertj.core.api.Assertions.assertThat; import static org.mockito.Mockito.when; @@ -109,24 +110,21 @@ public class RulesTest { private static final String DOSSIER_REDACTIONS = "dossier_redactions"; private static final String IMPORTED_REDACTION = "imported_redaction"; private static final String PII = "PII"; - private static final String RESOURCES_PATH = "src/test/resources/"; - private static final String REDACTION_LOG_PATH = RESOURCES_PATH + "RedactionLog/"; private final static String TEST_DOSSIER_TEMPLATE_ID = "123"; private final static String TEST_DOSSIER_ID = "123"; + private String TEST_FILE_ID = "123"; private final Map> dictionary = new HashMap<>(); private final Map> dossierDictionary = new HashMap<>(); private final Map> falsePositive = new HashMap<>(); - private final Map> falseRecommendation = new HashMap<>(); - private final Map typeColorMap = new HashMap<>(); - private final Map hintTypeMap = new HashMap<>(); - private final Map caseInSensitiveMap = new HashMap<>(); - private final Map recommendationTypeMap = new HashMap<>(); - private final Map rankTypeMap = new HashMap<>(); + private static final Map typeColorMap = Map.ofEntries(entry(VERTEBRATE, "#ff85f7"), entry(ADDRESS, "#ffe187"), entry(AUTHOR, "#ffe187"), entry(SPONSOR, "#85ebff"), entry(NO_REDACTION_INDICATOR, "#be85ff"), entry(REDACTION_INDICATOR, "#caff85"), entry(HINT_ONLY, "#abc0c4"), entry(MUST_REDACT, "#fab4c0"), entry(PUBLISHED_INFORMATION, "#85ebff"), entry(TEST_METHOD, "#91fae8"), entry(PII, "#66ccff"), entry(PURITY, "#ffe187"), entry(IMAGE, "#fcc5fb"), entry(OCR, "#fcc5fb"), entry(LOGO, "#ffe187"), entry(FORMULA, "#ffe187"), entry(SIGNATURE, "#ffe187"), entry(IMPORTED_REDACTION, "#fcfbe6")); + private static final Map hintTypeMap = Map.ofEntries(entry(VERTEBRATE, true), entry(ADDRESS, false), entry(AUTHOR, false), entry(SPONSOR, false), entry(NO_REDACTION_INDICATOR, true), entry(REDACTION_INDICATOR, true), entry(HINT_ONLY, true), entry(MUST_REDACT, true), entry(PUBLISHED_INFORMATION, true), entry(TEST_METHOD, true), entry(PII, false), entry(PURITY, false), entry(IMAGE, true), entry(OCR, true), entry(FORMULA, false), entry(LOGO, false), entry(SIGNATURE, false), entry(DOSSIER_REDACTIONS, false), entry(IMPORTED_REDACTION, false)); + private static final Map caseInSensitiveMap = Map.ofEntries(entry(VERTEBRATE, true), entry(ADDRESS, false), entry(AUTHOR, false), entry(SPONSOR, false), entry(NO_REDACTION_INDICATOR, true), entry(REDACTION_INDICATOR, true), entry(HINT_ONLY, true), entry(MUST_REDACT, true), entry(PUBLISHED_INFORMATION, true), entry(TEST_METHOD, false), entry(PII, false), entry(PURITY, false), entry(IMAGE, true), entry(OCR, true), entry(SIGNATURE, true), entry(LOGO, true), entry(FORMULA, true), entry(DOSSIER_REDACTIONS, false), entry(IMPORTED_REDACTION, false)); + private static final Map recommendationTypeMap = Map.ofEntries(entry(VERTEBRATE, false), entry(ADDRESS, false), entry(AUTHOR, false), entry(SPONSOR, false), entry(NO_REDACTION_INDICATOR, false), entry(REDACTION_INDICATOR, false), entry(HINT_ONLY, false), entry(MUST_REDACT, false), entry(PUBLISHED_INFORMATION, false), entry(TEST_METHOD, false), entry(PII, false), entry(PURITY, false), entry(IMAGE, false), entry(OCR, false), entry(FORMULA, false), entry(SIGNATURE, false), entry(LOGO, false), entry(DOSSIER_REDACTIONS, false), entry(IMPORTED_REDACTION, false)); + private static final Map rankTypeMap = Map.ofEntries(entry(PURITY, 155), entry(PII, 150), entry(ADDRESS, 140), entry(AUTHOR, 130), entry(SPONSOR, 120), entry(VERTEBRATE, 110), entry(MUST_REDACT, 100), entry(REDACTION_INDICATOR, 90), entry(NO_REDACTION_INDICATOR, 80), entry(PUBLISHED_INFORMATION, 70), entry(TEST_METHOD, 60), entry(HINT_ONLY, 50), entry(IMAGE, 30), entry(OCR, 29), entry(LOGO, 28), entry(SIGNATURE, 27), entry(FORMULA, 26), entry(DOSSIER_REDACTIONS, 200), entry(IMPORTED_REDACTION, 200)); private final Colors colors = new Colors(); - private final Map reanalysisVersions = new HashMap<>(); - private final Set deleted = new HashSet<>(); + @Autowired private RedactionController redactionController; @Autowired @@ -151,7 +149,15 @@ public class RulesTest { private RabbitTemplate rabbitTemplate; @MockBean private LegalBasisClient legalBasisClient; - private String TEST_FILE_ID = "123"; + + + @After + public void cleanupStorage() { + + if (this.storageService instanceof FileSystemBackedStorageService) { + ((FileSystemBackedStorageService) this.storageService).clearStorage(); + } + } private static String loadFromClassPath(String path) { @@ -173,18 +179,12 @@ public class RulesTest { } - @After - public void cleanupStorage() { - - if (this.storageService instanceof FileSystemBackedStorageService) { - ((FileSystemBackedStorageService) this.storageService).clearStorage(); - } - } - - @Before public void stubClients() { + objectMapper.registerModule(new JavaTimeModule()); + objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(JSONPrimitive.of(RULES)); @@ -212,252 +212,66 @@ public class RulesTest { } - private void loadDictionaryForTest() { + /** + * Generates RedactionLog for given file and saves it here: REDACTION_LOG_PATH. + * Test is ignored, because it's for manual tests. + */ + @Ignore + @Test + public void generateRedactionLogForOneFile() { - dictionary.computeIfAbsent(AUTHOR, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/CBI_author.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(SPONSOR, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(VERTEBRATE, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/vertebrate.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(ADDRESS, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/CBI_address.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(REDACTION_INDICATOR, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(HINT_ONLY, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/hint_only.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(MUST_REDACT, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/must_redact.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(PUBLISHED_INFORMATION, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/published_information.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(TEST_METHOD, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/test_method.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(PII, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/PII.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(PURITY, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/purity.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(IMAGE, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(OCR, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(LOGO, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(SIGNATURE, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(FORMULA, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dossierDictionary.put(IMPORTED_REDACTION, new ArrayList<>()); - - falsePositive.computeIfAbsent(PII, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - - } - - - private void loadTypeForTest() { - - typeColorMap.put(VERTEBRATE, "#ff85f7"); - typeColorMap.put(ADDRESS, "#ffe187"); - typeColorMap.put(AUTHOR, "#ffe187"); - typeColorMap.put(SPONSOR, "#85ebff"); - typeColorMap.put(NO_REDACTION_INDICATOR, "#be85ff"); - typeColorMap.put(REDACTION_INDICATOR, "#caff85"); - typeColorMap.put(HINT_ONLY, "#abc0c4"); - typeColorMap.put(MUST_REDACT, "#fab4c0"); - typeColorMap.put(PUBLISHED_INFORMATION, "#85ebff"); - typeColorMap.put(TEST_METHOD, "#91fae8"); - typeColorMap.put(PII, "#66ccff"); - typeColorMap.put(PURITY, "#ffe187"); - typeColorMap.put(IMAGE, "#fcc5fb"); - typeColorMap.put(OCR, "#fcc5fb"); - typeColorMap.put(LOGO, "#ffe187"); - typeColorMap.put(FORMULA, "#ffe187"); - typeColorMap.put(SIGNATURE, "#ffe187"); - typeColorMap.put(IMPORTED_REDACTION, "#fcfbe6"); - - hintTypeMap.put(VERTEBRATE, true); - hintTypeMap.put(ADDRESS, false); - hintTypeMap.put(AUTHOR, false); - hintTypeMap.put(SPONSOR, false); - hintTypeMap.put(NO_REDACTION_INDICATOR, true); - hintTypeMap.put(REDACTION_INDICATOR, true); - hintTypeMap.put(HINT_ONLY, true); - hintTypeMap.put(MUST_REDACT, true); - hintTypeMap.put(PUBLISHED_INFORMATION, true); - hintTypeMap.put(TEST_METHOD, true); - hintTypeMap.put(PII, false); - hintTypeMap.put(PURITY, false); - hintTypeMap.put(IMAGE, true); - hintTypeMap.put(OCR, true); - hintTypeMap.put(FORMULA, false); - hintTypeMap.put(LOGO, false); - hintTypeMap.put(SIGNATURE, false); - hintTypeMap.put(DOSSIER_REDACTIONS, false); - hintTypeMap.put(IMPORTED_REDACTION, false); - - caseInSensitiveMap.put(VERTEBRATE, true); - caseInSensitiveMap.put(ADDRESS, false); - caseInSensitiveMap.put(AUTHOR, false); - caseInSensitiveMap.put(SPONSOR, false); - caseInSensitiveMap.put(NO_REDACTION_INDICATOR, true); - caseInSensitiveMap.put(REDACTION_INDICATOR, true); - caseInSensitiveMap.put(HINT_ONLY, true); - caseInSensitiveMap.put(MUST_REDACT, true); - caseInSensitiveMap.put(PUBLISHED_INFORMATION, true); - caseInSensitiveMap.put(TEST_METHOD, false); - caseInSensitiveMap.put(PII, false); - caseInSensitiveMap.put(PURITY, false); - caseInSensitiveMap.put(IMAGE, true); - caseInSensitiveMap.put(OCR, true); - caseInSensitiveMap.put(SIGNATURE, true); - caseInSensitiveMap.put(LOGO, true); - caseInSensitiveMap.put(FORMULA, true); - caseInSensitiveMap.put(DOSSIER_REDACTIONS, false); - caseInSensitiveMap.put(IMPORTED_REDACTION, false); - - recommendationTypeMap.put(VERTEBRATE, false); - recommendationTypeMap.put(ADDRESS, false); - recommendationTypeMap.put(AUTHOR, false); - recommendationTypeMap.put(SPONSOR, false); - recommendationTypeMap.put(NO_REDACTION_INDICATOR, false); - recommendationTypeMap.put(REDACTION_INDICATOR, false); - recommendationTypeMap.put(HINT_ONLY, false); - recommendationTypeMap.put(MUST_REDACT, false); - recommendationTypeMap.put(PUBLISHED_INFORMATION, false); - recommendationTypeMap.put(TEST_METHOD, false); - recommendationTypeMap.put(PII, false); - recommendationTypeMap.put(PURITY, false); - recommendationTypeMap.put(IMAGE, false); - recommendationTypeMap.put(OCR, false); - recommendationTypeMap.put(FORMULA, false); - recommendationTypeMap.put(SIGNATURE, false); - recommendationTypeMap.put(LOGO, false); - recommendationTypeMap.put(DOSSIER_REDACTIONS, false); - recommendationTypeMap.put(IMPORTED_REDACTION, false); - - rankTypeMap.put(PURITY, 155); - rankTypeMap.put(PII, 150); - rankTypeMap.put(ADDRESS, 140); - rankTypeMap.put(AUTHOR, 130); - rankTypeMap.put(SPONSOR, 120); - rankTypeMap.put(VERTEBRATE, 110); - rankTypeMap.put(MUST_REDACT, 100); - rankTypeMap.put(REDACTION_INDICATOR, 90); - rankTypeMap.put(NO_REDACTION_INDICATOR, 80); - rankTypeMap.put(PUBLISHED_INFORMATION, 70); - rankTypeMap.put(TEST_METHOD, 60); - rankTypeMap.put(HINT_ONLY, 50); - rankTypeMap.put(IMAGE, 30); - rankTypeMap.put(OCR, 29); - rankTypeMap.put(LOGO, 28); - rankTypeMap.put(SIGNATURE, 27); - rankTypeMap.put(FORMULA, 26); - rankTypeMap.put(DOSSIER_REDACTIONS, 200); - rankTypeMap.put(IMPORTED_REDACTION, 200); - - colors.setSkippedColor("#cccccc"); - colors.setRequestAddColor("#04b093"); - colors.setRequestRemoveColor("#04b093"); - } - - - private List getTypeResponse() { - - return typeColorMap.entrySet() - .stream() - .map(typeColor -> Type.builder() - .id(typeColor.getKey() + ":" + TEST_DOSSIER_TEMPLATE_ID) - .type(typeColor.getKey()) - .dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID) - .hexColor(typeColor.getValue()) - .isHint(hintTypeMap.get(typeColor.getKey())) - .isCaseInsensitive(caseInSensitiveMap.get(typeColor.getKey())) - .isRecommendation(recommendationTypeMap.get(typeColor.getKey())) - .rank(rankTypeMap.get(typeColor.getKey())) - .build()) - - .collect(Collectors.toList()); - } - - - private void mockDictionaryCalls(Long version) { - - when(dictionaryClient.getDictionaryForType(VERTEBRATE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(VERTEBRATE, false)); - when(dictionaryClient.getDictionaryForType(ADDRESS + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(ADDRESS, false)); - when(dictionaryClient.getDictionaryForType(AUTHOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(AUTHOR, false)); - when(dictionaryClient.getDictionaryForType(SPONSOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(SPONSOR, false)); - when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(NO_REDACTION_INDICATOR, false)); - when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(REDACTION_INDICATOR, false)); - when(dictionaryClient.getDictionaryForType(HINT_ONLY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(HINT_ONLY, false)); - when(dictionaryClient.getDictionaryForType(MUST_REDACT + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(MUST_REDACT, false)); - when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PUBLISHED_INFORMATION, false)); - when(dictionaryClient.getDictionaryForType(TEST_METHOD + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(TEST_METHOD, false)); - when(dictionaryClient.getDictionaryForType(PII + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PII, false)); - when(dictionaryClient.getDictionaryForType(PURITY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PURITY, false)); - when(dictionaryClient.getDictionaryForType(IMAGE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(IMAGE, false)); - when(dictionaryClient.getDictionaryForType(OCR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(OCR, false)); - when(dictionaryClient.getDictionaryForType(LOGO + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(LOGO, false)); - when(dictionaryClient.getDictionaryForType(SIGNATURE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(SIGNATURE, false)); - when(dictionaryClient.getDictionaryForType(FORMULA + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(FORMULA, false)); - when(dictionaryClient.getDictionaryForType(DOSSIER_REDACTIONS + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(DOSSIER_REDACTIONS, true)); - when(dictionaryClient.getDictionaryForType(IMPORTED_REDACTION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(IMPORTED_REDACTION, true)); - - } - - - private String cleanDictionaryEntry(String entry) { - - return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " "); - } - - - private Type getDictionaryResponse(String type, boolean isDossierDictionary) { - - return Type.builder() - .id(type + ":" + TEST_DOSSIER_TEMPLATE_ID) - .hexColor(typeColorMap.get(type)) - .entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type))) - .falsePositiveEntries(falsePositive.containsKey(type) ? toDictionaryEntry(falsePositive.get(type)) : new ArrayList<>()) - .falseRecommendationEntries(falseRecommendation.containsKey(type) ? toDictionaryEntry(falseRecommendation.get(type)) : new ArrayList<>()) - .isHint(hintTypeMap.get(type)) - .isCaseInsensitive(caseInSensitiveMap.get(type)) - .isRecommendation(recommendationTypeMap.get(type)) - .rank(rankTypeMap.get(type)) - .build(); - } - - - private List toDictionaryEntry(List entries) { - - List dictionaryEntries = new ArrayList<>(); - entries.forEach(entry -> dictionaryEntries.add(DictionaryEntry.builder() - .value(entry) - .version(reanalysisVersions.getOrDefault(entry, 0L)) - .deleted(deleted.contains(entry)) - .build())); - return dictionaryEntries; + String fileName = "files/Compounds/31 A14111B - EU AIR3 - MCP Section 1 - Identity of the plant protection product.pdf"; + generateAndSaveRedactionLog(fileName); } /** - * Generates RedactionLog for given file and saves it here: REDACTION_LOG_PATH + * Generates RedactionLog for all files and saves it here: REDACTION_LOG_PATH. + * Test is ignored, because it's for manual tests. */ @Ignore @Test - @SneakyThrows - public void generateRedactionLogForOneFile() { + public void generateRedactionLogForAllFiles() { + + Set files = getFileNames(new HashSet<>(), FileSystems.getDefault().getPath(RESOURCES_PATH)); + System.out.println("Will generate RedactionLog for " + files.size() + " files."); + TEST_FILE_ID = "1000"; + files.forEach(this::generateAndSaveRedactionLog); + } + + + /** + * Analyses file and compares its RedactionLog with saved one from here: REDACTION_LOG_PATH. + * If RedactionLog Json does not exist, test will fail. + * Test is ignored, because it's for manual tests. + */ + @Ignore + @Test + public void analyseFileAndCompareRedactionLog() { String fileName = "files/Compounds/31 A14111B - EU AIR3 - MCP Section 1 - Identity of the plant protection product.pdf"; - generateRedactionLog(fileName); + analyseFileAndCompareRedactionLog(fileName); + } + + + /** + * Analyses all files and compares its RedactionLog with saved one from here: REDACTION_LOG_PATH. + * If RedactionLogs Json does not exist, test will fail. + * Test is ignored, because it's for manual tests. + */ + @Ignore + @Test + public void analyseAllFilesAndCompareRedactionLogs() { + + Set files = getFileNames(new HashSet<>(), FileSystems.getDefault().getPath(RESOURCES_PATH)); + System.out.println("Will analyse " + files.size() + " files and compare its RedactionLogs."); + TEST_FILE_ID = "5000"; + files.forEach(this::analyseFileAndCompareRedactionLog); } @SneakyThrows - public void generateRedactionLog(String fileName) { + public void generateAndSaveRedactionLog(String fileName) { increaseTestFileId(); @@ -475,101 +289,6 @@ public class RulesTest { } - private void increaseTestFileId() { - - TEST_FILE_ID = Integer.toString(Integer.parseInt(TEST_FILE_ID) + 1); - } - - - @SneakyThrows - private void loadNerForTest() { - - ClassPathResource responseJson = new ClassPathResource("files/ner_response.json"); - var bytes = IOUtils.toByteArray(responseJson.getInputStream()); - storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), bytes); - } - - - @SneakyThrows - private AnalyzeRequest prepareStorage(String file) { - - ClassPathResource pdfFileResource = new ClassPathResource(file); - - return prepareStorage(pdfFileResource.getInputStream()); - } - - - @SneakyThrows - private void saveRedactionLogAsJson(RedactionLog redactionLog, String pdfFileName) { - - File pdfFile = new File(pdfFileName); - - String directory = REDACTION_LOG_PATH + pdfFile.getParentFile().getPath(); - File dr = new File(directory); - boolean created = dr.mkdirs(); - if (created) { - System.out.println("Directory was created"); - } - - String fileName = StringUtils.replace(pdfFile.getName(), ".pdf", ".json"); - File file = new File(directory, fileName); - - ObjectMapper mapper = new ObjectMapper(); - mapper.registerModule(new JavaTimeModule()); - mapper.writeValue(file, redactionLog); - - System.out.println("Saved RedactionLog for " + fileName + " here " + directory); - } - - - @SneakyThrows - private AnalyzeRequest prepareStorage(InputStream stream) { - - AnalyzeRequest request = AnalyzeRequest.builder() - .dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID) - .dossierId(TEST_DOSSIER_ID) - .fileId(TEST_FILE_ID) - .lastProcessed(OffsetDateTime.now()) - .build(); - - var bytes = IOUtils.toByteArray(stream); - - storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), bytes); - - return request; - - } - - - /** - * Generates RedactionLog for all files and saves it here: REDACTION_LOG_PATH - */ - @Ignore - @Test - @SneakyThrows - public void generateRedactionLogForAllFiles() { - - Set files = getFileNames(new HashSet<>(), FileSystems.getDefault().getPath(RESOURCES_PATH)); - System.out.println("Will generate RedactionLog for " + files.size() + " files."); - TEST_FILE_ID = "1000"; - files.forEach(this::generateRedactionLog); - } - - - /** - * Analyses file and compares its RedactionLog with saved one from here: REDACTION_LOG_PATH. - * If RedactionLog Json does not exist, test will fail. - */ - @Ignore - @Test - @SneakyThrows - public void analyseFileAndCompareRedactionLog() { - - String fileName = "files/Compounds/28 A8637C - EU AIR3 - MCP Section 10 - Ecotoxicological studies on the plant protection product.pdf"; - analyseFileAndCompareRedactionLog(fileName); - } - - @SneakyThrows public void analyseFileAndCompareRedactionLog(String fileName) { @@ -586,6 +305,7 @@ public class RulesTest { RedactionLog redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID); + // All timestamps are ignored, because they are for sure different assertThat(redactionLog.getAnalysisVersion()).isEqualTo(savedRedactionLog.getAnalysisVersion()); assertThat(redactionLog.getAnalysisNumber()).isEqualTo(savedRedactionLog.getAnalysisNumber()); assertThat(redactionLog.getDictionaryVersion()).isEqualTo(savedRedactionLog.getDictionaryVersion()); @@ -690,6 +410,12 @@ public class RulesTest { } + private void increaseTestFileId() { + + TEST_FILE_ID = Integer.toString(Integer.parseInt(TEST_FILE_ID) + 1); + } + + @SneakyThrows private RedactionLog loadSavedRedactionLog(String pdfFileName) { @@ -697,29 +423,171 @@ public class RulesTest { String directory = REDACTION_LOG_PATH + pdfFile.getParentFile().getPath(); String fileName = StringUtils.replace(pdfFile.getName(), ".pdf", ".json"); File file = new File(directory, fileName); - - ObjectMapper om = new ObjectMapper(); - om.registerModule(new JavaTimeModule()); - om.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - assertThat(file).exists(); - return om.readValue(file, RedactionLog.class); + + return objectMapper.readValue(file, RedactionLog.class); } - /** - * Analyses all files and compares its RedactionLog with saved one from here: REDACTION_LOG_PATH - * If RedactionLogs Json does not exist, test will fail. - */ - @Ignore - @Test @SneakyThrows - public void analyseAllFilesAndCompareRedactionLogs() { + private void loadNerForTest() { - Set files = getFileNames(new HashSet<>(), FileSystems.getDefault().getPath(RESOURCES_PATH)); - System.out.println("Will analyse " + files.size() + " files and compare its RedactionLogs."); - TEST_FILE_ID = "5000"; - files.forEach(this::analyseFileAndCompareRedactionLog); + ClassPathResource responseJson = new ClassPathResource("files/ner_response.json"); + var bytes = IOUtils.toByteArray(responseJson.getInputStream()); + storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), bytes); + } + + + @SneakyThrows + private AnalyzeRequest prepareStorage(String file) { + + ClassPathResource pdfFileResource = new ClassPathResource(file); + + return prepareStorage(pdfFileResource.getInputStream()); + } + + + @SneakyThrows + private AnalyzeRequest prepareStorage(InputStream stream) { + + AnalyzeRequest request = AnalyzeRequest.builder() + .dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID) + .dossierId(TEST_DOSSIER_ID) + .fileId(TEST_FILE_ID) + .lastProcessed(OffsetDateTime.now()) + .build(); + + var bytes = IOUtils.toByteArray(stream); + + storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), bytes); + + return request; + + } + + + @SneakyThrows + private void saveRedactionLogAsJson(RedactionLog redactionLog, String pdfFileName) { + + File pdfFile = new File(pdfFileName); + + String directory = REDACTION_LOG_PATH + pdfFile.getParentFile().getPath(); + File dr = new File(directory); + boolean created = dr.mkdirs(); + if (created) { + System.out.println("Directory was created"); + } + + String fileName = StringUtils.replace(pdfFile.getName(), ".pdf", ".json"); + File file = new File(directory, fileName); + + objectMapper.writeValue(file, redactionLog); + + System.out.println("Saved RedactionLog for " + fileName + " here " + directory); + } + + + private void loadDictionaryForTest() { + + dictionary.computeIfAbsent(AUTHOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/CBI_author.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(SPONSOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(VERTEBRATE, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/vertebrate.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(ADDRESS, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/CBI_address.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(REDACTION_INDICATOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(HINT_ONLY, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/hint_only.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(MUST_REDACT, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/must_redact.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(PUBLISHED_INFORMATION, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/published_information.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(TEST_METHOD, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/test_method.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(PII, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/PII.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(PURITY, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/purity.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(IMAGE, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(OCR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(LOGO, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(SIGNATURE, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(FORMULA, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dossierDictionary.put(IMPORTED_REDACTION, new ArrayList<>()); + + falsePositive.computeIfAbsent(PII, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + + } + + + private void loadTypeForTest() { + + colors.setSkippedColor("#cccccc"); + colors.setRequestAddColor("#04b093"); + colors.setRequestRemoveColor("#04b093"); + } + + + private List getTypeResponse() { + + return typeColorMap.entrySet() + .stream() + .map(typeColor -> Type.builder() + .id(typeColor.getKey() + ":" + TEST_DOSSIER_TEMPLATE_ID) + .type(typeColor.getKey()) + .dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID) + .hexColor(typeColor.getValue()) + .isHint(hintTypeMap.get(typeColor.getKey())) + .isCaseInsensitive(caseInSensitiveMap.get(typeColor.getKey())) + .isRecommendation(recommendationTypeMap.get(typeColor.getKey())) + .rank(rankTypeMap.get(typeColor.getKey())) + .build()) + + .collect(Collectors.toList()); + } + + + private void mockDictionaryCalls(Long version) { + + when(dictionaryClient.getDictionaryForType(VERTEBRATE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(VERTEBRATE, false)); + when(dictionaryClient.getDictionaryForType(ADDRESS + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(ADDRESS, false)); + when(dictionaryClient.getDictionaryForType(AUTHOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(AUTHOR, false)); + when(dictionaryClient.getDictionaryForType(SPONSOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(SPONSOR, false)); + when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(NO_REDACTION_INDICATOR, false)); + when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(REDACTION_INDICATOR, false)); + when(dictionaryClient.getDictionaryForType(HINT_ONLY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(HINT_ONLY, false)); + when(dictionaryClient.getDictionaryForType(MUST_REDACT + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(MUST_REDACT, false)); + when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PUBLISHED_INFORMATION, false)); + when(dictionaryClient.getDictionaryForType(TEST_METHOD + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(TEST_METHOD, false)); + when(dictionaryClient.getDictionaryForType(PII + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PII, false)); + when(dictionaryClient.getDictionaryForType(PURITY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PURITY, false)); + when(dictionaryClient.getDictionaryForType(IMAGE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(IMAGE, false)); + when(dictionaryClient.getDictionaryForType(OCR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(OCR, false)); + when(dictionaryClient.getDictionaryForType(LOGO + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(LOGO, false)); + when(dictionaryClient.getDictionaryForType(SIGNATURE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(SIGNATURE, false)); + when(dictionaryClient.getDictionaryForType(FORMULA + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(FORMULA, false)); + when(dictionaryClient.getDictionaryForType(DOSSIER_REDACTIONS + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(DOSSIER_REDACTIONS, true)); + when(dictionaryClient.getDictionaryForType(IMPORTED_REDACTION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(IMPORTED_REDACTION, true)); + + } + + + private String cleanDictionaryEntry(String entry) { + + return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " "); } @@ -741,6 +609,30 @@ public class RulesTest { } + private Type getDictionaryResponse(String type, boolean isDossierDictionary) { + + return Type.builder() + .id(type + ":" + TEST_DOSSIER_TEMPLATE_ID) + .hexColor(typeColorMap.get(type)) + .entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type))) + .falsePositiveEntries(falsePositive.containsKey(type) ? toDictionaryEntry(falsePositive.get(type)) : new ArrayList<>()) + .falseRecommendationEntries(new ArrayList<>()) + .isHint(hintTypeMap.get(type)) + .isCaseInsensitive(caseInSensitiveMap.get(type)) + .isRecommendation(recommendationTypeMap.get(type)) + .rank(rankTypeMap.get(type)) + .build(); + } + + + private List toDictionaryEntry(List entries) { + + List dictionaryEntries = new ArrayList<>(); + entries.forEach(entry -> dictionaryEntries.add(DictionaryEntry.builder().value(entry).version(0L).deleted(false).build())); + return dictionaryEntries; + } + + @Configuration @EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class, StorageAutoConfiguration.class}) public static class RedactionIntegrationTestConfiguration {