diff --git a/.gitignore b/.gitignore index 6bb4bada..0365fa46 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,4 @@ **/.DS_Store **/classpath-data.json **/dependencies-and-licenses-overview.txt +/redaction-service-v1/redaction-service-server-v1/src/test/resources/RedactionLog/ diff --git a/redaction-service-v1/redaction-service-server-v1/pom.xml b/redaction-service-v1/redaction-service-server-v1/pom.xml index 47a85da3..84ea2060 100644 --- a/redaction-service-v1/redaction-service-server-v1/pom.xml +++ b/redaction-service-v1/redaction-service-server-v1/pom.xml @@ -38,6 +38,13 @@ ${jackson.version} + + com.fasterxml.jackson.datatype + jackson-datatype-jsr310 + ${jackson.version} + test + + org.ahocorasick ahocorasick diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RulesTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RulesTest.java new file mode 100644 index 00000000..bd752544 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RulesTest.java @@ -0,0 +1,773 @@ +package com.iqser.red.service.redaction.v1.server; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.Mockito.when; + +import java.io.BufferedReader; +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.nio.file.DirectoryStream; +import java.nio.file.FileSystems; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.OffsetDateTime; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.junit.After; +import org.junit.Before; +import org.junit.Ignore; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.kie.api.KieServices; +import org.kie.api.builder.KieBuilder; +import org.kie.api.builder.KieFileSystem; +import org.kie.api.builder.KieModule; +import org.kie.api.runtime.KieContainer; +import org.springframework.amqp.rabbit.core.RabbitTemplate; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.autoconfigure.EnableAutoConfiguration; +import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.boot.test.mock.mockito.MockBean; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.Import; +import org.springframework.context.annotation.Primary; +import org.springframework.core.io.ClassPathResource; +import org.springframework.test.context.junit4.SpringRunner; + +import com.amazonaws.services.s3.AmazonS3; +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; +import com.iqser.red.service.persistence.service.v1.api.model.common.JSONPrimitive; +import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.configuration.Colors; +import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType; +import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.DictionaryEntry; +import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.Type; +import com.iqser.red.service.redaction.v1.model.AnalyzeRequest; +import com.iqser.red.service.redaction.v1.model.Change; +import com.iqser.red.service.redaction.v1.model.Engine; +import com.iqser.red.service.redaction.v1.model.ManualChange; +import com.iqser.red.service.redaction.v1.model.Rectangle; +import com.iqser.red.service.redaction.v1.model.RedactionLog; +import com.iqser.red.service.redaction.v1.model.RedactionLogComment; +import com.iqser.red.service.redaction.v1.model.RedactionLogEntry; +import com.iqser.red.service.redaction.v1.model.RedactionLogLegalBasis; +import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest; +import com.iqser.red.service.redaction.v1.server.annotate.AnnotationService; +import com.iqser.red.service.redaction.v1.server.client.DictionaryClient; +import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient; +import com.iqser.red.service.redaction.v1.server.client.RulesClient; +import com.iqser.red.service.redaction.v1.server.controller.RedactionController; +import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService; +import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService; +import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader; +import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities; +import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; +import com.iqser.red.storage.commons.StorageAutoConfiguration; +import com.iqser.red.storage.commons.service.StorageService; + +import lombok.SneakyThrows; + +@RunWith(SpringRunner.class) +@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT) +@Import(RulesTest.RedactionIntegrationTestConfiguration.class) +public class RulesTest { + + private static final String RULES_PATH = "drools/testRules.drl"; + private static final String RULES = loadFromClassPath(RULES_PATH); + private static final String VERTEBRATE = "vertebrate"; + private static final String ADDRESS = "CBI_address"; + private static final String AUTHOR = "CBI_author"; + private static final String SPONSOR = "CBI_sponsor"; + private static final String NO_REDACTION_INDICATOR = "no_redaction_indicator"; + private static final String REDACTION_INDICATOR = "redaction_indicator"; + private static final String HINT_ONLY = "hint_only"; + private static final String MUST_REDACT = "must_redact"; + private static final String PUBLISHED_INFORMATION = "published_information"; + private static final String TEST_METHOD = "test_method"; + private static final String PURITY = "purity"; + private static final String IMAGE = "image"; + private static final String LOGO = "logo"; + private static final String SIGNATURE = "signature"; + private static final String FORMULA = "formula"; + private static final String OCR = "ocr"; + private static final String DOSSIER_REDACTIONS = "dossier_redactions"; + private static final String IMPORTED_REDACTION = "imported_redaction"; + private static final String PII = "PII"; + + private static final String RESOURCES_PATH = "src/test/resources/"; + + private static final String REDACTION_LOG_PATH = RESOURCES_PATH + "RedactionLog/"; + private final static String TEST_DOSSIER_TEMPLATE_ID = "123"; + private final static String TEST_DOSSIER_ID = "123"; + private final Map> dictionary = new HashMap<>(); + private final Map> dossierDictionary = new HashMap<>(); + private final Map> falsePositive = new HashMap<>(); + private final Map> falseRecommendation = new HashMap<>(); + private final Map typeColorMap = new HashMap<>(); + private final Map hintTypeMap = new HashMap<>(); + private final Map caseInSensitiveMap = new HashMap<>(); + private final Map recommendationTypeMap = new HashMap<>(); + private final Map rankTypeMap = new HashMap<>(); + private final Colors colors = new Colors(); + private final Map reanalysisVersions = new HashMap<>(); + private final Set deleted = new HashSet<>(); + @Autowired + private RedactionController redactionController; + @Autowired + private AnnotationService annotationService; + @Autowired + private AnalyzeService analyzeService; + @Autowired + private ObjectMapper objectMapper; + @MockBean + private RulesClient rulesClient; + @MockBean + private DictionaryClient dictionaryClient; + @Autowired + private RedactionStorageService redactionStorageService; + @Autowired + private StorageService storageService; + @Autowired + private ManualRedactionSurroundingTextService manualRedactionSurroundingTextService; + @MockBean + private AmazonS3 amazonS3; + @MockBean + private RabbitTemplate rabbitTemplate; + @MockBean + private LegalBasisClient legalBasisClient; + private String TEST_FILE_ID = "123"; + + + private static String loadFromClassPath(String path) { + + URL resource = ResourceLoader.class.getClassLoader().getResource(path); + if (resource == null) { + throw new IllegalArgumentException("could not load classpath resource: drools/rules.drl"); + } + try (BufferedReader br = new BufferedReader(new InputStreamReader(resource.openStream(), StandardCharsets.UTF_8))) { + StringBuilder sb = new StringBuilder(); + String str; + while ((str = br.readLine()) != null) { + sb.append(str).append("\n"); + } + return sb.toString(); + } catch (IOException e) { + throw new IllegalArgumentException("could not load classpath resource: " + path, e); + } + } + + + @After + public void cleanupStorage() { + + if (this.storageService instanceof FileSystemBackedStorageService) { + ((FileSystemBackedStorageService) this.storageService).clearStorage(); + } + } + + + @Before + public void stubClients() { + + when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); + when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(JSONPrimitive.of(RULES)); + + loadDictionaryForTest(); + loadTypeForTest(); + when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); + when(dictionaryClient.getAllTypesForDossierTemplate(TEST_DOSSIER_TEMPLATE_ID, false)).thenReturn(getTypeResponse()); + + when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); + when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder() + .id(DOSSIER_REDACTIONS + ":" + TEST_DOSSIER_TEMPLATE_ID) + .type(DOSSIER_REDACTIONS) + .dossierTemplateId(TEST_DOSSIER_ID) + .hexColor("#ffe187") + .isHint(hintTypeMap.get(DOSSIER_REDACTIONS)) + .isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS)) + .isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS)) + .rank(rankTypeMap.get(DOSSIER_REDACTIONS)) + .build())); + + mockDictionaryCalls(null); + mockDictionaryCalls(0L); + + when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors); + } + + + private void loadDictionaryForTest() { + + dictionary.computeIfAbsent(AUTHOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/CBI_author.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(SPONSOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(VERTEBRATE, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/vertebrate.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(ADDRESS, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/CBI_address.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(REDACTION_INDICATOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(HINT_ONLY, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/hint_only.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(MUST_REDACT, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/must_redact.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(PUBLISHED_INFORMATION, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/published_information.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(TEST_METHOD, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/test_method.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(PII, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/PII.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(PURITY, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/purity.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(IMAGE, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(OCR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(LOGO, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(SIGNATURE, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(FORMULA, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dossierDictionary.put(IMPORTED_REDACTION, new ArrayList<>()); + + falsePositive.computeIfAbsent(PII, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + + } + + + private void loadTypeForTest() { + + typeColorMap.put(VERTEBRATE, "#ff85f7"); + typeColorMap.put(ADDRESS, "#ffe187"); + typeColorMap.put(AUTHOR, "#ffe187"); + typeColorMap.put(SPONSOR, "#85ebff"); + typeColorMap.put(NO_REDACTION_INDICATOR, "#be85ff"); + typeColorMap.put(REDACTION_INDICATOR, "#caff85"); + typeColorMap.put(HINT_ONLY, "#abc0c4"); + typeColorMap.put(MUST_REDACT, "#fab4c0"); + typeColorMap.put(PUBLISHED_INFORMATION, "#85ebff"); + typeColorMap.put(TEST_METHOD, "#91fae8"); + typeColorMap.put(PII, "#66ccff"); + typeColorMap.put(PURITY, "#ffe187"); + typeColorMap.put(IMAGE, "#fcc5fb"); + typeColorMap.put(OCR, "#fcc5fb"); + typeColorMap.put(LOGO, "#ffe187"); + typeColorMap.put(FORMULA, "#ffe187"); + typeColorMap.put(SIGNATURE, "#ffe187"); + typeColorMap.put(IMPORTED_REDACTION, "#fcfbe6"); + + hintTypeMap.put(VERTEBRATE, true); + hintTypeMap.put(ADDRESS, false); + hintTypeMap.put(AUTHOR, false); + hintTypeMap.put(SPONSOR, false); + hintTypeMap.put(NO_REDACTION_INDICATOR, true); + hintTypeMap.put(REDACTION_INDICATOR, true); + hintTypeMap.put(HINT_ONLY, true); + hintTypeMap.put(MUST_REDACT, true); + hintTypeMap.put(PUBLISHED_INFORMATION, true); + hintTypeMap.put(TEST_METHOD, true); + hintTypeMap.put(PII, false); + hintTypeMap.put(PURITY, false); + hintTypeMap.put(IMAGE, true); + hintTypeMap.put(OCR, true); + hintTypeMap.put(FORMULA, false); + hintTypeMap.put(LOGO, false); + hintTypeMap.put(SIGNATURE, false); + hintTypeMap.put(DOSSIER_REDACTIONS, false); + hintTypeMap.put(IMPORTED_REDACTION, false); + + caseInSensitiveMap.put(VERTEBRATE, true); + caseInSensitiveMap.put(ADDRESS, false); + caseInSensitiveMap.put(AUTHOR, false); + caseInSensitiveMap.put(SPONSOR, false); + caseInSensitiveMap.put(NO_REDACTION_INDICATOR, true); + caseInSensitiveMap.put(REDACTION_INDICATOR, true); + caseInSensitiveMap.put(HINT_ONLY, true); + caseInSensitiveMap.put(MUST_REDACT, true); + caseInSensitiveMap.put(PUBLISHED_INFORMATION, true); + caseInSensitiveMap.put(TEST_METHOD, false); + caseInSensitiveMap.put(PII, false); + caseInSensitiveMap.put(PURITY, false); + caseInSensitiveMap.put(IMAGE, true); + caseInSensitiveMap.put(OCR, true); + caseInSensitiveMap.put(SIGNATURE, true); + caseInSensitiveMap.put(LOGO, true); + caseInSensitiveMap.put(FORMULA, true); + caseInSensitiveMap.put(DOSSIER_REDACTIONS, false); + caseInSensitiveMap.put(IMPORTED_REDACTION, false); + + recommendationTypeMap.put(VERTEBRATE, false); + recommendationTypeMap.put(ADDRESS, false); + recommendationTypeMap.put(AUTHOR, false); + recommendationTypeMap.put(SPONSOR, false); + recommendationTypeMap.put(NO_REDACTION_INDICATOR, false); + recommendationTypeMap.put(REDACTION_INDICATOR, false); + recommendationTypeMap.put(HINT_ONLY, false); + recommendationTypeMap.put(MUST_REDACT, false); + recommendationTypeMap.put(PUBLISHED_INFORMATION, false); + recommendationTypeMap.put(TEST_METHOD, false); + recommendationTypeMap.put(PII, false); + recommendationTypeMap.put(PURITY, false); + recommendationTypeMap.put(IMAGE, false); + recommendationTypeMap.put(OCR, false); + recommendationTypeMap.put(FORMULA, false); + recommendationTypeMap.put(SIGNATURE, false); + recommendationTypeMap.put(LOGO, false); + recommendationTypeMap.put(DOSSIER_REDACTIONS, false); + recommendationTypeMap.put(IMPORTED_REDACTION, false); + + rankTypeMap.put(PURITY, 155); + rankTypeMap.put(PII, 150); + rankTypeMap.put(ADDRESS, 140); + rankTypeMap.put(AUTHOR, 130); + rankTypeMap.put(SPONSOR, 120); + rankTypeMap.put(VERTEBRATE, 110); + rankTypeMap.put(MUST_REDACT, 100); + rankTypeMap.put(REDACTION_INDICATOR, 90); + rankTypeMap.put(NO_REDACTION_INDICATOR, 80); + rankTypeMap.put(PUBLISHED_INFORMATION, 70); + rankTypeMap.put(TEST_METHOD, 60); + rankTypeMap.put(HINT_ONLY, 50); + rankTypeMap.put(IMAGE, 30); + rankTypeMap.put(OCR, 29); + rankTypeMap.put(LOGO, 28); + rankTypeMap.put(SIGNATURE, 27); + rankTypeMap.put(FORMULA, 26); + rankTypeMap.put(DOSSIER_REDACTIONS, 200); + rankTypeMap.put(IMPORTED_REDACTION, 200); + + colors.setSkippedColor("#cccccc"); + colors.setRequestAddColor("#04b093"); + colors.setRequestRemoveColor("#04b093"); + } + + + private List getTypeResponse() { + + return typeColorMap.entrySet() + .stream() + .map(typeColor -> Type.builder() + .id(typeColor.getKey() + ":" + TEST_DOSSIER_TEMPLATE_ID) + .type(typeColor.getKey()) + .dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID) + .hexColor(typeColor.getValue()) + .isHint(hintTypeMap.get(typeColor.getKey())) + .isCaseInsensitive(caseInSensitiveMap.get(typeColor.getKey())) + .isRecommendation(recommendationTypeMap.get(typeColor.getKey())) + .rank(rankTypeMap.get(typeColor.getKey())) + .build()) + + .collect(Collectors.toList()); + } + + + private void mockDictionaryCalls(Long version) { + + when(dictionaryClient.getDictionaryForType(VERTEBRATE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(VERTEBRATE, false)); + when(dictionaryClient.getDictionaryForType(ADDRESS + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(ADDRESS, false)); + when(dictionaryClient.getDictionaryForType(AUTHOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(AUTHOR, false)); + when(dictionaryClient.getDictionaryForType(SPONSOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(SPONSOR, false)); + when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(NO_REDACTION_INDICATOR, false)); + when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(REDACTION_INDICATOR, false)); + when(dictionaryClient.getDictionaryForType(HINT_ONLY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(HINT_ONLY, false)); + when(dictionaryClient.getDictionaryForType(MUST_REDACT + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(MUST_REDACT, false)); + when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PUBLISHED_INFORMATION, false)); + when(dictionaryClient.getDictionaryForType(TEST_METHOD + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(TEST_METHOD, false)); + when(dictionaryClient.getDictionaryForType(PII + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PII, false)); + when(dictionaryClient.getDictionaryForType(PURITY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PURITY, false)); + when(dictionaryClient.getDictionaryForType(IMAGE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(IMAGE, false)); + when(dictionaryClient.getDictionaryForType(OCR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(OCR, false)); + when(dictionaryClient.getDictionaryForType(LOGO + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(LOGO, false)); + when(dictionaryClient.getDictionaryForType(SIGNATURE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(SIGNATURE, false)); + when(dictionaryClient.getDictionaryForType(FORMULA + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(FORMULA, false)); + when(dictionaryClient.getDictionaryForType(DOSSIER_REDACTIONS + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(DOSSIER_REDACTIONS, true)); + when(dictionaryClient.getDictionaryForType(IMPORTED_REDACTION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(IMPORTED_REDACTION, true)); + + } + + + private String cleanDictionaryEntry(String entry) { + + return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " "); + } + + + private Type getDictionaryResponse(String type, boolean isDossierDictionary) { + + return Type.builder() + .id(type + ":" + TEST_DOSSIER_TEMPLATE_ID) + .hexColor(typeColorMap.get(type)) + .entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type))) + .falsePositiveEntries(falsePositive.containsKey(type) ? toDictionaryEntry(falsePositive.get(type)) : new ArrayList<>()) + .falseRecommendationEntries(falseRecommendation.containsKey(type) ? toDictionaryEntry(falseRecommendation.get(type)) : new ArrayList<>()) + .isHint(hintTypeMap.get(type)) + .isCaseInsensitive(caseInSensitiveMap.get(type)) + .isRecommendation(recommendationTypeMap.get(type)) + .rank(rankTypeMap.get(type)) + .build(); + } + + + private List toDictionaryEntry(List entries) { + + List dictionaryEntries = new ArrayList<>(); + entries.forEach(entry -> dictionaryEntries.add(DictionaryEntry.builder() + .value(entry) + .version(reanalysisVersions.getOrDefault(entry, 0L)) + .deleted(deleted.contains(entry)) + .build())); + return dictionaryEntries; + } + + + /** + * Generates RedactionLog for given file and saves it here: REDACTION_LOG_PATH + */ + @Ignore + @Test + @SneakyThrows + public void generateRedactionLogForOneFile() { + + String fileName = "files/Compounds/31 A14111B - EU AIR3 - MCP Section 1 - Identity of the plant protection product.pdf"; + generateRedactionLog(fileName); + } + + + @SneakyThrows + public void generateRedactionLog(String fileName) { + + increaseTestFileId(); + + System.out.println("Generate RedactionLog as Json for " + fileName + " with fileId " + TEST_FILE_ID); + + loadNerForTest(); + + AnalyzeRequest request = prepareStorage(fileName); + analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); + analyzeService.analyze(request); + + RedactionLog redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID); + + saveRedactionLogAsJson(redactionLog, fileName); + } + + + private void increaseTestFileId() { + + TEST_FILE_ID = Integer.toString(Integer.parseInt(TEST_FILE_ID) + 1); + } + + + @SneakyThrows + private void loadNerForTest() { + + ClassPathResource responseJson = new ClassPathResource("files/ner_response.json"); + var bytes = IOUtils.toByteArray(responseJson.getInputStream()); + storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), bytes); + } + + + @SneakyThrows + private AnalyzeRequest prepareStorage(String file) { + + ClassPathResource pdfFileResource = new ClassPathResource(file); + + return prepareStorage(pdfFileResource.getInputStream()); + } + + + @SneakyThrows + private void saveRedactionLogAsJson(RedactionLog redactionLog, String pdfFileName) { + + File pdfFile = new File(pdfFileName); + + String directory = REDACTION_LOG_PATH + pdfFile.getParentFile().getPath(); + File dr = new File(directory); + boolean created = dr.mkdirs(); + if (created) { + System.out.println("Directory was created"); + } + + String fileName = StringUtils.replace(pdfFile.getName(), ".pdf", ".json"); + File file = new File(directory, fileName); + + ObjectMapper mapper = new ObjectMapper(); + mapper.registerModule(new JavaTimeModule()); + mapper.writeValue(file, redactionLog); + + System.out.println("Saved RedactionLog for " + fileName + " here " + directory); + } + + + @SneakyThrows + private AnalyzeRequest prepareStorage(InputStream stream) { + + AnalyzeRequest request = AnalyzeRequest.builder() + .dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID) + .dossierId(TEST_DOSSIER_ID) + .fileId(TEST_FILE_ID) + .lastProcessed(OffsetDateTime.now()) + .build(); + + var bytes = IOUtils.toByteArray(stream); + + storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), bytes); + + return request; + + } + + + /** + * Generates RedactionLog for all files and saves it here: REDACTION_LOG_PATH + */ + @Ignore + @Test + @SneakyThrows + public void generateRedactionLogForAllFiles() { + + Set files = getFileNames(new HashSet<>(), FileSystems.getDefault().getPath(RESOURCES_PATH)); + System.out.println("Will generate RedactionLog for " + files.size() + " files."); + TEST_FILE_ID = "1000"; + files.forEach(this::generateRedactionLog); + } + + + /** + * Analyses file and compares its RedactionLog with saved one from here: REDACTION_LOG_PATH. + * If RedactionLog Json does not exist, test will fail. + */ + @Ignore + @Test + @SneakyThrows + public void analyseFileAndCompareRedactionLog() { + + String fileName = "files/Compounds/28 A8637C - EU AIR3 - MCP Section 10 - Ecotoxicological studies on the plant protection product.pdf"; + analyseFileAndCompareRedactionLog(fileName); + } + + + @SneakyThrows + public void analyseFileAndCompareRedactionLog(String fileName) { + + increaseTestFileId(); + System.out.println("Analyse " + fileName + " with fileId " + TEST_FILE_ID + " and compare it with its saved RedactionLog"); + + RedactionLog savedRedactionLog = loadSavedRedactionLog(fileName); + + loadNerForTest(); + + AnalyzeRequest request = prepareStorage(fileName); + analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); + analyzeService.analyze(request); + + RedactionLog redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID); + + assertThat(redactionLog.getAnalysisVersion()).isEqualTo(savedRedactionLog.getAnalysisVersion()); + assertThat(redactionLog.getAnalysisNumber()).isEqualTo(savedRedactionLog.getAnalysisNumber()); + assertThat(redactionLog.getDictionaryVersion()).isEqualTo(savedRedactionLog.getDictionaryVersion()); + assertThat(redactionLog.getDossierDictionaryVersion()).isEqualTo(savedRedactionLog.getDossierDictionaryVersion()); + assertThat(redactionLog.getRulesVersion()).isEqualTo(savedRedactionLog.getRulesVersion()); + assertThat(redactionLog.getLegalBasisVersion()).isEqualTo(savedRedactionLog.getLegalBasisVersion()); + + assertThat(redactionLog.getRedactionLogEntry().size()).isEqualTo(savedRedactionLog.getRedactionLogEntry().size()); + assertThat(redactionLog.getLegalBasis().size()).isEqualTo(savedRedactionLog.getLegalBasis().size()); + + for (RedactionLogLegalBasis redactionLegalBasis : redactionLog.getLegalBasis()) { + var savedRedactionLegalBasis = savedRedactionLog.getLegalBasis() + .stream() + .filter(lb -> lb.getName().equalsIgnoreCase(redactionLegalBasis.getName())) + .filter(lb -> lb.getDescription().equalsIgnoreCase(redactionLegalBasis.getDescription())) + .filter(lb -> lb.getReason().equalsIgnoreCase(redactionLegalBasis.getReason())) + .findFirst(); + assertThat(savedRedactionLegalBasis).isPresent(); + } + + for (RedactionLogEntry redactionLogEntry : redactionLog.getRedactionLogEntry()) { + var savedRedactionLogEntry = savedRedactionLog.getRedactionLogEntry().stream().filter(r -> r.getId().equalsIgnoreCase(redactionLogEntry.getId())).findFirst(); + assertThat(savedRedactionLogEntry).isPresent(); + assertThat(savedRedactionLogEntry.get().getId()).isEqualTo(redactionLogEntry.getId()); + assertThat(savedRedactionLogEntry.get().getType()).isEqualTo(redactionLogEntry.getType()); + assertThat(savedRedactionLogEntry.get().getValue()).isEqualTo(redactionLogEntry.getValue()); + assertThat(savedRedactionLogEntry.get().getReason()).isEqualTo(redactionLogEntry.getReason()); + assertThat(savedRedactionLogEntry.get().getMatchedRule()).isEqualTo(redactionLogEntry.getMatchedRule()); + assertThat(savedRedactionLogEntry.get().isRectangle()).isEqualTo(redactionLogEntry.isRectangle()); + assertThat(savedRedactionLogEntry.get().getLegalBasis()).isEqualTo(redactionLogEntry.getLegalBasis()); + assertThat(savedRedactionLogEntry.get().isImported()).isEqualTo(redactionLogEntry.isImported()); + assertThat(savedRedactionLogEntry.get().isRedacted()).isEqualTo(redactionLogEntry.isRedacted()); + assertThat(savedRedactionLogEntry.get().isHint()).isEqualTo(redactionLogEntry.isHint()); + assertThat(savedRedactionLogEntry.get().isRecommendation()).isEqualTo(redactionLogEntry.isRecommendation()); + assertThat(savedRedactionLogEntry.get().isFalsePositive()).isEqualTo(redactionLogEntry.isFalsePositive()); + assertThat(savedRedactionLogEntry.get().getSection()).isEqualTo(redactionLogEntry.getSection()); + assertThat(savedRedactionLogEntry.get().getColor()).isEqualTo(redactionLogEntry.getColor()); + assertThat(savedRedactionLogEntry.get().getSectionNumber()).isEqualTo(redactionLogEntry.getSectionNumber()); + assertThat(savedRedactionLogEntry.get().getTextBefore()).isEqualTo(redactionLogEntry.getTextBefore()); + assertThat(savedRedactionLogEntry.get().getTextAfter()).isEqualTo(redactionLogEntry.getTextAfter()); + assertThat(savedRedactionLogEntry.get().getStartOffset()).isEqualTo(redactionLogEntry.getStartOffset()); + assertThat(savedRedactionLogEntry.get().getEndOffset()).isEqualTo(redactionLogEntry.getEndOffset()); + assertThat(savedRedactionLogEntry.get().isImage()).isEqualTo(redactionLogEntry.isImage()); + assertThat(savedRedactionLogEntry.get().isImageHasTransparency()).isEqualTo(redactionLogEntry.isImageHasTransparency()); + assertThat(savedRedactionLogEntry.get().isDictionaryEntry()).isEqualTo(redactionLogEntry.isDictionaryEntry()); + assertThat(savedRedactionLogEntry.get().isDossierDictionaryEntry()).isEqualTo(redactionLogEntry.isDossierDictionaryEntry()); + assertThat(savedRedactionLogEntry.get().isExcluded()).isEqualTo(redactionLogEntry.isExcluded()); + assertThat(savedRedactionLogEntry.get().getSourceId()).isEqualTo(redactionLogEntry.getSourceId()); + + for (Rectangle rectangle : redactionLogEntry.getPositions()) { + var savedRectangle = savedRedactionLogEntry.get() + .getPositions() + .stream() + .filter(r -> r.getPage() == rectangle.getPage()) + .filter(r -> r.getTopLeft().getX() == rectangle.getTopLeft().getX()) + .filter(r -> r.getTopLeft().getY() == rectangle.getTopLeft().getY()) + .filter(r -> r.getHeight() == rectangle.getHeight()) + .filter(r -> r.getWidth() == rectangle.getWidth()) + .findFirst(); + assertThat(savedRectangle).isPresent(); + } + + for (RedactionLogComment comment : redactionLogEntry.getComments()) { + var savedComment = savedRedactionLogEntry.get().getComments().stream().filter(c -> c.getId() == comment.getId()).findFirst(); + assertThat(savedComment).isPresent(); + assertThat(savedComment.get().getId()).isEqualTo(comment.getId()); + assertThat(savedComment.get().getUser()).isEqualTo(comment.getUser()); + assertThat(savedComment.get().getText()).isEqualTo(comment.getText()); + assertThat(savedComment.get().getAnnotationId()).isEqualTo(comment.getAnnotationId()); + assertThat(savedComment.get().getFileId()).isEqualTo(comment.getFileId()); + + } + + for (Change change : redactionLogEntry.getChanges()) { + var savedChange = savedRedactionLogEntry.get() + .getChanges() + .stream() + .filter(c -> c.getAnalysisNumber() == change.getAnalysisNumber()) + .filter(c -> c.getType() == change.getType()) + .findFirst(); + assertThat(savedChange).isPresent(); + } + + for (ManualChange manualChange : redactionLogEntry.getManualChanges()) { + var savedManualChange = savedRedactionLogEntry.get() + .getManualChanges() + .stream() + .filter(m -> m.getAnnotationStatus() == manualChange.getAnnotationStatus()) + .filter(m -> m.getManualRedactionType() == manualChange.getManualRedactionType()) + .filter(m -> m.getUserId().equalsIgnoreCase(manualChange.getUserId())) + .filter(m -> m.getPropertyChanges() == manualChange.getPropertyChanges()) + .findFirst(); + assertThat(savedManualChange).isPresent(); + } + + assertThat(savedRedactionLogEntry.get().getEngines()).containsExactly(redactionLogEntry.getEngines().toArray(new Engine[0])); + + assertThat(savedRedactionLogEntry.get().getReference()).containsAll(redactionLogEntry.getReference()); + assertThat(savedRedactionLogEntry.get().getImportedRedactionIntersections()).containsAll(redactionLogEntry.getImportedRedactionIntersections()); + } + + } + + + @SneakyThrows + private RedactionLog loadSavedRedactionLog(String pdfFileName) { + + File pdfFile = new File(pdfFileName); + String directory = REDACTION_LOG_PATH + pdfFile.getParentFile().getPath(); + String fileName = StringUtils.replace(pdfFile.getName(), ".pdf", ".json"); + File file = new File(directory, fileName); + + ObjectMapper om = new ObjectMapper(); + om.registerModule(new JavaTimeModule()); + om.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + + assertThat(file).exists(); + return om.readValue(file, RedactionLog.class); + } + + + /** + * Analyses all files and compares its RedactionLog with saved one from here: REDACTION_LOG_PATH + * If RedactionLogs Json does not exist, test will fail. + */ + @Ignore + @Test + @SneakyThrows + public void analyseAllFilesAndCompareRedactionLogs() { + + Set files = getFileNames(new HashSet<>(), FileSystems.getDefault().getPath(RESOURCES_PATH)); + System.out.println("Will analyse " + files.size() + " files and compare its RedactionLogs."); + TEST_FILE_ID = "5000"; + files.forEach(this::analyseFileAndCompareRedactionLog); + } + + + @SneakyThrows + private Set getFileNames(Set fileNames, Path dir) { + + try (DirectoryStream stream = Files.newDirectoryStream(dir)) { + for (Path path : stream) { + if (path.toFile().isDirectory()) { + getFileNames(fileNames, path); + } else if (StringUtils.endsWith(path.toAbsolutePath().toString(), ".pdf")) { + String absolutePath = path.toAbsolutePath().toString(); + int pos = StringUtils.indexOf(absolutePath, StringUtils.replace(RESOURCES_PATH, "/", "\\")) + 18; + fileNames.add(StringUtils.substring(absolutePath, pos)); + } + } + } + return fileNames; + } + + + @Configuration + @EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class, StorageAutoConfiguration.class}) + public static class RedactionIntegrationTestConfiguration { + + @Bean + public KieContainer kieContainer() { + + KieServices kieServices = KieServices.Factory.get(); + + KieFileSystem kieFileSystem = kieServices.newKieFileSystem(); + InputStream input = new ByteArrayInputStream(RULES.getBytes(StandardCharsets.UTF_8)); + kieFileSystem.write(RESOURCES_PATH + RULES_PATH, kieServices.getResources().newInputStreamResource(input)); + KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem); + kieBuilder.buildAll(); + KieModule kieModule = kieBuilder.getKieModule(); + + return kieServices.newKieContainer(kieModule.getReleaseId()); + } + + + @Bean + @Primary + public StorageService inmemoryStorage() { + + return new FileSystemBackedStorageService(); + } + + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/testRules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/testRules.drl new file mode 100644 index 00000000..ad7726cd --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/testRules.drl @@ -0,0 +1,431 @@ +package drools + +import com.iqser.red.service.redaction.v1.server.redaction.model.Section + +global Section section + + +// --------------------------------------- AI rules ------------------------------------------------------------------- + +rule "0: Add CBI_author from ai" + when + Section(aiMatchesType("CBI_author")) + then + section.addAiEntities("CBI_author", "CBI_author"); + end + +rule "0: Combine address parts from ai to CBI_address (org is mandatory)" + when + Section(aiMatchesType("ORG")) + then + section.combineAiTypes("ORG", "STREET,POSTAL,COUNTRY,CARDINAL,CITY,STATE", 20, "CBI_address", 3, false); + end + +rule "0: Combine address parts from ai to CBI_address (street is mandatory)" + when + Section(aiMatchesType("STREET")) + then + section.combineAiTypes("STREET", "ORG,POSTAL,COUNTRY,CARDINAL,CITY,STATE", 20, "CBI_address", 3, false); + end + +rule "0: Combine address parts from ai to CBI_address (city is mandatory)" + when + Section(aiMatchesType("CITY")) + then + section.combineAiTypes("CITY", "ORG,STREET,POSTAL,COUNTRY,CARDINAL,STATE", 20, "CBI_address", 3, false); + end + + +// --------------------------------------- CBI rules ------------------------------------------------------------------- + +rule "1: Redact CBI Authors (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_author")) + then + section.redact("CBI_author", 1, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "2: Redact CBI Authors (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_author")) + then + section.redact("CBI_author", 2, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "3: Redact not CBI Address (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_address")) + then + section.redactNot("CBI_address", 3, "Address found for non vertebrate study"); + section.ignoreRecommendations("CBI_address"); + end + +rule "4: Redact CBI Address (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_address")) + then + section.redact("CBI_address", 4, "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "5: Do not redact genitive CBI_author" + when + Section(matchesType("CBI_author")) + then + section.expandToFalsePositiveByRegEx("CBI_author", "['’’'ʼˈ´`‘′ʻ’']s", false, 0); + end + + +rule "6: Redact Author(s) cells in Tables with Author(s) header (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author(s)") && !hasTableHeader("Vertebrate study Y/N")) + then + section.redactCell("Author(s)", 6, "CBI_author", false, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "7: Redact Author(s) cells in Tables with Author(s) header (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author(s)") && !hasTableHeader("Vertebrate study Y/N")) + then + section.redactCell("Author(s)", 7, "CBI_author", false, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "8: Redact Author cells in Tables with Author header (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author") && !hasTableHeader("Vertebrate study Y/N")) + then + section.redactCell("Author", 8, "CBI_author", false, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "9: Redact Author cells in Tables with Author header (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author") && !hasTableHeader("Vertebrate study Y/N")) + then + section.redactCell("Author", 9, "CBI_author", false, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "10: Redact and recommand Authors in Tables with Vertebrate study Y/N header (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (rowEquals("Vertebrate study Y/N", "Y") || rowEquals("Vertebrate study Y/N", "Yes") || rowEquals("Vertebrate study Y/N", "N") || rowEquals("Vertebrate study Y/N", "No"))) + then + section.redactCell("Author(s)", 10, "CBI_author", true, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "11: Redact and recommand Authors in Tables with Vertebrate study Y/N header (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (rowEquals("Vertebrate study Y/N", "Y") || rowEquals("Vertebrate study Y/N", "Yes") || rowEquals("Vertebrate study Y/N", "N") || rowEquals("Vertebrate study Y/N", "No"))) + then + section.redactCell("Author(s)", 11, "CBI_author", true, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + +/* Syngenta specific laboratory rule */ +rule "12: Recommend CTL/BL laboratory that start with BL or CTL" + when + Section(searchText.contains("CT") || searchText.contains("BL")) + then + section.addRecommendationByRegEx("((\\b((([Cc]T(([1ILli\\/])| L|~P))|(BL))[\\. ]?([\\dA-Ziltphz~\\/.:!]| ?[\\(',][Ppi](\\(e)?|([\\(-?']\\/))+( ?[\\(\\/\\dA-Znasieg]+)?)\\b( ?\\/? ?\\d+)?)|(\\bCT[L1i]\\b))", true, 0, "CBI_address"); + end + +rule "14: Redact and add recommendation for et al. author (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("et al")) + then + section.redactAndRecommendByRegEx("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", false, 1, "CBI_author", 14, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "15: Redact and add recommendation for et al. author (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("et al")) + then + section.redactAndRecommendByRegEx("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", false, 1, "CBI_author", 15, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "16: Add recommendation for Addresses in Test Organism sections" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("Species:") && searchText.contains("Source:")) + then + section.recommendLineAfter("Source:", "CBI_address"); + end + +rule "17: Add recommendation for Addresses in Test Animals sections" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("Species") && searchText.contains("Source")) + then + section.recommendLineAfter("Source", "CBI_address"); + end + + +rule "18: Do not redact Names and Addresses if Published Information found" + when + Section(matchesType("published_information")) + then + section.redactNotAndReference("CBI_author","published_information", 18, "Published Information found"); + section.redactNotAndReference("CBI_address","published_information", 18, "Published Information found"); + end + + +// --------------------------------------- PII rules ------------------------------------------------------------------- + + +rule "19: Redacted PII Personal Identification Information (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("PII")) + then + section.redact("PII", 19, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "20: Redacted PII Personal Identification Information (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("PII")) + then + section.redact("PII", 20, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "21: Redact Emails by RegEx (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("@")) + then + section.redactByRegEx("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", true, 1, "PII", 21, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "22: Redact Emails by RegEx (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("@")) + then + section.redactByRegEx("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", true, 1, "PII", 22, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "23: Redact contact information (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (text.contains("Contact point:") + || text.contains("Phone:") + || text.contains("Fax:") + || text.contains("Tel.:") + || text.contains("Tel:") + || text.contains("E-mail:") + || text.contains("Email:") + || text.contains("e-mail:") + || text.contains("E-mail address:") + || text.contains("Contact:") + || text.contains("Alternative contact:") + || text.contains("Telephone number:") + || text.contains("Telephone No:") + || text.contains("Fax number:") + || text.contains("Telephone:") + || text.contains("Phone No.") + || (text.contains("No:") && text.contains("Fax")) + || (text.contains("Contact:") && text.contains("Tel.:")) + || text.contains("European contact:") + )) + then + section.redactLineAfter("Contact point:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + section.redactLineAfter("Phone:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + section.redactLineAfter("Fax:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + section.redactLineAfter("Tel.:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + section.redactLineAfter("Tel:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + section.redactLineAfter("E-mail:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + section.redactLineAfter("Email:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + section.redactLineAfter("e-mail:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + section.redactLineAfter("E-mail address:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + section.redactLineAfter("Contact:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + section.redactLineAfter("Alternative contact:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + section.redactLineAfter("Telephone number:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + section.redactLineAfter("Telephone No:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + section.redactLineAfter("Fax number:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + section.redactLineAfter("Telephone:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + section.redactLineAfter("Phone No.", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + section.redactBetween("No:", "Fax", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + section.redactBetween("Contact:", "Tel.:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + section.redactLineAfter("European contact:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "24: Redact contact information (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (text.contains("Contact point:") + || text.contains("Phone:") + || text.contains("Fax:") + || text.contains("Tel.:") + || text.contains("Tel:") + || text.contains("E-mail:") + || text.contains("Email:") + || text.contains("e-mail:") + || text.contains("E-mail address:") + || text.contains("Contact:") + || text.contains("Alternative contact:") + || text.contains("Telephone number:") + || text.contains("Telephone No:") + || text.contains("Fax number:") + || text.contains("Telephone:") + || text.contains("Phone No.") + || (text.contains("No:") && text.contains("Fax")) + || (text.contains("Contact:") && text.contains("Tel.:")) + || text.contains("European contact:") + )) + then + section.redactLineAfter("Contact point:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + section.redactLineAfter("Phone:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + section.redactLineAfter("Fax:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + section.redactLineAfter("Tel.:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + section.redactLineAfter("Tel:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + section.redactLineAfter("E-mail:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + section.redactLineAfter("Email:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + section.redactLineAfter("e-mail:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + section.redactLineAfter("E-mail address:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + section.redactLineAfter("Contact:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + section.redactLineAfter("Alternative contact:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + section.redactLineAfter("Telephone number:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + section.redactLineAfter("Telephone No:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + section.redactLineAfter("Fax number:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + section.redactLineAfter("Telephone:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + section.redactLineAfter("Phone No.", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + section.redactBetween("No:", "Fax", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + section.redactBetween("Contact:", "Tel.:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + section.redactLineAfter("European contact:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "25: Redact Phone and Fax by RegEx (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && ( + text.contains("Telephone") + || text.contains("Phone") + || text.contains("Ph.") + || text.contains("Fax") + || text.contains("Tel") + || text.contains("Ter") + || text.contains("Cell") + || text.contains("Mobile") + || text.contains("Fel") + || text.contains("Fer") + )) + then + section.redactByRegEx("\\b(telephone|phone|fax|tel|ter|cell|mobile|fel|fer)[:.\\s]{0,3}((\\(?\\+?[0-9])(\\(?[0-9\\/.\\-\\s]+\\)?)*([0-9]+\\)?))\\b", true, 2, "PII", 25, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + +rule "26: Redact Phone and Fax by RegEx (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && ( + text.contains("Telephone") + || text.contains("Phone") + || text.contains("Ph.") + || text.contains("Fax") + || text.contains("Tel") + || text.contains("Ter") + || text.contains("Cell") + || text.contains("Mobile") + || text.contains("Fel") + || text.contains("Fer") + )) + then + section.redactByRegEx("\\b(telephone|phone|fax|tel|ter|cell|mobile|fel|fer)[:.\\s]{0,3}((\\(?\\+?[0-9])(\\(?[0-9\\/.\\-\\s]+\\)?)*([0-9]+\\)?))\\b", true, 2, "PII", 26, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "27: Redact AUTHOR(S) (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") + && searchText.contains("AUTHOR(S):") + && searchText.contains("COMPLETION DATE:") + && !searchText.contains("STUDY COMPLETION DATE:") + ) + then + section.redactLinesBetween("AUTHOR(S):", "COMPLETION DATE:", "PII", 27, true, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "28: Redact AUTHOR(S) (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") + && searchText.contains("AUTHOR(S):") + && searchText.contains("COMPLETION DATE:") + && !searchText.contains("STUDY COMPLETION DATE:") + ) + then + section.redactLinesBetween("AUTHOR(S):", "COMPLETION DATE:", "PII", 28, true, "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "29: Redact AUTHOR(S) (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") + && searchText.contains("AUTHOR(S):") + && searchText.contains("STUDY COMPLETION DATE:") + ) + then + section.redactLinesBetween("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", 29, true, "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "30: Redact AUTHOR(S) (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") + && searchText.contains("AUTHOR(S):") + && searchText.contains("STUDY COMPLETION DATE:") + ) + then + section.redactLinesBetween("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", 30, true, "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "31: Redact PERFORMING LABORATORY (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") + && searchText.contains("PERFORMING LABORATORY:") + ) + then + section.redactBetween("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", 31, true, "PERFORMING LABORATORY was found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + section.redactNot("CBI_address", 31, "Performing laboratory found for non vertebrate study"); + end + +rule "32: Redact PERFORMING LABORATORY (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") + && searchText.contains("PERFORMING LABORATORY:")) + then + section.redactBetween("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", 32, true, "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +// --------------------------------------- other rules ------------------------------------------------------------------- + +rule "33: Purity Hint" + when + Section(searchText.toLowerCase().contains("purity")) + then + section.addHintAnnotationByRegEx("(purity ?( of|\\(.{1,20}\\))?( ?:)?) .{0,5}[\\d\\.]+( .{0,4}\\.)? ?%", true, 1, "hint_only"); + end + + +rule "34: Ignore dossier_redaction entries if confidentiality is not 'confidential'" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Confidentiality","confidential") && matchesType("dossier_redaction")); + then + section.ignore("dossier_redaction"); + end + + +rule "35: Redact signatures (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesImageType("signature")) + then + section.redactImage("signature", 35, "Signature found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "36: Redact signatures (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesImageType("signature")) + then + section.redactImage("signature", 36, "Signature found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "43: Redact Logos (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesImageType("logo")) + then + section.redactImage("logo", 43, "Logo found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RulesTest/SYNGENTA_EFSA_sanitisation_GFL_v1_withHighlights.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RulesTest/SYNGENTA_EFSA_sanitisation_GFL_v1_withHighlights.pdf new file mode 100644 index 00000000..d822757f Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RulesTest/SYNGENTA_EFSA_sanitisation_GFL_v1_withHighlights.pdf differ