diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AbstractRedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AbstractRedactionIntegrationTest.java new file mode 100644 index 00000000..70202114 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AbstractRedactionIntegrationTest.java @@ -0,0 +1,481 @@ +package com.iqser.red.service.redaction.v1.server; + +import static org.mockito.Mockito.when; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.time.OffsetDateTime; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import org.junit.jupiter.api.AfterEach; +import org.mockito.stubbing.Answer; +import org.springframework.amqp.rabbit.core.RabbitTemplate; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.mock.mockito.MockBean; +import org.springframework.core.io.ClassPathResource; + +import com.amazonaws.services.s3.AmazonS3; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; +import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.configuration.Colors; +import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType; +import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.DictionaryEntry; +import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type; +import com.iqser.red.service.redaction.v1.server.annotate.AnnotationService; +import com.iqser.red.service.redaction.v1.server.client.DictionaryClient; +import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient; +import com.iqser.red.service.redaction.v1.server.client.RulesClient; +import com.iqser.red.service.redaction.v1.server.controller.RedactionController; +import com.iqser.red.service.redaction.v1.server.multitenancy.TenantContext; +import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService; +import com.iqser.red.service.redaction.v1.server.redaction.service.analyze.AnalyzeService; +import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader; +import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities; +import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; +import com.iqser.red.storage.commons.service.StorageService; + +import lombok.SneakyThrows; + +public abstract class AbstractRedactionIntegrationTest { + + protected static final String VERTEBRATE = "vertebrate"; + protected static final String ADDRESS = "CBI_address"; + protected static final String AUTHOR = "CBI_author"; + protected static final String SPONSOR = "CBI_sponsor"; + protected static final String NO_REDACTION_INDICATOR = "no_redaction_indicator"; + protected static final String REDACTION_INDICATOR = "redaction_indicator"; + protected static final String HINT_ONLY = "hint_only"; + protected static final String MUST_REDACT = "must_redact"; + protected static final String PUBLISHED_INFORMATION = "published_information"; + protected static final String TEST_METHOD = "test_method"; + protected static final String PURITY = "purity"; + protected static final String IMAGE = "image"; + protected static final String LOGO = "logo"; + protected static final String SIGNATURE = "signature"; + protected static final String FORMULA = "formula"; + protected static final String OCR = "ocr"; + protected static final String DOSSIER_REDACTIONS = "dossier_redactions"; + protected static final String IMPORTED_REDACTION = "imported_redaction"; + protected static final String PII = "PII"; + protected static final String ROTATE_SIMPLE = "RotateSimple"; + + @Autowired + protected RedactionController redactionController; + + @Autowired + protected AnnotationService annotationService; + + @Autowired + protected AnalyzeService analyzeService; + + @Autowired + protected ObjectMapper objectMapper; + + @Autowired + protected RedactionStorageService redactionStorageService; + + @Autowired + protected StorageService storageService; + + @Autowired + protected ManualRedactionSurroundingTextService manualRedactionSurroundingTextService; + + @MockBean + protected AmazonS3 amazonS3; + + @MockBean + protected RabbitTemplate rabbitTemplate; + + @MockBean + protected LegalBasisClient legalBasisClient; + + protected final Map> dictionary = new HashMap<>(); + protected final Map> dossierDictionary = new HashMap<>(); + protected final Map> falsePositive = new HashMap<>(); + protected final Map> falseRecommendation = new HashMap<>(); + protected final Map typeColorMap = new HashMap<>(); + protected final Map hintTypeMap = new HashMap<>(); + protected final Map caseInSensitiveMap = new HashMap<>(); + protected final Map recommendationTypeMap = new HashMap<>(); + protected final Map rankTypeMap = new HashMap<>(); + protected final Colors colors = new Colors(); + protected final Map reanlysisVersions = new HashMap<>(); + protected final Set deleted = new HashSet<>(); + + protected final static String TEST_DOSSIER_TEMPLATE_ID = "123"; + protected final static String TEST_DOSSIER_ID = "123"; + protected final static String TEST_FILE_ID = "123"; + + @MockBean + protected RulesClient rulesClient; + + @MockBean + protected DictionaryClient dictionaryClient; + + + @AfterEach + public void cleanupStorage() { + + if (this.storageService instanceof FileSystemBackedStorageService) { + ((FileSystemBackedStorageService) this.storageService).clearStorage(); + } + } + + + protected void mockDictionaryCalls(Long version) { + + when(dictionaryClient.getDictionaryForType(VERTEBRATE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(VERTEBRATE, + false)); + when(dictionaryClient.getDictionaryForType(ADDRESS + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(ADDRESS, false)); + when(dictionaryClient.getDictionaryForType(AUTHOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(AUTHOR, false)); + when(dictionaryClient.getDictionaryForType(SPONSOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(SPONSOR, false)); + when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse( + NO_REDACTION_INDICATOR, + false)); + when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse( + REDACTION_INDICATOR, + false)); + when(dictionaryClient.getDictionaryForType(HINT_ONLY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(HINT_ONLY, false)); + when(dictionaryClient.getDictionaryForType(MUST_REDACT + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(MUST_REDACT, + false)); + when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse( + PUBLISHED_INFORMATION, + false)); + when(dictionaryClient.getDictionaryForType(TEST_METHOD + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(TEST_METHOD, + false)); + when(dictionaryClient.getDictionaryForType(PII + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(PII, false)); + when(dictionaryClient.getDictionaryForType(PURITY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(PURITY, false)); + when(dictionaryClient.getDictionaryForType(IMAGE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(IMAGE, false)); + when(dictionaryClient.getDictionaryForType(OCR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(OCR, false)); + when(dictionaryClient.getDictionaryForType(LOGO + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(LOGO, false)); + when(dictionaryClient.getDictionaryForType(SIGNATURE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(SIGNATURE, false)); + when(dictionaryClient.getDictionaryForType(FORMULA + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(FORMULA, false)); + when(dictionaryClient.getDictionaryForType(ROTATE_SIMPLE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(ROTATE_SIMPLE, + false)); + when(dictionaryClient.getDictionaryForType(DOSSIER_REDACTIONS + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse( + DOSSIER_REDACTIONS, + true)); + when(dictionaryClient.getDictionaryForType(IMPORTED_REDACTION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse( + IMPORTED_REDACTION, + true)); + + } + + + protected void loadDictionaryForTest() { + + dictionary.computeIfAbsent(AUTHOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/CBI_author.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(SPONSOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(VERTEBRATE, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/vertebrate.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(ADDRESS, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/CBI_address.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(REDACTION_INDICATOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(HINT_ONLY, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/hint_only.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(MUST_REDACT, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/must_redact.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(PUBLISHED_INFORMATION, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/published_information.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(TEST_METHOD, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/test_method.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(PII, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/PII.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(PURITY, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/purity.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(IMAGE, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(OCR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(LOGO, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(SIGNATURE, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(FORMULA, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dossierDictionary.put(IMPORTED_REDACTION, new ArrayList<>()); + + falsePositive.computeIfAbsent(PII, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + + } + + + protected void loadOnlyDictionaryForSimpleFile() { + + dictionary.clear(); + dictionary.computeIfAbsent(ROTATE_SIMPLE, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/RotateTestFileSimple.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + } + + + protected static String loadFromClassPath(String path) { + + URL resource = ResourceLoader.class.getClassLoader().getResource(path); + if (resource == null) { + throw new IllegalArgumentException("could not load classpath resource: drools/rules.drl"); + } + try (BufferedReader br = new BufferedReader(new InputStreamReader(resource.openStream(), StandardCharsets.UTF_8))) { + StringBuilder sb = new StringBuilder(); + String str; + while ((str = br.readLine()) != null) { + sb.append(str).append("\n"); + } + return sb.toString(); + } catch (IOException e) { + throw new IllegalArgumentException("could not load classpath resource: " + path, e); + } + } + + + protected List getPathsRecursively(File path) { + + List result = new ArrayList<>(); + if (path == null || path.listFiles() == null) { + return result; + } + for (File f : path.listFiles()) { + if (f.isFile()) { + result.add(f); + } else { + result.addAll(getPathsRecursively(f)); + } + } + return result; + + } + + + protected void loadTypeForTest() { + + typeColorMap.put(VERTEBRATE, "#ff85f7"); + typeColorMap.put(ADDRESS, "#ffe187"); + typeColorMap.put(AUTHOR, "#ffe187"); + typeColorMap.put(SPONSOR, "#85ebff"); + typeColorMap.put(NO_REDACTION_INDICATOR, "#be85ff"); + typeColorMap.put(REDACTION_INDICATOR, "#caff85"); + typeColorMap.put(HINT_ONLY, "#abc0c4"); + typeColorMap.put(MUST_REDACT, "#fab4c0"); + typeColorMap.put(PUBLISHED_INFORMATION, "#85ebff"); + typeColorMap.put(TEST_METHOD, "#91fae8"); + typeColorMap.put(PII, "#66ccff"); + typeColorMap.put(PURITY, "#ffe187"); + typeColorMap.put(IMAGE, "#fcc5fb"); + typeColorMap.put(OCR, "#fcc5fb"); + typeColorMap.put(LOGO, "#ffe187"); + typeColorMap.put(FORMULA, "#ffe187"); + typeColorMap.put(SIGNATURE, "#ffe187"); + typeColorMap.put(IMPORTED_REDACTION, "#fcfbe6"); + typeColorMap.put(ROTATE_SIMPLE, "#66ccff"); + + hintTypeMap.put(VERTEBRATE, true); + hintTypeMap.put(ADDRESS, false); + hintTypeMap.put(AUTHOR, false); + hintTypeMap.put(SPONSOR, false); + hintTypeMap.put(NO_REDACTION_INDICATOR, true); + hintTypeMap.put(REDACTION_INDICATOR, true); + hintTypeMap.put(HINT_ONLY, true); + hintTypeMap.put(MUST_REDACT, true); + hintTypeMap.put(PUBLISHED_INFORMATION, true); + hintTypeMap.put(TEST_METHOD, true); + hintTypeMap.put(PII, false); + hintTypeMap.put(PURITY, false); + hintTypeMap.put(IMAGE, true); + hintTypeMap.put(OCR, true); + hintTypeMap.put(FORMULA, false); + hintTypeMap.put(LOGO, false); + hintTypeMap.put(SIGNATURE, false); + hintTypeMap.put(DOSSIER_REDACTIONS, false); + hintTypeMap.put(IMPORTED_REDACTION, false); + hintTypeMap.put(ROTATE_SIMPLE, false); + + caseInSensitiveMap.put(VERTEBRATE, true); + caseInSensitiveMap.put(ADDRESS, false); + caseInSensitiveMap.put(AUTHOR, false); + caseInSensitiveMap.put(SPONSOR, false); + caseInSensitiveMap.put(NO_REDACTION_INDICATOR, true); + caseInSensitiveMap.put(REDACTION_INDICATOR, true); + caseInSensitiveMap.put(HINT_ONLY, true); + caseInSensitiveMap.put(MUST_REDACT, true); + caseInSensitiveMap.put(PUBLISHED_INFORMATION, true); + caseInSensitiveMap.put(TEST_METHOD, false); + caseInSensitiveMap.put(PII, false); + caseInSensitiveMap.put(PURITY, false); + caseInSensitiveMap.put(IMAGE, true); + caseInSensitiveMap.put(OCR, true); + caseInSensitiveMap.put(SIGNATURE, true); + caseInSensitiveMap.put(LOGO, true); + caseInSensitiveMap.put(FORMULA, true); + caseInSensitiveMap.put(DOSSIER_REDACTIONS, false); + caseInSensitiveMap.put(IMPORTED_REDACTION, false); + caseInSensitiveMap.put(ROTATE_SIMPLE, true); + + recommendationTypeMap.put(VERTEBRATE, false); + recommendationTypeMap.put(ADDRESS, false); + recommendationTypeMap.put(AUTHOR, false); + recommendationTypeMap.put(SPONSOR, false); + recommendationTypeMap.put(NO_REDACTION_INDICATOR, false); + recommendationTypeMap.put(REDACTION_INDICATOR, false); + recommendationTypeMap.put(HINT_ONLY, false); + recommendationTypeMap.put(MUST_REDACT, false); + recommendationTypeMap.put(PUBLISHED_INFORMATION, false); + recommendationTypeMap.put(TEST_METHOD, false); + recommendationTypeMap.put(PII, false); + recommendationTypeMap.put(PURITY, false); + recommendationTypeMap.put(IMAGE, false); + recommendationTypeMap.put(OCR, false); + recommendationTypeMap.put(FORMULA, false); + recommendationTypeMap.put(SIGNATURE, false); + recommendationTypeMap.put(LOGO, false); + recommendationTypeMap.put(DOSSIER_REDACTIONS, false); + recommendationTypeMap.put(IMPORTED_REDACTION, false); + recommendationTypeMap.put(ROTATE_SIMPLE, false); + + rankTypeMap.put(PURITY, 155); + rankTypeMap.put(PII, 150); + rankTypeMap.put(ADDRESS, 140); + rankTypeMap.put(AUTHOR, 130); + rankTypeMap.put(SPONSOR, 120); + rankTypeMap.put(VERTEBRATE, 110); + rankTypeMap.put(MUST_REDACT, 100); + rankTypeMap.put(REDACTION_INDICATOR, 90); + rankTypeMap.put(NO_REDACTION_INDICATOR, 80); + rankTypeMap.put(PUBLISHED_INFORMATION, 70); + rankTypeMap.put(TEST_METHOD, 60); + rankTypeMap.put(HINT_ONLY, 50); + rankTypeMap.put(IMAGE, 30); + rankTypeMap.put(OCR, 29); + rankTypeMap.put(LOGO, 28); + rankTypeMap.put(SIGNATURE, 27); + rankTypeMap.put(FORMULA, 26); + rankTypeMap.put(DOSSIER_REDACTIONS, 200); + rankTypeMap.put(IMPORTED_REDACTION, 200); + rankTypeMap.put(ROTATE_SIMPLE, 150); + + colors.setSkippedColor("#cccccc"); + colors.setRequestAddColor("#04b093"); + colors.setRequestRemoveColor("#04b093"); + } + + + @SneakyThrows + protected void loadNerForTest() { + + ClassPathResource responseJson = new ClassPathResource("files/ner_response.json"); + storageService.storeObject(TenantContext.getTenantId(), + RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), + responseJson.getInputStream()); + } + + + protected List getTypeResponse() { + + return typeColorMap.entrySet() + .stream() + .map(typeColor -> Type.builder() + .id(typeColor.getKey() + ":" + TEST_DOSSIER_TEMPLATE_ID) + .type(typeColor.getKey()) + .dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID) + .hexColor(typeColor.getValue()) + .isHint(hintTypeMap.get(typeColor.getKey())) + .isCaseInsensitive(caseInSensitiveMap.get(typeColor.getKey())) + .isRecommendation(recommendationTypeMap.get(typeColor.getKey())) + .rank(rankTypeMap.get(typeColor.getKey())) + .build()) + + .collect(Collectors.toList()); + } + + + protected Type getDictionaryResponse(String type, boolean isDossierDictionary) { + + return Type.builder() + .id(type + ":" + TEST_DOSSIER_TEMPLATE_ID) + .hexColor(typeColorMap.get(type)) + .entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type))) + .falsePositiveEntries(falsePositive.containsKey(type) ? toDictionaryEntry(falsePositive.get(type)) : new ArrayList<>()) + .falseRecommendationEntries(falseRecommendation.containsKey(type) ? toDictionaryEntry(falseRecommendation.get(type)) : new ArrayList<>()) + .isHint(hintTypeMap.get(type)) + .isCaseInsensitive(caseInSensitiveMap.get(type)) + .isRecommendation(recommendationTypeMap.get(type)) + .rank(rankTypeMap.get(type)) + .build(); + } + + + private String cleanDictionaryEntry(String entry) { + + return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " "); + } + + + private List toDictionaryEntry(List entries) { + + if (entries == null) { + entries = Collections.emptyList(); + } + + List dictionaryEntries = new ArrayList<>(); + entries.forEach(entry -> { + dictionaryEntries.add(DictionaryEntry.builder().value(entry).version(reanlysisVersions.getOrDefault(entry, 0L)).deleted(deleted.contains(entry)).build()); + }); + return dictionaryEntries; + } + + + @SneakyThrows + protected AnalyzeRequest prepareStorage(String file) { + + return prepareStorage(file, "files/cv_service_empty_response.json"); + } + + + @SneakyThrows + protected AnalyzeRequest prepareStorage(String file, String cvServiceResponseFile) { + + ClassPathResource pdfFileResource = new ClassPathResource(file); + ClassPathResource cvServiceResponseFileResource = new ClassPathResource(cvServiceResponseFile); + + return prepareStorage(pdfFileResource.getInputStream(), cvServiceResponseFileResource.getInputStream()); + } + + + @SneakyThrows + protected AnalyzeRequest prepareStorage(InputStream fileStream, InputStream cvServiceResponseFileStream) { + + AnalyzeRequest request = AnalyzeRequest.builder() + .dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID) + .dossierId(TEST_DOSSIER_ID) + .fileId(TEST_FILE_ID) + .lastProcessed(OffsetDateTime.now()) + .build(); + + storageService.storeObject(TenantContext.getTenantId(), + RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.TABLES), + cvServiceResponseFileStream); + storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), fileStream); + + return request; + + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index e5d61ddb..599179c9 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -3,28 +3,23 @@ package com.iqser.red.service.redaction.v1.server; import static org.assertj.core.api.Assertions.assertThat; import static org.mockito.Mockito.when; -import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; -import java.io.InputStreamReader; import java.net.URL; import java.nio.charset.StandardCharsets; import java.time.OffsetDateTime; import java.time.ZoneOffset; import java.util.ArrayList; -import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.UUID; import java.util.stream.Collectors; -import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @@ -34,13 +29,9 @@ import org.kie.api.builder.KieBuilder; import org.kie.api.builder.KieFileSystem; import org.kie.api.builder.KieModule; import org.kie.api.runtime.KieContainer; -import org.mockito.stubbing.Answer; -import org.springframework.amqp.rabbit.core.RabbitTemplate; -import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.autoconfigure.EnableAutoConfiguration; import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration; import org.springframework.boot.test.context.SpringBootTest; -import org.springframework.boot.test.mock.mockito.MockBean; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Import; @@ -48,9 +39,7 @@ import org.springframework.context.annotation.Primary; import org.springframework.core.io.ClassPathResource; import org.springframework.test.context.junit.jupiter.SpringExtension; -import com.amazonaws.services.s3.AmazonS3; import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.ObjectMapper; import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult; import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute; @@ -67,24 +56,14 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.configuration.Colors; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType; -import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.DictionaryEntry; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry; import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest; import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest; import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse; -import com.iqser.red.service.redaction.v1.server.annotate.AnnotationService; import com.iqser.red.service.redaction.v1.server.classification.model.SectionText; -import com.iqser.red.service.redaction.v1.server.client.DictionaryClient; -import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient; -import com.iqser.red.service.redaction.v1.server.client.RulesClient; -import com.iqser.red.service.redaction.v1.server.controller.RedactionController; import com.iqser.red.service.redaction.v1.server.multitenancy.TenantContext; -import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService; -import com.iqser.red.service.redaction.v1.server.redaction.service.analyze.AnalyzeService; import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils; -import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader; -import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities; import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; import com.iqser.red.storage.commons.StorageAutoConfiguration; import com.iqser.red.storage.commons.service.StorageService; @@ -94,82 +73,9 @@ import lombok.SneakyThrows; @ExtendWith(SpringExtension.class) @SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT) @Import(RedactionIntegrationTest.RedactionIntegrationTestConfiguration.class) -public class RedactionIntegrationTest { +public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { private static final String RULES = loadFromClassPath("drools/rules.drl"); - private static final String VERTEBRATE = "vertebrate"; - private static final String ADDRESS = "CBI_address"; - private static final String AUTHOR = "CBI_author"; - private static final String SPONSOR = "CBI_sponsor"; - private static final String NO_REDACTION_INDICATOR = "no_redaction_indicator"; - private static final String REDACTION_INDICATOR = "redaction_indicator"; - private static final String HINT_ONLY = "hint_only"; - private static final String MUST_REDACT = "must_redact"; - private static final String PUBLISHED_INFORMATION = "published_information"; - private static final String TEST_METHOD = "test_method"; - private static final String PURITY = "purity"; - private static final String IMAGE = "image"; - private static final String LOGO = "logo"; - private static final String SIGNATURE = "signature"; - private static final String FORMULA = "formula"; - private static final String OCR = "ocr"; - private static final String DOSSIER_REDACTIONS = "dossier_redactions"; - private static final String IMPORTED_REDACTION = "imported_redaction"; - private static final String PII = "PII"; - private static final String ROTATE_SIMPLE = "RotateSimple"; - - @Autowired - private RedactionController redactionController; - - @Autowired - private AnnotationService annotationService; - - @Autowired - private AnalyzeService analyzeService; - - @Autowired - private ObjectMapper objectMapper; - - @MockBean - private RulesClient rulesClient; - - @MockBean - private DictionaryClient dictionaryClient; - - @Autowired - private RedactionStorageService redactionStorageService; - - @Autowired - private StorageService storageService; - - @Autowired - private ManualRedactionSurroundingTextService manualRedactionSurroundingTextService; - - @MockBean - private AmazonS3 amazonS3; - - @MockBean - private RabbitTemplate rabbitTemplate; - - @MockBean - private LegalBasisClient legalBasisClient; - - private final Map> dictionary = new HashMap<>(); - private final Map> dossierDictionary = new HashMap<>(); - private final Map> falsePositive = new HashMap<>(); - private final Map> falseRecommendation = new HashMap<>(); - private final Map typeColorMap = new HashMap<>(); - private final Map hintTypeMap = new HashMap<>(); - private final Map caseInSensitiveMap = new HashMap<>(); - private final Map recommendationTypeMap = new HashMap<>(); - private final Map rankTypeMap = new HashMap<>(); - private final Colors colors = new Colors(); - private final Map reanlysisVersions = new HashMap<>(); - private final Set deleted = new HashSet<>(); - - private final static String TEST_DOSSIER_TEMPLATE_ID = "123"; - private final static String TEST_DOSSIER_ID = "123"; - private final static String TEST_FILE_ID = "123"; @Configuration @EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class, StorageAutoConfiguration.class}) @@ -201,15 +107,6 @@ public class RedactionIntegrationTest { } - @AfterEach - public void cleanupStorage() { - - if (this.storageService instanceof FileSystemBackedStorageService) { - ((FileSystemBackedStorageService) this.storageService).clearStorage(); - } - } - - @BeforeEach public void stubClients() { @@ -243,46 +140,6 @@ public class RedactionIntegrationTest { } - private void mockDictionaryCalls(Long version) { - - when(dictionaryClient.getDictionaryForType(VERTEBRATE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(VERTEBRATE, - false)); - when(dictionaryClient.getDictionaryForType(ADDRESS + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(ADDRESS, false)); - when(dictionaryClient.getDictionaryForType(AUTHOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(AUTHOR, false)); - when(dictionaryClient.getDictionaryForType(SPONSOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(SPONSOR, false)); - when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse( - NO_REDACTION_INDICATOR, - false)); - when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse( - REDACTION_INDICATOR, - false)); - when(dictionaryClient.getDictionaryForType(HINT_ONLY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(HINT_ONLY, false)); - when(dictionaryClient.getDictionaryForType(MUST_REDACT + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(MUST_REDACT, - false)); - when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse( - PUBLISHED_INFORMATION, - false)); - when(dictionaryClient.getDictionaryForType(TEST_METHOD + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(TEST_METHOD, - false)); - when(dictionaryClient.getDictionaryForType(PII + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(PII, false)); - when(dictionaryClient.getDictionaryForType(PURITY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(PURITY, false)); - when(dictionaryClient.getDictionaryForType(IMAGE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(IMAGE, false)); - when(dictionaryClient.getDictionaryForType(OCR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(OCR, false)); - when(dictionaryClient.getDictionaryForType(LOGO + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(LOGO, false)); - when(dictionaryClient.getDictionaryForType(SIGNATURE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(SIGNATURE, false)); - when(dictionaryClient.getDictionaryForType(FORMULA + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(FORMULA, false)); - when(dictionaryClient.getDictionaryForType(ROTATE_SIMPLE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(ROTATE_SIMPLE, - false)); - when(dictionaryClient.getDictionaryForType(DOSSIER_REDACTIONS + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse( - DOSSIER_REDACTIONS, - true)); - when(dictionaryClient.getDictionaryForType(IMPORTED_REDACTION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse( - IMPORTED_REDACTION, - true)); - - } - - @Test public void test270Rotated() { @@ -644,7 +501,9 @@ public class RedactionIntegrationTest { String outputFileName = OsUtils.getTemporaryDirectory() + "/Annotated.pdf"; ClassPathResource responseJson = new ClassPathResource("files/crafted_document.NER_ENTITIES.json"); - storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), responseJson.getInputStream()); + storageService.storeObject(TenantContext.getTenantId(), + RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), + responseJson.getInputStream()); long start = System.currentTimeMillis(); AnalyzeRequest request = prepareStorage(fileName); @@ -1102,10 +961,6 @@ public class RedactionIntegrationTest { } - - - - @Test public void phantomCellsDocumentTest() throws IOException { @@ -1326,272 +1181,6 @@ public class RedactionIntegrationTest { } - private void loadDictionaryForTest() { - - dictionary.computeIfAbsent(AUTHOR, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/CBI_author.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(SPONSOR, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(VERTEBRATE, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/vertebrate.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(ADDRESS, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/CBI_address.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(REDACTION_INDICATOR, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(HINT_ONLY, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/hint_only.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(MUST_REDACT, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/must_redact.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(PUBLISHED_INFORMATION, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/published_information.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(TEST_METHOD, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/test_method.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(PII, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/PII.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(PURITY, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/purity.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(IMAGE, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(OCR, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(LOGO, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(SIGNATURE, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(FORMULA, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dossierDictionary.put(IMPORTED_REDACTION, new ArrayList<>()); - - falsePositive.computeIfAbsent(PII, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - - } - - - private void loadOnlyDictionaryForSimpleFile() { - - dictionary.clear(); - dictionary.computeIfAbsent(ROTATE_SIMPLE, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/RotateTestFileSimple.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - } - - - private static String loadFromClassPath(String path) { - - URL resource = ResourceLoader.class.getClassLoader().getResource(path); - if (resource == null) { - throw new IllegalArgumentException("could not load classpath resource: drools/rules.drl"); - } - try (BufferedReader br = new BufferedReader(new InputStreamReader(resource.openStream(), StandardCharsets.UTF_8))) { - StringBuilder sb = new StringBuilder(); - String str; - while ((str = br.readLine()) != null) { - sb.append(str).append("\n"); - } - return sb.toString(); - } catch (IOException e) { - throw new IllegalArgumentException("could not load classpath resource: " + path, e); - } - } - - - private List getPathsRecursively(File path) { - - List result = new ArrayList<>(); - if (path == null || path.listFiles() == null) { - return result; - } - for (File f : path.listFiles()) { - if (f.isFile()) { - result.add(f); - } else { - result.addAll(getPathsRecursively(f)); - } - } - return result; - - } - - - private void loadTypeForTest() { - - typeColorMap.put(VERTEBRATE, "#ff85f7"); - typeColorMap.put(ADDRESS, "#ffe187"); - typeColorMap.put(AUTHOR, "#ffe187"); - typeColorMap.put(SPONSOR, "#85ebff"); - typeColorMap.put(NO_REDACTION_INDICATOR, "#be85ff"); - typeColorMap.put(REDACTION_INDICATOR, "#caff85"); - typeColorMap.put(HINT_ONLY, "#abc0c4"); - typeColorMap.put(MUST_REDACT, "#fab4c0"); - typeColorMap.put(PUBLISHED_INFORMATION, "#85ebff"); - typeColorMap.put(TEST_METHOD, "#91fae8"); - typeColorMap.put(PII, "#66ccff"); - typeColorMap.put(PURITY, "#ffe187"); - typeColorMap.put(IMAGE, "#fcc5fb"); - typeColorMap.put(OCR, "#fcc5fb"); - typeColorMap.put(LOGO, "#ffe187"); - typeColorMap.put(FORMULA, "#ffe187"); - typeColorMap.put(SIGNATURE, "#ffe187"); - typeColorMap.put(IMPORTED_REDACTION, "#fcfbe6"); - typeColorMap.put(ROTATE_SIMPLE, "#66ccff"); - - hintTypeMap.put(VERTEBRATE, true); - hintTypeMap.put(ADDRESS, false); - hintTypeMap.put(AUTHOR, false); - hintTypeMap.put(SPONSOR, false); - hintTypeMap.put(NO_REDACTION_INDICATOR, true); - hintTypeMap.put(REDACTION_INDICATOR, true); - hintTypeMap.put(HINT_ONLY, true); - hintTypeMap.put(MUST_REDACT, true); - hintTypeMap.put(PUBLISHED_INFORMATION, true); - hintTypeMap.put(TEST_METHOD, true); - hintTypeMap.put(PII, false); - hintTypeMap.put(PURITY, false); - hintTypeMap.put(IMAGE, true); - hintTypeMap.put(OCR, true); - hintTypeMap.put(FORMULA, false); - hintTypeMap.put(LOGO, false); - hintTypeMap.put(SIGNATURE, false); - hintTypeMap.put(DOSSIER_REDACTIONS, false); - hintTypeMap.put(IMPORTED_REDACTION, false); - hintTypeMap.put(ROTATE_SIMPLE, false); - - caseInSensitiveMap.put(VERTEBRATE, true); - caseInSensitiveMap.put(ADDRESS, false); - caseInSensitiveMap.put(AUTHOR, false); - caseInSensitiveMap.put(SPONSOR, false); - caseInSensitiveMap.put(NO_REDACTION_INDICATOR, true); - caseInSensitiveMap.put(REDACTION_INDICATOR, true); - caseInSensitiveMap.put(HINT_ONLY, true); - caseInSensitiveMap.put(MUST_REDACT, true); - caseInSensitiveMap.put(PUBLISHED_INFORMATION, true); - caseInSensitiveMap.put(TEST_METHOD, false); - caseInSensitiveMap.put(PII, false); - caseInSensitiveMap.put(PURITY, false); - caseInSensitiveMap.put(IMAGE, true); - caseInSensitiveMap.put(OCR, true); - caseInSensitiveMap.put(SIGNATURE, true); - caseInSensitiveMap.put(LOGO, true); - caseInSensitiveMap.put(FORMULA, true); - caseInSensitiveMap.put(DOSSIER_REDACTIONS, false); - caseInSensitiveMap.put(IMPORTED_REDACTION, false); - caseInSensitiveMap.put(ROTATE_SIMPLE, true); - - recommendationTypeMap.put(VERTEBRATE, false); - recommendationTypeMap.put(ADDRESS, false); - recommendationTypeMap.put(AUTHOR, false); - recommendationTypeMap.put(SPONSOR, false); - recommendationTypeMap.put(NO_REDACTION_INDICATOR, false); - recommendationTypeMap.put(REDACTION_INDICATOR, false); - recommendationTypeMap.put(HINT_ONLY, false); - recommendationTypeMap.put(MUST_REDACT, false); - recommendationTypeMap.put(PUBLISHED_INFORMATION, false); - recommendationTypeMap.put(TEST_METHOD, false); - recommendationTypeMap.put(PII, false); - recommendationTypeMap.put(PURITY, false); - recommendationTypeMap.put(IMAGE, false); - recommendationTypeMap.put(OCR, false); - recommendationTypeMap.put(FORMULA, false); - recommendationTypeMap.put(SIGNATURE, false); - recommendationTypeMap.put(LOGO, false); - recommendationTypeMap.put(DOSSIER_REDACTIONS, false); - recommendationTypeMap.put(IMPORTED_REDACTION, false); - recommendationTypeMap.put(ROTATE_SIMPLE, false); - - rankTypeMap.put(PURITY, 155); - rankTypeMap.put(PII, 150); - rankTypeMap.put(ADDRESS, 140); - rankTypeMap.put(AUTHOR, 130); - rankTypeMap.put(SPONSOR, 120); - rankTypeMap.put(VERTEBRATE, 110); - rankTypeMap.put(MUST_REDACT, 100); - rankTypeMap.put(REDACTION_INDICATOR, 90); - rankTypeMap.put(NO_REDACTION_INDICATOR, 80); - rankTypeMap.put(PUBLISHED_INFORMATION, 70); - rankTypeMap.put(TEST_METHOD, 60); - rankTypeMap.put(HINT_ONLY, 50); - rankTypeMap.put(IMAGE, 30); - rankTypeMap.put(OCR, 29); - rankTypeMap.put(LOGO, 28); - rankTypeMap.put(SIGNATURE, 27); - rankTypeMap.put(FORMULA, 26); - rankTypeMap.put(DOSSIER_REDACTIONS, 200); - rankTypeMap.put(IMPORTED_REDACTION, 200); - rankTypeMap.put(ROTATE_SIMPLE, 150); - - colors.setSkippedColor("#cccccc"); - colors.setRequestAddColor("#04b093"); - colors.setRequestRemoveColor("#04b093"); - } - - - @SneakyThrows - private void loadNerForTest() { - - ClassPathResource responseJson = new ClassPathResource("files/ner_response.json"); - storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), responseJson.getInputStream()); - } - - - private List getTypeResponse() { - - return typeColorMap.entrySet() - .stream() - .map(typeColor -> Type.builder() - .id(typeColor.getKey() + ":" + TEST_DOSSIER_TEMPLATE_ID) - .type(typeColor.getKey()) - .dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID) - .hexColor(typeColor.getValue()) - .isHint(hintTypeMap.get(typeColor.getKey())) - .isCaseInsensitive(caseInSensitiveMap.get(typeColor.getKey())) - .isRecommendation(recommendationTypeMap.get(typeColor.getKey())) - .rank(rankTypeMap.get(typeColor.getKey())) - .build()) - - .collect(Collectors.toList()); - } - - - private Type getDictionaryResponse(String type, boolean isDossierDictionary) { - - return Type.builder() - .id(type + ":" + TEST_DOSSIER_TEMPLATE_ID) - .hexColor(typeColorMap.get(type)) - .entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type))) - .falsePositiveEntries(falsePositive.containsKey(type) ? toDictionaryEntry(falsePositive.get(type)) : new ArrayList<>()) - .falseRecommendationEntries(falseRecommendation.containsKey(type) ? toDictionaryEntry(falseRecommendation.get(type)) : new ArrayList<>()) - .isHint(hintTypeMap.get(type)) - .isCaseInsensitive(caseInSensitiveMap.get(type)) - .isRecommendation(recommendationTypeMap.get(type)) - .rank(rankTypeMap.get(type)) - .build(); - } - - - private String cleanDictionaryEntry(String entry) { - - return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " "); - } - - - private List toDictionaryEntry(List entries) { - - if (entries == null) { - entries = Collections.emptyList(); - } - - List dictionaryEntries = new ArrayList<>(); - entries.forEach(entry -> { - dictionaryEntries.add(DictionaryEntry.builder().value(entry).version(reanlysisVersions.getOrDefault(entry, 0L)).deleted(deleted.contains(entry)).build()); - }); - return dictionaryEntries; - } - - @Test public void testImportedRedactions() throws IOException { @@ -1599,7 +1188,8 @@ public class RedactionIntegrationTest { ClassPathResource importedRedactions = new ClassPathResource("files/ImportedRedactions/RotateTestFile_without_highlights.IMPORTED_REDACTIONS.json"); AnalyzeRequest request = prepareStorage("files/ImportedRedactions/RotateTestFile_without_highlights.pdf"); - storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS), + storageService.storeObject(TenantContext.getTenantId(), + RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS), importedRedactions.getInputStream()); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); @@ -1659,39 +1249,4 @@ public class RedactionIntegrationTest { assertThat(values).contains("Mr. Tambourine Man"); } - - @SneakyThrows - private AnalyzeRequest prepareStorage(InputStream fileStream, InputStream cvServiceResponseFileStream) { - - AnalyzeRequest request = AnalyzeRequest.builder() - .dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID) - .dossierId(TEST_DOSSIER_ID) - .fileId(TEST_FILE_ID) - .lastProcessed(OffsetDateTime.now()) - .build(); - - storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.TABLES), cvServiceResponseFileStream); - storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), fileStream); - - return request; - - } - - - @SneakyThrows - private AnalyzeRequest prepareStorage(String file) { - - return prepareStorage(file, "files/cv_service_empty_response.json"); - } - - - @SneakyThrows - private AnalyzeRequest prepareStorage(String file, String cvServiceResponseFile) { - - ClassPathResource pdfFileResource = new ClassPathResource(file); - ClassPathResource cvServiceResponseFileResource = new ClassPathResource(cvServiceResponseFile); - - return prepareStorage(pdfFileResource.getInputStream(), cvServiceResponseFileResource.getInputStream()); - } - } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTestV2.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTestV2.java new file mode 100644 index 00000000..5ed34a17 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTestV2.java @@ -0,0 +1,152 @@ +package com.iqser.red.service.redaction.v1.server; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.Mockito.when; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.kie.api.KieServices; +import org.kie.api.builder.KieBuilder; +import org.kie.api.builder.KieFileSystem; +import org.kie.api.builder.KieModule; +import org.kie.api.runtime.KieContainer; +import org.springframework.boot.autoconfigure.EnableAutoConfiguration; +import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.Import; +import org.springframework.context.annotation.Primary; +import org.springframework.test.context.junit.jupiter.SpringExtension; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; +import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive; +import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type; +import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine; +import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry; +import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest; +import com.iqser.red.service.redaction.v1.server.multitenancy.TenantContext; +import com.iqser.red.storage.commons.StorageAutoConfiguration; +import com.iqser.red.storage.commons.service.StorageService; + +import lombok.SneakyThrows; + +@ExtendWith(SpringExtension.class) +@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT) +@Import(RedactionIntegrationTest.RedactionIntegrationTestConfiguration.class) +public class RedactionIntegrationTestV2 extends AbstractRedactionIntegrationTest { + + private static final String RULES = loadFromClassPath("drools/rules_v2.drl"); + + @Configuration + @EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class, StorageAutoConfiguration.class}) + public static class RedactionIntegrationTestConfiguration { + + @Bean + public KieContainer kieContainer() { + + KieServices kieServices = KieServices.Factory.get(); + + KieFileSystem kieFileSystem = kieServices.newKieFileSystem(); + InputStream input = new ByteArrayInputStream(RULES.getBytes(StandardCharsets.UTF_8)); + kieFileSystem.write("src/test/resources/drools/rules_v2", kieServices.getResources().newInputStreamResource(input)); + KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem); + kieBuilder.buildAll(); + KieModule kieModule = kieBuilder.getKieModule(); + + return kieServices.newKieContainer(kieModule.getReleaseId()); + } + + + @Bean + @Primary + public StorageService inmemoryStorage() { + + return new FileSystemBackedStorageService(); + } + + } + + + @BeforeEach + public void stubClients() { + + TenantContext.setTenantId("redaction"); + + when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); + when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(JSONPrimitive.of(RULES)); + + loadDictionaryForTest(); + loadTypeForTest(); + loadNerForTest(); + when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); + when(dictionaryClient.getAllTypesForDossierTemplate(TEST_DOSSIER_TEMPLATE_ID, false)).thenReturn(getTypeResponse()); + + when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); + when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder() + .id(DOSSIER_REDACTIONS + ":" + TEST_DOSSIER_TEMPLATE_ID) + .type(DOSSIER_REDACTIONS) + .dossierTemplateId(TEST_DOSSIER_ID) + .hexColor("#ffe187") + .isHint(hintTypeMap.get(DOSSIER_REDACTIONS)) + .isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS)) + .isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS)) + .rank(rankTypeMap.get(DOSSIER_REDACTIONS)) + .build())); + + mockDictionaryCalls(null); + mockDictionaryCalls(0L); + + when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors); + } + + + /** + * The case in this test: One term, 'Dr. Alan Miller', is found by PII-Rule and is in the PII-dictionary + * as well as in the PII-false-positive-list - and in the CBI-author dictionary. + * It gets redacted, as the PII-finding is false positive and so the CBI-author entry is effective + * independent of the entity-rank + */ + @Test + @SneakyThrows + public void testTermIsInTwoDictionariesAndInOneFalsePositive() { + + AnalyzeRequest request = prepareStorage("files/new/simplified2.pdf"); + + dictionary.clear(); + dictionary.computeIfAbsent(PII, v -> new ArrayList<>()).add("Dr. Alan Miller"); + dictionary.computeIfAbsent(AUTHOR, v -> new ArrayList<>()).add("Dr. Alan Miller"); + + falsePositive.clear(); + falsePositive.computeIfAbsent(PII, v -> new ArrayList<>()).add("Dr. Alan Miller COMPLETION DATE:"); + + analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); + analyzeService.analyze(request); + + var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID); + + assertThat(redactionLog.getRedactionLogEntry().size()).isEqualTo(1); + + RedactionLogEntry redactionLogEntry = redactionLog.getRedactionLogEntry().get(0); + + assertThat(redactionLogEntry.getType()).isEqualTo("CBI_author"); + assertThat(redactionLogEntry.getValue()).isEqualTo("Dr. Alan Miller"); + assertThat(redactionLogEntry.isRedacted()).isEqualTo(true); + assertThat(redactionLogEntry.isRecommendation()).isEqualTo(false); + assertThat(redactionLogEntry.isFalsePositive()).isEqualTo(false); + assertThat(redactionLogEntry.isExcluded()).isEqualTo(false); + assertThat(redactionLogEntry.isDictionaryEntry()).isEqualTo(true); + + assertThat(redactionLogEntry.getEngines().size()).isEqualTo(1); + assertThat(redactionLogEntry.getEngines().contains(Engine.DICTIONARY)).isEqualTo(true); + + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl new file mode 100644 index 00000000..44e24b98 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl @@ -0,0 +1,341 @@ +package drools + +import com.iqser.red.service.redaction.v1.server.redaction.model.Section + +global Section section + + +// --------------------------------------- AI rules ------------------------------------------------------------------- + +rule "0: Add CBI_author from ai" + when + Section(aiMatchesType("CBI_author")) + then + section.addAiEntities("CBI_author", "CBI_author"); + end + +rule "0: Combine address parts from ai to CBI_address (org is mandatory)" + when + Section(aiMatchesType("ORG")) + then + section.combineAiTypes("ORG", "STREET,POSTAL,COUNTRY,CARDINAL,CITY,STATE", 20, "CBI_address", 3, false); + end + +rule "0: Combine address parts from ai to CBI_address (street is mandatory)" + when + Section(aiMatchesType("STREET")) + then + section.combineAiTypes("STREET", "ORG,POSTAL,COUNTRY,CARDINAL,CITY,STATE", 20, "CBI_address", 3, false); + end + +rule "0: Combine address parts from ai to CBI_address (city is mandatory)" + when + Section(aiMatchesType("CITY")) + then + section.combineAiTypes("CITY", "ORG,STREET,POSTAL,COUNTRY,CARDINAL,STATE", 20, "CBI_address", 3, false); + end + +/* Syngenta specific laboratory recommendation */ +rule "0: Recommend CTL/BL laboratory that start with BL or CTL" + when + Section(searchText.contains("CT") || searchText.contains("BL")) + then + /* Regular expression: ((\b((([Cc]T(([1ILli\/])| L|~P))|(BL))[\. ]?([\dA-Ziltphz~\/.:!]| ?[\(',][Ppi](\(e)?|([\(-?']\/))+( ?[\(\/\dA-Znasieg]+)?)\b( ?\/? ?\d+)?)|(\bCT[L1i]\b)) */ + section.addRecommendationByRegEx("((\\b((([Cc]T(([1ILli\\/])| L|~P))|(BL))[\\. ]?([\\dA-Ziltphz~\\/.:!]| ?[\\(',][Ppi](\\(e)?|([\\(-?']\\/))+( ?[\\(\\/\\dA-Znasieg]+)?)\\b( ?\\/? ?\\d+)?)|(\\bCT[L1i]\\b))", true, 0, "CBI_address"); + end + + +// --------------------------------------- CBI rules ------------------------------------------------------------------- + +rule "1: Redact CBI Authors (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_author")) + then + section.redact("CBI_author", 1, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "2: Redact CBI Authors (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_author")) + then + section.redact("CBI_author", 2, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "3: Redact not CBI Address (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_address")) + then + section.redactNot("CBI_address", 3, "Address found for non vertebrate study"); + section.ignoreRecommendations("CBI_address"); + end + +rule "4: Redact CBI Address (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_address")) + then + section.redact("CBI_address", 4, "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "5: Do not redact genitive CBI_author" + when + Section(matchesType("CBI_author")) + then + section.expandToFalsePositiveByRegEx("CBI_author", "['’’'ʼˈ´`‘′ʻ’']s", false, 0); + end + + +rule "6: Redact Author(s) cells in Tables with Author(s) header (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author(s)") && !hasTableHeader("Vertebrate study Y/N")) + then + section.redactCell("Author(s)", 6, "CBI_author", false, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "7: Redact Author(s) cells in Tables with Author(s) header (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author(s)") && !hasTableHeader("Vertebrate study Y/N")) + then + section.redactCell("Author(s)", 7, "CBI_author", false, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "8: Redact Author cells in Tables with Author header (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author") && !hasTableHeader("Vertebrate study Y/N")) + then + section.redactCell("Author", 8, "CBI_author", false, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "9: Redact Author cells in Tables with Author header (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author") && !hasTableHeader("Vertebrate study Y/N")) + then + section.redactCell("Author", 9, "CBI_author", false, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "10: Redact and recommand Authors in Tables with Vertebrate study Y/N header (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (rowEquals("Vertebrate study Y/N", "Y") || rowEquals("Vertebrate study Y/N", "Yes") || rowEquals("Vertebrate study Y/N", "N") || rowEquals("Vertebrate study Y/N", "No"))) + then + section.redactCell("Author(s)", 10, "CBI_author", true, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "11: Redact and recommand Authors in Tables with Vertebrate study Y/N header (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (rowEquals("Vertebrate study Y/N", "Y") || rowEquals("Vertebrate study Y/N", "Yes") || rowEquals("Vertebrate study Y/N", "N") || rowEquals("Vertebrate study Y/N", "No"))) + then + section.redactCell("Author(s)", 11, "CBI_author", true, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + +rule "14: Redact and add recommendation for et al. author (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("et al")) + then + section.redactAndRecommendByRegEx("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", false, 1, "CBI_author", 14, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "15: Redact and add recommendation for et al. author (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("et al")) + then + section.redactAndRecommendByRegEx("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", false, 1, "CBI_author", 15, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "16: Add recommendation for Addresses in Test Organism sections" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("Species:") && searchText.contains("Source:")) + then + section.recommendLineAfter("Source:", "CBI_address"); + end + +rule "17: Add recommendation for Addresses in Test Animals sections" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("Species") && searchText.contains("Source")) + then + section.recommendLineAfter("Source", "CBI_address"); + end + + +rule "18: Do not redact Names and Addresses if Published Information found" + when + Section(matchesType("published_information")) + then + section.redactNotAndReference("CBI_author","published_information", 18, "Published Information found"); + section.redactNotAndReference("CBI_address","published_information", 18, "Published Information found"); + end + + +// --------------------------------------- PII rules ------------------------------------------------------------------- + + +rule "19: Redacted PII Personal Identification Information (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("PII")) + then + section.redact("PII", 19, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "20: Redacted PII Personal Identification Information (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("PII")) + then + section.redact("PII", 20, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "21: Redact Emails by RegEx (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("@")) + then + section.redactByRegEx("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", true, 1, "PII", 21, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "22: Redact Emails by RegEx (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("@")) + then + section.redactByRegEx("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", true, 1, "PII", 22, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "25: Redact Phone and Fax by RegEx (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && ( + text.contains("Contact") + || text.contains("Telephone") + || text.contains("Phone") + || text.contains("Fax") + || text.contains("Tel") + || text.contains("Ter") + || text.contains("Mobile") + || text.contains("Fel") + || text.contains("Fer") + )) + then + section.redactByRegEx("\\b(contact|telephone|phone|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", true, 2, "PII", 25, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "26: Redact Phone and Fax by RegEx (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && ( + text.contains("Contact") + || text.contains("Telephone") + || text.contains("Phone") + || text.contains("Fax") + || text.contains("Tel") + || text.contains("Ter") + || text.contains("Mobile") + || text.contains("Fel") + || text.contains("Fer") + )) + then + section.redactByRegEx("\\b(contact|telephone|phone|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", true, 2, "PII", 26, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "27: Redact AUTHOR(S) (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") + && searchText.contains("AUTHOR(S):") + && searchText.contains("COMPLETION DATE:") + && !searchText.contains("STUDY COMPLETION DATE:") + ) + then + section.redactLinesBetween("AUTHOR(S):", "COMPLETION DATE:", "PII", 27, true, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "28: Redact AUTHOR(S) (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") + && searchText.contains("AUTHOR(S):") + && searchText.contains("COMPLETION DATE:") + && !searchText.contains("STUDY COMPLETION DATE:") + ) + then + section.redactLinesBetween("AUTHOR(S):", "COMPLETION DATE:", "PII", 28, true, "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "29: Redact AUTHOR(S) (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") + && searchText.contains("AUTHOR(S):") + && searchText.contains("STUDY COMPLETION DATE:") + ) + then + section.redactLinesBetween("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", 29, true, "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "30: Redact AUTHOR(S) (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") + && searchText.contains("AUTHOR(S):") + && searchText.contains("STUDY COMPLETION DATE:") + ) + then + section.redactLinesBetween("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", 30, true, "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "31: Redact PERFORMING LABORATORY (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") + && searchText.contains("PERFORMING LABORATORY:") + ) + then + section.redactBetween("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", 31, true, "PERFORMING LABORATORY was found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + section.redactNot("CBI_address", 31, "Performing laboratory found for non vertebrate study"); + end + +rule "32: Redact PERFORMING LABORATORY (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") + && searchText.contains("PERFORMING LABORATORY:")) + then + section.redactBetween("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", 32, true, "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +// --------------------------------------- other rules ------------------------------------------------------------------- + +rule "33: Purity Hint" + when + Section(searchText.toLowerCase().contains("purity")) + then + section.addHintAnnotationByRegEx("(purity ?( of|\\(.{1,20}\\))?( ?:)?) .{0,5}[\\d\\.]+( .{0,4}\\.)? ?%", true, 1, "hint_only"); + end + + +rule "34: Ignore dossier_redaction entries if confidentiality is not 'confidential'" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Confidentiality","confidential") && matchesType("dossier_redaction")); + then + section.ignore("dossier_redaction"); + end + + +rule "35: Redact signatures (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesImageType("signature")) + then + section.redactImage("signature", 35, "Signature found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "36: Redact signatures (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesImageType("signature")) + then + section.redactImage("signature", 36, "Signature found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "43: Redact Logos (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesImageType("logo")) + then + section.redactImage("logo", 43, "Logo found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/simplified2.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/simplified2.pdf new file mode 100644 index 00000000..32eac69f Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/simplified2.pdf differ