RED-6411 - add test for this case (with the current rules-file) and move/abstract logic from the RedactionIntegrationTest into AbstractRedactionIntegrationTest

This commit is contained in:
Thomas Beyer 2023-03-28 10:16:17 +02:00
parent 7644afc3aa
commit 157b20a1de
5 changed files with 980 additions and 451 deletions

View File

@ -0,0 +1,481 @@
package com.iqser.red.service.redaction.v1.server;
import static org.mockito.Mockito.when;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.junit.jupiter.api.AfterEach;
import org.mockito.stubbing.Answer;
import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.mock.mockito.MockBean;
import org.springframework.core.io.ClassPathResource;
import com.amazonaws.services.s3.AmazonS3;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.configuration.Colors;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.DictionaryEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotationService;
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
import com.iqser.red.service.redaction.v1.server.multitenancy.TenantContext;
import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
import com.iqser.red.service.redaction.v1.server.redaction.service.analyze.AnalyzeService;
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.storage.commons.service.StorageService;
import lombok.SneakyThrows;
public abstract class AbstractRedactionIntegrationTest {
protected static final String VERTEBRATE = "vertebrate";
protected static final String ADDRESS = "CBI_address";
protected static final String AUTHOR = "CBI_author";
protected static final String SPONSOR = "CBI_sponsor";
protected static final String NO_REDACTION_INDICATOR = "no_redaction_indicator";
protected static final String REDACTION_INDICATOR = "redaction_indicator";
protected static final String HINT_ONLY = "hint_only";
protected static final String MUST_REDACT = "must_redact";
protected static final String PUBLISHED_INFORMATION = "published_information";
protected static final String TEST_METHOD = "test_method";
protected static final String PURITY = "purity";
protected static final String IMAGE = "image";
protected static final String LOGO = "logo";
protected static final String SIGNATURE = "signature";
protected static final String FORMULA = "formula";
protected static final String OCR = "ocr";
protected static final String DOSSIER_REDACTIONS = "dossier_redactions";
protected static final String IMPORTED_REDACTION = "imported_redaction";
protected static final String PII = "PII";
protected static final String ROTATE_SIMPLE = "RotateSimple";
@Autowired
protected RedactionController redactionController;
@Autowired
protected AnnotationService annotationService;
@Autowired
protected AnalyzeService analyzeService;
@Autowired
protected ObjectMapper objectMapper;
@Autowired
protected RedactionStorageService redactionStorageService;
@Autowired
protected StorageService storageService;
@Autowired
protected ManualRedactionSurroundingTextService manualRedactionSurroundingTextService;
@MockBean
protected AmazonS3 amazonS3;
@MockBean
protected RabbitTemplate rabbitTemplate;
@MockBean
protected LegalBasisClient legalBasisClient;
protected final Map<String, List<String>> dictionary = new HashMap<>();
protected final Map<String, List<String>> dossierDictionary = new HashMap<>();
protected final Map<String, List<String>> falsePositive = new HashMap<>();
protected final Map<String, List<String>> falseRecommendation = new HashMap<>();
protected final Map<String, String> typeColorMap = new HashMap<>();
protected final Map<String, Boolean> hintTypeMap = new HashMap<>();
protected final Map<String, Boolean> caseInSensitiveMap = new HashMap<>();
protected final Map<String, Boolean> recommendationTypeMap = new HashMap<>();
protected final Map<String, Integer> rankTypeMap = new HashMap<>();
protected final Colors colors = new Colors();
protected final Map<String, Long> reanlysisVersions = new HashMap<>();
protected final Set<String> deleted = new HashSet<>();
protected final static String TEST_DOSSIER_TEMPLATE_ID = "123";
protected final static String TEST_DOSSIER_ID = "123";
protected final static String TEST_FILE_ID = "123";
@MockBean
protected RulesClient rulesClient;
@MockBean
protected DictionaryClient dictionaryClient;
@AfterEach
public void cleanupStorage() {
if (this.storageService instanceof FileSystemBackedStorageService) {
((FileSystemBackedStorageService) this.storageService).clearStorage();
}
}
protected void mockDictionaryCalls(Long version) {
when(dictionaryClient.getDictionaryForType(VERTEBRATE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(VERTEBRATE,
false));
when(dictionaryClient.getDictionaryForType(ADDRESS + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(ADDRESS, false));
when(dictionaryClient.getDictionaryForType(AUTHOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(AUTHOR, false));
when(dictionaryClient.getDictionaryForType(SPONSOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(SPONSOR, false));
when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(
NO_REDACTION_INDICATOR,
false));
when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(
REDACTION_INDICATOR,
false));
when(dictionaryClient.getDictionaryForType(HINT_ONLY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(HINT_ONLY, false));
when(dictionaryClient.getDictionaryForType(MUST_REDACT + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(MUST_REDACT,
false));
when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(
PUBLISHED_INFORMATION,
false));
when(dictionaryClient.getDictionaryForType(TEST_METHOD + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(TEST_METHOD,
false));
when(dictionaryClient.getDictionaryForType(PII + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(PII, false));
when(dictionaryClient.getDictionaryForType(PURITY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(PURITY, false));
when(dictionaryClient.getDictionaryForType(IMAGE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(IMAGE, false));
when(dictionaryClient.getDictionaryForType(OCR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(OCR, false));
when(dictionaryClient.getDictionaryForType(LOGO + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(LOGO, false));
when(dictionaryClient.getDictionaryForType(SIGNATURE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(SIGNATURE, false));
when(dictionaryClient.getDictionaryForType(FORMULA + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(FORMULA, false));
when(dictionaryClient.getDictionaryForType(ROTATE_SIMPLE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(ROTATE_SIMPLE,
false));
when(dictionaryClient.getDictionaryForType(DOSSIER_REDACTIONS + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(
DOSSIER_REDACTIONS,
true));
when(dictionaryClient.getDictionaryForType(IMPORTED_REDACTION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(
IMPORTED_REDACTION,
true));
}
protected void loadDictionaryForTest() {
dictionary.computeIfAbsent(AUTHOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_author.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(SPONSOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(VERTEBRATE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/vertebrate.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(ADDRESS, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_address.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(REDACTION_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(HINT_ONLY, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/hint_only.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(MUST_REDACT, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/must_redact.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(PUBLISHED_INFORMATION, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/published_information.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(TEST_METHOD, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/test_method.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(PII, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/PII.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(PURITY, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/purity.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(IMAGE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(OCR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(LOGO, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(SIGNATURE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(FORMULA, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dossierDictionary.put(IMPORTED_REDACTION, new ArrayList<>());
falsePositive.computeIfAbsent(PII, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
}
protected void loadOnlyDictionaryForSimpleFile() {
dictionary.clear();
dictionary.computeIfAbsent(ROTATE_SIMPLE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/RotateTestFileSimple.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
}
protected static String loadFromClassPath(String path) {
URL resource = ResourceLoader.class.getClassLoader().getResource(path);
if (resource == null) {
throw new IllegalArgumentException("could not load classpath resource: drools/rules.drl");
}
try (BufferedReader br = new BufferedReader(new InputStreamReader(resource.openStream(), StandardCharsets.UTF_8))) {
StringBuilder sb = new StringBuilder();
String str;
while ((str = br.readLine()) != null) {
sb.append(str).append("\n");
}
return sb.toString();
} catch (IOException e) {
throw new IllegalArgumentException("could not load classpath resource: " + path, e);
}
}
protected List<File> getPathsRecursively(File path) {
List<File> result = new ArrayList<>();
if (path == null || path.listFiles() == null) {
return result;
}
for (File f : path.listFiles()) {
if (f.isFile()) {
result.add(f);
} else {
result.addAll(getPathsRecursively(f));
}
}
return result;
}
protected void loadTypeForTest() {
typeColorMap.put(VERTEBRATE, "#ff85f7");
typeColorMap.put(ADDRESS, "#ffe187");
typeColorMap.put(AUTHOR, "#ffe187");
typeColorMap.put(SPONSOR, "#85ebff");
typeColorMap.put(NO_REDACTION_INDICATOR, "#be85ff");
typeColorMap.put(REDACTION_INDICATOR, "#caff85");
typeColorMap.put(HINT_ONLY, "#abc0c4");
typeColorMap.put(MUST_REDACT, "#fab4c0");
typeColorMap.put(PUBLISHED_INFORMATION, "#85ebff");
typeColorMap.put(TEST_METHOD, "#91fae8");
typeColorMap.put(PII, "#66ccff");
typeColorMap.put(PURITY, "#ffe187");
typeColorMap.put(IMAGE, "#fcc5fb");
typeColorMap.put(OCR, "#fcc5fb");
typeColorMap.put(LOGO, "#ffe187");
typeColorMap.put(FORMULA, "#ffe187");
typeColorMap.put(SIGNATURE, "#ffe187");
typeColorMap.put(IMPORTED_REDACTION, "#fcfbe6");
typeColorMap.put(ROTATE_SIMPLE, "#66ccff");
hintTypeMap.put(VERTEBRATE, true);
hintTypeMap.put(ADDRESS, false);
hintTypeMap.put(AUTHOR, false);
hintTypeMap.put(SPONSOR, false);
hintTypeMap.put(NO_REDACTION_INDICATOR, true);
hintTypeMap.put(REDACTION_INDICATOR, true);
hintTypeMap.put(HINT_ONLY, true);
hintTypeMap.put(MUST_REDACT, true);
hintTypeMap.put(PUBLISHED_INFORMATION, true);
hintTypeMap.put(TEST_METHOD, true);
hintTypeMap.put(PII, false);
hintTypeMap.put(PURITY, false);
hintTypeMap.put(IMAGE, true);
hintTypeMap.put(OCR, true);
hintTypeMap.put(FORMULA, false);
hintTypeMap.put(LOGO, false);
hintTypeMap.put(SIGNATURE, false);
hintTypeMap.put(DOSSIER_REDACTIONS, false);
hintTypeMap.put(IMPORTED_REDACTION, false);
hintTypeMap.put(ROTATE_SIMPLE, false);
caseInSensitiveMap.put(VERTEBRATE, true);
caseInSensitiveMap.put(ADDRESS, false);
caseInSensitiveMap.put(AUTHOR, false);
caseInSensitiveMap.put(SPONSOR, false);
caseInSensitiveMap.put(NO_REDACTION_INDICATOR, true);
caseInSensitiveMap.put(REDACTION_INDICATOR, true);
caseInSensitiveMap.put(HINT_ONLY, true);
caseInSensitiveMap.put(MUST_REDACT, true);
caseInSensitiveMap.put(PUBLISHED_INFORMATION, true);
caseInSensitiveMap.put(TEST_METHOD, false);
caseInSensitiveMap.put(PII, false);
caseInSensitiveMap.put(PURITY, false);
caseInSensitiveMap.put(IMAGE, true);
caseInSensitiveMap.put(OCR, true);
caseInSensitiveMap.put(SIGNATURE, true);
caseInSensitiveMap.put(LOGO, true);
caseInSensitiveMap.put(FORMULA, true);
caseInSensitiveMap.put(DOSSIER_REDACTIONS, false);
caseInSensitiveMap.put(IMPORTED_REDACTION, false);
caseInSensitiveMap.put(ROTATE_SIMPLE, true);
recommendationTypeMap.put(VERTEBRATE, false);
recommendationTypeMap.put(ADDRESS, false);
recommendationTypeMap.put(AUTHOR, false);
recommendationTypeMap.put(SPONSOR, false);
recommendationTypeMap.put(NO_REDACTION_INDICATOR, false);
recommendationTypeMap.put(REDACTION_INDICATOR, false);
recommendationTypeMap.put(HINT_ONLY, false);
recommendationTypeMap.put(MUST_REDACT, false);
recommendationTypeMap.put(PUBLISHED_INFORMATION, false);
recommendationTypeMap.put(TEST_METHOD, false);
recommendationTypeMap.put(PII, false);
recommendationTypeMap.put(PURITY, false);
recommendationTypeMap.put(IMAGE, false);
recommendationTypeMap.put(OCR, false);
recommendationTypeMap.put(FORMULA, false);
recommendationTypeMap.put(SIGNATURE, false);
recommendationTypeMap.put(LOGO, false);
recommendationTypeMap.put(DOSSIER_REDACTIONS, false);
recommendationTypeMap.put(IMPORTED_REDACTION, false);
recommendationTypeMap.put(ROTATE_SIMPLE, false);
rankTypeMap.put(PURITY, 155);
rankTypeMap.put(PII, 150);
rankTypeMap.put(ADDRESS, 140);
rankTypeMap.put(AUTHOR, 130);
rankTypeMap.put(SPONSOR, 120);
rankTypeMap.put(VERTEBRATE, 110);
rankTypeMap.put(MUST_REDACT, 100);
rankTypeMap.put(REDACTION_INDICATOR, 90);
rankTypeMap.put(NO_REDACTION_INDICATOR, 80);
rankTypeMap.put(PUBLISHED_INFORMATION, 70);
rankTypeMap.put(TEST_METHOD, 60);
rankTypeMap.put(HINT_ONLY, 50);
rankTypeMap.put(IMAGE, 30);
rankTypeMap.put(OCR, 29);
rankTypeMap.put(LOGO, 28);
rankTypeMap.put(SIGNATURE, 27);
rankTypeMap.put(FORMULA, 26);
rankTypeMap.put(DOSSIER_REDACTIONS, 200);
rankTypeMap.put(IMPORTED_REDACTION, 200);
rankTypeMap.put(ROTATE_SIMPLE, 150);
colors.setSkippedColor("#cccccc");
colors.setRequestAddColor("#04b093");
colors.setRequestRemoveColor("#04b093");
}
@SneakyThrows
protected void loadNerForTest() {
ClassPathResource responseJson = new ClassPathResource("files/ner_response.json");
storageService.storeObject(TenantContext.getTenantId(),
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES),
responseJson.getInputStream());
}
protected List<Type> getTypeResponse() {
return typeColorMap.entrySet()
.stream()
.map(typeColor -> Type.builder()
.id(typeColor.getKey() + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(typeColor.getKey())
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
.hexColor(typeColor.getValue())
.isHint(hintTypeMap.get(typeColor.getKey()))
.isCaseInsensitive(caseInSensitiveMap.get(typeColor.getKey()))
.isRecommendation(recommendationTypeMap.get(typeColor.getKey()))
.rank(rankTypeMap.get(typeColor.getKey()))
.build())
.collect(Collectors.toList());
}
protected Type getDictionaryResponse(String type, boolean isDossierDictionary) {
return Type.builder()
.id(type + ":" + TEST_DOSSIER_TEMPLATE_ID)
.hexColor(typeColorMap.get(type))
.entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type)))
.falsePositiveEntries(falsePositive.containsKey(type) ? toDictionaryEntry(falsePositive.get(type)) : new ArrayList<>())
.falseRecommendationEntries(falseRecommendation.containsKey(type) ? toDictionaryEntry(falseRecommendation.get(type)) : new ArrayList<>())
.isHint(hintTypeMap.get(type))
.isCaseInsensitive(caseInSensitiveMap.get(type))
.isRecommendation(recommendationTypeMap.get(type))
.rank(rankTypeMap.get(type))
.build();
}
private String cleanDictionaryEntry(String entry) {
return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " ");
}
private List<DictionaryEntry> toDictionaryEntry(List<String> entries) {
if (entries == null) {
entries = Collections.emptyList();
}
List<DictionaryEntry> dictionaryEntries = new ArrayList<>();
entries.forEach(entry -> {
dictionaryEntries.add(DictionaryEntry.builder().value(entry).version(reanlysisVersions.getOrDefault(entry, 0L)).deleted(deleted.contains(entry)).build());
});
return dictionaryEntries;
}
@SneakyThrows
protected AnalyzeRequest prepareStorage(String file) {
return prepareStorage(file, "files/cv_service_empty_response.json");
}
@SneakyThrows
protected AnalyzeRequest prepareStorage(String file, String cvServiceResponseFile) {
ClassPathResource pdfFileResource = new ClassPathResource(file);
ClassPathResource cvServiceResponseFileResource = new ClassPathResource(cvServiceResponseFile);
return prepareStorage(pdfFileResource.getInputStream(), cvServiceResponseFileResource.getInputStream());
}
@SneakyThrows
protected AnalyzeRequest prepareStorage(InputStream fileStream, InputStream cvServiceResponseFileStream) {
AnalyzeRequest request = AnalyzeRequest.builder()
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.lastProcessed(OffsetDateTime.now())
.build();
storageService.storeObject(TenantContext.getTenantId(),
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.TABLES),
cvServiceResponseFileStream);
storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), fileStream);
return request;
}
}

View File

@ -3,28 +3,23 @@ package com.iqser.red.service.redaction.v1.server;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.Mockito.when;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.stream.Collectors;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
@ -34,13 +29,9 @@ import org.kie.api.builder.KieBuilder;
import org.kie.api.builder.KieFileSystem;
import org.kie.api.builder.KieModule;
import org.kie.api.runtime.KieContainer;
import org.mockito.stubbing.Answer;
import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.mock.mockito.MockBean;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Import;
@ -48,9 +39,7 @@ import org.springframework.context.annotation.Primary;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.junit.jupiter.SpringExtension;
import com.amazonaws.services.s3.AmazonS3;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute;
@ -67,24 +56,14 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.configuration.Colors;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.DictionaryEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotationService;
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
import com.iqser.red.service.redaction.v1.server.multitenancy.TenantContext;
import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
import com.iqser.red.service.redaction.v1.server.redaction.service.analyze.AnalyzeService;
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.iqser.red.storage.commons.service.StorageService;
@ -94,82 +73,9 @@ import lombok.SneakyThrows;
@ExtendWith(SpringExtension.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
@Import(RedactionIntegrationTest.RedactionIntegrationTestConfiguration.class)
public class RedactionIntegrationTest {
public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
private static final String RULES = loadFromClassPath("drools/rules.drl");
private static final String VERTEBRATE = "vertebrate";
private static final String ADDRESS = "CBI_address";
private static final String AUTHOR = "CBI_author";
private static final String SPONSOR = "CBI_sponsor";
private static final String NO_REDACTION_INDICATOR = "no_redaction_indicator";
private static final String REDACTION_INDICATOR = "redaction_indicator";
private static final String HINT_ONLY = "hint_only";
private static final String MUST_REDACT = "must_redact";
private static final String PUBLISHED_INFORMATION = "published_information";
private static final String TEST_METHOD = "test_method";
private static final String PURITY = "purity";
private static final String IMAGE = "image";
private static final String LOGO = "logo";
private static final String SIGNATURE = "signature";
private static final String FORMULA = "formula";
private static final String OCR = "ocr";
private static final String DOSSIER_REDACTIONS = "dossier_redactions";
private static final String IMPORTED_REDACTION = "imported_redaction";
private static final String PII = "PII";
private static final String ROTATE_SIMPLE = "RotateSimple";
@Autowired
private RedactionController redactionController;
@Autowired
private AnnotationService annotationService;
@Autowired
private AnalyzeService analyzeService;
@Autowired
private ObjectMapper objectMapper;
@MockBean
private RulesClient rulesClient;
@MockBean
private DictionaryClient dictionaryClient;
@Autowired
private RedactionStorageService redactionStorageService;
@Autowired
private StorageService storageService;
@Autowired
private ManualRedactionSurroundingTextService manualRedactionSurroundingTextService;
@MockBean
private AmazonS3 amazonS3;
@MockBean
private RabbitTemplate rabbitTemplate;
@MockBean
private LegalBasisClient legalBasisClient;
private final Map<String, List<String>> dictionary = new HashMap<>();
private final Map<String, List<String>> dossierDictionary = new HashMap<>();
private final Map<String, List<String>> falsePositive = new HashMap<>();
private final Map<String, List<String>> falseRecommendation = new HashMap<>();
private final Map<String, String> typeColorMap = new HashMap<>();
private final Map<String, Boolean> hintTypeMap = new HashMap<>();
private final Map<String, Boolean> caseInSensitiveMap = new HashMap<>();
private final Map<String, Boolean> recommendationTypeMap = new HashMap<>();
private final Map<String, Integer> rankTypeMap = new HashMap<>();
private final Colors colors = new Colors();
private final Map<String, Long> reanlysisVersions = new HashMap<>();
private final Set<String> deleted = new HashSet<>();
private final static String TEST_DOSSIER_TEMPLATE_ID = "123";
private final static String TEST_DOSSIER_ID = "123";
private final static String TEST_FILE_ID = "123";
@Configuration
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class, StorageAutoConfiguration.class})
@ -201,15 +107,6 @@ public class RedactionIntegrationTest {
}
@AfterEach
public void cleanupStorage() {
if (this.storageService instanceof FileSystemBackedStorageService) {
((FileSystemBackedStorageService) this.storageService).clearStorage();
}
}
@BeforeEach
public void stubClients() {
@ -243,46 +140,6 @@ public class RedactionIntegrationTest {
}
private void mockDictionaryCalls(Long version) {
when(dictionaryClient.getDictionaryForType(VERTEBRATE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(VERTEBRATE,
false));
when(dictionaryClient.getDictionaryForType(ADDRESS + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(ADDRESS, false));
when(dictionaryClient.getDictionaryForType(AUTHOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(AUTHOR, false));
when(dictionaryClient.getDictionaryForType(SPONSOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(SPONSOR, false));
when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(
NO_REDACTION_INDICATOR,
false));
when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(
REDACTION_INDICATOR,
false));
when(dictionaryClient.getDictionaryForType(HINT_ONLY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(HINT_ONLY, false));
when(dictionaryClient.getDictionaryForType(MUST_REDACT + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(MUST_REDACT,
false));
when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(
PUBLISHED_INFORMATION,
false));
when(dictionaryClient.getDictionaryForType(TEST_METHOD + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(TEST_METHOD,
false));
when(dictionaryClient.getDictionaryForType(PII + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(PII, false));
when(dictionaryClient.getDictionaryForType(PURITY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(PURITY, false));
when(dictionaryClient.getDictionaryForType(IMAGE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(IMAGE, false));
when(dictionaryClient.getDictionaryForType(OCR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(OCR, false));
when(dictionaryClient.getDictionaryForType(LOGO + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(LOGO, false));
when(dictionaryClient.getDictionaryForType(SIGNATURE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(SIGNATURE, false));
when(dictionaryClient.getDictionaryForType(FORMULA + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(FORMULA, false));
when(dictionaryClient.getDictionaryForType(ROTATE_SIMPLE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(ROTATE_SIMPLE,
false));
when(dictionaryClient.getDictionaryForType(DOSSIER_REDACTIONS + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(
DOSSIER_REDACTIONS,
true));
when(dictionaryClient.getDictionaryForType(IMPORTED_REDACTION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(
IMPORTED_REDACTION,
true));
}
@Test
public void test270Rotated() {
@ -644,7 +501,9 @@ public class RedactionIntegrationTest {
String outputFileName = OsUtils.getTemporaryDirectory() + "/Annotated.pdf";
ClassPathResource responseJson = new ClassPathResource("files/crafted_document.NER_ENTITIES.json");
storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), responseJson.getInputStream());
storageService.storeObject(TenantContext.getTenantId(),
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES),
responseJson.getInputStream());
long start = System.currentTimeMillis();
AnalyzeRequest request = prepareStorage(fileName);
@ -1102,10 +961,6 @@ public class RedactionIntegrationTest {
}
@Test
public void phantomCellsDocumentTest() throws IOException {
@ -1326,272 +1181,6 @@ public class RedactionIntegrationTest {
}
private void loadDictionaryForTest() {
dictionary.computeIfAbsent(AUTHOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_author.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(SPONSOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(VERTEBRATE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/vertebrate.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(ADDRESS, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_address.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(REDACTION_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(HINT_ONLY, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/hint_only.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(MUST_REDACT, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/must_redact.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(PUBLISHED_INFORMATION, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/published_information.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(TEST_METHOD, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/test_method.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(PII, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/PII.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(PURITY, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/purity.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(IMAGE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(OCR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(LOGO, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(SIGNATURE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(FORMULA, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dossierDictionary.put(IMPORTED_REDACTION, new ArrayList<>());
falsePositive.computeIfAbsent(PII, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
}
private void loadOnlyDictionaryForSimpleFile() {
dictionary.clear();
dictionary.computeIfAbsent(ROTATE_SIMPLE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/RotateTestFileSimple.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
}
private static String loadFromClassPath(String path) {
URL resource = ResourceLoader.class.getClassLoader().getResource(path);
if (resource == null) {
throw new IllegalArgumentException("could not load classpath resource: drools/rules.drl");
}
try (BufferedReader br = new BufferedReader(new InputStreamReader(resource.openStream(), StandardCharsets.UTF_8))) {
StringBuilder sb = new StringBuilder();
String str;
while ((str = br.readLine()) != null) {
sb.append(str).append("\n");
}
return sb.toString();
} catch (IOException e) {
throw new IllegalArgumentException("could not load classpath resource: " + path, e);
}
}
private List<File> getPathsRecursively(File path) {
List<File> result = new ArrayList<>();
if (path == null || path.listFiles() == null) {
return result;
}
for (File f : path.listFiles()) {
if (f.isFile()) {
result.add(f);
} else {
result.addAll(getPathsRecursively(f));
}
}
return result;
}
private void loadTypeForTest() {
typeColorMap.put(VERTEBRATE, "#ff85f7");
typeColorMap.put(ADDRESS, "#ffe187");
typeColorMap.put(AUTHOR, "#ffe187");
typeColorMap.put(SPONSOR, "#85ebff");
typeColorMap.put(NO_REDACTION_INDICATOR, "#be85ff");
typeColorMap.put(REDACTION_INDICATOR, "#caff85");
typeColorMap.put(HINT_ONLY, "#abc0c4");
typeColorMap.put(MUST_REDACT, "#fab4c0");
typeColorMap.put(PUBLISHED_INFORMATION, "#85ebff");
typeColorMap.put(TEST_METHOD, "#91fae8");
typeColorMap.put(PII, "#66ccff");
typeColorMap.put(PURITY, "#ffe187");
typeColorMap.put(IMAGE, "#fcc5fb");
typeColorMap.put(OCR, "#fcc5fb");
typeColorMap.put(LOGO, "#ffe187");
typeColorMap.put(FORMULA, "#ffe187");
typeColorMap.put(SIGNATURE, "#ffe187");
typeColorMap.put(IMPORTED_REDACTION, "#fcfbe6");
typeColorMap.put(ROTATE_SIMPLE, "#66ccff");
hintTypeMap.put(VERTEBRATE, true);
hintTypeMap.put(ADDRESS, false);
hintTypeMap.put(AUTHOR, false);
hintTypeMap.put(SPONSOR, false);
hintTypeMap.put(NO_REDACTION_INDICATOR, true);
hintTypeMap.put(REDACTION_INDICATOR, true);
hintTypeMap.put(HINT_ONLY, true);
hintTypeMap.put(MUST_REDACT, true);
hintTypeMap.put(PUBLISHED_INFORMATION, true);
hintTypeMap.put(TEST_METHOD, true);
hintTypeMap.put(PII, false);
hintTypeMap.put(PURITY, false);
hintTypeMap.put(IMAGE, true);
hintTypeMap.put(OCR, true);
hintTypeMap.put(FORMULA, false);
hintTypeMap.put(LOGO, false);
hintTypeMap.put(SIGNATURE, false);
hintTypeMap.put(DOSSIER_REDACTIONS, false);
hintTypeMap.put(IMPORTED_REDACTION, false);
hintTypeMap.put(ROTATE_SIMPLE, false);
caseInSensitiveMap.put(VERTEBRATE, true);
caseInSensitiveMap.put(ADDRESS, false);
caseInSensitiveMap.put(AUTHOR, false);
caseInSensitiveMap.put(SPONSOR, false);
caseInSensitiveMap.put(NO_REDACTION_INDICATOR, true);
caseInSensitiveMap.put(REDACTION_INDICATOR, true);
caseInSensitiveMap.put(HINT_ONLY, true);
caseInSensitiveMap.put(MUST_REDACT, true);
caseInSensitiveMap.put(PUBLISHED_INFORMATION, true);
caseInSensitiveMap.put(TEST_METHOD, false);
caseInSensitiveMap.put(PII, false);
caseInSensitiveMap.put(PURITY, false);
caseInSensitiveMap.put(IMAGE, true);
caseInSensitiveMap.put(OCR, true);
caseInSensitiveMap.put(SIGNATURE, true);
caseInSensitiveMap.put(LOGO, true);
caseInSensitiveMap.put(FORMULA, true);
caseInSensitiveMap.put(DOSSIER_REDACTIONS, false);
caseInSensitiveMap.put(IMPORTED_REDACTION, false);
caseInSensitiveMap.put(ROTATE_SIMPLE, true);
recommendationTypeMap.put(VERTEBRATE, false);
recommendationTypeMap.put(ADDRESS, false);
recommendationTypeMap.put(AUTHOR, false);
recommendationTypeMap.put(SPONSOR, false);
recommendationTypeMap.put(NO_REDACTION_INDICATOR, false);
recommendationTypeMap.put(REDACTION_INDICATOR, false);
recommendationTypeMap.put(HINT_ONLY, false);
recommendationTypeMap.put(MUST_REDACT, false);
recommendationTypeMap.put(PUBLISHED_INFORMATION, false);
recommendationTypeMap.put(TEST_METHOD, false);
recommendationTypeMap.put(PII, false);
recommendationTypeMap.put(PURITY, false);
recommendationTypeMap.put(IMAGE, false);
recommendationTypeMap.put(OCR, false);
recommendationTypeMap.put(FORMULA, false);
recommendationTypeMap.put(SIGNATURE, false);
recommendationTypeMap.put(LOGO, false);
recommendationTypeMap.put(DOSSIER_REDACTIONS, false);
recommendationTypeMap.put(IMPORTED_REDACTION, false);
recommendationTypeMap.put(ROTATE_SIMPLE, false);
rankTypeMap.put(PURITY, 155);
rankTypeMap.put(PII, 150);
rankTypeMap.put(ADDRESS, 140);
rankTypeMap.put(AUTHOR, 130);
rankTypeMap.put(SPONSOR, 120);
rankTypeMap.put(VERTEBRATE, 110);
rankTypeMap.put(MUST_REDACT, 100);
rankTypeMap.put(REDACTION_INDICATOR, 90);
rankTypeMap.put(NO_REDACTION_INDICATOR, 80);
rankTypeMap.put(PUBLISHED_INFORMATION, 70);
rankTypeMap.put(TEST_METHOD, 60);
rankTypeMap.put(HINT_ONLY, 50);
rankTypeMap.put(IMAGE, 30);
rankTypeMap.put(OCR, 29);
rankTypeMap.put(LOGO, 28);
rankTypeMap.put(SIGNATURE, 27);
rankTypeMap.put(FORMULA, 26);
rankTypeMap.put(DOSSIER_REDACTIONS, 200);
rankTypeMap.put(IMPORTED_REDACTION, 200);
rankTypeMap.put(ROTATE_SIMPLE, 150);
colors.setSkippedColor("#cccccc");
colors.setRequestAddColor("#04b093");
colors.setRequestRemoveColor("#04b093");
}
@SneakyThrows
private void loadNerForTest() {
ClassPathResource responseJson = new ClassPathResource("files/ner_response.json");
storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), responseJson.getInputStream());
}
private List<Type> getTypeResponse() {
return typeColorMap.entrySet()
.stream()
.map(typeColor -> Type.builder()
.id(typeColor.getKey() + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(typeColor.getKey())
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
.hexColor(typeColor.getValue())
.isHint(hintTypeMap.get(typeColor.getKey()))
.isCaseInsensitive(caseInSensitiveMap.get(typeColor.getKey()))
.isRecommendation(recommendationTypeMap.get(typeColor.getKey()))
.rank(rankTypeMap.get(typeColor.getKey()))
.build())
.collect(Collectors.toList());
}
private Type getDictionaryResponse(String type, boolean isDossierDictionary) {
return Type.builder()
.id(type + ":" + TEST_DOSSIER_TEMPLATE_ID)
.hexColor(typeColorMap.get(type))
.entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type)))
.falsePositiveEntries(falsePositive.containsKey(type) ? toDictionaryEntry(falsePositive.get(type)) : new ArrayList<>())
.falseRecommendationEntries(falseRecommendation.containsKey(type) ? toDictionaryEntry(falseRecommendation.get(type)) : new ArrayList<>())
.isHint(hintTypeMap.get(type))
.isCaseInsensitive(caseInSensitiveMap.get(type))
.isRecommendation(recommendationTypeMap.get(type))
.rank(rankTypeMap.get(type))
.build();
}
private String cleanDictionaryEntry(String entry) {
return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " ");
}
private List<DictionaryEntry> toDictionaryEntry(List<String> entries) {
if (entries == null) {
entries = Collections.emptyList();
}
List<DictionaryEntry> dictionaryEntries = new ArrayList<>();
entries.forEach(entry -> {
dictionaryEntries.add(DictionaryEntry.builder().value(entry).version(reanlysisVersions.getOrDefault(entry, 0L)).deleted(deleted.contains(entry)).build());
});
return dictionaryEntries;
}
@Test
public void testImportedRedactions() throws IOException {
@ -1599,7 +1188,8 @@ public class RedactionIntegrationTest {
ClassPathResource importedRedactions = new ClassPathResource("files/ImportedRedactions/RotateTestFile_without_highlights.IMPORTED_REDACTIONS.json");
AnalyzeRequest request = prepareStorage("files/ImportedRedactions/RotateTestFile_without_highlights.pdf");
storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS),
storageService.storeObject(TenantContext.getTenantId(),
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS),
importedRedactions.getInputStream());
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
@ -1659,39 +1249,4 @@ public class RedactionIntegrationTest {
assertThat(values).contains("Mr. Tambourine Man");
}
@SneakyThrows
private AnalyzeRequest prepareStorage(InputStream fileStream, InputStream cvServiceResponseFileStream) {
AnalyzeRequest request = AnalyzeRequest.builder()
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.lastProcessed(OffsetDateTime.now())
.build();
storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.TABLES), cvServiceResponseFileStream);
storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), fileStream);
return request;
}
@SneakyThrows
private AnalyzeRequest prepareStorage(String file) {
return prepareStorage(file, "files/cv_service_empty_response.json");
}
@SneakyThrows
private AnalyzeRequest prepareStorage(String file, String cvServiceResponseFile) {
ClassPathResource pdfFileResource = new ClassPathResource(file);
ClassPathResource cvServiceResponseFileResource = new ClassPathResource(cvServiceResponseFile);
return prepareStorage(pdfFileResource.getInputStream(), cvServiceResponseFileResource.getInputStream());
}
}

View File

@ -0,0 +1,152 @@
package com.iqser.red.service.redaction.v1.server;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.Mockito.when;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.kie.api.KieServices;
import org.kie.api.builder.KieBuilder;
import org.kie.api.builder.KieFileSystem;
import org.kie.api.builder.KieModule;
import org.kie.api.runtime.KieContainer;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Import;
import org.springframework.context.annotation.Primary;
import org.springframework.test.context.junit.jupiter.SpringExtension;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest;
import com.iqser.red.service.redaction.v1.server.multitenancy.TenantContext;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.iqser.red.storage.commons.service.StorageService;
import lombok.SneakyThrows;
@ExtendWith(SpringExtension.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
@Import(RedactionIntegrationTest.RedactionIntegrationTestConfiguration.class)
public class RedactionIntegrationTestV2 extends AbstractRedactionIntegrationTest {
private static final String RULES = loadFromClassPath("drools/rules_v2.drl");
@Configuration
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class, StorageAutoConfiguration.class})
public static class RedactionIntegrationTestConfiguration {
@Bean
public KieContainer kieContainer() {
KieServices kieServices = KieServices.Factory.get();
KieFileSystem kieFileSystem = kieServices.newKieFileSystem();
InputStream input = new ByteArrayInputStream(RULES.getBytes(StandardCharsets.UTF_8));
kieFileSystem.write("src/test/resources/drools/rules_v2", kieServices.getResources().newInputStreamResource(input));
KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem);
kieBuilder.buildAll();
KieModule kieModule = kieBuilder.getKieModule();
return kieServices.newKieContainer(kieModule.getReleaseId());
}
@Bean
@Primary
public StorageService inmemoryStorage() {
return new FileSystemBackedStorageService();
}
}
@BeforeEach
public void stubClients() {
TenantContext.setTenantId("redaction");
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(JSONPrimitive.of(RULES));
loadDictionaryForTest();
loadTypeForTest();
loadNerForTest();
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypesForDossierTemplate(TEST_DOSSIER_TEMPLATE_ID, false)).thenReturn(getTypeResponse());
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder()
.id(DOSSIER_REDACTIONS + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(DOSSIER_REDACTIONS)
.dossierTemplateId(TEST_DOSSIER_ID)
.hexColor("#ffe187")
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS))
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS))
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS))
.rank(rankTypeMap.get(DOSSIER_REDACTIONS))
.build()));
mockDictionaryCalls(null);
mockDictionaryCalls(0L);
when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors);
}
/**
* The case in this test: One term, 'Dr. Alan Miller', is found by PII-Rule and is in the PII-dictionary
* as well as in the PII-false-positive-list - and in the CBI-author dictionary.
* It gets redacted, as the PII-finding is false positive and so the CBI-author entry is effective
* independent of the entity-rank
*/
@Test
@SneakyThrows
public void testTermIsInTwoDictionariesAndInOneFalsePositive() {
AnalyzeRequest request = prepareStorage("files/new/simplified2.pdf");
dictionary.clear();
dictionary.computeIfAbsent(PII, v -> new ArrayList<>()).add("Dr. Alan Miller");
dictionary.computeIfAbsent(AUTHOR, v -> new ArrayList<>()).add("Dr. Alan Miller");
falsePositive.clear();
falsePositive.computeIfAbsent(PII, v -> new ArrayList<>()).add("Dr. Alan Miller COMPLETION DATE:");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
analyzeService.analyze(request);
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
assertThat(redactionLog.getRedactionLogEntry().size()).isEqualTo(1);
RedactionLogEntry redactionLogEntry = redactionLog.getRedactionLogEntry().get(0);
assertThat(redactionLogEntry.getType()).isEqualTo("CBI_author");
assertThat(redactionLogEntry.getValue()).isEqualTo("Dr. Alan Miller");
assertThat(redactionLogEntry.isRedacted()).isEqualTo(true);
assertThat(redactionLogEntry.isRecommendation()).isEqualTo(false);
assertThat(redactionLogEntry.isFalsePositive()).isEqualTo(false);
assertThat(redactionLogEntry.isExcluded()).isEqualTo(false);
assertThat(redactionLogEntry.isDictionaryEntry()).isEqualTo(true);
assertThat(redactionLogEntry.getEngines().size()).isEqualTo(1);
assertThat(redactionLogEntry.getEngines().contains(Engine.DICTIONARY)).isEqualTo(true);
}
}

View File

@ -0,0 +1,341 @@
package drools
import com.iqser.red.service.redaction.v1.server.redaction.model.Section
global Section section
// --------------------------------------- AI rules -------------------------------------------------------------------
rule "0: Add CBI_author from ai"
when
Section(aiMatchesType("CBI_author"))
then
section.addAiEntities("CBI_author", "CBI_author");
end
rule "0: Combine address parts from ai to CBI_address (org is mandatory)"
when
Section(aiMatchesType("ORG"))
then
section.combineAiTypes("ORG", "STREET,POSTAL,COUNTRY,CARDINAL,CITY,STATE", 20, "CBI_address", 3, false);
end
rule "0: Combine address parts from ai to CBI_address (street is mandatory)"
when
Section(aiMatchesType("STREET"))
then
section.combineAiTypes("STREET", "ORG,POSTAL,COUNTRY,CARDINAL,CITY,STATE", 20, "CBI_address", 3, false);
end
rule "0: Combine address parts from ai to CBI_address (city is mandatory)"
when
Section(aiMatchesType("CITY"))
then
section.combineAiTypes("CITY", "ORG,STREET,POSTAL,COUNTRY,CARDINAL,STATE", 20, "CBI_address", 3, false);
end
/* Syngenta specific laboratory recommendation */
rule "0: Recommend CTL/BL laboratory that start with BL or CTL"
when
Section(searchText.contains("CT") || searchText.contains("BL"))
then
/* Regular expression: ((\b((([Cc]T(([1ILli\/])| L|~P))|(BL))[\. ]?([\dA-Ziltphz~\/.:!]| ?[\(',][Ppi](\(e)?|([\(-?']\/))+( ?[\(\/\dA-Znasieg]+)?)\b( ?\/? ?\d+)?)|(\bCT[L1i]\b)) */
section.addRecommendationByRegEx("((\\b((([Cc]T(([1ILli\\/])| L|~P))|(BL))[\\. ]?([\\dA-Ziltphz~\\/.:!]| ?[\\(',][Ppi](\\(e)?|([\\(-?']\\/))+( ?[\\(\\/\\dA-Znasieg]+)?)\\b( ?\\/? ?\\d+)?)|(\\bCT[L1i]\\b))", true, 0, "CBI_address");
end
// --------------------------------------- CBI rules -------------------------------------------------------------------
rule "1: Redact CBI Authors (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_author"))
then
section.redact("CBI_author", 1, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "2: Redact CBI Authors (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_author"))
then
section.redact("CBI_author", 2, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "3: Redact not CBI Address (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_address"))
then
section.redactNot("CBI_address", 3, "Address found for non vertebrate study");
section.ignoreRecommendations("CBI_address");
end
rule "4: Redact CBI Address (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_address"))
then
section.redact("CBI_address", 4, "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "5: Do not redact genitive CBI_author"
when
Section(matchesType("CBI_author"))
then
section.expandToFalsePositiveByRegEx("CBI_author", "[''ʼˈ´`ʻ']s", false, 0);
end
rule "6: Redact Author(s) cells in Tables with Author(s) header (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author(s)") && !hasTableHeader("Vertebrate study Y/N"))
then
section.redactCell("Author(s)", 6, "CBI_author", false, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "7: Redact Author(s) cells in Tables with Author(s) header (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author(s)") && !hasTableHeader("Vertebrate study Y/N"))
then
section.redactCell("Author(s)", 7, "CBI_author", false, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "8: Redact Author cells in Tables with Author header (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author") && !hasTableHeader("Vertebrate study Y/N"))
then
section.redactCell("Author", 8, "CBI_author", false, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "9: Redact Author cells in Tables with Author header (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author") && !hasTableHeader("Vertebrate study Y/N"))
then
section.redactCell("Author", 9, "CBI_author", false, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "10: Redact and recommand Authors in Tables with Vertebrate study Y/N header (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (rowEquals("Vertebrate study Y/N", "Y") || rowEquals("Vertebrate study Y/N", "Yes") || rowEquals("Vertebrate study Y/N", "N") || rowEquals("Vertebrate study Y/N", "No")))
then
section.redactCell("Author(s)", 10, "CBI_author", true, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "11: Redact and recommand Authors in Tables with Vertebrate study Y/N header (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (rowEquals("Vertebrate study Y/N", "Y") || rowEquals("Vertebrate study Y/N", "Yes") || rowEquals("Vertebrate study Y/N", "N") || rowEquals("Vertebrate study Y/N", "No")))
then
section.redactCell("Author(s)", 11, "CBI_author", true, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "14: Redact and add recommendation for et al. author (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("et al"))
then
section.redactAndRecommendByRegEx("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", false, 1, "CBI_author", 14, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "15: Redact and add recommendation for et al. author (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("et al"))
then
section.redactAndRecommendByRegEx("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", false, 1, "CBI_author", 15, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "16: Add recommendation for Addresses in Test Organism sections"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("Species:") && searchText.contains("Source:"))
then
section.recommendLineAfter("Source:", "CBI_address");
end
rule "17: Add recommendation for Addresses in Test Animals sections"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("Species") && searchText.contains("Source"))
then
section.recommendLineAfter("Source", "CBI_address");
end
rule "18: Do not redact Names and Addresses if Published Information found"
when
Section(matchesType("published_information"))
then
section.redactNotAndReference("CBI_author","published_information", 18, "Published Information found");
section.redactNotAndReference("CBI_address","published_information", 18, "Published Information found");
end
// --------------------------------------- PII rules -------------------------------------------------------------------
rule "19: Redacted PII Personal Identification Information (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("PII"))
then
section.redact("PII", 19, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "20: Redacted PII Personal Identification Information (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("PII"))
then
section.redact("PII", 20, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "21: Redact Emails by RegEx (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("@"))
then
section.redactByRegEx("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", true, 1, "PII", 21, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "22: Redact Emails by RegEx (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("@"))
then
section.redactByRegEx("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", true, 1, "PII", 22, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "25: Redact Phone and Fax by RegEx (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (
text.contains("Contact")
|| text.contains("Telephone")
|| text.contains("Phone")
|| text.contains("Fax")
|| text.contains("Tel")
|| text.contains("Ter")
|| text.contains("Mobile")
|| text.contains("Fel")
|| text.contains("Fer")
))
then
section.redactByRegEx("\\b(contact|telephone|phone|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", true, 2, "PII", 25, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "26: Redact Phone and Fax by RegEx (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (
text.contains("Contact")
|| text.contains("Telephone")
|| text.contains("Phone")
|| text.contains("Fax")
|| text.contains("Tel")
|| text.contains("Ter")
|| text.contains("Mobile")
|| text.contains("Fel")
|| text.contains("Fer")
))
then
section.redactByRegEx("\\b(contact|telephone|phone|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", true, 2, "PII", 26, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "27: Redact AUTHOR(S) (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes")
&& searchText.contains("AUTHOR(S):")
&& searchText.contains("COMPLETION DATE:")
&& !searchText.contains("STUDY COMPLETION DATE:")
)
then
section.redactLinesBetween("AUTHOR(S):", "COMPLETION DATE:", "PII", 27, true, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "28: Redact AUTHOR(S) (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes")
&& searchText.contains("AUTHOR(S):")
&& searchText.contains("COMPLETION DATE:")
&& !searchText.contains("STUDY COMPLETION DATE:")
)
then
section.redactLinesBetween("AUTHOR(S):", "COMPLETION DATE:", "PII", 28, true, "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "29: Redact AUTHOR(S) (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes")
&& searchText.contains("AUTHOR(S):")
&& searchText.contains("STUDY COMPLETION DATE:")
)
then
section.redactLinesBetween("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", 29, true, "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "30: Redact AUTHOR(S) (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes")
&& searchText.contains("AUTHOR(S):")
&& searchText.contains("STUDY COMPLETION DATE:")
)
then
section.redactLinesBetween("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", 30, true, "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "31: Redact PERFORMING LABORATORY (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes")
&& searchText.contains("PERFORMING LABORATORY:")
)
then
section.redactBetween("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", 31, true, "PERFORMING LABORATORY was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactNot("CBI_address", 31, "Performing laboratory found for non vertebrate study");
end
rule "32: Redact PERFORMING LABORATORY (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes")
&& searchText.contains("PERFORMING LABORATORY:"))
then
section.redactBetween("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", 32, true, "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
// --------------------------------------- other rules -------------------------------------------------------------------
rule "33: Purity Hint"
when
Section(searchText.toLowerCase().contains("purity"))
then
section.addHintAnnotationByRegEx("(purity ?( of|\\(.{1,20}\\))?( ?:)?) .{0,5}[\\d\\.]+( .{0,4}\\.)? ?%", true, 1, "hint_only");
end
rule "34: Ignore dossier_redaction entries if confidentiality is not 'confidential'"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Confidentiality","confidential") && matchesType("dossier_redaction"));
then
section.ignore("dossier_redaction");
end
rule "35: Redact signatures (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesImageType("signature"))
then
section.redactImage("signature", 35, "Signature found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "36: Redact signatures (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesImageType("signature"))
then
section.redactImage("signature", 36, "Signature found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "43: Redact Logos (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesImageType("logo"))
then
section.redactImage("logo", 43, "Logo found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end