RED-4510: Implemented Test class which generates and compares RedactionLogs

This commit is contained in:
Philipp Schramm 2022-07-21 12:38:07 +02:00
parent 32e73dc9d9
commit f48bdb6267
5 changed files with 1212 additions and 0 deletions

1
.gitignore vendored
View File

@ -26,3 +26,4 @@
**/.DS_Store
**/classpath-data.json
**/dependencies-and-licenses-overview.txt
/redaction-service-v1/redaction-service-server-v1/src/test/resources/RedactionLog/

View File

@ -38,6 +38,13 @@
<version>${jackson.version}</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.datatype</groupId>
<artifactId>jackson-datatype-jsr310</artifactId>
<version>${jackson.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.ahocorasick</groupId>
<artifactId>ahocorasick</artifactId>

View File

@ -0,0 +1,773 @@
package com.iqser.red.service.redaction.v1.server;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.Mockito.when;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.DirectoryStream;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.kie.api.KieServices;
import org.kie.api.builder.KieBuilder;
import org.kie.api.builder.KieFileSystem;
import org.kie.api.builder.KieModule;
import org.kie.api.runtime.KieContainer;
import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.mock.mockito.MockBean;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Import;
import org.springframework.context.annotation.Primary;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.junit4.SpringRunner;
import com.amazonaws.services.s3.AmazonS3;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
import com.iqser.red.service.persistence.service.v1.api.model.common.JSONPrimitive;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.configuration.Colors;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.DictionaryEntry;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.Type;
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
import com.iqser.red.service.redaction.v1.model.Change;
import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.model.ManualChange;
import com.iqser.red.service.redaction.v1.model.Rectangle;
import com.iqser.red.service.redaction.v1.model.RedactionLog;
import com.iqser.red.service.redaction.v1.model.RedactionLogComment;
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.model.RedactionLogLegalBasis;
import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotationService;
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.iqser.red.storage.commons.service.StorageService;
import lombok.SneakyThrows;
@RunWith(SpringRunner.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
@Import(RulesTest.RedactionIntegrationTestConfiguration.class)
public class RulesTest {
private static final String RULES_PATH = "drools/testRules.drl";
private static final String RULES = loadFromClassPath(RULES_PATH);
private static final String VERTEBRATE = "vertebrate";
private static final String ADDRESS = "CBI_address";
private static final String AUTHOR = "CBI_author";
private static final String SPONSOR = "CBI_sponsor";
private static final String NO_REDACTION_INDICATOR = "no_redaction_indicator";
private static final String REDACTION_INDICATOR = "redaction_indicator";
private static final String HINT_ONLY = "hint_only";
private static final String MUST_REDACT = "must_redact";
private static final String PUBLISHED_INFORMATION = "published_information";
private static final String TEST_METHOD = "test_method";
private static final String PURITY = "purity";
private static final String IMAGE = "image";
private static final String LOGO = "logo";
private static final String SIGNATURE = "signature";
private static final String FORMULA = "formula";
private static final String OCR = "ocr";
private static final String DOSSIER_REDACTIONS = "dossier_redactions";
private static final String IMPORTED_REDACTION = "imported_redaction";
private static final String PII = "PII";
private static final String RESOURCES_PATH = "src/test/resources/";
private static final String REDACTION_LOG_PATH = RESOURCES_PATH + "RedactionLog/";
private final static String TEST_DOSSIER_TEMPLATE_ID = "123";
private final static String TEST_DOSSIER_ID = "123";
private final Map<String, List<String>> dictionary = new HashMap<>();
private final Map<String, List<String>> dossierDictionary = new HashMap<>();
private final Map<String, List<String>> falsePositive = new HashMap<>();
private final Map<String, List<String>> falseRecommendation = new HashMap<>();
private final Map<String, String> typeColorMap = new HashMap<>();
private final Map<String, Boolean> hintTypeMap = new HashMap<>();
private final Map<String, Boolean> caseInSensitiveMap = new HashMap<>();
private final Map<String, Boolean> recommendationTypeMap = new HashMap<>();
private final Map<String, Integer> rankTypeMap = new HashMap<>();
private final Colors colors = new Colors();
private final Map<String, Long> reanalysisVersions = new HashMap<>();
private final Set<String> deleted = new HashSet<>();
@Autowired
private RedactionController redactionController;
@Autowired
private AnnotationService annotationService;
@Autowired
private AnalyzeService analyzeService;
@Autowired
private ObjectMapper objectMapper;
@MockBean
private RulesClient rulesClient;
@MockBean
private DictionaryClient dictionaryClient;
@Autowired
private RedactionStorageService redactionStorageService;
@Autowired
private StorageService storageService;
@Autowired
private ManualRedactionSurroundingTextService manualRedactionSurroundingTextService;
@MockBean
private AmazonS3 amazonS3;
@MockBean
private RabbitTemplate rabbitTemplate;
@MockBean
private LegalBasisClient legalBasisClient;
private String TEST_FILE_ID = "123";
private static String loadFromClassPath(String path) {
URL resource = ResourceLoader.class.getClassLoader().getResource(path);
if (resource == null) {
throw new IllegalArgumentException("could not load classpath resource: drools/rules.drl");
}
try (BufferedReader br = new BufferedReader(new InputStreamReader(resource.openStream(), StandardCharsets.UTF_8))) {
StringBuilder sb = new StringBuilder();
String str;
while ((str = br.readLine()) != null) {
sb.append(str).append("\n");
}
return sb.toString();
} catch (IOException e) {
throw new IllegalArgumentException("could not load classpath resource: " + path, e);
}
}
@After
public void cleanupStorage() {
if (this.storageService instanceof FileSystemBackedStorageService) {
((FileSystemBackedStorageService) this.storageService).clearStorage();
}
}
@Before
public void stubClients() {
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(JSONPrimitive.of(RULES));
loadDictionaryForTest();
loadTypeForTest();
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypesForDossierTemplate(TEST_DOSSIER_TEMPLATE_ID, false)).thenReturn(getTypeResponse());
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder()
.id(DOSSIER_REDACTIONS + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(DOSSIER_REDACTIONS)
.dossierTemplateId(TEST_DOSSIER_ID)
.hexColor("#ffe187")
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS))
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS))
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS))
.rank(rankTypeMap.get(DOSSIER_REDACTIONS))
.build()));
mockDictionaryCalls(null);
mockDictionaryCalls(0L);
when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors);
}
private void loadDictionaryForTest() {
dictionary.computeIfAbsent(AUTHOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_author.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(SPONSOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(VERTEBRATE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/vertebrate.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(ADDRESS, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_address.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(REDACTION_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(HINT_ONLY, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/hint_only.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(MUST_REDACT, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/must_redact.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(PUBLISHED_INFORMATION, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/published_information.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(TEST_METHOD, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/test_method.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(PII, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/PII.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(PURITY, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/purity.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(IMAGE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(OCR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(LOGO, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(SIGNATURE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(FORMULA, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dossierDictionary.put(IMPORTED_REDACTION, new ArrayList<>());
falsePositive.computeIfAbsent(PII, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
}
private void loadTypeForTest() {
typeColorMap.put(VERTEBRATE, "#ff85f7");
typeColorMap.put(ADDRESS, "#ffe187");
typeColorMap.put(AUTHOR, "#ffe187");
typeColorMap.put(SPONSOR, "#85ebff");
typeColorMap.put(NO_REDACTION_INDICATOR, "#be85ff");
typeColorMap.put(REDACTION_INDICATOR, "#caff85");
typeColorMap.put(HINT_ONLY, "#abc0c4");
typeColorMap.put(MUST_REDACT, "#fab4c0");
typeColorMap.put(PUBLISHED_INFORMATION, "#85ebff");
typeColorMap.put(TEST_METHOD, "#91fae8");
typeColorMap.put(PII, "#66ccff");
typeColorMap.put(PURITY, "#ffe187");
typeColorMap.put(IMAGE, "#fcc5fb");
typeColorMap.put(OCR, "#fcc5fb");
typeColorMap.put(LOGO, "#ffe187");
typeColorMap.put(FORMULA, "#ffe187");
typeColorMap.put(SIGNATURE, "#ffe187");
typeColorMap.put(IMPORTED_REDACTION, "#fcfbe6");
hintTypeMap.put(VERTEBRATE, true);
hintTypeMap.put(ADDRESS, false);
hintTypeMap.put(AUTHOR, false);
hintTypeMap.put(SPONSOR, false);
hintTypeMap.put(NO_REDACTION_INDICATOR, true);
hintTypeMap.put(REDACTION_INDICATOR, true);
hintTypeMap.put(HINT_ONLY, true);
hintTypeMap.put(MUST_REDACT, true);
hintTypeMap.put(PUBLISHED_INFORMATION, true);
hintTypeMap.put(TEST_METHOD, true);
hintTypeMap.put(PII, false);
hintTypeMap.put(PURITY, false);
hintTypeMap.put(IMAGE, true);
hintTypeMap.put(OCR, true);
hintTypeMap.put(FORMULA, false);
hintTypeMap.put(LOGO, false);
hintTypeMap.put(SIGNATURE, false);
hintTypeMap.put(DOSSIER_REDACTIONS, false);
hintTypeMap.put(IMPORTED_REDACTION, false);
caseInSensitiveMap.put(VERTEBRATE, true);
caseInSensitiveMap.put(ADDRESS, false);
caseInSensitiveMap.put(AUTHOR, false);
caseInSensitiveMap.put(SPONSOR, false);
caseInSensitiveMap.put(NO_REDACTION_INDICATOR, true);
caseInSensitiveMap.put(REDACTION_INDICATOR, true);
caseInSensitiveMap.put(HINT_ONLY, true);
caseInSensitiveMap.put(MUST_REDACT, true);
caseInSensitiveMap.put(PUBLISHED_INFORMATION, true);
caseInSensitiveMap.put(TEST_METHOD, false);
caseInSensitiveMap.put(PII, false);
caseInSensitiveMap.put(PURITY, false);
caseInSensitiveMap.put(IMAGE, true);
caseInSensitiveMap.put(OCR, true);
caseInSensitiveMap.put(SIGNATURE, true);
caseInSensitiveMap.put(LOGO, true);
caseInSensitiveMap.put(FORMULA, true);
caseInSensitiveMap.put(DOSSIER_REDACTIONS, false);
caseInSensitiveMap.put(IMPORTED_REDACTION, false);
recommendationTypeMap.put(VERTEBRATE, false);
recommendationTypeMap.put(ADDRESS, false);
recommendationTypeMap.put(AUTHOR, false);
recommendationTypeMap.put(SPONSOR, false);
recommendationTypeMap.put(NO_REDACTION_INDICATOR, false);
recommendationTypeMap.put(REDACTION_INDICATOR, false);
recommendationTypeMap.put(HINT_ONLY, false);
recommendationTypeMap.put(MUST_REDACT, false);
recommendationTypeMap.put(PUBLISHED_INFORMATION, false);
recommendationTypeMap.put(TEST_METHOD, false);
recommendationTypeMap.put(PII, false);
recommendationTypeMap.put(PURITY, false);
recommendationTypeMap.put(IMAGE, false);
recommendationTypeMap.put(OCR, false);
recommendationTypeMap.put(FORMULA, false);
recommendationTypeMap.put(SIGNATURE, false);
recommendationTypeMap.put(LOGO, false);
recommendationTypeMap.put(DOSSIER_REDACTIONS, false);
recommendationTypeMap.put(IMPORTED_REDACTION, false);
rankTypeMap.put(PURITY, 155);
rankTypeMap.put(PII, 150);
rankTypeMap.put(ADDRESS, 140);
rankTypeMap.put(AUTHOR, 130);
rankTypeMap.put(SPONSOR, 120);
rankTypeMap.put(VERTEBRATE, 110);
rankTypeMap.put(MUST_REDACT, 100);
rankTypeMap.put(REDACTION_INDICATOR, 90);
rankTypeMap.put(NO_REDACTION_INDICATOR, 80);
rankTypeMap.put(PUBLISHED_INFORMATION, 70);
rankTypeMap.put(TEST_METHOD, 60);
rankTypeMap.put(HINT_ONLY, 50);
rankTypeMap.put(IMAGE, 30);
rankTypeMap.put(OCR, 29);
rankTypeMap.put(LOGO, 28);
rankTypeMap.put(SIGNATURE, 27);
rankTypeMap.put(FORMULA, 26);
rankTypeMap.put(DOSSIER_REDACTIONS, 200);
rankTypeMap.put(IMPORTED_REDACTION, 200);
colors.setSkippedColor("#cccccc");
colors.setRequestAddColor("#04b093");
colors.setRequestRemoveColor("#04b093");
}
private List<Type> getTypeResponse() {
return typeColorMap.entrySet()
.stream()
.map(typeColor -> Type.builder()
.id(typeColor.getKey() + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(typeColor.getKey())
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
.hexColor(typeColor.getValue())
.isHint(hintTypeMap.get(typeColor.getKey()))
.isCaseInsensitive(caseInSensitiveMap.get(typeColor.getKey()))
.isRecommendation(recommendationTypeMap.get(typeColor.getKey()))
.rank(rankTypeMap.get(typeColor.getKey()))
.build())
.collect(Collectors.toList());
}
private void mockDictionaryCalls(Long version) {
when(dictionaryClient.getDictionaryForType(VERTEBRATE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
when(dictionaryClient.getDictionaryForType(ADDRESS + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(ADDRESS, false));
when(dictionaryClient.getDictionaryForType(AUTHOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(AUTHOR, false));
when(dictionaryClient.getDictionaryForType(SPONSOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(SPONSOR, false));
when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(NO_REDACTION_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(REDACTION_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(HINT_ONLY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(HINT_ONLY, false));
when(dictionaryClient.getDictionaryForType(MUST_REDACT + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(MUST_REDACT, false));
when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PUBLISHED_INFORMATION, false));
when(dictionaryClient.getDictionaryForType(TEST_METHOD + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(TEST_METHOD, false));
when(dictionaryClient.getDictionaryForType(PII + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PII, false));
when(dictionaryClient.getDictionaryForType(PURITY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PURITY, false));
when(dictionaryClient.getDictionaryForType(IMAGE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(IMAGE, false));
when(dictionaryClient.getDictionaryForType(OCR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(OCR, false));
when(dictionaryClient.getDictionaryForType(LOGO + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(LOGO, false));
when(dictionaryClient.getDictionaryForType(SIGNATURE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(SIGNATURE, false));
when(dictionaryClient.getDictionaryForType(FORMULA + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(FORMULA, false));
when(dictionaryClient.getDictionaryForType(DOSSIER_REDACTIONS + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(DOSSIER_REDACTIONS, true));
when(dictionaryClient.getDictionaryForType(IMPORTED_REDACTION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(IMPORTED_REDACTION, true));
}
private String cleanDictionaryEntry(String entry) {
return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " ");
}
private Type getDictionaryResponse(String type, boolean isDossierDictionary) {
return Type.builder()
.id(type + ":" + TEST_DOSSIER_TEMPLATE_ID)
.hexColor(typeColorMap.get(type))
.entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type)))
.falsePositiveEntries(falsePositive.containsKey(type) ? toDictionaryEntry(falsePositive.get(type)) : new ArrayList<>())
.falseRecommendationEntries(falseRecommendation.containsKey(type) ? toDictionaryEntry(falseRecommendation.get(type)) : new ArrayList<>())
.isHint(hintTypeMap.get(type))
.isCaseInsensitive(caseInSensitiveMap.get(type))
.isRecommendation(recommendationTypeMap.get(type))
.rank(rankTypeMap.get(type))
.build();
}
private List<DictionaryEntry> toDictionaryEntry(List<String> entries) {
List<DictionaryEntry> dictionaryEntries = new ArrayList<>();
entries.forEach(entry -> dictionaryEntries.add(DictionaryEntry.builder()
.value(entry)
.version(reanalysisVersions.getOrDefault(entry, 0L))
.deleted(deleted.contains(entry))
.build()));
return dictionaryEntries;
}
/**
* Generates RedactionLog for given file and saves it here: REDACTION_LOG_PATH
*/
@Ignore
@Test
@SneakyThrows
public void generateRedactionLogForOneFile() {
String fileName = "files/Compounds/31 A14111B - EU AIR3 - MCP Section 1 - Identity of the plant protection product.pdf";
generateRedactionLog(fileName);
}
@SneakyThrows
public void generateRedactionLog(String fileName) {
increaseTestFileId();
System.out.println("Generate RedactionLog as Json for " + fileName + " with fileId " + TEST_FILE_ID);
loadNerForTest();
AnalyzeRequest request = prepareStorage(fileName);
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
analyzeService.analyze(request);
RedactionLog redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
saveRedactionLogAsJson(redactionLog, fileName);
}
private void increaseTestFileId() {
TEST_FILE_ID = Integer.toString(Integer.parseInt(TEST_FILE_ID) + 1);
}
@SneakyThrows
private void loadNerForTest() {
ClassPathResource responseJson = new ClassPathResource("files/ner_response.json");
var bytes = IOUtils.toByteArray(responseJson.getInputStream());
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), bytes);
}
@SneakyThrows
private AnalyzeRequest prepareStorage(String file) {
ClassPathResource pdfFileResource = new ClassPathResource(file);
return prepareStorage(pdfFileResource.getInputStream());
}
@SneakyThrows
private void saveRedactionLogAsJson(RedactionLog redactionLog, String pdfFileName) {
File pdfFile = new File(pdfFileName);
String directory = REDACTION_LOG_PATH + pdfFile.getParentFile().getPath();
File dr = new File(directory);
boolean created = dr.mkdirs();
if (created) {
System.out.println("Directory was created");
}
String fileName = StringUtils.replace(pdfFile.getName(), ".pdf", ".json");
File file = new File(directory, fileName);
ObjectMapper mapper = new ObjectMapper();
mapper.registerModule(new JavaTimeModule());
mapper.writeValue(file, redactionLog);
System.out.println("Saved RedactionLog for " + fileName + " here " + directory);
}
@SneakyThrows
private AnalyzeRequest prepareStorage(InputStream stream) {
AnalyzeRequest request = AnalyzeRequest.builder()
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.lastProcessed(OffsetDateTime.now())
.build();
var bytes = IOUtils.toByteArray(stream);
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), bytes);
return request;
}
/**
* Generates RedactionLog for all files and saves it here: REDACTION_LOG_PATH
*/
@Ignore
@Test
@SneakyThrows
public void generateRedactionLogForAllFiles() {
Set<String> files = getFileNames(new HashSet<>(), FileSystems.getDefault().getPath(RESOURCES_PATH));
System.out.println("Will generate RedactionLog for " + files.size() + " files.");
TEST_FILE_ID = "1000";
files.forEach(this::generateRedactionLog);
}
/**
* Analyses file and compares its RedactionLog with saved one from here: REDACTION_LOG_PATH.
* If RedactionLog Json does not exist, test will fail.
*/
@Ignore
@Test
@SneakyThrows
public void analyseFileAndCompareRedactionLog() {
String fileName = "files/Compounds/28 A8637C - EU AIR3 - MCP Section 10 - Ecotoxicological studies on the plant protection product.pdf";
analyseFileAndCompareRedactionLog(fileName);
}
@SneakyThrows
public void analyseFileAndCompareRedactionLog(String fileName) {
increaseTestFileId();
System.out.println("Analyse " + fileName + " with fileId " + TEST_FILE_ID + " and compare it with its saved RedactionLog");
RedactionLog savedRedactionLog = loadSavedRedactionLog(fileName);
loadNerForTest();
AnalyzeRequest request = prepareStorage(fileName);
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
analyzeService.analyze(request);
RedactionLog redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
assertThat(redactionLog.getAnalysisVersion()).isEqualTo(savedRedactionLog.getAnalysisVersion());
assertThat(redactionLog.getAnalysisNumber()).isEqualTo(savedRedactionLog.getAnalysisNumber());
assertThat(redactionLog.getDictionaryVersion()).isEqualTo(savedRedactionLog.getDictionaryVersion());
assertThat(redactionLog.getDossierDictionaryVersion()).isEqualTo(savedRedactionLog.getDossierDictionaryVersion());
assertThat(redactionLog.getRulesVersion()).isEqualTo(savedRedactionLog.getRulesVersion());
assertThat(redactionLog.getLegalBasisVersion()).isEqualTo(savedRedactionLog.getLegalBasisVersion());
assertThat(redactionLog.getRedactionLogEntry().size()).isEqualTo(savedRedactionLog.getRedactionLogEntry().size());
assertThat(redactionLog.getLegalBasis().size()).isEqualTo(savedRedactionLog.getLegalBasis().size());
for (RedactionLogLegalBasis redactionLegalBasis : redactionLog.getLegalBasis()) {
var savedRedactionLegalBasis = savedRedactionLog.getLegalBasis()
.stream()
.filter(lb -> lb.getName().equalsIgnoreCase(redactionLegalBasis.getName()))
.filter(lb -> lb.getDescription().equalsIgnoreCase(redactionLegalBasis.getDescription()))
.filter(lb -> lb.getReason().equalsIgnoreCase(redactionLegalBasis.getReason()))
.findFirst();
assertThat(savedRedactionLegalBasis).isPresent();
}
for (RedactionLogEntry redactionLogEntry : redactionLog.getRedactionLogEntry()) {
var savedRedactionLogEntry = savedRedactionLog.getRedactionLogEntry().stream().filter(r -> r.getId().equalsIgnoreCase(redactionLogEntry.getId())).findFirst();
assertThat(savedRedactionLogEntry).isPresent();
assertThat(savedRedactionLogEntry.get().getId()).isEqualTo(redactionLogEntry.getId());
assertThat(savedRedactionLogEntry.get().getType()).isEqualTo(redactionLogEntry.getType());
assertThat(savedRedactionLogEntry.get().getValue()).isEqualTo(redactionLogEntry.getValue());
assertThat(savedRedactionLogEntry.get().getReason()).isEqualTo(redactionLogEntry.getReason());
assertThat(savedRedactionLogEntry.get().getMatchedRule()).isEqualTo(redactionLogEntry.getMatchedRule());
assertThat(savedRedactionLogEntry.get().isRectangle()).isEqualTo(redactionLogEntry.isRectangle());
assertThat(savedRedactionLogEntry.get().getLegalBasis()).isEqualTo(redactionLogEntry.getLegalBasis());
assertThat(savedRedactionLogEntry.get().isImported()).isEqualTo(redactionLogEntry.isImported());
assertThat(savedRedactionLogEntry.get().isRedacted()).isEqualTo(redactionLogEntry.isRedacted());
assertThat(savedRedactionLogEntry.get().isHint()).isEqualTo(redactionLogEntry.isHint());
assertThat(savedRedactionLogEntry.get().isRecommendation()).isEqualTo(redactionLogEntry.isRecommendation());
assertThat(savedRedactionLogEntry.get().isFalsePositive()).isEqualTo(redactionLogEntry.isFalsePositive());
assertThat(savedRedactionLogEntry.get().getSection()).isEqualTo(redactionLogEntry.getSection());
assertThat(savedRedactionLogEntry.get().getColor()).isEqualTo(redactionLogEntry.getColor());
assertThat(savedRedactionLogEntry.get().getSectionNumber()).isEqualTo(redactionLogEntry.getSectionNumber());
assertThat(savedRedactionLogEntry.get().getTextBefore()).isEqualTo(redactionLogEntry.getTextBefore());
assertThat(savedRedactionLogEntry.get().getTextAfter()).isEqualTo(redactionLogEntry.getTextAfter());
assertThat(savedRedactionLogEntry.get().getStartOffset()).isEqualTo(redactionLogEntry.getStartOffset());
assertThat(savedRedactionLogEntry.get().getEndOffset()).isEqualTo(redactionLogEntry.getEndOffset());
assertThat(savedRedactionLogEntry.get().isImage()).isEqualTo(redactionLogEntry.isImage());
assertThat(savedRedactionLogEntry.get().isImageHasTransparency()).isEqualTo(redactionLogEntry.isImageHasTransparency());
assertThat(savedRedactionLogEntry.get().isDictionaryEntry()).isEqualTo(redactionLogEntry.isDictionaryEntry());
assertThat(savedRedactionLogEntry.get().isDossierDictionaryEntry()).isEqualTo(redactionLogEntry.isDossierDictionaryEntry());
assertThat(savedRedactionLogEntry.get().isExcluded()).isEqualTo(redactionLogEntry.isExcluded());
assertThat(savedRedactionLogEntry.get().getSourceId()).isEqualTo(redactionLogEntry.getSourceId());
for (Rectangle rectangle : redactionLogEntry.getPositions()) {
var savedRectangle = savedRedactionLogEntry.get()
.getPositions()
.stream()
.filter(r -> r.getPage() == rectangle.getPage())
.filter(r -> r.getTopLeft().getX() == rectangle.getTopLeft().getX())
.filter(r -> r.getTopLeft().getY() == rectangle.getTopLeft().getY())
.filter(r -> r.getHeight() == rectangle.getHeight())
.filter(r -> r.getWidth() == rectangle.getWidth())
.findFirst();
assertThat(savedRectangle).isPresent();
}
for (RedactionLogComment comment : redactionLogEntry.getComments()) {
var savedComment = savedRedactionLogEntry.get().getComments().stream().filter(c -> c.getId() == comment.getId()).findFirst();
assertThat(savedComment).isPresent();
assertThat(savedComment.get().getId()).isEqualTo(comment.getId());
assertThat(savedComment.get().getUser()).isEqualTo(comment.getUser());
assertThat(savedComment.get().getText()).isEqualTo(comment.getText());
assertThat(savedComment.get().getAnnotationId()).isEqualTo(comment.getAnnotationId());
assertThat(savedComment.get().getFileId()).isEqualTo(comment.getFileId());
}
for (Change change : redactionLogEntry.getChanges()) {
var savedChange = savedRedactionLogEntry.get()
.getChanges()
.stream()
.filter(c -> c.getAnalysisNumber() == change.getAnalysisNumber())
.filter(c -> c.getType() == change.getType())
.findFirst();
assertThat(savedChange).isPresent();
}
for (ManualChange manualChange : redactionLogEntry.getManualChanges()) {
var savedManualChange = savedRedactionLogEntry.get()
.getManualChanges()
.stream()
.filter(m -> m.getAnnotationStatus() == manualChange.getAnnotationStatus())
.filter(m -> m.getManualRedactionType() == manualChange.getManualRedactionType())
.filter(m -> m.getUserId().equalsIgnoreCase(manualChange.getUserId()))
.filter(m -> m.getPropertyChanges() == manualChange.getPropertyChanges())
.findFirst();
assertThat(savedManualChange).isPresent();
}
assertThat(savedRedactionLogEntry.get().getEngines()).containsExactly(redactionLogEntry.getEngines().toArray(new Engine[0]));
assertThat(savedRedactionLogEntry.get().getReference()).containsAll(redactionLogEntry.getReference());
assertThat(savedRedactionLogEntry.get().getImportedRedactionIntersections()).containsAll(redactionLogEntry.getImportedRedactionIntersections());
}
}
@SneakyThrows
private RedactionLog loadSavedRedactionLog(String pdfFileName) {
File pdfFile = new File(pdfFileName);
String directory = REDACTION_LOG_PATH + pdfFile.getParentFile().getPath();
String fileName = StringUtils.replace(pdfFile.getName(), ".pdf", ".json");
File file = new File(directory, fileName);
ObjectMapper om = new ObjectMapper();
om.registerModule(new JavaTimeModule());
om.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
assertThat(file).exists();
return om.readValue(file, RedactionLog.class);
}
/**
* Analyses all files and compares its RedactionLog with saved one from here: REDACTION_LOG_PATH
* If RedactionLogs Json does not exist, test will fail.
*/
@Ignore
@Test
@SneakyThrows
public void analyseAllFilesAndCompareRedactionLogs() {
Set<String> files = getFileNames(new HashSet<>(), FileSystems.getDefault().getPath(RESOURCES_PATH));
System.out.println("Will analyse " + files.size() + " files and compare its RedactionLogs.");
TEST_FILE_ID = "5000";
files.forEach(this::analyseFileAndCompareRedactionLog);
}
@SneakyThrows
private Set<String> getFileNames(Set<String> fileNames, Path dir) {
try (DirectoryStream<Path> stream = Files.newDirectoryStream(dir)) {
for (Path path : stream) {
if (path.toFile().isDirectory()) {
getFileNames(fileNames, path);
} else if (StringUtils.endsWith(path.toAbsolutePath().toString(), ".pdf")) {
String absolutePath = path.toAbsolutePath().toString();
int pos = StringUtils.indexOf(absolutePath, StringUtils.replace(RESOURCES_PATH, "/", "\\")) + 18;
fileNames.add(StringUtils.substring(absolutePath, pos));
}
}
}
return fileNames;
}
@Configuration
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class, StorageAutoConfiguration.class})
public static class RedactionIntegrationTestConfiguration {
@Bean
public KieContainer kieContainer() {
KieServices kieServices = KieServices.Factory.get();
KieFileSystem kieFileSystem = kieServices.newKieFileSystem();
InputStream input = new ByteArrayInputStream(RULES.getBytes(StandardCharsets.UTF_8));
kieFileSystem.write(RESOURCES_PATH + RULES_PATH, kieServices.getResources().newInputStreamResource(input));
KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem);
kieBuilder.buildAll();
KieModule kieModule = kieBuilder.getKieModule();
return kieServices.newKieContainer(kieModule.getReleaseId());
}
@Bean
@Primary
public StorageService inmemoryStorage() {
return new FileSystemBackedStorageService();
}
}
}

View File

@ -0,0 +1,431 @@
package drools
import com.iqser.red.service.redaction.v1.server.redaction.model.Section
global Section section
// --------------------------------------- AI rules -------------------------------------------------------------------
rule "0: Add CBI_author from ai"
when
Section(aiMatchesType("CBI_author"))
then
section.addAiEntities("CBI_author", "CBI_author");
end
rule "0: Combine address parts from ai to CBI_address (org is mandatory)"
when
Section(aiMatchesType("ORG"))
then
section.combineAiTypes("ORG", "STREET,POSTAL,COUNTRY,CARDINAL,CITY,STATE", 20, "CBI_address", 3, false);
end
rule "0: Combine address parts from ai to CBI_address (street is mandatory)"
when
Section(aiMatchesType("STREET"))
then
section.combineAiTypes("STREET", "ORG,POSTAL,COUNTRY,CARDINAL,CITY,STATE", 20, "CBI_address", 3, false);
end
rule "0: Combine address parts from ai to CBI_address (city is mandatory)"
when
Section(aiMatchesType("CITY"))
then
section.combineAiTypes("CITY", "ORG,STREET,POSTAL,COUNTRY,CARDINAL,STATE", 20, "CBI_address", 3, false);
end
// --------------------------------------- CBI rules -------------------------------------------------------------------
rule "1: Redact CBI Authors (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_author"))
then
section.redact("CBI_author", 1, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "2: Redact CBI Authors (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_author"))
then
section.redact("CBI_author", 2, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "3: Redact not CBI Address (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_address"))
then
section.redactNot("CBI_address", 3, "Address found for non vertebrate study");
section.ignoreRecommendations("CBI_address");
end
rule "4: Redact CBI Address (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_address"))
then
section.redact("CBI_address", 4, "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "5: Do not redact genitive CBI_author"
when
Section(matchesType("CBI_author"))
then
section.expandToFalsePositiveByRegEx("CBI_author", "[''ʼˈ´`ʻ']s", false, 0);
end
rule "6: Redact Author(s) cells in Tables with Author(s) header (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author(s)") && !hasTableHeader("Vertebrate study Y/N"))
then
section.redactCell("Author(s)", 6, "CBI_author", false, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "7: Redact Author(s) cells in Tables with Author(s) header (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author(s)") && !hasTableHeader("Vertebrate study Y/N"))
then
section.redactCell("Author(s)", 7, "CBI_author", false, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "8: Redact Author cells in Tables with Author header (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author") && !hasTableHeader("Vertebrate study Y/N"))
then
section.redactCell("Author", 8, "CBI_author", false, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "9: Redact Author cells in Tables with Author header (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author") && !hasTableHeader("Vertebrate study Y/N"))
then
section.redactCell("Author", 9, "CBI_author", false, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "10: Redact and recommand Authors in Tables with Vertebrate study Y/N header (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (rowEquals("Vertebrate study Y/N", "Y") || rowEquals("Vertebrate study Y/N", "Yes") || rowEquals("Vertebrate study Y/N", "N") || rowEquals("Vertebrate study Y/N", "No")))
then
section.redactCell("Author(s)", 10, "CBI_author", true, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "11: Redact and recommand Authors in Tables with Vertebrate study Y/N header (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (rowEquals("Vertebrate study Y/N", "Y") || rowEquals("Vertebrate study Y/N", "Yes") || rowEquals("Vertebrate study Y/N", "N") || rowEquals("Vertebrate study Y/N", "No")))
then
section.redactCell("Author(s)", 11, "CBI_author", true, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
/* Syngenta specific laboratory rule */
rule "12: Recommend CTL/BL laboratory that start with BL or CTL"
when
Section(searchText.contains("CT") || searchText.contains("BL"))
then
section.addRecommendationByRegEx("((\\b((([Cc]T(([1ILli\\/])| L|~P))|(BL))[\\. ]?([\\dA-Ziltphz~\\/.:!]| ?[\\(',][Ppi](\\(e)?|([\\(-?']\\/))+( ?[\\(\\/\\dA-Znasieg]+)?)\\b( ?\\/? ?\\d+)?)|(\\bCT[L1i]\\b))", true, 0, "CBI_address");
end
rule "14: Redact and add recommendation for et al. author (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("et al"))
then
section.redactAndRecommendByRegEx("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", false, 1, "CBI_author", 14, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "15: Redact and add recommendation for et al. author (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("et al"))
then
section.redactAndRecommendByRegEx("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", false, 1, "CBI_author", 15, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "16: Add recommendation for Addresses in Test Organism sections"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("Species:") && searchText.contains("Source:"))
then
section.recommendLineAfter("Source:", "CBI_address");
end
rule "17: Add recommendation for Addresses in Test Animals sections"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("Species") && searchText.contains("Source"))
then
section.recommendLineAfter("Source", "CBI_address");
end
rule "18: Do not redact Names and Addresses if Published Information found"
when
Section(matchesType("published_information"))
then
section.redactNotAndReference("CBI_author","published_information", 18, "Published Information found");
section.redactNotAndReference("CBI_address","published_information", 18, "Published Information found");
end
// --------------------------------------- PII rules -------------------------------------------------------------------
rule "19: Redacted PII Personal Identification Information (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("PII"))
then
section.redact("PII", 19, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "20: Redacted PII Personal Identification Information (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("PII"))
then
section.redact("PII", 20, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "21: Redact Emails by RegEx (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("@"))
then
section.redactByRegEx("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", true, 1, "PII", 21, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "22: Redact Emails by RegEx (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("@"))
then
section.redactByRegEx("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", true, 1, "PII", 22, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "23: Redact contact information (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (text.contains("Contact point:")
|| text.contains("Phone:")
|| text.contains("Fax:")
|| text.contains("Tel.:")
|| text.contains("Tel:")
|| text.contains("E-mail:")
|| text.contains("Email:")
|| text.contains("e-mail:")
|| text.contains("E-mail address:")
|| text.contains("Contact:")
|| text.contains("Alternative contact:")
|| text.contains("Telephone number:")
|| text.contains("Telephone No:")
|| text.contains("Fax number:")
|| text.contains("Telephone:")
|| text.contains("Phone No.")
|| (text.contains("No:") && text.contains("Fax"))
|| (text.contains("Contact:") && text.contains("Tel.:"))
|| text.contains("European contact:")
))
then
section.redactLineAfter("Contact point:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Phone:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Fax:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Tel.:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Tel:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("E-mail:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Email:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("e-mail:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("E-mail address:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Contact:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Alternative contact:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Telephone number:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Telephone No:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Fax number:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Telephone:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Phone No.", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactBetween("No:", "Fax", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactBetween("Contact:", "Tel.:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("European contact:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "24: Redact contact information (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (text.contains("Contact point:")
|| text.contains("Phone:")
|| text.contains("Fax:")
|| text.contains("Tel.:")
|| text.contains("Tel:")
|| text.contains("E-mail:")
|| text.contains("Email:")
|| text.contains("e-mail:")
|| text.contains("E-mail address:")
|| text.contains("Contact:")
|| text.contains("Alternative contact:")
|| text.contains("Telephone number:")
|| text.contains("Telephone No:")
|| text.contains("Fax number:")
|| text.contains("Telephone:")
|| text.contains("Phone No.")
|| (text.contains("No:") && text.contains("Fax"))
|| (text.contains("Contact:") && text.contains("Tel.:"))
|| text.contains("European contact:")
))
then
section.redactLineAfter("Contact point:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Phone:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Fax:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Tel.:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Tel:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("E-mail:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Email:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("e-mail:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("E-mail address:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Contact:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Alternative contact:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Telephone number:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Telephone No:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Fax number:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Telephone:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Phone No.", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactBetween("No:", "Fax", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactBetween("Contact:", "Tel.:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("European contact:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "25: Redact Phone and Fax by RegEx (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (
text.contains("Telephone")
|| text.contains("Phone")
|| text.contains("Ph.")
|| text.contains("Fax")
|| text.contains("Tel")
|| text.contains("Ter")
|| text.contains("Cell")
|| text.contains("Mobile")
|| text.contains("Fel")
|| text.contains("Fer")
))
then
section.redactByRegEx("\\b(telephone|phone|fax|tel|ter|cell|mobile|fel|fer)[:.\\s]{0,3}((\\(?\\+?[0-9])(\\(?[0-9\\/.\\-\\s]+\\)?)*([0-9]+\\)?))\\b", true, 2, "PII", 25, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "26: Redact Phone and Fax by RegEx (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (
text.contains("Telephone")
|| text.contains("Phone")
|| text.contains("Ph.")
|| text.contains("Fax")
|| text.contains("Tel")
|| text.contains("Ter")
|| text.contains("Cell")
|| text.contains("Mobile")
|| text.contains("Fel")
|| text.contains("Fer")
))
then
section.redactByRegEx("\\b(telephone|phone|fax|tel|ter|cell|mobile|fel|fer)[:.\\s]{0,3}((\\(?\\+?[0-9])(\\(?[0-9\\/.\\-\\s]+\\)?)*([0-9]+\\)?))\\b", true, 2, "PII", 26, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "27: Redact AUTHOR(S) (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes")
&& searchText.contains("AUTHOR(S):")
&& searchText.contains("COMPLETION DATE:")
&& !searchText.contains("STUDY COMPLETION DATE:")
)
then
section.redactLinesBetween("AUTHOR(S):", "COMPLETION DATE:", "PII", 27, true, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "28: Redact AUTHOR(S) (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes")
&& searchText.contains("AUTHOR(S):")
&& searchText.contains("COMPLETION DATE:")
&& !searchText.contains("STUDY COMPLETION DATE:")
)
then
section.redactLinesBetween("AUTHOR(S):", "COMPLETION DATE:", "PII", 28, true, "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "29: Redact AUTHOR(S) (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes")
&& searchText.contains("AUTHOR(S):")
&& searchText.contains("STUDY COMPLETION DATE:")
)
then
section.redactLinesBetween("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", 29, true, "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "30: Redact AUTHOR(S) (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes")
&& searchText.contains("AUTHOR(S):")
&& searchText.contains("STUDY COMPLETION DATE:")
)
then
section.redactLinesBetween("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", 30, true, "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "31: Redact PERFORMING LABORATORY (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes")
&& searchText.contains("PERFORMING LABORATORY:")
)
then
section.redactBetween("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", 31, true, "PERFORMING LABORATORY was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactNot("CBI_address", 31, "Performing laboratory found for non vertebrate study");
end
rule "32: Redact PERFORMING LABORATORY (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes")
&& searchText.contains("PERFORMING LABORATORY:"))
then
section.redactBetween("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", 32, true, "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
// --------------------------------------- other rules -------------------------------------------------------------------
rule "33: Purity Hint"
when
Section(searchText.toLowerCase().contains("purity"))
then
section.addHintAnnotationByRegEx("(purity ?( of|\\(.{1,20}\\))?( ?:)?) .{0,5}[\\d\\.]+( .{0,4}\\.)? ?%", true, 1, "hint_only");
end
rule "34: Ignore dossier_redaction entries if confidentiality is not 'confidential'"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Confidentiality","confidential") && matchesType("dossier_redaction"));
then
section.ignore("dossier_redaction");
end
rule "35: Redact signatures (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesImageType("signature"))
then
section.redactImage("signature", 35, "Signature found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "36: Redact signatures (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesImageType("signature"))
then
section.redactImage("signature", 36, "Signature found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "43: Redact Logos (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesImageType("logo"))
then
section.redactImage("logo", 43, "Logo found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end