diff --git a/.gitignore b/.gitignore
index 6bb4bada..0365fa46 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,3 +26,4 @@
**/.DS_Store
**/classpath-data.json
**/dependencies-and-licenses-overview.txt
+/redaction-service-v1/redaction-service-server-v1/src/test/resources/RedactionLog/
diff --git a/redaction-service-v1/redaction-service-server-v1/pom.xml b/redaction-service-v1/redaction-service-server-v1/pom.xml
index 47a85da3..84ea2060 100644
--- a/redaction-service-v1/redaction-service-server-v1/pom.xml
+++ b/redaction-service-v1/redaction-service-server-v1/pom.xml
@@ -38,6 +38,13 @@
${jackson.version}
+
+ com.fasterxml.jackson.datatype
+ jackson-datatype-jsr310
+ ${jackson.version}
+ test
+
+
org.ahocorasick
ahocorasick
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RulesTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RulesTest.java
new file mode 100644
index 00000000..bd752544
--- /dev/null
+++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RulesTest.java
@@ -0,0 +1,773 @@
+package com.iqser.red.service.redaction.v1.server;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.mockito.Mockito.when;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.URL;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.DirectoryStream;
+import java.nio.file.FileSystems;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.time.OffsetDateTime;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Ignore;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.kie.api.KieServices;
+import org.kie.api.builder.KieBuilder;
+import org.kie.api.builder.KieFileSystem;
+import org.kie.api.builder.KieModule;
+import org.kie.api.runtime.KieContainer;
+import org.springframework.amqp.rabbit.core.RabbitTemplate;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
+import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
+import org.springframework.boot.test.context.SpringBootTest;
+import org.springframework.boot.test.mock.mockito.MockBean;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.context.annotation.Import;
+import org.springframework.context.annotation.Primary;
+import org.springframework.core.io.ClassPathResource;
+import org.springframework.test.context.junit4.SpringRunner;
+
+import com.amazonaws.services.s3.AmazonS3;
+import com.fasterxml.jackson.databind.DeserializationFeature;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
+import com.iqser.red.service.persistence.service.v1.api.model.common.JSONPrimitive;
+import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.configuration.Colors;
+import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
+import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.DictionaryEntry;
+import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.Type;
+import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
+import com.iqser.red.service.redaction.v1.model.Change;
+import com.iqser.red.service.redaction.v1.model.Engine;
+import com.iqser.red.service.redaction.v1.model.ManualChange;
+import com.iqser.red.service.redaction.v1.model.Rectangle;
+import com.iqser.red.service.redaction.v1.model.RedactionLog;
+import com.iqser.red.service.redaction.v1.model.RedactionLogComment;
+import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
+import com.iqser.red.service.redaction.v1.model.RedactionLogLegalBasis;
+import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest;
+import com.iqser.red.service.redaction.v1.server.annotate.AnnotationService;
+import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
+import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
+import com.iqser.red.service.redaction.v1.server.client.RulesClient;
+import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
+import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
+import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
+import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
+import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
+import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
+import com.iqser.red.storage.commons.StorageAutoConfiguration;
+import com.iqser.red.storage.commons.service.StorageService;
+
+import lombok.SneakyThrows;
+
+@RunWith(SpringRunner.class)
+@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
+@Import(RulesTest.RedactionIntegrationTestConfiguration.class)
+public class RulesTest {
+
+ private static final String RULES_PATH = "drools/testRules.drl";
+ private static final String RULES = loadFromClassPath(RULES_PATH);
+ private static final String VERTEBRATE = "vertebrate";
+ private static final String ADDRESS = "CBI_address";
+ private static final String AUTHOR = "CBI_author";
+ private static final String SPONSOR = "CBI_sponsor";
+ private static final String NO_REDACTION_INDICATOR = "no_redaction_indicator";
+ private static final String REDACTION_INDICATOR = "redaction_indicator";
+ private static final String HINT_ONLY = "hint_only";
+ private static final String MUST_REDACT = "must_redact";
+ private static final String PUBLISHED_INFORMATION = "published_information";
+ private static final String TEST_METHOD = "test_method";
+ private static final String PURITY = "purity";
+ private static final String IMAGE = "image";
+ private static final String LOGO = "logo";
+ private static final String SIGNATURE = "signature";
+ private static final String FORMULA = "formula";
+ private static final String OCR = "ocr";
+ private static final String DOSSIER_REDACTIONS = "dossier_redactions";
+ private static final String IMPORTED_REDACTION = "imported_redaction";
+ private static final String PII = "PII";
+
+ private static final String RESOURCES_PATH = "src/test/resources/";
+
+ private static final String REDACTION_LOG_PATH = RESOURCES_PATH + "RedactionLog/";
+ private final static String TEST_DOSSIER_TEMPLATE_ID = "123";
+ private final static String TEST_DOSSIER_ID = "123";
+ private final Map> dictionary = new HashMap<>();
+ private final Map> dossierDictionary = new HashMap<>();
+ private final Map> falsePositive = new HashMap<>();
+ private final Map> falseRecommendation = new HashMap<>();
+ private final Map typeColorMap = new HashMap<>();
+ private final Map hintTypeMap = new HashMap<>();
+ private final Map caseInSensitiveMap = new HashMap<>();
+ private final Map recommendationTypeMap = new HashMap<>();
+ private final Map rankTypeMap = new HashMap<>();
+ private final Colors colors = new Colors();
+ private final Map reanalysisVersions = new HashMap<>();
+ private final Set deleted = new HashSet<>();
+ @Autowired
+ private RedactionController redactionController;
+ @Autowired
+ private AnnotationService annotationService;
+ @Autowired
+ private AnalyzeService analyzeService;
+ @Autowired
+ private ObjectMapper objectMapper;
+ @MockBean
+ private RulesClient rulesClient;
+ @MockBean
+ private DictionaryClient dictionaryClient;
+ @Autowired
+ private RedactionStorageService redactionStorageService;
+ @Autowired
+ private StorageService storageService;
+ @Autowired
+ private ManualRedactionSurroundingTextService manualRedactionSurroundingTextService;
+ @MockBean
+ private AmazonS3 amazonS3;
+ @MockBean
+ private RabbitTemplate rabbitTemplate;
+ @MockBean
+ private LegalBasisClient legalBasisClient;
+ private String TEST_FILE_ID = "123";
+
+
+ private static String loadFromClassPath(String path) {
+
+ URL resource = ResourceLoader.class.getClassLoader().getResource(path);
+ if (resource == null) {
+ throw new IllegalArgumentException("could not load classpath resource: drools/rules.drl");
+ }
+ try (BufferedReader br = new BufferedReader(new InputStreamReader(resource.openStream(), StandardCharsets.UTF_8))) {
+ StringBuilder sb = new StringBuilder();
+ String str;
+ while ((str = br.readLine()) != null) {
+ sb.append(str).append("\n");
+ }
+ return sb.toString();
+ } catch (IOException e) {
+ throw new IllegalArgumentException("could not load classpath resource: " + path, e);
+ }
+ }
+
+
+ @After
+ public void cleanupStorage() {
+
+ if (this.storageService instanceof FileSystemBackedStorageService) {
+ ((FileSystemBackedStorageService) this.storageService).clearStorage();
+ }
+ }
+
+
+ @Before
+ public void stubClients() {
+
+ when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
+ when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(JSONPrimitive.of(RULES));
+
+ loadDictionaryForTest();
+ loadTypeForTest();
+ when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
+ when(dictionaryClient.getAllTypesForDossierTemplate(TEST_DOSSIER_TEMPLATE_ID, false)).thenReturn(getTypeResponse());
+
+ when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
+ when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder()
+ .id(DOSSIER_REDACTIONS + ":" + TEST_DOSSIER_TEMPLATE_ID)
+ .type(DOSSIER_REDACTIONS)
+ .dossierTemplateId(TEST_DOSSIER_ID)
+ .hexColor("#ffe187")
+ .isHint(hintTypeMap.get(DOSSIER_REDACTIONS))
+ .isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS))
+ .isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS))
+ .rank(rankTypeMap.get(DOSSIER_REDACTIONS))
+ .build()));
+
+ mockDictionaryCalls(null);
+ mockDictionaryCalls(0L);
+
+ when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors);
+ }
+
+
+ private void loadDictionaryForTest() {
+
+ dictionary.computeIfAbsent(AUTHOR, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/CBI_author.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(SPONSOR, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(VERTEBRATE, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/vertebrate.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(ADDRESS, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/CBI_address.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(REDACTION_INDICATOR, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(HINT_ONLY, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/hint_only.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(MUST_REDACT, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/must_redact.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(PUBLISHED_INFORMATION, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/published_information.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(TEST_METHOD, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/test_method.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(PII, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/PII.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(PURITY, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/purity.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(IMAGE, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(OCR, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(LOGO, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(SIGNATURE, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
+ dictionary.computeIfAbsent(FORMULA, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
+ dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
+ dossierDictionary.put(IMPORTED_REDACTION, new ArrayList<>());
+
+ falsePositive.computeIfAbsent(PII, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
+
+ }
+
+
+ private void loadTypeForTest() {
+
+ typeColorMap.put(VERTEBRATE, "#ff85f7");
+ typeColorMap.put(ADDRESS, "#ffe187");
+ typeColorMap.put(AUTHOR, "#ffe187");
+ typeColorMap.put(SPONSOR, "#85ebff");
+ typeColorMap.put(NO_REDACTION_INDICATOR, "#be85ff");
+ typeColorMap.put(REDACTION_INDICATOR, "#caff85");
+ typeColorMap.put(HINT_ONLY, "#abc0c4");
+ typeColorMap.put(MUST_REDACT, "#fab4c0");
+ typeColorMap.put(PUBLISHED_INFORMATION, "#85ebff");
+ typeColorMap.put(TEST_METHOD, "#91fae8");
+ typeColorMap.put(PII, "#66ccff");
+ typeColorMap.put(PURITY, "#ffe187");
+ typeColorMap.put(IMAGE, "#fcc5fb");
+ typeColorMap.put(OCR, "#fcc5fb");
+ typeColorMap.put(LOGO, "#ffe187");
+ typeColorMap.put(FORMULA, "#ffe187");
+ typeColorMap.put(SIGNATURE, "#ffe187");
+ typeColorMap.put(IMPORTED_REDACTION, "#fcfbe6");
+
+ hintTypeMap.put(VERTEBRATE, true);
+ hintTypeMap.put(ADDRESS, false);
+ hintTypeMap.put(AUTHOR, false);
+ hintTypeMap.put(SPONSOR, false);
+ hintTypeMap.put(NO_REDACTION_INDICATOR, true);
+ hintTypeMap.put(REDACTION_INDICATOR, true);
+ hintTypeMap.put(HINT_ONLY, true);
+ hintTypeMap.put(MUST_REDACT, true);
+ hintTypeMap.put(PUBLISHED_INFORMATION, true);
+ hintTypeMap.put(TEST_METHOD, true);
+ hintTypeMap.put(PII, false);
+ hintTypeMap.put(PURITY, false);
+ hintTypeMap.put(IMAGE, true);
+ hintTypeMap.put(OCR, true);
+ hintTypeMap.put(FORMULA, false);
+ hintTypeMap.put(LOGO, false);
+ hintTypeMap.put(SIGNATURE, false);
+ hintTypeMap.put(DOSSIER_REDACTIONS, false);
+ hintTypeMap.put(IMPORTED_REDACTION, false);
+
+ caseInSensitiveMap.put(VERTEBRATE, true);
+ caseInSensitiveMap.put(ADDRESS, false);
+ caseInSensitiveMap.put(AUTHOR, false);
+ caseInSensitiveMap.put(SPONSOR, false);
+ caseInSensitiveMap.put(NO_REDACTION_INDICATOR, true);
+ caseInSensitiveMap.put(REDACTION_INDICATOR, true);
+ caseInSensitiveMap.put(HINT_ONLY, true);
+ caseInSensitiveMap.put(MUST_REDACT, true);
+ caseInSensitiveMap.put(PUBLISHED_INFORMATION, true);
+ caseInSensitiveMap.put(TEST_METHOD, false);
+ caseInSensitiveMap.put(PII, false);
+ caseInSensitiveMap.put(PURITY, false);
+ caseInSensitiveMap.put(IMAGE, true);
+ caseInSensitiveMap.put(OCR, true);
+ caseInSensitiveMap.put(SIGNATURE, true);
+ caseInSensitiveMap.put(LOGO, true);
+ caseInSensitiveMap.put(FORMULA, true);
+ caseInSensitiveMap.put(DOSSIER_REDACTIONS, false);
+ caseInSensitiveMap.put(IMPORTED_REDACTION, false);
+
+ recommendationTypeMap.put(VERTEBRATE, false);
+ recommendationTypeMap.put(ADDRESS, false);
+ recommendationTypeMap.put(AUTHOR, false);
+ recommendationTypeMap.put(SPONSOR, false);
+ recommendationTypeMap.put(NO_REDACTION_INDICATOR, false);
+ recommendationTypeMap.put(REDACTION_INDICATOR, false);
+ recommendationTypeMap.put(HINT_ONLY, false);
+ recommendationTypeMap.put(MUST_REDACT, false);
+ recommendationTypeMap.put(PUBLISHED_INFORMATION, false);
+ recommendationTypeMap.put(TEST_METHOD, false);
+ recommendationTypeMap.put(PII, false);
+ recommendationTypeMap.put(PURITY, false);
+ recommendationTypeMap.put(IMAGE, false);
+ recommendationTypeMap.put(OCR, false);
+ recommendationTypeMap.put(FORMULA, false);
+ recommendationTypeMap.put(SIGNATURE, false);
+ recommendationTypeMap.put(LOGO, false);
+ recommendationTypeMap.put(DOSSIER_REDACTIONS, false);
+ recommendationTypeMap.put(IMPORTED_REDACTION, false);
+
+ rankTypeMap.put(PURITY, 155);
+ rankTypeMap.put(PII, 150);
+ rankTypeMap.put(ADDRESS, 140);
+ rankTypeMap.put(AUTHOR, 130);
+ rankTypeMap.put(SPONSOR, 120);
+ rankTypeMap.put(VERTEBRATE, 110);
+ rankTypeMap.put(MUST_REDACT, 100);
+ rankTypeMap.put(REDACTION_INDICATOR, 90);
+ rankTypeMap.put(NO_REDACTION_INDICATOR, 80);
+ rankTypeMap.put(PUBLISHED_INFORMATION, 70);
+ rankTypeMap.put(TEST_METHOD, 60);
+ rankTypeMap.put(HINT_ONLY, 50);
+ rankTypeMap.put(IMAGE, 30);
+ rankTypeMap.put(OCR, 29);
+ rankTypeMap.put(LOGO, 28);
+ rankTypeMap.put(SIGNATURE, 27);
+ rankTypeMap.put(FORMULA, 26);
+ rankTypeMap.put(DOSSIER_REDACTIONS, 200);
+ rankTypeMap.put(IMPORTED_REDACTION, 200);
+
+ colors.setSkippedColor("#cccccc");
+ colors.setRequestAddColor("#04b093");
+ colors.setRequestRemoveColor("#04b093");
+ }
+
+
+ private List getTypeResponse() {
+
+ return typeColorMap.entrySet()
+ .stream()
+ .map(typeColor -> Type.builder()
+ .id(typeColor.getKey() + ":" + TEST_DOSSIER_TEMPLATE_ID)
+ .type(typeColor.getKey())
+ .dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
+ .hexColor(typeColor.getValue())
+ .isHint(hintTypeMap.get(typeColor.getKey()))
+ .isCaseInsensitive(caseInSensitiveMap.get(typeColor.getKey()))
+ .isRecommendation(recommendationTypeMap.get(typeColor.getKey()))
+ .rank(rankTypeMap.get(typeColor.getKey()))
+ .build())
+
+ .collect(Collectors.toList());
+ }
+
+
+ private void mockDictionaryCalls(Long version) {
+
+ when(dictionaryClient.getDictionaryForType(VERTEBRATE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
+ when(dictionaryClient.getDictionaryForType(ADDRESS + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(ADDRESS, false));
+ when(dictionaryClient.getDictionaryForType(AUTHOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(AUTHOR, false));
+ when(dictionaryClient.getDictionaryForType(SPONSOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(SPONSOR, false));
+ when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(NO_REDACTION_INDICATOR, false));
+ when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(REDACTION_INDICATOR, false));
+ when(dictionaryClient.getDictionaryForType(HINT_ONLY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(HINT_ONLY, false));
+ when(dictionaryClient.getDictionaryForType(MUST_REDACT + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(MUST_REDACT, false));
+ when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PUBLISHED_INFORMATION, false));
+ when(dictionaryClient.getDictionaryForType(TEST_METHOD + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(TEST_METHOD, false));
+ when(dictionaryClient.getDictionaryForType(PII + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PII, false));
+ when(dictionaryClient.getDictionaryForType(PURITY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PURITY, false));
+ when(dictionaryClient.getDictionaryForType(IMAGE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(IMAGE, false));
+ when(dictionaryClient.getDictionaryForType(OCR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(OCR, false));
+ when(dictionaryClient.getDictionaryForType(LOGO + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(LOGO, false));
+ when(dictionaryClient.getDictionaryForType(SIGNATURE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(SIGNATURE, false));
+ when(dictionaryClient.getDictionaryForType(FORMULA + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(FORMULA, false));
+ when(dictionaryClient.getDictionaryForType(DOSSIER_REDACTIONS + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(DOSSIER_REDACTIONS, true));
+ when(dictionaryClient.getDictionaryForType(IMPORTED_REDACTION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(IMPORTED_REDACTION, true));
+
+ }
+
+
+ private String cleanDictionaryEntry(String entry) {
+
+ return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " ");
+ }
+
+
+ private Type getDictionaryResponse(String type, boolean isDossierDictionary) {
+
+ return Type.builder()
+ .id(type + ":" + TEST_DOSSIER_TEMPLATE_ID)
+ .hexColor(typeColorMap.get(type))
+ .entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type)))
+ .falsePositiveEntries(falsePositive.containsKey(type) ? toDictionaryEntry(falsePositive.get(type)) : new ArrayList<>())
+ .falseRecommendationEntries(falseRecommendation.containsKey(type) ? toDictionaryEntry(falseRecommendation.get(type)) : new ArrayList<>())
+ .isHint(hintTypeMap.get(type))
+ .isCaseInsensitive(caseInSensitiveMap.get(type))
+ .isRecommendation(recommendationTypeMap.get(type))
+ .rank(rankTypeMap.get(type))
+ .build();
+ }
+
+
+ private List toDictionaryEntry(List entries) {
+
+ List dictionaryEntries = new ArrayList<>();
+ entries.forEach(entry -> dictionaryEntries.add(DictionaryEntry.builder()
+ .value(entry)
+ .version(reanalysisVersions.getOrDefault(entry, 0L))
+ .deleted(deleted.contains(entry))
+ .build()));
+ return dictionaryEntries;
+ }
+
+
+ /**
+ * Generates RedactionLog for given file and saves it here: REDACTION_LOG_PATH
+ */
+ @Ignore
+ @Test
+ @SneakyThrows
+ public void generateRedactionLogForOneFile() {
+
+ String fileName = "files/Compounds/31 A14111B - EU AIR3 - MCP Section 1 - Identity of the plant protection product.pdf";
+ generateRedactionLog(fileName);
+ }
+
+
+ @SneakyThrows
+ public void generateRedactionLog(String fileName) {
+
+ increaseTestFileId();
+
+ System.out.println("Generate RedactionLog as Json for " + fileName + " with fileId " + TEST_FILE_ID);
+
+ loadNerForTest();
+
+ AnalyzeRequest request = prepareStorage(fileName);
+ analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
+ analyzeService.analyze(request);
+
+ RedactionLog redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
+
+ saveRedactionLogAsJson(redactionLog, fileName);
+ }
+
+
+ private void increaseTestFileId() {
+
+ TEST_FILE_ID = Integer.toString(Integer.parseInt(TEST_FILE_ID) + 1);
+ }
+
+
+ @SneakyThrows
+ private void loadNerForTest() {
+
+ ClassPathResource responseJson = new ClassPathResource("files/ner_response.json");
+ var bytes = IOUtils.toByteArray(responseJson.getInputStream());
+ storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), bytes);
+ }
+
+
+ @SneakyThrows
+ private AnalyzeRequest prepareStorage(String file) {
+
+ ClassPathResource pdfFileResource = new ClassPathResource(file);
+
+ return prepareStorage(pdfFileResource.getInputStream());
+ }
+
+
+ @SneakyThrows
+ private void saveRedactionLogAsJson(RedactionLog redactionLog, String pdfFileName) {
+
+ File pdfFile = new File(pdfFileName);
+
+ String directory = REDACTION_LOG_PATH + pdfFile.getParentFile().getPath();
+ File dr = new File(directory);
+ boolean created = dr.mkdirs();
+ if (created) {
+ System.out.println("Directory was created");
+ }
+
+ String fileName = StringUtils.replace(pdfFile.getName(), ".pdf", ".json");
+ File file = new File(directory, fileName);
+
+ ObjectMapper mapper = new ObjectMapper();
+ mapper.registerModule(new JavaTimeModule());
+ mapper.writeValue(file, redactionLog);
+
+ System.out.println("Saved RedactionLog for " + fileName + " here " + directory);
+ }
+
+
+ @SneakyThrows
+ private AnalyzeRequest prepareStorage(InputStream stream) {
+
+ AnalyzeRequest request = AnalyzeRequest.builder()
+ .dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
+ .dossierId(TEST_DOSSIER_ID)
+ .fileId(TEST_FILE_ID)
+ .lastProcessed(OffsetDateTime.now())
+ .build();
+
+ var bytes = IOUtils.toByteArray(stream);
+
+ storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), bytes);
+
+ return request;
+
+ }
+
+
+ /**
+ * Generates RedactionLog for all files and saves it here: REDACTION_LOG_PATH
+ */
+ @Ignore
+ @Test
+ @SneakyThrows
+ public void generateRedactionLogForAllFiles() {
+
+ Set files = getFileNames(new HashSet<>(), FileSystems.getDefault().getPath(RESOURCES_PATH));
+ System.out.println("Will generate RedactionLog for " + files.size() + " files.");
+ TEST_FILE_ID = "1000";
+ files.forEach(this::generateRedactionLog);
+ }
+
+
+ /**
+ * Analyses file and compares its RedactionLog with saved one from here: REDACTION_LOG_PATH.
+ * If RedactionLog Json does not exist, test will fail.
+ */
+ @Ignore
+ @Test
+ @SneakyThrows
+ public void analyseFileAndCompareRedactionLog() {
+
+ String fileName = "files/Compounds/28 A8637C - EU AIR3 - MCP Section 10 - Ecotoxicological studies on the plant protection product.pdf";
+ analyseFileAndCompareRedactionLog(fileName);
+ }
+
+
+ @SneakyThrows
+ public void analyseFileAndCompareRedactionLog(String fileName) {
+
+ increaseTestFileId();
+ System.out.println("Analyse " + fileName + " with fileId " + TEST_FILE_ID + " and compare it with its saved RedactionLog");
+
+ RedactionLog savedRedactionLog = loadSavedRedactionLog(fileName);
+
+ loadNerForTest();
+
+ AnalyzeRequest request = prepareStorage(fileName);
+ analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
+ analyzeService.analyze(request);
+
+ RedactionLog redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
+
+ assertThat(redactionLog.getAnalysisVersion()).isEqualTo(savedRedactionLog.getAnalysisVersion());
+ assertThat(redactionLog.getAnalysisNumber()).isEqualTo(savedRedactionLog.getAnalysisNumber());
+ assertThat(redactionLog.getDictionaryVersion()).isEqualTo(savedRedactionLog.getDictionaryVersion());
+ assertThat(redactionLog.getDossierDictionaryVersion()).isEqualTo(savedRedactionLog.getDossierDictionaryVersion());
+ assertThat(redactionLog.getRulesVersion()).isEqualTo(savedRedactionLog.getRulesVersion());
+ assertThat(redactionLog.getLegalBasisVersion()).isEqualTo(savedRedactionLog.getLegalBasisVersion());
+
+ assertThat(redactionLog.getRedactionLogEntry().size()).isEqualTo(savedRedactionLog.getRedactionLogEntry().size());
+ assertThat(redactionLog.getLegalBasis().size()).isEqualTo(savedRedactionLog.getLegalBasis().size());
+
+ for (RedactionLogLegalBasis redactionLegalBasis : redactionLog.getLegalBasis()) {
+ var savedRedactionLegalBasis = savedRedactionLog.getLegalBasis()
+ .stream()
+ .filter(lb -> lb.getName().equalsIgnoreCase(redactionLegalBasis.getName()))
+ .filter(lb -> lb.getDescription().equalsIgnoreCase(redactionLegalBasis.getDescription()))
+ .filter(lb -> lb.getReason().equalsIgnoreCase(redactionLegalBasis.getReason()))
+ .findFirst();
+ assertThat(savedRedactionLegalBasis).isPresent();
+ }
+
+ for (RedactionLogEntry redactionLogEntry : redactionLog.getRedactionLogEntry()) {
+ var savedRedactionLogEntry = savedRedactionLog.getRedactionLogEntry().stream().filter(r -> r.getId().equalsIgnoreCase(redactionLogEntry.getId())).findFirst();
+ assertThat(savedRedactionLogEntry).isPresent();
+ assertThat(savedRedactionLogEntry.get().getId()).isEqualTo(redactionLogEntry.getId());
+ assertThat(savedRedactionLogEntry.get().getType()).isEqualTo(redactionLogEntry.getType());
+ assertThat(savedRedactionLogEntry.get().getValue()).isEqualTo(redactionLogEntry.getValue());
+ assertThat(savedRedactionLogEntry.get().getReason()).isEqualTo(redactionLogEntry.getReason());
+ assertThat(savedRedactionLogEntry.get().getMatchedRule()).isEqualTo(redactionLogEntry.getMatchedRule());
+ assertThat(savedRedactionLogEntry.get().isRectangle()).isEqualTo(redactionLogEntry.isRectangle());
+ assertThat(savedRedactionLogEntry.get().getLegalBasis()).isEqualTo(redactionLogEntry.getLegalBasis());
+ assertThat(savedRedactionLogEntry.get().isImported()).isEqualTo(redactionLogEntry.isImported());
+ assertThat(savedRedactionLogEntry.get().isRedacted()).isEqualTo(redactionLogEntry.isRedacted());
+ assertThat(savedRedactionLogEntry.get().isHint()).isEqualTo(redactionLogEntry.isHint());
+ assertThat(savedRedactionLogEntry.get().isRecommendation()).isEqualTo(redactionLogEntry.isRecommendation());
+ assertThat(savedRedactionLogEntry.get().isFalsePositive()).isEqualTo(redactionLogEntry.isFalsePositive());
+ assertThat(savedRedactionLogEntry.get().getSection()).isEqualTo(redactionLogEntry.getSection());
+ assertThat(savedRedactionLogEntry.get().getColor()).isEqualTo(redactionLogEntry.getColor());
+ assertThat(savedRedactionLogEntry.get().getSectionNumber()).isEqualTo(redactionLogEntry.getSectionNumber());
+ assertThat(savedRedactionLogEntry.get().getTextBefore()).isEqualTo(redactionLogEntry.getTextBefore());
+ assertThat(savedRedactionLogEntry.get().getTextAfter()).isEqualTo(redactionLogEntry.getTextAfter());
+ assertThat(savedRedactionLogEntry.get().getStartOffset()).isEqualTo(redactionLogEntry.getStartOffset());
+ assertThat(savedRedactionLogEntry.get().getEndOffset()).isEqualTo(redactionLogEntry.getEndOffset());
+ assertThat(savedRedactionLogEntry.get().isImage()).isEqualTo(redactionLogEntry.isImage());
+ assertThat(savedRedactionLogEntry.get().isImageHasTransparency()).isEqualTo(redactionLogEntry.isImageHasTransparency());
+ assertThat(savedRedactionLogEntry.get().isDictionaryEntry()).isEqualTo(redactionLogEntry.isDictionaryEntry());
+ assertThat(savedRedactionLogEntry.get().isDossierDictionaryEntry()).isEqualTo(redactionLogEntry.isDossierDictionaryEntry());
+ assertThat(savedRedactionLogEntry.get().isExcluded()).isEqualTo(redactionLogEntry.isExcluded());
+ assertThat(savedRedactionLogEntry.get().getSourceId()).isEqualTo(redactionLogEntry.getSourceId());
+
+ for (Rectangle rectangle : redactionLogEntry.getPositions()) {
+ var savedRectangle = savedRedactionLogEntry.get()
+ .getPositions()
+ .stream()
+ .filter(r -> r.getPage() == rectangle.getPage())
+ .filter(r -> r.getTopLeft().getX() == rectangle.getTopLeft().getX())
+ .filter(r -> r.getTopLeft().getY() == rectangle.getTopLeft().getY())
+ .filter(r -> r.getHeight() == rectangle.getHeight())
+ .filter(r -> r.getWidth() == rectangle.getWidth())
+ .findFirst();
+ assertThat(savedRectangle).isPresent();
+ }
+
+ for (RedactionLogComment comment : redactionLogEntry.getComments()) {
+ var savedComment = savedRedactionLogEntry.get().getComments().stream().filter(c -> c.getId() == comment.getId()).findFirst();
+ assertThat(savedComment).isPresent();
+ assertThat(savedComment.get().getId()).isEqualTo(comment.getId());
+ assertThat(savedComment.get().getUser()).isEqualTo(comment.getUser());
+ assertThat(savedComment.get().getText()).isEqualTo(comment.getText());
+ assertThat(savedComment.get().getAnnotationId()).isEqualTo(comment.getAnnotationId());
+ assertThat(savedComment.get().getFileId()).isEqualTo(comment.getFileId());
+
+ }
+
+ for (Change change : redactionLogEntry.getChanges()) {
+ var savedChange = savedRedactionLogEntry.get()
+ .getChanges()
+ .stream()
+ .filter(c -> c.getAnalysisNumber() == change.getAnalysisNumber())
+ .filter(c -> c.getType() == change.getType())
+ .findFirst();
+ assertThat(savedChange).isPresent();
+ }
+
+ for (ManualChange manualChange : redactionLogEntry.getManualChanges()) {
+ var savedManualChange = savedRedactionLogEntry.get()
+ .getManualChanges()
+ .stream()
+ .filter(m -> m.getAnnotationStatus() == manualChange.getAnnotationStatus())
+ .filter(m -> m.getManualRedactionType() == manualChange.getManualRedactionType())
+ .filter(m -> m.getUserId().equalsIgnoreCase(manualChange.getUserId()))
+ .filter(m -> m.getPropertyChanges() == manualChange.getPropertyChanges())
+ .findFirst();
+ assertThat(savedManualChange).isPresent();
+ }
+
+ assertThat(savedRedactionLogEntry.get().getEngines()).containsExactly(redactionLogEntry.getEngines().toArray(new Engine[0]));
+
+ assertThat(savedRedactionLogEntry.get().getReference()).containsAll(redactionLogEntry.getReference());
+ assertThat(savedRedactionLogEntry.get().getImportedRedactionIntersections()).containsAll(redactionLogEntry.getImportedRedactionIntersections());
+ }
+
+ }
+
+
+ @SneakyThrows
+ private RedactionLog loadSavedRedactionLog(String pdfFileName) {
+
+ File pdfFile = new File(pdfFileName);
+ String directory = REDACTION_LOG_PATH + pdfFile.getParentFile().getPath();
+ String fileName = StringUtils.replace(pdfFile.getName(), ".pdf", ".json");
+ File file = new File(directory, fileName);
+
+ ObjectMapper om = new ObjectMapper();
+ om.registerModule(new JavaTimeModule());
+ om.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
+
+ assertThat(file).exists();
+ return om.readValue(file, RedactionLog.class);
+ }
+
+
+ /**
+ * Analyses all files and compares its RedactionLog with saved one from here: REDACTION_LOG_PATH
+ * If RedactionLogs Json does not exist, test will fail.
+ */
+ @Ignore
+ @Test
+ @SneakyThrows
+ public void analyseAllFilesAndCompareRedactionLogs() {
+
+ Set files = getFileNames(new HashSet<>(), FileSystems.getDefault().getPath(RESOURCES_PATH));
+ System.out.println("Will analyse " + files.size() + " files and compare its RedactionLogs.");
+ TEST_FILE_ID = "5000";
+ files.forEach(this::analyseFileAndCompareRedactionLog);
+ }
+
+
+ @SneakyThrows
+ private Set getFileNames(Set fileNames, Path dir) {
+
+ try (DirectoryStream stream = Files.newDirectoryStream(dir)) {
+ for (Path path : stream) {
+ if (path.toFile().isDirectory()) {
+ getFileNames(fileNames, path);
+ } else if (StringUtils.endsWith(path.toAbsolutePath().toString(), ".pdf")) {
+ String absolutePath = path.toAbsolutePath().toString();
+ int pos = StringUtils.indexOf(absolutePath, StringUtils.replace(RESOURCES_PATH, "/", "\\")) + 18;
+ fileNames.add(StringUtils.substring(absolutePath, pos));
+ }
+ }
+ }
+ return fileNames;
+ }
+
+
+ @Configuration
+ @EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class, StorageAutoConfiguration.class})
+ public static class RedactionIntegrationTestConfiguration {
+
+ @Bean
+ public KieContainer kieContainer() {
+
+ KieServices kieServices = KieServices.Factory.get();
+
+ KieFileSystem kieFileSystem = kieServices.newKieFileSystem();
+ InputStream input = new ByteArrayInputStream(RULES.getBytes(StandardCharsets.UTF_8));
+ kieFileSystem.write(RESOURCES_PATH + RULES_PATH, kieServices.getResources().newInputStreamResource(input));
+ KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem);
+ kieBuilder.buildAll();
+ KieModule kieModule = kieBuilder.getKieModule();
+
+ return kieServices.newKieContainer(kieModule.getReleaseId());
+ }
+
+
+ @Bean
+ @Primary
+ public StorageService inmemoryStorage() {
+
+ return new FileSystemBackedStorageService();
+ }
+
+ }
+
+}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/testRules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/testRules.drl
new file mode 100644
index 00000000..ad7726cd
--- /dev/null
+++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/testRules.drl
@@ -0,0 +1,431 @@
+package drools
+
+import com.iqser.red.service.redaction.v1.server.redaction.model.Section
+
+global Section section
+
+
+// --------------------------------------- AI rules -------------------------------------------------------------------
+
+rule "0: Add CBI_author from ai"
+ when
+ Section(aiMatchesType("CBI_author"))
+ then
+ section.addAiEntities("CBI_author", "CBI_author");
+ end
+
+rule "0: Combine address parts from ai to CBI_address (org is mandatory)"
+ when
+ Section(aiMatchesType("ORG"))
+ then
+ section.combineAiTypes("ORG", "STREET,POSTAL,COUNTRY,CARDINAL,CITY,STATE", 20, "CBI_address", 3, false);
+ end
+
+rule "0: Combine address parts from ai to CBI_address (street is mandatory)"
+ when
+ Section(aiMatchesType("STREET"))
+ then
+ section.combineAiTypes("STREET", "ORG,POSTAL,COUNTRY,CARDINAL,CITY,STATE", 20, "CBI_address", 3, false);
+ end
+
+rule "0: Combine address parts from ai to CBI_address (city is mandatory)"
+ when
+ Section(aiMatchesType("CITY"))
+ then
+ section.combineAiTypes("CITY", "ORG,STREET,POSTAL,COUNTRY,CARDINAL,STATE", 20, "CBI_address", 3, false);
+ end
+
+
+// --------------------------------------- CBI rules -------------------------------------------------------------------
+
+rule "1: Redact CBI Authors (Non vertebrate study)"
+ when
+ Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_author"))
+ then
+ section.redact("CBI_author", 1, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
+ end
+
+rule "2: Redact CBI Authors (Vertebrate study)"
+ when
+ Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_author"))
+ then
+ section.redact("CBI_author", 2, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ end
+
+
+rule "3: Redact not CBI Address (Non vertebrate study)"
+ when
+ Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_address"))
+ then
+ section.redactNot("CBI_address", 3, "Address found for non vertebrate study");
+ section.ignoreRecommendations("CBI_address");
+ end
+
+rule "4: Redact CBI Address (Vertebrate study)"
+ when
+ Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_address"))
+ then
+ section.redact("CBI_address", 4, "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ end
+
+
+rule "5: Do not redact genitive CBI_author"
+ when
+ Section(matchesType("CBI_author"))
+ then
+ section.expandToFalsePositiveByRegEx("CBI_author", "['’’'ʼˈ´`‘′ʻ’']s", false, 0);
+ end
+
+
+rule "6: Redact Author(s) cells in Tables with Author(s) header (Non vertebrate study)"
+ when
+ Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author(s)") && !hasTableHeader("Vertebrate study Y/N"))
+ then
+ section.redactCell("Author(s)", 6, "CBI_author", false, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
+ end
+
+rule "7: Redact Author(s) cells in Tables with Author(s) header (Vertebrate study)"
+ when
+ Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author(s)") && !hasTableHeader("Vertebrate study Y/N"))
+ then
+ section.redactCell("Author(s)", 7, "CBI_author", false, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ end
+
+
+rule "8: Redact Author cells in Tables with Author header (Non vertebrate study)"
+ when
+ Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author") && !hasTableHeader("Vertebrate study Y/N"))
+ then
+ section.redactCell("Author", 8, "CBI_author", false, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
+ end
+
+rule "9: Redact Author cells in Tables with Author header (Vertebrate study)"
+ when
+ Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author") && !hasTableHeader("Vertebrate study Y/N"))
+ then
+ section.redactCell("Author", 9, "CBI_author", false, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ end
+
+
+rule "10: Redact and recommand Authors in Tables with Vertebrate study Y/N header (Non vertebrate study)"
+ when
+ Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (rowEquals("Vertebrate study Y/N", "Y") || rowEquals("Vertebrate study Y/N", "Yes") || rowEquals("Vertebrate study Y/N", "N") || rowEquals("Vertebrate study Y/N", "No")))
+ then
+ section.redactCell("Author(s)", 10, "CBI_author", true, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
+ end
+
+rule "11: Redact and recommand Authors in Tables with Vertebrate study Y/N header (Vertebrate study)"
+ when
+ Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (rowEquals("Vertebrate study Y/N", "Y") || rowEquals("Vertebrate study Y/N", "Yes") || rowEquals("Vertebrate study Y/N", "N") || rowEquals("Vertebrate study Y/N", "No")))
+ then
+ section.redactCell("Author(s)", 11, "CBI_author", true, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ end
+
+/* Syngenta specific laboratory rule */
+rule "12: Recommend CTL/BL laboratory that start with BL or CTL"
+ when
+ Section(searchText.contains("CT") || searchText.contains("BL"))
+ then
+ section.addRecommendationByRegEx("((\\b((([Cc]T(([1ILli\\/])| L|~P))|(BL))[\\. ]?([\\dA-Ziltphz~\\/.:!]| ?[\\(',][Ppi](\\(e)?|([\\(-?']\\/))+( ?[\\(\\/\\dA-Znasieg]+)?)\\b( ?\\/? ?\\d+)?)|(\\bCT[L1i]\\b))", true, 0, "CBI_address");
+ end
+
+rule "14: Redact and add recommendation for et al. author (Non vertebrate study)"
+ when
+ Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("et al"))
+ then
+ section.redactAndRecommendByRegEx("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", false, 1, "CBI_author", 14, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
+ end
+
+rule "15: Redact and add recommendation for et al. author (Vertebrate study)"
+ when
+ Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("et al"))
+ then
+ section.redactAndRecommendByRegEx("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", false, 1, "CBI_author", 15, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ end
+
+
+rule "16: Add recommendation for Addresses in Test Organism sections"
+ when
+ Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("Species:") && searchText.contains("Source:"))
+ then
+ section.recommendLineAfter("Source:", "CBI_address");
+ end
+
+rule "17: Add recommendation for Addresses in Test Animals sections"
+ when
+ Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("Species") && searchText.contains("Source"))
+ then
+ section.recommendLineAfter("Source", "CBI_address");
+ end
+
+
+rule "18: Do not redact Names and Addresses if Published Information found"
+ when
+ Section(matchesType("published_information"))
+ then
+ section.redactNotAndReference("CBI_author","published_information", 18, "Published Information found");
+ section.redactNotAndReference("CBI_address","published_information", 18, "Published Information found");
+ end
+
+
+// --------------------------------------- PII rules -------------------------------------------------------------------
+
+
+rule "19: Redacted PII Personal Identification Information (Non vertebrate study)"
+ when
+ Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("PII"))
+ then
+ section.redact("PII", 19, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
+ end
+
+rule "20: Redacted PII Personal Identification Information (Vertebrate study)"
+ when
+ Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("PII"))
+ then
+ section.redact("PII", 20, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ end
+
+
+rule "21: Redact Emails by RegEx (Non vertebrate study)"
+ when
+ Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("@"))
+ then
+ section.redactByRegEx("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", true, 1, "PII", 21, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
+ end
+
+rule "22: Redact Emails by RegEx (Vertebrate study)"
+ when
+ Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("@"))
+ then
+ section.redactByRegEx("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", true, 1, "PII", 22, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ end
+
+
+rule "23: Redact contact information (Non vertebrate study)"
+ when
+ Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (text.contains("Contact point:")
+ || text.contains("Phone:")
+ || text.contains("Fax:")
+ || text.contains("Tel.:")
+ || text.contains("Tel:")
+ || text.contains("E-mail:")
+ || text.contains("Email:")
+ || text.contains("e-mail:")
+ || text.contains("E-mail address:")
+ || text.contains("Contact:")
+ || text.contains("Alternative contact:")
+ || text.contains("Telephone number:")
+ || text.contains("Telephone No:")
+ || text.contains("Fax number:")
+ || text.contains("Telephone:")
+ || text.contains("Phone No.")
+ || (text.contains("No:") && text.contains("Fax"))
+ || (text.contains("Contact:") && text.contains("Tel.:"))
+ || text.contains("European contact:")
+ ))
+ then
+ section.redactLineAfter("Contact point:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("Phone:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("Fax:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("Tel.:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("Tel:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("E-mail:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("Email:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("e-mail:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("E-mail address:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("Contact:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("Alternative contact:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("Telephone number:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("Telephone No:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("Fax number:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("Telephone:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("Phone No.", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
+ section.redactBetween("No:", "Fax", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
+ section.redactBetween("Contact:", "Tel.:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("European contact:", "PII", 23, true, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
+ end
+
+rule "24: Redact contact information (Vertebrate study)"
+ when
+ Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (text.contains("Contact point:")
+ || text.contains("Phone:")
+ || text.contains("Fax:")
+ || text.contains("Tel.:")
+ || text.contains("Tel:")
+ || text.contains("E-mail:")
+ || text.contains("Email:")
+ || text.contains("e-mail:")
+ || text.contains("E-mail address:")
+ || text.contains("Contact:")
+ || text.contains("Alternative contact:")
+ || text.contains("Telephone number:")
+ || text.contains("Telephone No:")
+ || text.contains("Fax number:")
+ || text.contains("Telephone:")
+ || text.contains("Phone No.")
+ || (text.contains("No:") && text.contains("Fax"))
+ || (text.contains("Contact:") && text.contains("Tel.:"))
+ || text.contains("European contact:")
+ ))
+ then
+ section.redactLineAfter("Contact point:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("Phone:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("Fax:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("Tel.:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("Tel:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("E-mail:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("Email:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("e-mail:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("E-mail address:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("Contact:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("Alternative contact:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("Telephone number:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("Telephone No:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("Fax number:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("Telephone:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("Phone No.", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ section.redactBetween("No:", "Fax", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ section.redactBetween("Contact:", "Tel.:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ section.redactLineAfter("European contact:", "PII", 24, true, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ end
+
+
+rule "25: Redact Phone and Fax by RegEx (Non vertebrate study)"
+ when
+ Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (
+ text.contains("Telephone")
+ || text.contains("Phone")
+ || text.contains("Ph.")
+ || text.contains("Fax")
+ || text.contains("Tel")
+ || text.contains("Ter")
+ || text.contains("Cell")
+ || text.contains("Mobile")
+ || text.contains("Fel")
+ || text.contains("Fer")
+ ))
+ then
+ section.redactByRegEx("\\b(telephone|phone|fax|tel|ter|cell|mobile|fel|fer)[:.\\s]{0,3}((\\(?\\+?[0-9])(\\(?[0-9\\/.\\-\\s]+\\)?)*([0-9]+\\)?))\\b", true, 2, "PII", 25, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ end
+
+rule "26: Redact Phone and Fax by RegEx (Vertebrate study)"
+ when
+ Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (
+ text.contains("Telephone")
+ || text.contains("Phone")
+ || text.contains("Ph.")
+ || text.contains("Fax")
+ || text.contains("Tel")
+ || text.contains("Ter")
+ || text.contains("Cell")
+ || text.contains("Mobile")
+ || text.contains("Fel")
+ || text.contains("Fer")
+ ))
+ then
+ section.redactByRegEx("\\b(telephone|phone|fax|tel|ter|cell|mobile|fel|fer)[:.\\s]{0,3}((\\(?\\+?[0-9])(\\(?[0-9\\/.\\-\\s]+\\)?)*([0-9]+\\)?))\\b", true, 2, "PII", 26, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ end
+
+
+rule "27: Redact AUTHOR(S) (Non vertebrate study)"
+ when
+ Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes")
+ && searchText.contains("AUTHOR(S):")
+ && searchText.contains("COMPLETION DATE:")
+ && !searchText.contains("STUDY COMPLETION DATE:")
+ )
+ then
+ section.redactLinesBetween("AUTHOR(S):", "COMPLETION DATE:", "PII", 27, true, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
+ end
+
+rule "28: Redact AUTHOR(S) (Vertebrate study)"
+ when
+ Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes")
+ && searchText.contains("AUTHOR(S):")
+ && searchText.contains("COMPLETION DATE:")
+ && !searchText.contains("STUDY COMPLETION DATE:")
+ )
+ then
+ section.redactLinesBetween("AUTHOR(S):", "COMPLETION DATE:", "PII", 28, true, "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ end
+
+
+rule "29: Redact AUTHOR(S) (Non vertebrate study)"
+ when
+ Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes")
+ && searchText.contains("AUTHOR(S):")
+ && searchText.contains("STUDY COMPLETION DATE:")
+ )
+ then
+ section.redactLinesBetween("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", 29, true, "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
+ end
+
+rule "30: Redact AUTHOR(S) (Vertebrate study)"
+ when
+ Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes")
+ && searchText.contains("AUTHOR(S):")
+ && searchText.contains("STUDY COMPLETION DATE:")
+ )
+ then
+ section.redactLinesBetween("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", 30, true, "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ end
+
+
+rule "31: Redact PERFORMING LABORATORY (Non vertebrate study)"
+ when
+ Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes")
+ && searchText.contains("PERFORMING LABORATORY:")
+ )
+ then
+ section.redactBetween("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", 31, true, "PERFORMING LABORATORY was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
+ section.redactNot("CBI_address", 31, "Performing laboratory found for non vertebrate study");
+ end
+
+rule "32: Redact PERFORMING LABORATORY (Vertebrate study)"
+ when
+ Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes")
+ && searchText.contains("PERFORMING LABORATORY:"))
+ then
+ section.redactBetween("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", 32, true, "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ end
+
+
+// --------------------------------------- other rules -------------------------------------------------------------------
+
+rule "33: Purity Hint"
+ when
+ Section(searchText.toLowerCase().contains("purity"))
+ then
+ section.addHintAnnotationByRegEx("(purity ?( of|\\(.{1,20}\\))?( ?:)?) .{0,5}[\\d\\.]+( .{0,4}\\.)? ?%", true, 1, "hint_only");
+ end
+
+
+rule "34: Ignore dossier_redaction entries if confidentiality is not 'confidential'"
+ when
+ Section(!fileAttributeByLabelEqualsIgnoreCase("Confidentiality","confidential") && matchesType("dossier_redaction"));
+ then
+ section.ignore("dossier_redaction");
+ end
+
+
+rule "35: Redact signatures (Non vertebrate study)"
+ when
+ Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesImageType("signature"))
+ then
+ section.redactImage("signature", 35, "Signature found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
+ end
+
+rule "36: Redact signatures (Vertebrate study)"
+ when
+ Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesImageType("signature"))
+ then
+ section.redactImage("signature", 36, "Signature found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ end
+
+
+rule "43: Redact Logos (Vertebrate study)"
+ when
+ Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesImageType("logo"))
+ then
+ section.redactImage("logo", 43, "Logo found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
+ end
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RulesTest/SYNGENTA_EFSA_sanitisation_GFL_v1_withHighlights.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RulesTest/SYNGENTA_EFSA_sanitisation_GFL_v1_withHighlights.pdf
new file mode 100644
index 00000000..d822757f
Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RulesTest/SYNGENTA_EFSA_sanitisation_GFL_v1_withHighlights.pdf differ