# Conflicts:
#	redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java
This commit is contained in:
devplant 2023-04-05 11:37:33 +03:00
commit c3b29e4ebc
9 changed files with 1100 additions and 534 deletions

View File

@ -0,0 +1,15 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import java.util.HashMap;
import java.util.Map;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@NoArgsConstructor
public class TenantRules {
private Map<String, Long> rulesVersionPerDossierTemplateId = new HashMap<>();
}

View File

@ -1,11 +1,13 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.exception.RulesValidationException;
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import io.micrometer.core.annotation.Timed;
import lombok.RequiredArgsConstructor;
import javax.annotation.PostConstruct;
import org.apache.commons.lang3.StringUtils;
import org.kie.api.KieServices;
@ -16,11 +18,19 @@ import org.kie.api.runtime.KieContainer;
import org.kie.api.runtime.KieSession;
import org.springframework.stereotype.Service;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Map;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.exception.RulesValidationException;
import com.iqser.red.service.redaction.v1.server.multitenancy.TenantContext;
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
import com.iqser.red.service.redaction.v1.server.redaction.model.TenantRules;
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
import io.micrometer.core.annotation.Timed;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
@Service
@RequiredArgsConstructor
@ -30,7 +40,24 @@ public class DroolsExecutionService {
private final Map<String, KieContainer> kieContainers = new HashMap<>();
private final Map<String, Long> rulesVersionPerDossierTemplateId = new HashMap<>();
private final RedactionServiceSettings settings;
private LoadingCache<String, TenantRules> tenantRulesCache;
@PostConstruct
protected void createCache() {
tenantRulesCache = CacheBuilder.newBuilder()
.maximumSize(settings.getDictionaryCacheMaximumSize())
.expireAfterAccess(settings.getDictionaryCacheExpireAfterAccessDays(), TimeUnit.DAYS)
.build(new CacheLoader<>() {
public TenantRules load(String key) {
return new TenantRules();
}
});
}
public KieContainer getKieContainer(String dossierTemplateId) {
@ -61,13 +88,13 @@ public class DroolsExecutionService {
public KieContainer updateRules(String dossierTemplateId) {
long version = rulesClient.getVersion(dossierTemplateId);
Long rulesVersion = rulesVersionPerDossierTemplateId.get(dossierTemplateId);
Long rulesVersion = getVersionForDossierTemplate(dossierTemplateId);
if (rulesVersion == null) {
rulesVersion = -1L;
}
if (version > rulesVersion) {
rulesVersionPerDossierTemplateId.put(dossierTemplateId, version);
setRulesVersionForDossierTemplate(dossierTemplateId, version);
return createOrUpdateKieContainer(dossierTemplateId);
}
return getKieContainer(dossierTemplateId);
@ -126,11 +153,25 @@ public class DroolsExecutionService {
public long getRulesVersion(String dossierTemplateId) {
Long rulesVersion = rulesVersionPerDossierTemplateId.get(dossierTemplateId);
Long rulesVersion = getVersionForDossierTemplate(dossierTemplateId);
if (rulesVersion == null) {
return -1;
}
return rulesVersion;
}
@SneakyThrows
private Long getVersionForDossierTemplate(String dossierTemplateId) {
return tenantRulesCache.get(TenantContext.getTenantId()).getRulesVersionPerDossierTemplateId().get(dossierTemplateId);
}
@SneakyThrows
private void setRulesVersionForDossierTemplate(String dossierTemplateId, long version) {
tenantRulesCache.get(TenantContext.getTenantId()).getRulesVersionPerDossierTemplateId().put(dossierTemplateId, version);
}
}

View File

@ -66,7 +66,7 @@ class EntityFinder {
!local,
model.isDossierDictionary(),
local ? Engine.RULE : Engine.DICTIONARY,
local ? EntityType.RECOMMENDATION : EntityType.ENTITY));
local ? EntityType.RECOMMENDATION : EntityType.ENTITY)).stream().filter(e -> !e.isFalsePositive()).collect(Collectors.toSet());
EntitySearchUtils.addOrAddEngine(found, entities);
}

View File

@ -273,14 +273,14 @@ public final class EntitySearchUtils {
existing.setLegalBasis(found.getLegalBasis());
existing.setMatchedRule(found.getMatchedRule());
existing.setRedactionReason(found.getRedactionReason());
if (existing.getEntityType().equals(EntityType.RECOMMENDATION) && found.getEntityType().equals(EntityType.ENTITY) || existing.getEntityType()
.equals(EntityType.ENTITY) && found.getEntityType().equals(EntityType.RECOMMENDATION)) {
if (isOneARecommendationAndTheOtherEntity(found, existing)) {
existing.setEntityType(EntityType.ENTITY);
if (found.isRedaction()) {
existing.setRedaction(true);
}
}
} else if (dictionary.getDictionaryRank(existing.getType()) <= dictionary.getDictionaryRank(found.getType())) {
} else if (dictionary.getDictionaryRank(existing.getType()) <= dictionary.getDictionaryRank(found.getType()) &&
!(isOneARecommendationAndTheOtherEntity(found, existing) && existing.isRedaction() && found.isRedaction()) ) {
entities.remove(found);
entities.add(found);
}
@ -289,6 +289,13 @@ public final class EntitySearchUtils {
}
}
private boolean isOneARecommendationAndTheOtherEntity(Entity entityOne, Entity entityTwo) {
var entityTypeOne = entityOne.getEntityType();
var entityTypeTwo = entityTwo.getEntityType();
return entityTypeTwo.equals(EntityType.RECOMMENDATION) && entityTypeOne.equals(EntityType.ENTITY)
|| entityTypeTwo.equals(EntityType.ENTITY) && entityTypeOne.equals(EntityType.RECOMMENDATION);
}
public void addEntitiesIgnoreRank(Set<Entity> entities, Set<Entity> found) {
// HashSet keeps old value but we want the new.

View File

@ -0,0 +1,462 @@
package com.iqser.red.service.redaction.v1.server;
import static org.mockito.Mockito.when;
import java.io.File;
import java.io.InputStream;
import java.net.URL;
import java.nio.file.Files;
import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.junit.jupiter.api.AfterEach;
import org.mockito.stubbing.Answer;
import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.mock.mockito.MockBean;
import org.springframework.core.io.ClassPathResource;
import com.amazonaws.services.s3.AmazonS3;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.configuration.Colors;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.DictionaryEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotationService;
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
import com.iqser.red.service.redaction.v1.server.multitenancy.TenantContext;
import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
import com.iqser.red.service.redaction.v1.server.redaction.service.analyze.AnalyzeService;
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.storage.commons.service.StorageService;
import lombok.SneakyThrows;
public abstract class AbstractRedactionIntegrationTest {
protected static final String VERTEBRATE_INDICATOR = "vertebrate";
protected static final String DICTIONARY_ADDRESS = "CBI_address";
protected static final String DICTIONARY_AUTHOR = "CBI_author";
protected static final String DICTIONARY_SPONSOR = "CBI_sponsor";
protected static final String DICTIONARY_PII = "PII";
protected static final String NO_REDACTION_INDICATOR = "no_redaction_indicator";
protected static final String REDACTION_INDICATOR = "redaction_indicator";
protected static final String HINT_ONLY_INDICATOR = "hint_only";
protected static final String MUST_REDACT_INDICATOR = "must_redact";
protected static final String PUBLISHED_INFORMATION_INDICATOR = "published_information";
protected static final String TEST_METHOD_INDICATOR = "test_method";
protected static final String PURITY_INDICATOR = "purity";
protected static final String IMAGE_INDICATOR = "image";
protected static final String LOGO_INDICATOR = "logo";
protected static final String SIGNATURE_INDICATOR = "signature";
protected static final String FORMULA_INDICATOR = "formula";
protected static final String OCR_INDICATOR = "ocr";
protected static final String DOSSIER_REDACTIONS_INDICATOR = "dossier_redactions";
protected static final String IMPORTED_REDACTION_INDICATOR = "imported_redaction";
protected static final String ROTATE_SIMPLE_INDICATOR = "RotateSimple";
protected final static String TEST_DOSSIER_TEMPLATE_ID = "123";
public static final String IMPORTED_REDACTION_TYPE_ID = IMPORTED_REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String DOSSIER_REDACTIONS_TYPE_ID = DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String ROTATE_SIMPLE_TYPE_ID = ROTATE_SIMPLE_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String FORMULA_TYPE_ID = FORMULA_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String SIGNATURE_TYPE_ID = SIGNATURE_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String LOGO_TYPE_ID = LOGO_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String OCR_TYPE_ID = OCR_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String IMAGE_TYPE_ID = IMAGE_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String PURITY_TYPE_ID = PURITY_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String PII_TYPE_ID = DICTIONARY_PII + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String TEST_METHOD_TYPE_ID = TEST_METHOD_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String PUBLISHED_INFORMATION_TYPE_ID = PUBLISHED_INFORMATION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String MUST_REDACT_TYPE_ID = MUST_REDACT_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String HINT_ONLY_TYPE_ID = HINT_ONLY_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String REDACTION_TYPE_ID = REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String NO_REDACTION_TYPE_ID = NO_REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String SPONSOR_TYPE_ID = DICTIONARY_SPONSOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String AUTHOR_TYPE_ID = DICTIONARY_AUTHOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String ADDRESS_TYPE_ID = DICTIONARY_ADDRESS + ":" + TEST_DOSSIER_TEMPLATE_ID;
public static final String VERTEBRATE_TYPE_ID = VERTEBRATE_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
@Autowired
protected RedactionController redactionController;
@Autowired
protected AnnotationService annotationService;
@Autowired
protected AnalyzeService analyzeService;
@Autowired
protected ObjectMapper objectMapper;
@Autowired
protected RedactionStorageService redactionStorageService;
@Autowired
protected StorageService storageService;
@Autowired
protected ManualRedactionSurroundingTextService manualRedactionSurroundingTextService;
@MockBean
protected AmazonS3 amazonS3;
@MockBean
protected RabbitTemplate rabbitTemplate;
@MockBean
protected LegalBasisClient legalBasisClient;
protected final Map<String, List<String>> dictionary = new HashMap<>();
protected final Map<String, List<String>> dossierDictionary = new HashMap<>();
protected final Map<String, List<String>> falsePositive = new HashMap<>();
protected final Map<String, List<String>> falseRecommendation = new HashMap<>();
protected final Map<String, String> typeColorMap = new HashMap<>();
protected final Map<String, Boolean> hintTypeMap = new HashMap<>();
protected final Map<String, Boolean> caseInSensitiveMap = new HashMap<>();
protected final Map<String, Boolean> recommendationTypeMap = new HashMap<>();
protected final Map<String, Integer> rankTypeMap = new HashMap<>();
protected final Colors colors = new Colors();
protected final Map<String, Long> reanlysisVersions = new HashMap<>();
protected final Set<String> deleted = new HashSet<>();
protected final static String TEST_DOSSIER_ID = "123";
protected final static String TEST_FILE_ID = "123";
@MockBean
protected RulesClient rulesClient;
@MockBean
protected DictionaryClient dictionaryClient;
@AfterEach
public void cleanupStorage() {
if (this.storageService instanceof FileSystemBackedStorageService) {
((FileSystemBackedStorageService) this.storageService).clearStorage();
}
}
protected void mockDictionaryCalls(Long version) {
when(dictionaryClient.getDictionaryForType(VERTEBRATE_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(VERTEBRATE_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(ADDRESS_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(DICTIONARY_ADDRESS, false));
when(dictionaryClient.getDictionaryForType(AUTHOR_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(DICTIONARY_AUTHOR, false));
when(dictionaryClient.getDictionaryForType(SPONSOR_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(DICTIONARY_SPONSOR, false));
when(dictionaryClient.getDictionaryForType(NO_REDACTION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(NO_REDACTION_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(REDACTION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(REDACTION_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(HINT_ONLY_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(HINT_ONLY_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(MUST_REDACT_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(MUST_REDACT_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(PUBLISHED_INFORMATION_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(TEST_METHOD_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(TEST_METHOD_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(PII_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(DICTIONARY_PII, false));
when(dictionaryClient.getDictionaryForType(PURITY_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(PURITY_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(IMAGE_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(IMAGE_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(OCR_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(OCR_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(LOGO_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(LOGO_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(SIGNATURE_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(SIGNATURE_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(FORMULA_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(FORMULA_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(ROTATE_SIMPLE_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(ROTATE_SIMPLE_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(DOSSIER_REDACTIONS_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(DOSSIER_REDACTIONS_INDICATOR,true));
when(dictionaryClient.getDictionaryForType(IMPORTED_REDACTION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(IMPORTED_REDACTION_INDICATOR,true));
}
protected void loadDictionaryForTest() {
dictionary.computeIfAbsent(DICTIONARY_AUTHOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_author.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(DICTIONARY_SPONSOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(VERTEBRATE_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/vertebrate.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(DICTIONARY_ADDRESS, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_address.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(REDACTION_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(HINT_ONLY_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/hint_only.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(MUST_REDACT_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/must_redact.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(PUBLISHED_INFORMATION_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/published_information.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(TEST_METHOD_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/test_method.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(DICTIONARY_PII, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/PII.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(PURITY_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/purity.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(IMAGE_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(OCR_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(LOGO_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(SIGNATURE_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(FORMULA_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dossierDictionary.put(IMPORTED_REDACTION_INDICATOR, new ArrayList<>());
falsePositive.computeIfAbsent(DICTIONARY_PII, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
}
protected void loadOnlyDictionaryForSimpleFile() {
dictionary.clear();
dictionary.computeIfAbsent(ROTATE_SIMPLE_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/RotateTestFileSimple.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
}
@SneakyThrows
protected static String loadFromClassPath(String path) {
URL resource = ResourceLoader.class.getClassLoader().getResource(path);
if (resource == null) {
throw new IllegalArgumentException("could not load classpath resource: drools/rules.drl");
}
List<String> stringList = Files.readAllLines(new File(resource.getPath()).toPath());
return String.join("\n", stringList);
}
protected void loadTypeForTest() {
typeColorMap.put(VERTEBRATE_INDICATOR, "#ff85f7");
typeColorMap.put(DICTIONARY_ADDRESS, "#ffe187");
typeColorMap.put(DICTIONARY_AUTHOR, "#ffe187");
typeColorMap.put(DICTIONARY_SPONSOR, "#85ebff");
typeColorMap.put(NO_REDACTION_INDICATOR, "#be85ff");
typeColorMap.put(REDACTION_INDICATOR, "#caff85");
typeColorMap.put(HINT_ONLY_INDICATOR, "#abc0c4");
typeColorMap.put(MUST_REDACT_INDICATOR, "#fab4c0");
typeColorMap.put(PUBLISHED_INFORMATION_INDICATOR, "#85ebff");
typeColorMap.put(TEST_METHOD_INDICATOR, "#91fae8");
typeColorMap.put(DICTIONARY_PII, "#66ccff");
typeColorMap.put(PURITY_INDICATOR, "#ffe187");
typeColorMap.put(IMAGE_INDICATOR, "#fcc5fb");
typeColorMap.put(OCR_INDICATOR, "#fcc5fb");
typeColorMap.put(LOGO_INDICATOR, "#ffe187");
typeColorMap.put(FORMULA_INDICATOR, "#ffe187");
typeColorMap.put(SIGNATURE_INDICATOR, "#ffe187");
typeColorMap.put(IMPORTED_REDACTION_INDICATOR, "#fcfbe6");
typeColorMap.put(ROTATE_SIMPLE_INDICATOR, "#66ccff");
hintTypeMap.put(VERTEBRATE_INDICATOR, true);
hintTypeMap.put(DICTIONARY_ADDRESS, false);
hintTypeMap.put(DICTIONARY_AUTHOR, false);
hintTypeMap.put(DICTIONARY_SPONSOR, false);
hintTypeMap.put(NO_REDACTION_INDICATOR, true);
hintTypeMap.put(REDACTION_INDICATOR, true);
hintTypeMap.put(HINT_ONLY_INDICATOR, true);
hintTypeMap.put(MUST_REDACT_INDICATOR, true);
hintTypeMap.put(PUBLISHED_INFORMATION_INDICATOR, true);
hintTypeMap.put(TEST_METHOD_INDICATOR, true);
hintTypeMap.put(DICTIONARY_PII, false);
hintTypeMap.put(PURITY_INDICATOR, false);
hintTypeMap.put(IMAGE_INDICATOR, true);
hintTypeMap.put(OCR_INDICATOR, true);
hintTypeMap.put(FORMULA_INDICATOR, false);
hintTypeMap.put(LOGO_INDICATOR, false);
hintTypeMap.put(SIGNATURE_INDICATOR, false);
hintTypeMap.put(DOSSIER_REDACTIONS_INDICATOR, false);
hintTypeMap.put(IMPORTED_REDACTION_INDICATOR, false);
hintTypeMap.put(ROTATE_SIMPLE_INDICATOR, false);
caseInSensitiveMap.put(VERTEBRATE_INDICATOR, true);
caseInSensitiveMap.put(DICTIONARY_ADDRESS, false);
caseInSensitiveMap.put(DICTIONARY_AUTHOR, false);
caseInSensitiveMap.put(DICTIONARY_SPONSOR, false);
caseInSensitiveMap.put(NO_REDACTION_INDICATOR, true);
caseInSensitiveMap.put(REDACTION_INDICATOR, true);
caseInSensitiveMap.put(HINT_ONLY_INDICATOR, true);
caseInSensitiveMap.put(MUST_REDACT_INDICATOR, true);
caseInSensitiveMap.put(PUBLISHED_INFORMATION_INDICATOR, true);
caseInSensitiveMap.put(TEST_METHOD_INDICATOR, false);
caseInSensitiveMap.put(DICTIONARY_PII, false);
caseInSensitiveMap.put(PURITY_INDICATOR, false);
caseInSensitiveMap.put(IMAGE_INDICATOR, true);
caseInSensitiveMap.put(OCR_INDICATOR, true);
caseInSensitiveMap.put(SIGNATURE_INDICATOR, true);
caseInSensitiveMap.put(LOGO_INDICATOR, true);
caseInSensitiveMap.put(FORMULA_INDICATOR, true);
caseInSensitiveMap.put(DOSSIER_REDACTIONS_INDICATOR, false);
caseInSensitiveMap.put(IMPORTED_REDACTION_INDICATOR, false);
caseInSensitiveMap.put(ROTATE_SIMPLE_INDICATOR, true);
recommendationTypeMap.put(VERTEBRATE_INDICATOR, false);
recommendationTypeMap.put(DICTIONARY_ADDRESS, false);
recommendationTypeMap.put(DICTIONARY_AUTHOR, false);
recommendationTypeMap.put(DICTIONARY_SPONSOR, false);
recommendationTypeMap.put(NO_REDACTION_INDICATOR, false);
recommendationTypeMap.put(REDACTION_INDICATOR, false);
recommendationTypeMap.put(HINT_ONLY_INDICATOR, false);
recommendationTypeMap.put(MUST_REDACT_INDICATOR, false);
recommendationTypeMap.put(PUBLISHED_INFORMATION_INDICATOR, false);
recommendationTypeMap.put(TEST_METHOD_INDICATOR, false);
recommendationTypeMap.put(DICTIONARY_PII, false);
recommendationTypeMap.put(PURITY_INDICATOR, false);
recommendationTypeMap.put(IMAGE_INDICATOR, false);
recommendationTypeMap.put(OCR_INDICATOR, false);
recommendationTypeMap.put(FORMULA_INDICATOR, false);
recommendationTypeMap.put(SIGNATURE_INDICATOR, false);
recommendationTypeMap.put(LOGO_INDICATOR, false);
recommendationTypeMap.put(DOSSIER_REDACTIONS_INDICATOR, false);
recommendationTypeMap.put(IMPORTED_REDACTION_INDICATOR, false);
recommendationTypeMap.put(ROTATE_SIMPLE_INDICATOR, false);
rankTypeMap.put(PURITY_INDICATOR, 155);
rankTypeMap.put(DICTIONARY_PII, 150);
rankTypeMap.put(DICTIONARY_ADDRESS, 140);
rankTypeMap.put(DICTIONARY_AUTHOR, 130);
rankTypeMap.put(DICTIONARY_SPONSOR, 120);
rankTypeMap.put(VERTEBRATE_INDICATOR, 110);
rankTypeMap.put(MUST_REDACT_INDICATOR, 100);
rankTypeMap.put(REDACTION_INDICATOR, 90);
rankTypeMap.put(NO_REDACTION_INDICATOR, 80);
rankTypeMap.put(PUBLISHED_INFORMATION_INDICATOR, 70);
rankTypeMap.put(TEST_METHOD_INDICATOR, 60);
rankTypeMap.put(HINT_ONLY_INDICATOR, 50);
rankTypeMap.put(IMAGE_INDICATOR, 30);
rankTypeMap.put(OCR_INDICATOR, 29);
rankTypeMap.put(LOGO_INDICATOR, 28);
rankTypeMap.put(SIGNATURE_INDICATOR, 27);
rankTypeMap.put(FORMULA_INDICATOR, 26);
rankTypeMap.put(DOSSIER_REDACTIONS_INDICATOR, 200);
rankTypeMap.put(IMPORTED_REDACTION_INDICATOR, 200);
rankTypeMap.put(ROTATE_SIMPLE_INDICATOR, 150);
colors.setSkippedColor("#cccccc");
colors.setRequestAddColor("#04b093");
colors.setRequestRemoveColor("#04b093");
}
@SneakyThrows
protected void loadNerForTest() {
ClassPathResource responseJson = new ClassPathResource("files/ner_response.json");
storageService.storeObject(TenantContext.getTenantId(),
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES),
responseJson.getInputStream());
}
protected List<Type> getTypeResponse() {
return typeColorMap.entrySet()
.stream()
.map(typeColor -> Type.builder()
.id(typeColor.getKey() + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(typeColor.getKey())
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
.hexColor(typeColor.getValue())
.isHint(hintTypeMap.get(typeColor.getKey()))
.isCaseInsensitive(caseInSensitiveMap.get(typeColor.getKey()))
.isRecommendation(recommendationTypeMap.get(typeColor.getKey()))
.rank(rankTypeMap.get(typeColor.getKey()))
.build())
.collect(Collectors.toList());
}
protected Type getDictionaryResponse(String type, boolean isDossierDictionary) {
return Type.builder()
.id(type + ":" + TEST_DOSSIER_TEMPLATE_ID)
.hexColor(typeColorMap.get(type))
.entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type)))
.falsePositiveEntries(falsePositive.containsKey(type) ? toDictionaryEntry(falsePositive.get(type)) : new ArrayList<>())
.falseRecommendationEntries(falseRecommendation.containsKey(type) ? toDictionaryEntry(falseRecommendation.get(type)) : new ArrayList<>())
.isHint(hintTypeMap.get(type))
.isCaseInsensitive(caseInSensitiveMap.get(type))
.isRecommendation(recommendationTypeMap.get(type))
.rank(rankTypeMap.get(type))
.build();
}
private String cleanDictionaryEntry(String entry) {
return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " ");
}
private List<DictionaryEntry> toDictionaryEntry(List<String> entries) {
if (entries == null) {
entries = Collections.emptyList();
}
return entries.stream().map(this::toDictionaryEntry).collect(Collectors.toList());
}
private DictionaryEntry toDictionaryEntry(String entry) {
return DictionaryEntry.builder().value(entry).version(reanlysisVersions.getOrDefault(entry, 0L)).deleted(deleted.contains(entry)).build();
}
@SneakyThrows
protected AnalyzeRequest uploadFileToStorage(String file) {
return prepareStorage(file, "files/cv_service_empty_response.json");
}
@SneakyThrows
protected AnalyzeRequest prepareStorage(String file, String cvServiceResponseFile) {
ClassPathResource pdfFileResource = new ClassPathResource(file);
ClassPathResource cvServiceResponseFileResource = new ClassPathResource(cvServiceResponseFile);
return prepareStorage(pdfFileResource.getInputStream(), cvServiceResponseFileResource.getInputStream());
}
@SneakyThrows
protected AnalyzeRequest prepareStorage(InputStream fileStream, InputStream cvServiceResponseFileStream) {
AnalyzeRequest request = AnalyzeRequest.builder()
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.lastProcessed(OffsetDateTime.now())
.build();
storageService.storeObject(TenantContext.getTenantId(),
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.TABLES),
cvServiceResponseFileStream);
storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), fileStream);
return request;
}
}

View File

@ -3,28 +3,26 @@ package com.iqser.red.service.redaction.v1.server;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.Mockito.when;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URI;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.stream.Collectors;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
@ -33,17 +31,10 @@ import org.kie.api.KieServices;
import org.kie.api.builder.KieBuilder;
import org.kie.api.builder.KieFileSystem;
import org.kie.api.builder.KieModule;
import org.kie.api.builder.KieRepository;
import org.kie.api.builder.ReleaseId;
import org.kie.api.runtime.KieContainer;
import org.kie.internal.io.ResourceFactory;
import org.mockito.stubbing.Answer;
import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.mock.mockito.MockBean;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.context.annotation.Configuration;
@ -53,9 +44,7 @@ import org.springframework.context.annotation.Primary;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.junit.jupiter.SpringExtension;
import com.amazonaws.services.s3.AmazonS3;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute;
@ -72,24 +61,14 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.configuration.Colors;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.DictionaryEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotationService;
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
import com.iqser.red.service.redaction.v1.server.multitenancy.TenantContext;
import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
import com.iqser.red.service.redaction.v1.server.redaction.service.analyze.AnalyzeService;
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.iqser.red.storage.commons.service.StorageService;
@ -99,85 +78,12 @@ import lombok.SneakyThrows;
@ExtendWith(SpringExtension.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
@Import(RedactionIntegrationTest.RedactionIntegrationTestConfiguration.class)
public class RedactionIntegrationTest {
public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
private static final String RULES = loadFromClassPath("drools/rules.drl");
private static final String VERTEBRATE = "vertebrate";
private static final String ADDRESS = "CBI_address";
private static final String AUTHOR = "CBI_author";
private static final String SPONSOR = "CBI_sponsor";
private static final String NO_REDACTION_INDICATOR = "no_redaction_indicator";
private static final String REDACTION_INDICATOR = "redaction_indicator";
private static final String HINT_ONLY = "hint_only";
private static final String MUST_REDACT = "must_redact";
private static final String PUBLISHED_INFORMATION = "published_information";
private static final String TEST_METHOD = "test_method";
private static final String PURITY = "purity";
private static final String IMAGE = "image";
private static final String LOGO = "logo";
private static final String SIGNATURE = "signature";
private static final String FORMULA = "formula";
private static final String OCR = "ocr";
private static final String DOSSIER_REDACTIONS = "dossier_redactions";
private static final String IMPORTED_REDACTION = "imported_redaction";
private static final String PII = "PII";
private static final String ROTATE_SIMPLE = "RotateSimple";
@Autowired
private RedactionController redactionController;
@Autowired
private AnnotationService annotationService;
@Autowired
private AnalyzeService analyzeService;
@Autowired
private ObjectMapper objectMapper;
@MockBean
private RulesClient rulesClient;
@MockBean
private DictionaryClient dictionaryClient;
@Autowired
private RedactionStorageService redactionStorageService;
@Autowired
private StorageService storageService;
@Autowired
private ManualRedactionSurroundingTextService manualRedactionSurroundingTextService;
@MockBean
private AmazonS3 amazonS3;
@MockBean
private RabbitTemplate rabbitTemplate;
@MockBean
private LegalBasisClient legalBasisClient;
private final Map<String, List<String>> dictionary = new HashMap<>();
private final Map<String, List<String>> dossierDictionary = new HashMap<>();
private final Map<String, List<String>> falsePositive = new HashMap<>();
private final Map<String, List<String>> falseRecommendation = new HashMap<>();
private final Map<String, String> typeColorMap = new HashMap<>();
private final Map<String, Boolean> hintTypeMap = new HashMap<>();
private final Map<String, Boolean> caseInSensitiveMap = new HashMap<>();
private final Map<String, Boolean> recommendationTypeMap = new HashMap<>();
private final Map<String, Integer> rankTypeMap = new HashMap<>();
private final Colors colors = new Colors();
private final Map<String, Long> reanlysisVersions = new HashMap<>();
private final Set<String> deleted = new HashSet<>();
private final static String TEST_DOSSIER_TEMPLATE_ID = "123";
private final static String TEST_DOSSIER_ID = "123";
private final static String TEST_FILE_ID = "123";
@Configuration
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class})
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class, StorageAutoConfiguration.class})
@ComponentScan(excludeFilters={@ComponentScan.Filter(type= FilterType.ASSIGNABLE_TYPE, value=StorageAutoConfiguration.class)})
public static class RedactionIntegrationTestConfiguration {
@ -207,15 +113,6 @@ public class RedactionIntegrationTest {
}
@AfterEach
public void cleanupStorage() {
if (this.storageService instanceof FileSystemBackedStorageService) {
((FileSystemBackedStorageService) this.storageService).clearStorage();
}
}
@BeforeEach
public void stubClients() {
@ -232,67 +129,26 @@ public class RedactionIntegrationTest {
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder()
.id(DOSSIER_REDACTIONS + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(DOSSIER_REDACTIONS)
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(DOSSIER_REDACTIONS_INDICATOR)
.dossierTemplateId(TEST_DOSSIER_ID)
.hexColor("#ffe187")
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS))
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS))
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS))
.rank(rankTypeMap.get(DOSSIER_REDACTIONS))
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.build()));
mockDictionaryCalls(null);
mockDictionaryCalls(0L);
when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors);
}
private void mockDictionaryCalls(Long version) {
when(dictionaryClient.getDictionaryForType(VERTEBRATE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(VERTEBRATE,
false));
when(dictionaryClient.getDictionaryForType(ADDRESS + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(ADDRESS, false));
when(dictionaryClient.getDictionaryForType(AUTHOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(AUTHOR, false));
when(dictionaryClient.getDictionaryForType(SPONSOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(SPONSOR, false));
when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(
NO_REDACTION_INDICATOR,
false));
when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(
REDACTION_INDICATOR,
false));
when(dictionaryClient.getDictionaryForType(HINT_ONLY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(HINT_ONLY, false));
when(dictionaryClient.getDictionaryForType(MUST_REDACT + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(MUST_REDACT,
false));
when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(
PUBLISHED_INFORMATION,
false));
when(dictionaryClient.getDictionaryForType(TEST_METHOD + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(TEST_METHOD,
false));
when(dictionaryClient.getDictionaryForType(PII + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(PII, false));
when(dictionaryClient.getDictionaryForType(PURITY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(PURITY, false));
when(dictionaryClient.getDictionaryForType(IMAGE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(IMAGE, false));
when(dictionaryClient.getDictionaryForType(OCR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(OCR, false));
when(dictionaryClient.getDictionaryForType(LOGO + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(LOGO, false));
when(dictionaryClient.getDictionaryForType(SIGNATURE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(SIGNATURE, false));
when(dictionaryClient.getDictionaryForType(FORMULA + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(FORMULA, false));
when(dictionaryClient.getDictionaryForType(ROTATE_SIMPLE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(ROTATE_SIMPLE,
false));
when(dictionaryClient.getDictionaryForType(DOSSIER_REDACTIONS + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(
DOSSIER_REDACTIONS,
true));
when(dictionaryClient.getDictionaryForType(IMPORTED_REDACTION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(
IMPORTED_REDACTION,
true));
}
@Test
public void test270Rotated() {
AnalyzeRequest request = prepareStorage("files/Minimal Examples/270Rotated.pdf");
AnalyzeRequest request = uploadFileToStorage("files/Minimal Examples/270Rotated.pdf");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
assertThat(result).isNotNull();
@ -303,7 +159,7 @@ public class RedactionIntegrationTest {
@Disabled
public void testLargeScannedFileOOM() {
AnalyzeRequest request = prepareStorage("scanned/VV-377031.pdf");
AnalyzeRequest request = uploadFileToStorage("scanned/VV-377031.pdf");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
assertThat(result).isNotNull();
@ -315,7 +171,7 @@ public class RedactionIntegrationTest {
long start = System.currentTimeMillis();
AnalyzeRequest request = prepareStorage("files/Minimal Examples/merge_images.pdf");
AnalyzeRequest request = uploadFileToStorage("files/Minimal Examples/merge_images.pdf");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
@ -329,7 +185,7 @@ public class RedactionIntegrationTest {
duplicates.forEach((key, value) -> assertThat(value.size()).isEqualTo(1));
dictionary.get(AUTHOR).add("Drinking water");
dictionary.get(DICTIONARY_AUTHOR).add("Drinking water");
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(1L);
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build());
@ -355,7 +211,7 @@ public class RedactionIntegrationTest {
// F. Lastname, J. Doe, M. Mustermann
// Lastname M., Doe J., Mustermann M.
AnalyzeRequest request = prepareStorage("files/Minimal Examples/ExpansionTest.pdf");
AnalyzeRequest request = uploadFileToStorage("files/Minimal Examples/ExpansionTest.pdf");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
@ -371,7 +227,7 @@ public class RedactionIntegrationTest {
@Test
public void titleExtraction() throws IOException {
AnalyzeRequest request = prepareStorage("files/new/APN3_Clean_6.1 (6.4.3.01-02)_Apple_211029.pdf");
AnalyzeRequest request = uploadFileToStorage("files/new/APN3_Clean_6.1 (6.4.3.01-02)_Apple_211029.pdf");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
@ -394,7 +250,7 @@ public class RedactionIntegrationTest {
@Test
public void testAddFileAttribute() {
AnalyzeRequest request = prepareStorage("files/RSS/01 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1).pdf");
AnalyzeRequest request = uploadFileToStorage("files/RSS/01 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1).pdf");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
@ -411,7 +267,7 @@ public class RedactionIntegrationTest {
System.out.println("testIgnoreHint");
AnalyzeRequest request = prepareStorage("files/new/test-ignore-hint.pdf");
AnalyzeRequest request = uploadFileToStorage("files/new/test-ignore-hint.pdf");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
analyzeService.analyze(request);
@ -449,15 +305,11 @@ public class RedactionIntegrationTest {
System.out.println("noExceptionShouldBeThrownForAnyFiles");
ClassLoader loader = getClass().getClassLoader();
URL url = loader.getResource("files");
File[] files = new File(url.getPath()).listFiles();
List<File> input = new ArrayList<>();
for (File file : files) {
input.addAll(getPathsRecursively(file));
}
for (File path : input) {
Path path = Paths.get(URI.create(url.toString()));
AnalyzeRequest request = prepareStorage(path.getPath());
System.out.println("Redacting file : " + path.getName());
Files.walk(path).forEach(currentPath -> {
AnalyzeRequest request = uploadFileToStorage(currentPath.toString());
System.out.println("Redacting file : " + currentPath.getFileName());
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
long fstart = System.currentTimeMillis();
@ -476,7 +328,7 @@ public class RedactionIntegrationTest {
assertThat(entry.getValue().size()).isEqualTo(1);
});
dictionary.get(AUTHOR).add("Drinking water");
dictionary.get(DICTIONARY_AUTHOR).add("Drinking water");
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(1L);
long rstart = System.currentTimeMillis();
@ -484,8 +336,7 @@ public class RedactionIntegrationTest {
long rend = System.currentTimeMillis();
System.out.println("reanalysis analysis duration: " + (rend - rstart));
}
});
long end = System.currentTimeMillis();
@ -501,7 +352,7 @@ public class RedactionIntegrationTest {
String outputFileName = OsUtils.getTemporaryDirectory() + "/AnnotatedRedactionTestSeparatedRedaction.pdf";
long start = System.currentTimeMillis();
AnalyzeRequest request = prepareStorage(fileName);
AnalyzeRequest request = uploadFileToStorage(fileName);
request.setExcludedPages(Set.of(1));
request.setFileAttributes(List.of(FileAttribute.builder()
@ -545,10 +396,10 @@ public class RedactionIntegrationTest {
}
assertThat(correctFound).isEqualTo(redactionLog.getRedactionLogEntry().size());
dictionary.get(AUTHOR).add("properties");
dictionary.get(DICTIONARY_AUTHOR).add("properties");
reanlysisVersions.put("properties", 1L);
dictionary.get(AUTHOR).add("physical");
dictionary.get(DICTIONARY_AUTHOR).add("physical");
reanlysisVersions.put("physical", 2L);
deleted.add("David Chubb");
@ -558,7 +409,7 @@ public class RedactionIntegrationTest {
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(3L);
when(dictionaryClient.getDictionaryForType(VERTEBRATE, null)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
when(dictionaryClient.getDictionaryForType(VERTEBRATE_INDICATOR, null)).thenReturn(getDictionaryResponse(VERTEBRATE_INDICATOR, false));
start = System.currentTimeMillis();
@ -591,7 +442,7 @@ public class RedactionIntegrationTest {
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(4L);
when(dictionaryClient.getDictionaryForType(VERTEBRATE, null)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
when(dictionaryClient.getDictionaryForType(VERTEBRATE_INDICATOR, null)).thenReturn(getDictionaryResponse(VERTEBRATE_INDICATOR, false));
analyzeService.reanalyze(request);
@ -607,19 +458,19 @@ public class RedactionIntegrationTest {
String fileName = "files/new/test1S1T1.pdf";
String outputFileName = OsUtils.getTemporaryDirectory() + "/Annotated.pdf";
AnalyzeRequest request = prepareStorage(fileName);
AnalyzeRequest request = uploadFileToStorage(fileName);
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
analyzeService.analyze(request);
dictionary.get(AUTHOR).add("report");
dictionary.get(DICTIONARY_AUTHOR).add("report");
reanlysisVersions.put("report", 2L);
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(2L);
mockDictionaryCalls(0L);
analyzeService.reanalyze(request);
dictionary.get(AUTHOR).add("assessment report");
dictionary.get(DICTIONARY_AUTHOR).add("assessment report");
reanlysisVersions.put("assessment report", 3L);
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(3L);
mockDictionaryCalls(2L);
@ -650,10 +501,12 @@ public class RedactionIntegrationTest {
String outputFileName = OsUtils.getTemporaryDirectory() + "/Annotated.pdf";
ClassPathResource responseJson = new ClassPathResource("files/crafted_document.NER_ENTITIES.json");
storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), responseJson.getInputStream());
storageService.storeObject(TenantContext.getTenantId(),
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES),
responseJson.getInputStream());
long start = System.currentTimeMillis();
AnalyzeRequest request = prepareStorage(fileName);
AnalyzeRequest request = uploadFileToStorage(fileName);
request.setExcludedPages(Set.of(1));
request.setFileAttributes(List.of(FileAttribute.builder()
@ -697,11 +550,11 @@ public class RedactionIntegrationTest {
}
assertThat(correctFound).isEqualTo(redactionLog.getRedactionLogEntry().size());
dictionary.get(AUTHOR).add("properties");
dictionary.get(DICTIONARY_AUTHOR).add("properties");
reanlysisVersions.put("properties", 1L);
mockDictionaryCalls(0L);
dictionary.get(AUTHOR).add("physical");
dictionary.get(DICTIONARY_AUTHOR).add("physical");
reanlysisVersions.put("physical", 2L);
deleted.add("David Chubb");
@ -713,7 +566,7 @@ public class RedactionIntegrationTest {
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(3L);
when(dictionaryClient.getDictionaryForType(VERTEBRATE, null)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
when(dictionaryClient.getDictionaryForType(VERTEBRATE_INDICATOR, null)).thenReturn(getDictionaryResponse(VERTEBRATE_INDICATOR, false));
start = System.currentTimeMillis();
@ -747,7 +600,7 @@ public class RedactionIntegrationTest {
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(4L);
when(dictionaryClient.getDictionaryForType(VERTEBRATE, null)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
when(dictionaryClient.getDictionaryForType(VERTEBRATE_INDICATOR, null)).thenReturn(getDictionaryResponse(VERTEBRATE_INDICATOR, false));
analyzeService.reanalyze(request);
@ -770,7 +623,7 @@ public class RedactionIntegrationTest {
};
List<Type> types = objectMapper.readValue(typeResource.getInputStream(), typeRefForTypes);
AnalyzeRequest request = prepareStorage("files/new/PublishedInformationTest.pdf");
AnalyzeRequest request = uploadFileToStorage("files/new/PublishedInformationTest.pdf");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
ManualRedactions manualRedactions = new ManualRedactions();
manualRedactions.getIdsToRemove()
@ -828,7 +681,7 @@ public class RedactionIntegrationTest {
System.out.println("testTableRedaction");
long start = System.currentTimeMillis();
AnalyzeRequest request = prepareStorage("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
AnalyzeRequest request = uploadFileToStorage("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
@ -897,7 +750,7 @@ public class RedactionIntegrationTest {
long start = System.currentTimeMillis();
AnalyzeRequest request = prepareStorage("files/new/unicodeProblem.pdf");
AnalyzeRequest request = uploadFileToStorage("files/new/unicodeProblem.pdf");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
@ -919,7 +772,7 @@ public class RedactionIntegrationTest {
System.out.println("testTableRedaction");
long start = System.currentTimeMillis();
AnalyzeRequest request = prepareStorage("files/new/RotateTestFile.pdf");
AnalyzeRequest request = uploadFileToStorage("files/new/RotateTestFile.pdf");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
@ -945,7 +798,7 @@ public class RedactionIntegrationTest {
System.out.println("testTableRedaction");
long start = System.currentTimeMillis();
AnalyzeRequest request = prepareStorage("files/new/RotateTestFileSimple.pdf");
AnalyzeRequest request = uploadFileToStorage("files/new/RotateTestFileSimple.pdf");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
@ -967,7 +820,7 @@ public class RedactionIntegrationTest {
System.out.println("testTableHeader");
long start = System.currentTimeMillis();
AnalyzeRequest request = prepareStorage("files/Minimal Examples/NoHeaderTable.pdf");
AnalyzeRequest request = uploadFileToStorage("files/Minimal Examples/NoHeaderTable.pdf");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
@ -1002,7 +855,7 @@ public class RedactionIntegrationTest {
long start = System.currentTimeMillis();
AnalyzeRequest request = prepareStorage("files/new/S157.pdf");
AnalyzeRequest request = uploadFileToStorage("files/new/S157.pdf");
ManualRedactions manualRedactions = new ManualRedactions();
@ -1078,7 +931,7 @@ public class RedactionIntegrationTest {
manualRedactionEntry.setPositions(List.of(Rectangle.builder().topLeftX(375.61096f).topLeftY(241.282f).width(7.648041f).height(43.72262f).page(1).build(),
Rectangle.builder().topLeftX(384.83517f).topLeftY(241.282f).width(7.648041f).height(17.043358f).page(1).build()));
AnalyzeRequest request = prepareStorage(pdfFile);
AnalyzeRequest request = uploadFileToStorage(pdfFile);
request.setManualRedactions(manualRedactions);
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
@ -1108,14 +961,10 @@ public class RedactionIntegrationTest {
}
@Test
public void phantomCellsDocumentTest() throws IOException {
AnalyzeRequest request = prepareStorage("files/Minimal Examples/Phantom Cells.pdf");
AnalyzeRequest request = uploadFileToStorage("files/Minimal Examples/Phantom Cells.pdf");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
@ -1135,7 +984,7 @@ public class RedactionIntegrationTest {
long start = System.currentTimeMillis();
AnalyzeRequest request = prepareStorage("files/Minimal Examples/sponsor_companies.pdf");
AnalyzeRequest request = uploadFileToStorage("files/Minimal Examples/sponsor_companies.pdf");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
@ -1197,7 +1046,7 @@ public class RedactionIntegrationTest {
// manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
AnalyzeRequest request = prepareStorage(pdfFile);
AnalyzeRequest request = uploadFileToStorage(pdfFile);
request.setManualRedactions(manualRedactions);
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
@ -1253,7 +1102,7 @@ public class RedactionIntegrationTest {
System.out.println("expandByRegex");
long start = System.currentTimeMillis();
AnalyzeRequest request = prepareStorage("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
AnalyzeRequest request = uploadFileToStorage("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
@ -1308,7 +1157,7 @@ public class RedactionIntegrationTest {
manualRedactions.getEntriesToAdd().add(manualRedactionEntry2);
manualRedactions.getEntriesToAdd().add(manualRedactionEntry3);
AnalyzeRequest request = prepareStorage(pdfFile);
AnalyzeRequest request = uploadFileToStorage(pdfFile);
request.setManualRedactions(manualRedactions);
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
@ -1332,280 +1181,15 @@ public class RedactionIntegrationTest {
}
private void loadDictionaryForTest() {
dictionary.computeIfAbsent(AUTHOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_author.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(SPONSOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(VERTEBRATE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/vertebrate.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(ADDRESS, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/CBI_address.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(REDACTION_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(HINT_ONLY, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/hint_only.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(MUST_REDACT, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/must_redact.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(PUBLISHED_INFORMATION, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/published_information.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(TEST_METHOD, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/test_method.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(PII, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/PII.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(PURITY, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/purity.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(IMAGE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(OCR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(LOGO, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(SIGNATURE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(FORMULA, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dossierDictionary.put(IMPORTED_REDACTION, new ArrayList<>());
falsePositive.computeIfAbsent(PII, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
}
private void loadOnlyDictionaryForSimpleFile() {
dictionary.clear();
dictionary.computeIfAbsent(ROTATE_SIMPLE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/RotateTestFileSimple.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
}
private static String loadFromClassPath(String path) {
URL resource = ResourceLoader.class.getClassLoader().getResource(path);
if (resource == null) {
throw new IllegalArgumentException("could not load classpath resource: drools/rules.drl");
}
try (BufferedReader br = new BufferedReader(new InputStreamReader(resource.openStream(), StandardCharsets.UTF_8))) {
StringBuilder sb = new StringBuilder();
String str;
while ((str = br.readLine()) != null) {
sb.append(str).append("\n");
}
return sb.toString();
} catch (IOException e) {
throw new IllegalArgumentException("could not load classpath resource: " + path, e);
}
}
private List<File> getPathsRecursively(File path) {
List<File> result = new ArrayList<>();
if (path == null || path.listFiles() == null) {
return result;
}
for (File f : path.listFiles()) {
if (f.isFile()) {
result.add(f);
} else {
result.addAll(getPathsRecursively(f));
}
}
return result;
}
private void loadTypeForTest() {
typeColorMap.put(VERTEBRATE, "#ff85f7");
typeColorMap.put(ADDRESS, "#ffe187");
typeColorMap.put(AUTHOR, "#ffe187");
typeColorMap.put(SPONSOR, "#85ebff");
typeColorMap.put(NO_REDACTION_INDICATOR, "#be85ff");
typeColorMap.put(REDACTION_INDICATOR, "#caff85");
typeColorMap.put(HINT_ONLY, "#abc0c4");
typeColorMap.put(MUST_REDACT, "#fab4c0");
typeColorMap.put(PUBLISHED_INFORMATION, "#85ebff");
typeColorMap.put(TEST_METHOD, "#91fae8");
typeColorMap.put(PII, "#66ccff");
typeColorMap.put(PURITY, "#ffe187");
typeColorMap.put(IMAGE, "#fcc5fb");
typeColorMap.put(OCR, "#fcc5fb");
typeColorMap.put(LOGO, "#ffe187");
typeColorMap.put(FORMULA, "#ffe187");
typeColorMap.put(SIGNATURE, "#ffe187");
typeColorMap.put(IMPORTED_REDACTION, "#fcfbe6");
typeColorMap.put(ROTATE_SIMPLE, "#66ccff");
hintTypeMap.put(VERTEBRATE, true);
hintTypeMap.put(ADDRESS, false);
hintTypeMap.put(AUTHOR, false);
hintTypeMap.put(SPONSOR, false);
hintTypeMap.put(NO_REDACTION_INDICATOR, true);
hintTypeMap.put(REDACTION_INDICATOR, true);
hintTypeMap.put(HINT_ONLY, true);
hintTypeMap.put(MUST_REDACT, true);
hintTypeMap.put(PUBLISHED_INFORMATION, true);
hintTypeMap.put(TEST_METHOD, true);
hintTypeMap.put(PII, false);
hintTypeMap.put(PURITY, false);
hintTypeMap.put(IMAGE, true);
hintTypeMap.put(OCR, true);
hintTypeMap.put(FORMULA, false);
hintTypeMap.put(LOGO, false);
hintTypeMap.put(SIGNATURE, false);
hintTypeMap.put(DOSSIER_REDACTIONS, false);
hintTypeMap.put(IMPORTED_REDACTION, false);
hintTypeMap.put(ROTATE_SIMPLE, false);
caseInSensitiveMap.put(VERTEBRATE, true);
caseInSensitiveMap.put(ADDRESS, false);
caseInSensitiveMap.put(AUTHOR, false);
caseInSensitiveMap.put(SPONSOR, false);
caseInSensitiveMap.put(NO_REDACTION_INDICATOR, true);
caseInSensitiveMap.put(REDACTION_INDICATOR, true);
caseInSensitiveMap.put(HINT_ONLY, true);
caseInSensitiveMap.put(MUST_REDACT, true);
caseInSensitiveMap.put(PUBLISHED_INFORMATION, true);
caseInSensitiveMap.put(TEST_METHOD, false);
caseInSensitiveMap.put(PII, false);
caseInSensitiveMap.put(PURITY, false);
caseInSensitiveMap.put(IMAGE, true);
caseInSensitiveMap.put(OCR, true);
caseInSensitiveMap.put(SIGNATURE, true);
caseInSensitiveMap.put(LOGO, true);
caseInSensitiveMap.put(FORMULA, true);
caseInSensitiveMap.put(DOSSIER_REDACTIONS, false);
caseInSensitiveMap.put(IMPORTED_REDACTION, false);
caseInSensitiveMap.put(ROTATE_SIMPLE, true);
recommendationTypeMap.put(VERTEBRATE, false);
recommendationTypeMap.put(ADDRESS, false);
recommendationTypeMap.put(AUTHOR, false);
recommendationTypeMap.put(SPONSOR, false);
recommendationTypeMap.put(NO_REDACTION_INDICATOR, false);
recommendationTypeMap.put(REDACTION_INDICATOR, false);
recommendationTypeMap.put(HINT_ONLY, false);
recommendationTypeMap.put(MUST_REDACT, false);
recommendationTypeMap.put(PUBLISHED_INFORMATION, false);
recommendationTypeMap.put(TEST_METHOD, false);
recommendationTypeMap.put(PII, false);
recommendationTypeMap.put(PURITY, false);
recommendationTypeMap.put(IMAGE, false);
recommendationTypeMap.put(OCR, false);
recommendationTypeMap.put(FORMULA, false);
recommendationTypeMap.put(SIGNATURE, false);
recommendationTypeMap.put(LOGO, false);
recommendationTypeMap.put(DOSSIER_REDACTIONS, false);
recommendationTypeMap.put(IMPORTED_REDACTION, false);
recommendationTypeMap.put(ROTATE_SIMPLE, false);
rankTypeMap.put(PURITY, 155);
rankTypeMap.put(PII, 150);
rankTypeMap.put(ADDRESS, 140);
rankTypeMap.put(AUTHOR, 130);
rankTypeMap.put(SPONSOR, 120);
rankTypeMap.put(VERTEBRATE, 110);
rankTypeMap.put(MUST_REDACT, 100);
rankTypeMap.put(REDACTION_INDICATOR, 90);
rankTypeMap.put(NO_REDACTION_INDICATOR, 80);
rankTypeMap.put(PUBLISHED_INFORMATION, 70);
rankTypeMap.put(TEST_METHOD, 60);
rankTypeMap.put(HINT_ONLY, 50);
rankTypeMap.put(IMAGE, 30);
rankTypeMap.put(OCR, 29);
rankTypeMap.put(LOGO, 28);
rankTypeMap.put(SIGNATURE, 27);
rankTypeMap.put(FORMULA, 26);
rankTypeMap.put(DOSSIER_REDACTIONS, 200);
rankTypeMap.put(IMPORTED_REDACTION, 200);
rankTypeMap.put(ROTATE_SIMPLE, 150);
colors.setSkippedColor("#cccccc");
colors.setRequestAddColor("#04b093");
colors.setRequestRemoveColor("#04b093");
}
@SneakyThrows
private void loadNerForTest() {
ClassPathResource responseJson = new ClassPathResource("files/ner_response.json");
storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), responseJson.getInputStream());
}
private List<Type> getTypeResponse() {
return typeColorMap.entrySet()
.stream()
.map(typeColor -> Type.builder()
.id(typeColor.getKey() + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(typeColor.getKey())
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
.hexColor(typeColor.getValue())
.isHint(hintTypeMap.get(typeColor.getKey()))
.isCaseInsensitive(caseInSensitiveMap.get(typeColor.getKey()))
.isRecommendation(recommendationTypeMap.get(typeColor.getKey()))
.rank(rankTypeMap.get(typeColor.getKey()))
.build())
.collect(Collectors.toList());
}
private Type getDictionaryResponse(String type, boolean isDossierDictionary) {
return Type.builder()
.id(type + ":" + TEST_DOSSIER_TEMPLATE_ID)
.hexColor(typeColorMap.get(type))
.entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type)))
.falsePositiveEntries(falsePositive.containsKey(type) ? toDictionaryEntry(falsePositive.get(type)) : new ArrayList<>())
.falseRecommendationEntries(falseRecommendation.containsKey(type) ? toDictionaryEntry(falseRecommendation.get(type)) : new ArrayList<>())
.isHint(hintTypeMap.get(type))
.isCaseInsensitive(caseInSensitiveMap.get(type))
.isRecommendation(recommendationTypeMap.get(type))
.rank(rankTypeMap.get(type))
.build();
}
private String cleanDictionaryEntry(String entry) {
return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " ");
}
private List<DictionaryEntry> toDictionaryEntry(List<String> entries) {
if (entries == null) {
entries = Collections.emptyList();
}
List<DictionaryEntry> dictionaryEntries = new ArrayList<>();
entries.forEach(entry -> {
dictionaryEntries.add(DictionaryEntry.builder().value(entry).version(reanlysisVersions.getOrDefault(entry, 0L)).deleted(deleted.contains(entry)).build());
});
return dictionaryEntries;
}
@Test
public void testImportedRedactions() throws IOException {
String outputFileName = OsUtils.getTemporaryDirectory() + "/Annotated.pdf";
ClassPathResource importedRedactions = new ClassPathResource("files/ImportedRedactions/RotateTestFile_without_highlights.IMPORTED_REDACTIONS.json");
AnalyzeRequest request = prepareStorage("files/ImportedRedactions/RotateTestFile_without_highlights.pdf");
storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS),
AnalyzeRequest request = uploadFileToStorage("files/ImportedRedactions/RotateTestFile_without_highlights.pdf");
storageService.storeObject(TenantContext.getTenantId(),
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS),
importedRedactions.getInputStream());
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
@ -1636,17 +1220,17 @@ public class RedactionIntegrationTest {
@Test
public void testExpandByPrefixRegEx() throws IOException {
assertThat(dictionary.get(AUTHOR)).contains("Robinson");
assertThat(dictionary.get(AUTHOR)).doesNotContain("Mrs. Robinson");
assertThat(dictionary.get(AUTHOR)).contains("Bojangles");
assertThat(dictionary.get(AUTHOR)).doesNotContain("Mr. Bojangles");
assertThat(dictionary.get(AUTHOR)).contains("Tambourine Man");
assertThat(dictionary.get(AUTHOR)).doesNotContain("Mr. Tambourine Man");
assertThat(dictionary.get(DICTIONARY_AUTHOR)).contains("Robinson");
assertThat(dictionary.get(DICTIONARY_AUTHOR)).doesNotContain("Mrs. Robinson");
assertThat(dictionary.get(DICTIONARY_AUTHOR)).contains("Bojangles");
assertThat(dictionary.get(DICTIONARY_AUTHOR)).doesNotContain("Mr. Bojangles");
assertThat(dictionary.get(DICTIONARY_AUTHOR)).contains("Tambourine Man");
assertThat(dictionary.get(DICTIONARY_AUTHOR)).doesNotContain("Mr. Tambourine Man");
String fileName = "files/mr-mrs.pdf";
String outputFileName = OsUtils.getTemporaryDirectory() + "/Annotated.pdf";
AnalyzeRequest request = prepareStorage(fileName);
AnalyzeRequest request = uploadFileToStorage(fileName);
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
analyzeService.analyze(request);
@ -1665,39 +1249,4 @@ public class RedactionIntegrationTest {
assertThat(values).contains("Mr. Tambourine Man");
}
@SneakyThrows
private AnalyzeRequest prepareStorage(InputStream fileStream, InputStream cvServiceResponseFileStream) {
AnalyzeRequest request = AnalyzeRequest.builder()
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.lastProcessed(OffsetDateTime.now())
.build();
storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.TABLES), cvServiceResponseFileStream);
storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), fileStream);
return request;
}
@SneakyThrows
private AnalyzeRequest prepareStorage(String file) {
return prepareStorage(file, "files/cv_service_empty_response.json");
}
@SneakyThrows
private AnalyzeRequest prepareStorage(String file, String cvServiceResponseFile) {
ClassPathResource pdfFileResource = new ClassPathResource(file);
ClassPathResource cvServiceResponseFileResource = new ClassPathResource(cvServiceResponseFile);
return prepareStorage(pdfFileResource.getInputStream(), cvServiceResponseFileResource.getInputStream());
}
}

View File

@ -0,0 +1,151 @@
package com.iqser.red.service.redaction.v1.server;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.Mockito.when;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.List;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.kie.api.KieServices;
import org.kie.api.builder.KieBuilder;
import org.kie.api.builder.KieFileSystem;
import org.kie.api.builder.KieModule;
import org.kie.api.runtime.KieContainer;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Import;
import org.springframework.context.annotation.Primary;
import org.springframework.test.context.junit.jupiter.SpringExtension;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest;
import com.iqser.red.service.redaction.v1.server.multitenancy.TenantContext;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.iqser.red.storage.commons.service.StorageService;
import lombok.SneakyThrows;
@ExtendWith(SpringExtension.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
@Import(RedactionIntegrationTestV2.RedactionIntegrationTestConfiguration.class)
public class RedactionIntegrationTestV2 extends AbstractRedactionIntegrationTest {
private static final String RULES = loadFromClassPath("drools/rules_v2.drl");
@Configuration
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class, StorageAutoConfiguration.class})
static class RedactionIntegrationTestConfiguration {
@Bean
public KieContainer kieContainer() {
KieServices kieServices = KieServices.Factory.get();
KieFileSystem kieFileSystem = kieServices.newKieFileSystem();
InputStream input = new ByteArrayInputStream(RULES.getBytes(StandardCharsets.UTF_8));
kieFileSystem.write("src/test/resources/drools/rules_v2", kieServices.getResources().newInputStreamResource(input));
KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem);
kieBuilder.buildAll();
KieModule kieModule = kieBuilder.getKieModule();
return kieServices.newKieContainer(kieModule.getReleaseId());
}
@Bean
@Primary
public StorageService inmemoryStorage() {
return new FileSystemBackedStorageService();
}
}
@BeforeEach
public void stubClients() {
TenantContext.setTenantId("redaction");
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(JSONPrimitive.of(RULES));
loadDictionaryForTest();
loadTypeForTest();
loadNerForTest();
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypesForDossierTemplate(TEST_DOSSIER_TEMPLATE_ID, false)).thenReturn(getTypeResponse());
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder()
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(DOSSIER_REDACTIONS_INDICATOR)
.dossierTemplateId(TEST_DOSSIER_ID)
.hexColor("#ffe187")
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.build()));
mockDictionaryCalls(null);
when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors);
}
/**
* The case in this test: One term, 'Dr. Alan Miller', is found by PII-Rule and is in the PII-dictionary
* as well as in the PII-false-positive-list - and in the CBI-author dictionary.
* It gets redacted, as the PII-finding is false positive and so the CBI-author entry is effective
* independent of the entity-rank
*/
@Test
@SneakyThrows
public void testTermIsInTwoDictionariesAndInOneFalsePositive() {
AnalyzeRequest request = uploadFileToStorage("files/new/simplified2.pdf");
dictionary.clear();
dictionary.put(DICTIONARY_PII, Arrays.asList("Dr. Alan Miller"));
dictionary.put(DICTIONARY_AUTHOR, Arrays.asList("Dr. Alan Miller"));
falsePositive.clear();
falsePositive.put(DICTIONARY_PII, Arrays.asList("Dr. Alan Miller COMPLETION DATE:"));
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
analyzeService.analyze(request);
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
assertThat(redactionLog.getRedactionLogEntry().size()).isEqualTo(1);
RedactionLogEntry redactionLogEntry = redactionLog.getRedactionLogEntry().get(0);
assertThat(redactionLogEntry.getType()).isEqualTo(DICTIONARY_AUTHOR);
assertThat(redactionLogEntry.getValue()).isEqualTo("Dr. Alan Miller");
assertThat(redactionLogEntry.isRedacted()).isEqualTo(true);
assertThat(redactionLogEntry.isRecommendation()).isEqualTo(false);
assertThat(redactionLogEntry.isFalsePositive()).isEqualTo(false);
assertThat(redactionLogEntry.isExcluded()).isEqualTo(false);
assertThat(redactionLogEntry.isDictionaryEntry()).isEqualTo(true);
assertThat(redactionLogEntry.getEngines().size()).isEqualTo(1);
assertThat(redactionLogEntry.getEngines().contains(Engine.DICTIONARY)).isEqualTo(true);
}
}

View File

@ -0,0 +1,341 @@
package drools
import com.iqser.red.service.redaction.v1.server.redaction.model.Section
global Section section
// --------------------------------------- AI rules -------------------------------------------------------------------
rule "0: Add CBI_author from ai"
when
Section(aiMatchesType("CBI_author"))
then
section.addAiEntities("CBI_author", "CBI_author");
end
rule "0: Combine address parts from ai to CBI_address (org is mandatory)"
when
Section(aiMatchesType("ORG"))
then
section.combineAiTypes("ORG", "STREET,POSTAL,COUNTRY,CARDINAL,CITY,STATE", 20, "CBI_address", 3, false);
end
rule "0: Combine address parts from ai to CBI_address (street is mandatory)"
when
Section(aiMatchesType("STREET"))
then
section.combineAiTypes("STREET", "ORG,POSTAL,COUNTRY,CARDINAL,CITY,STATE", 20, "CBI_address", 3, false);
end
rule "0: Combine address parts from ai to CBI_address (city is mandatory)"
when
Section(aiMatchesType("CITY"))
then
section.combineAiTypes("CITY", "ORG,STREET,POSTAL,COUNTRY,CARDINAL,STATE", 20, "CBI_address", 3, false);
end
/* Syngenta specific laboratory recommendation */
rule "0: Recommend CTL/BL laboratory that start with BL or CTL"
when
Section(searchText.contains("CT") || searchText.contains("BL"))
then
/* Regular expression: ((\b((([Cc]T(([1ILli\/])| L|~P))|(BL))[\. ]?([\dA-Ziltphz~\/.:!]| ?[\(',][Ppi](\(e)?|([\(-?']\/))+( ?[\(\/\dA-Znasieg]+)?)\b( ?\/? ?\d+)?)|(\bCT[L1i]\b)) */
section.addRecommendationByRegEx("((\\b((([Cc]T(([1ILli\\/])| L|~P))|(BL))[\\. ]?([\\dA-Ziltphz~\\/.:!]| ?[\\(',][Ppi](\\(e)?|([\\(-?']\\/))+( ?[\\(\\/\\dA-Znasieg]+)?)\\b( ?\\/? ?\\d+)?)|(\\bCT[L1i]\\b))", true, 0, "CBI_address");
end
// --------------------------------------- CBI rules -------------------------------------------------------------------
rule "1: Redact CBI Authors (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_author"))
then
section.redact("CBI_author", 1, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "2: Redact CBI Authors (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_author"))
then
section.redact("CBI_author", 2, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "3: Redact not CBI Address (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_address"))
then
section.redactNot("CBI_address", 3, "Address found for non vertebrate study");
section.ignoreRecommendations("CBI_address");
end
rule "4: Redact CBI Address (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_address"))
then
section.redact("CBI_address", 4, "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "5: Do not redact genitive CBI_author"
when
Section(matchesType("CBI_author"))
then
section.expandToFalsePositiveByRegEx("CBI_author", "[''ʼˈ´`ʻ']s", false, 0);
end
rule "6: Redact Author(s) cells in Tables with Author(s) header (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author(s)") && !hasTableHeader("Vertebrate study Y/N"))
then
section.redactCell("Author(s)", 6, "CBI_author", false, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "7: Redact Author(s) cells in Tables with Author(s) header (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author(s)") && !hasTableHeader("Vertebrate study Y/N"))
then
section.redactCell("Author(s)", 7, "CBI_author", false, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "8: Redact Author cells in Tables with Author header (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author") && !hasTableHeader("Vertebrate study Y/N"))
then
section.redactCell("Author", 8, "CBI_author", false, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "9: Redact Author cells in Tables with Author header (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author") && !hasTableHeader("Vertebrate study Y/N"))
then
section.redactCell("Author", 9, "CBI_author", false, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "10: Redact and recommand Authors in Tables with Vertebrate study Y/N header (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (rowEquals("Vertebrate study Y/N", "Y") || rowEquals("Vertebrate study Y/N", "Yes") || rowEquals("Vertebrate study Y/N", "N") || rowEquals("Vertebrate study Y/N", "No")))
then
section.redactCell("Author(s)", 10, "CBI_author", true, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "11: Redact and recommand Authors in Tables with Vertebrate study Y/N header (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (rowEquals("Vertebrate study Y/N", "Y") || rowEquals("Vertebrate study Y/N", "Yes") || rowEquals("Vertebrate study Y/N", "N") || rowEquals("Vertebrate study Y/N", "No")))
then
section.redactCell("Author(s)", 11, "CBI_author", true, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "14: Redact and add recommendation for et al. author (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("et al"))
then
section.redactAndRecommendByRegEx("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", false, 1, "CBI_author", 14, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "15: Redact and add recommendation for et al. author (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("et al"))
then
section.redactAndRecommendByRegEx("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", false, 1, "CBI_author", 15, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "16: Add recommendation for Addresses in Test Organism sections"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("Species:") && searchText.contains("Source:"))
then
section.recommendLineAfter("Source:", "CBI_address");
end
rule "17: Add recommendation for Addresses in Test Animals sections"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("Species") && searchText.contains("Source"))
then
section.recommendLineAfter("Source", "CBI_address");
end
rule "18: Do not redact Names and Addresses if Published Information found"
when
Section(matchesType("published_information"))
then
section.redactNotAndReference("CBI_author","published_information", 18, "Published Information found");
section.redactNotAndReference("CBI_address","published_information", 18, "Published Information found");
end
// --------------------------------------- PII rules -------------------------------------------------------------------
rule "19: Redacted PII Personal Identification Information (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("PII"))
then
section.redact("PII", 19, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "20: Redacted PII Personal Identification Information (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("PII"))
then
section.redact("PII", 20, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "21: Redact Emails by RegEx (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("@"))
then
section.redactByRegEx("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", true, 1, "PII", 21, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "22: Redact Emails by RegEx (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("@"))
then
section.redactByRegEx("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", true, 1, "PII", 22, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "25: Redact Phone and Fax by RegEx (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (
text.contains("Contact")
|| text.contains("Telephone")
|| text.contains("Phone")
|| text.contains("Fax")
|| text.contains("Tel")
|| text.contains("Ter")
|| text.contains("Mobile")
|| text.contains("Fel")
|| text.contains("Fer")
))
then
section.redactByRegEx("\\b(contact|telephone|phone|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", true, 2, "PII", 25, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "26: Redact Phone and Fax by RegEx (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (
text.contains("Contact")
|| text.contains("Telephone")
|| text.contains("Phone")
|| text.contains("Fax")
|| text.contains("Tel")
|| text.contains("Ter")
|| text.contains("Mobile")
|| text.contains("Fel")
|| text.contains("Fer")
))
then
section.redactByRegEx("\\b(contact|telephone|phone|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", true, 2, "PII", 26, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "27: Redact AUTHOR(S) (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes")
&& searchText.contains("AUTHOR(S):")
&& searchText.contains("COMPLETION DATE:")
&& !searchText.contains("STUDY COMPLETION DATE:")
)
then
section.redactLinesBetween("AUTHOR(S):", "COMPLETION DATE:", "PII", 27, true, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "28: Redact AUTHOR(S) (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes")
&& searchText.contains("AUTHOR(S):")
&& searchText.contains("COMPLETION DATE:")
&& !searchText.contains("STUDY COMPLETION DATE:")
)
then
section.redactLinesBetween("AUTHOR(S):", "COMPLETION DATE:", "PII", 28, true, "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "29: Redact AUTHOR(S) (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes")
&& searchText.contains("AUTHOR(S):")
&& searchText.contains("STUDY COMPLETION DATE:")
)
then
section.redactLinesBetween("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", 29, true, "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "30: Redact AUTHOR(S) (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes")
&& searchText.contains("AUTHOR(S):")
&& searchText.contains("STUDY COMPLETION DATE:")
)
then
section.redactLinesBetween("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", 30, true, "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "31: Redact PERFORMING LABORATORY (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes")
&& searchText.contains("PERFORMING LABORATORY:")
)
then
section.redactBetween("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", 31, true, "PERFORMING LABORATORY was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactNot("CBI_address", 31, "Performing laboratory found for non vertebrate study");
end
rule "32: Redact PERFORMING LABORATORY (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes")
&& searchText.contains("PERFORMING LABORATORY:"))
then
section.redactBetween("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", 32, true, "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
// --------------------------------------- other rules -------------------------------------------------------------------
rule "33: Purity Hint"
when
Section(searchText.toLowerCase().contains("purity"))
then
section.addHintAnnotationByRegEx("(purity ?( of|\\(.{1,20}\\))?( ?:)?) .{0,5}[\\d\\.]+( .{0,4}\\.)? ?%", true, 1, "hint_only");
end
rule "34: Ignore dossier_redaction entries if confidentiality is not 'confidential'"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Confidentiality","confidential") && matchesType("dossier_redaction"));
then
section.ignore("dossier_redaction");
end
rule "35: Redact signatures (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesImageType("signature"))
then
section.redactImage("signature", 35, "Signature found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "36: Redact signatures (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesImageType("signature"))
then
section.redactImage("signature", 36, "Signature found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "43: Redact Logos (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesImageType("logo"))
then
section.redactImage("logo", 43, "Logo found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end