Merge branch 'master' of https://git.iqser.com/scm/red/redaction-service into RED-5694
# Conflicts: # redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java
This commit is contained in:
commit
c3b29e4ebc
@ -0,0 +1,15 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@NoArgsConstructor
|
||||
public class TenantRules {
|
||||
|
||||
private Map<String, Long> rulesVersionPerDossierTemplateId = new HashMap<>();
|
||||
|
||||
}
|
||||
@ -1,11 +1,13 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.exception.RulesValidationException;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import io.micrometer.core.annotation.Timed;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import javax.annotation.PostConstruct;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.kie.api.KieServices;
|
||||
@ -16,11 +18,19 @@ import org.kie.api.runtime.KieContainer;
|
||||
import org.kie.api.runtime.KieSession;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import com.google.common.cache.CacheBuilder;
|
||||
import com.google.common.cache.CacheLoader;
|
||||
import com.google.common.cache.LoadingCache;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.exception.RulesValidationException;
|
||||
import com.iqser.red.service.redaction.v1.server.multitenancy.TenantContext;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.TenantRules;
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
|
||||
import io.micrometer.core.annotation.Timed;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@ -30,7 +40,24 @@ public class DroolsExecutionService {
|
||||
|
||||
private final Map<String, KieContainer> kieContainers = new HashMap<>();
|
||||
|
||||
private final Map<String, Long> rulesVersionPerDossierTemplateId = new HashMap<>();
|
||||
private final RedactionServiceSettings settings;
|
||||
|
||||
private LoadingCache<String, TenantRules> tenantRulesCache;
|
||||
|
||||
|
||||
@PostConstruct
|
||||
protected void createCache() {
|
||||
|
||||
tenantRulesCache = CacheBuilder.newBuilder()
|
||||
.maximumSize(settings.getDictionaryCacheMaximumSize())
|
||||
.expireAfterAccess(settings.getDictionaryCacheExpireAfterAccessDays(), TimeUnit.DAYS)
|
||||
.build(new CacheLoader<>() {
|
||||
public TenantRules load(String key) {
|
||||
|
||||
return new TenantRules();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
public KieContainer getKieContainer(String dossierTemplateId) {
|
||||
@ -61,13 +88,13 @@ public class DroolsExecutionService {
|
||||
public KieContainer updateRules(String dossierTemplateId) {
|
||||
|
||||
long version = rulesClient.getVersion(dossierTemplateId);
|
||||
Long rulesVersion = rulesVersionPerDossierTemplateId.get(dossierTemplateId);
|
||||
Long rulesVersion = getVersionForDossierTemplate(dossierTemplateId);
|
||||
if (rulesVersion == null) {
|
||||
rulesVersion = -1L;
|
||||
}
|
||||
|
||||
if (version > rulesVersion) {
|
||||
rulesVersionPerDossierTemplateId.put(dossierTemplateId, version);
|
||||
setRulesVersionForDossierTemplate(dossierTemplateId, version);
|
||||
return createOrUpdateKieContainer(dossierTemplateId);
|
||||
}
|
||||
return getKieContainer(dossierTemplateId);
|
||||
@ -126,11 +153,25 @@ public class DroolsExecutionService {
|
||||
|
||||
public long getRulesVersion(String dossierTemplateId) {
|
||||
|
||||
Long rulesVersion = rulesVersionPerDossierTemplateId.get(dossierTemplateId);
|
||||
Long rulesVersion = getVersionForDossierTemplate(dossierTemplateId);
|
||||
if (rulesVersion == null) {
|
||||
return -1;
|
||||
}
|
||||
return rulesVersion;
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private Long getVersionForDossierTemplate(String dossierTemplateId) {
|
||||
|
||||
return tenantRulesCache.get(TenantContext.getTenantId()).getRulesVersionPerDossierTemplateId().get(dossierTemplateId);
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private void setRulesVersionForDossierTemplate(String dossierTemplateId, long version) {
|
||||
|
||||
tenantRulesCache.get(TenantContext.getTenantId()).getRulesVersionPerDossierTemplateId().put(dossierTemplateId, version);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -66,7 +66,7 @@ class EntityFinder {
|
||||
!local,
|
||||
model.isDossierDictionary(),
|
||||
local ? Engine.RULE : Engine.DICTIONARY,
|
||||
local ? EntityType.RECOMMENDATION : EntityType.ENTITY));
|
||||
local ? EntityType.RECOMMENDATION : EntityType.ENTITY)).stream().filter(e -> !e.isFalsePositive()).collect(Collectors.toSet());
|
||||
|
||||
EntitySearchUtils.addOrAddEngine(found, entities);
|
||||
}
|
||||
|
||||
@ -273,14 +273,14 @@ public final class EntitySearchUtils {
|
||||
existing.setLegalBasis(found.getLegalBasis());
|
||||
existing.setMatchedRule(found.getMatchedRule());
|
||||
existing.setRedactionReason(found.getRedactionReason());
|
||||
if (existing.getEntityType().equals(EntityType.RECOMMENDATION) && found.getEntityType().equals(EntityType.ENTITY) || existing.getEntityType()
|
||||
.equals(EntityType.ENTITY) && found.getEntityType().equals(EntityType.RECOMMENDATION)) {
|
||||
if (isOneARecommendationAndTheOtherEntity(found, existing)) {
|
||||
existing.setEntityType(EntityType.ENTITY);
|
||||
if (found.isRedaction()) {
|
||||
existing.setRedaction(true);
|
||||
}
|
||||
}
|
||||
} else if (dictionary.getDictionaryRank(existing.getType()) <= dictionary.getDictionaryRank(found.getType())) {
|
||||
} else if (dictionary.getDictionaryRank(existing.getType()) <= dictionary.getDictionaryRank(found.getType()) &&
|
||||
!(isOneARecommendationAndTheOtherEntity(found, existing) && existing.isRedaction() && found.isRedaction()) ) {
|
||||
entities.remove(found);
|
||||
entities.add(found);
|
||||
}
|
||||
@ -289,6 +289,13 @@ public final class EntitySearchUtils {
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isOneARecommendationAndTheOtherEntity(Entity entityOne, Entity entityTwo) {
|
||||
|
||||
var entityTypeOne = entityOne.getEntityType();
|
||||
var entityTypeTwo = entityTwo.getEntityType();
|
||||
return entityTypeTwo.equals(EntityType.RECOMMENDATION) && entityTypeOne.equals(EntityType.ENTITY)
|
||||
|| entityTypeTwo.equals(EntityType.ENTITY) && entityTypeOne.equals(EntityType.RECOMMENDATION);
|
||||
}
|
||||
|
||||
public void addEntitiesIgnoreRank(Set<Entity> entities, Set<Entity> found) {
|
||||
// HashSet keeps old value but we want the new.
|
||||
|
||||
@ -0,0 +1,462 @@
|
||||
package com.iqser.red.service.redaction.v1.server;
|
||||
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.InputStream;
|
||||
import java.net.URL;
|
||||
import java.nio.file.Files;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.mockito.stubbing.Answer;
|
||||
import org.springframework.amqp.rabbit.core.RabbitTemplate;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.mock.mockito.MockBean;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
|
||||
import com.amazonaws.services.s3.AmazonS3;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.configuration.Colors;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.DictionaryEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
|
||||
import com.iqser.red.service.redaction.v1.server.annotate.AnnotationService;
|
||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
|
||||
import com.iqser.red.service.redaction.v1.server.multitenancy.TenantContext;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.analyze.AnalyzeService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
public abstract class AbstractRedactionIntegrationTest {
|
||||
|
||||
protected static final String VERTEBRATE_INDICATOR = "vertebrate";
|
||||
protected static final String DICTIONARY_ADDRESS = "CBI_address";
|
||||
protected static final String DICTIONARY_AUTHOR = "CBI_author";
|
||||
protected static final String DICTIONARY_SPONSOR = "CBI_sponsor";
|
||||
protected static final String DICTIONARY_PII = "PII";
|
||||
protected static final String NO_REDACTION_INDICATOR = "no_redaction_indicator";
|
||||
protected static final String REDACTION_INDICATOR = "redaction_indicator";
|
||||
protected static final String HINT_ONLY_INDICATOR = "hint_only";
|
||||
protected static final String MUST_REDACT_INDICATOR = "must_redact";
|
||||
protected static final String PUBLISHED_INFORMATION_INDICATOR = "published_information";
|
||||
protected static final String TEST_METHOD_INDICATOR = "test_method";
|
||||
protected static final String PURITY_INDICATOR = "purity";
|
||||
protected static final String IMAGE_INDICATOR = "image";
|
||||
protected static final String LOGO_INDICATOR = "logo";
|
||||
protected static final String SIGNATURE_INDICATOR = "signature";
|
||||
protected static final String FORMULA_INDICATOR = "formula";
|
||||
protected static final String OCR_INDICATOR = "ocr";
|
||||
protected static final String DOSSIER_REDACTIONS_INDICATOR = "dossier_redactions";
|
||||
protected static final String IMPORTED_REDACTION_INDICATOR = "imported_redaction";
|
||||
protected static final String ROTATE_SIMPLE_INDICATOR = "RotateSimple";
|
||||
|
||||
protected final static String TEST_DOSSIER_TEMPLATE_ID = "123";
|
||||
public static final String IMPORTED_REDACTION_TYPE_ID = IMPORTED_REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String DOSSIER_REDACTIONS_TYPE_ID = DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String ROTATE_SIMPLE_TYPE_ID = ROTATE_SIMPLE_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String FORMULA_TYPE_ID = FORMULA_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String SIGNATURE_TYPE_ID = SIGNATURE_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String LOGO_TYPE_ID = LOGO_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String OCR_TYPE_ID = OCR_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String IMAGE_TYPE_ID = IMAGE_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String PURITY_TYPE_ID = PURITY_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String PII_TYPE_ID = DICTIONARY_PII + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String TEST_METHOD_TYPE_ID = TEST_METHOD_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String PUBLISHED_INFORMATION_TYPE_ID = PUBLISHED_INFORMATION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String MUST_REDACT_TYPE_ID = MUST_REDACT_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String HINT_ONLY_TYPE_ID = HINT_ONLY_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String REDACTION_TYPE_ID = REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String NO_REDACTION_TYPE_ID = NO_REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String SPONSOR_TYPE_ID = DICTIONARY_SPONSOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String AUTHOR_TYPE_ID = DICTIONARY_AUTHOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String ADDRESS_TYPE_ID = DICTIONARY_ADDRESS + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
public static final String VERTEBRATE_TYPE_ID = VERTEBRATE_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID;
|
||||
|
||||
@Autowired
|
||||
protected RedactionController redactionController;
|
||||
|
||||
@Autowired
|
||||
protected AnnotationService annotationService;
|
||||
|
||||
@Autowired
|
||||
protected AnalyzeService analyzeService;
|
||||
|
||||
@Autowired
|
||||
protected ObjectMapper objectMapper;
|
||||
|
||||
@Autowired
|
||||
protected RedactionStorageService redactionStorageService;
|
||||
|
||||
@Autowired
|
||||
protected StorageService storageService;
|
||||
|
||||
@Autowired
|
||||
protected ManualRedactionSurroundingTextService manualRedactionSurroundingTextService;
|
||||
|
||||
@MockBean
|
||||
protected AmazonS3 amazonS3;
|
||||
|
||||
@MockBean
|
||||
protected RabbitTemplate rabbitTemplate;
|
||||
|
||||
@MockBean
|
||||
protected LegalBasisClient legalBasisClient;
|
||||
|
||||
protected final Map<String, List<String>> dictionary = new HashMap<>();
|
||||
protected final Map<String, List<String>> dossierDictionary = new HashMap<>();
|
||||
protected final Map<String, List<String>> falsePositive = new HashMap<>();
|
||||
protected final Map<String, List<String>> falseRecommendation = new HashMap<>();
|
||||
protected final Map<String, String> typeColorMap = new HashMap<>();
|
||||
protected final Map<String, Boolean> hintTypeMap = new HashMap<>();
|
||||
protected final Map<String, Boolean> caseInSensitiveMap = new HashMap<>();
|
||||
protected final Map<String, Boolean> recommendationTypeMap = new HashMap<>();
|
||||
protected final Map<String, Integer> rankTypeMap = new HashMap<>();
|
||||
protected final Colors colors = new Colors();
|
||||
protected final Map<String, Long> reanlysisVersions = new HashMap<>();
|
||||
protected final Set<String> deleted = new HashSet<>();
|
||||
|
||||
protected final static String TEST_DOSSIER_ID = "123";
|
||||
protected final static String TEST_FILE_ID = "123";
|
||||
|
||||
@MockBean
|
||||
protected RulesClient rulesClient;
|
||||
|
||||
@MockBean
|
||||
protected DictionaryClient dictionaryClient;
|
||||
|
||||
|
||||
@AfterEach
|
||||
public void cleanupStorage() {
|
||||
|
||||
if (this.storageService instanceof FileSystemBackedStorageService) {
|
||||
((FileSystemBackedStorageService) this.storageService).clearStorage();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
protected void mockDictionaryCalls(Long version) {
|
||||
|
||||
when(dictionaryClient.getDictionaryForType(VERTEBRATE_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(VERTEBRATE_INDICATOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(DICTIONARY_ADDRESS, false));
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(DICTIONARY_AUTHOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(DICTIONARY_SPONSOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(NO_REDACTION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(NO_REDACTION_INDICATOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(REDACTION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(REDACTION_INDICATOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(HINT_ONLY_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(HINT_ONLY_INDICATOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(MUST_REDACT_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(MUST_REDACT_INDICATOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(PUBLISHED_INFORMATION_INDICATOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(TEST_METHOD_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(TEST_METHOD_INDICATOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(PII_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(DICTIONARY_PII, false));
|
||||
when(dictionaryClient.getDictionaryForType(PURITY_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(PURITY_INDICATOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(IMAGE_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(IMAGE_INDICATOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(OCR_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(OCR_INDICATOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(LOGO_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(LOGO_INDICATOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(SIGNATURE_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(SIGNATURE_INDICATOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(FORMULA_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(FORMULA_INDICATOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(ROTATE_SIMPLE_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(ROTATE_SIMPLE_INDICATOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(DOSSIER_REDACTIONS_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(DOSSIER_REDACTIONS_INDICATOR,true));
|
||||
when(dictionaryClient.getDictionaryForType(IMPORTED_REDACTION_TYPE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(IMPORTED_REDACTION_INDICATOR,true));
|
||||
|
||||
}
|
||||
|
||||
|
||||
protected void loadDictionaryForTest() {
|
||||
|
||||
dictionary.computeIfAbsent(DICTIONARY_AUTHOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/CBI_author.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(DICTIONARY_SPONSOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(VERTEBRATE_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/vertebrate.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(DICTIONARY_ADDRESS, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/CBI_address.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(REDACTION_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(HINT_ONLY_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/hint_only.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(MUST_REDACT_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/must_redact.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(PUBLISHED_INFORMATION_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/published_information.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(TEST_METHOD_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/test_method.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(DICTIONARY_PII, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/PII.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(PURITY_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/purity.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(IMAGE_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(OCR_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(LOGO_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(SIGNATURE_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(FORMULA_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dossierDictionary.put(IMPORTED_REDACTION_INDICATOR, new ArrayList<>());
|
||||
|
||||
falsePositive.computeIfAbsent(DICTIONARY_PII, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
|
||||
}
|
||||
|
||||
|
||||
protected void loadOnlyDictionaryForSimpleFile() {
|
||||
|
||||
dictionary.clear();
|
||||
dictionary.computeIfAbsent(ROTATE_SIMPLE_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/RotateTestFileSimple.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
protected static String loadFromClassPath(String path) {
|
||||
|
||||
URL resource = ResourceLoader.class.getClassLoader().getResource(path);
|
||||
if (resource == null) {
|
||||
throw new IllegalArgumentException("could not load classpath resource: drools/rules.drl");
|
||||
}
|
||||
List<String> stringList = Files.readAllLines(new File(resource.getPath()).toPath());
|
||||
return String.join("\n", stringList);
|
||||
}
|
||||
|
||||
|
||||
protected void loadTypeForTest() {
|
||||
|
||||
typeColorMap.put(VERTEBRATE_INDICATOR, "#ff85f7");
|
||||
typeColorMap.put(DICTIONARY_ADDRESS, "#ffe187");
|
||||
typeColorMap.put(DICTIONARY_AUTHOR, "#ffe187");
|
||||
typeColorMap.put(DICTIONARY_SPONSOR, "#85ebff");
|
||||
typeColorMap.put(NO_REDACTION_INDICATOR, "#be85ff");
|
||||
typeColorMap.put(REDACTION_INDICATOR, "#caff85");
|
||||
typeColorMap.put(HINT_ONLY_INDICATOR, "#abc0c4");
|
||||
typeColorMap.put(MUST_REDACT_INDICATOR, "#fab4c0");
|
||||
typeColorMap.put(PUBLISHED_INFORMATION_INDICATOR, "#85ebff");
|
||||
typeColorMap.put(TEST_METHOD_INDICATOR, "#91fae8");
|
||||
typeColorMap.put(DICTIONARY_PII, "#66ccff");
|
||||
typeColorMap.put(PURITY_INDICATOR, "#ffe187");
|
||||
typeColorMap.put(IMAGE_INDICATOR, "#fcc5fb");
|
||||
typeColorMap.put(OCR_INDICATOR, "#fcc5fb");
|
||||
typeColorMap.put(LOGO_INDICATOR, "#ffe187");
|
||||
typeColorMap.put(FORMULA_INDICATOR, "#ffe187");
|
||||
typeColorMap.put(SIGNATURE_INDICATOR, "#ffe187");
|
||||
typeColorMap.put(IMPORTED_REDACTION_INDICATOR, "#fcfbe6");
|
||||
typeColorMap.put(ROTATE_SIMPLE_INDICATOR, "#66ccff");
|
||||
|
||||
hintTypeMap.put(VERTEBRATE_INDICATOR, true);
|
||||
hintTypeMap.put(DICTIONARY_ADDRESS, false);
|
||||
hintTypeMap.put(DICTIONARY_AUTHOR, false);
|
||||
hintTypeMap.put(DICTIONARY_SPONSOR, false);
|
||||
hintTypeMap.put(NO_REDACTION_INDICATOR, true);
|
||||
hintTypeMap.put(REDACTION_INDICATOR, true);
|
||||
hintTypeMap.put(HINT_ONLY_INDICATOR, true);
|
||||
hintTypeMap.put(MUST_REDACT_INDICATOR, true);
|
||||
hintTypeMap.put(PUBLISHED_INFORMATION_INDICATOR, true);
|
||||
hintTypeMap.put(TEST_METHOD_INDICATOR, true);
|
||||
hintTypeMap.put(DICTIONARY_PII, false);
|
||||
hintTypeMap.put(PURITY_INDICATOR, false);
|
||||
hintTypeMap.put(IMAGE_INDICATOR, true);
|
||||
hintTypeMap.put(OCR_INDICATOR, true);
|
||||
hintTypeMap.put(FORMULA_INDICATOR, false);
|
||||
hintTypeMap.put(LOGO_INDICATOR, false);
|
||||
hintTypeMap.put(SIGNATURE_INDICATOR, false);
|
||||
hintTypeMap.put(DOSSIER_REDACTIONS_INDICATOR, false);
|
||||
hintTypeMap.put(IMPORTED_REDACTION_INDICATOR, false);
|
||||
hintTypeMap.put(ROTATE_SIMPLE_INDICATOR, false);
|
||||
|
||||
caseInSensitiveMap.put(VERTEBRATE_INDICATOR, true);
|
||||
caseInSensitiveMap.put(DICTIONARY_ADDRESS, false);
|
||||
caseInSensitiveMap.put(DICTIONARY_AUTHOR, false);
|
||||
caseInSensitiveMap.put(DICTIONARY_SPONSOR, false);
|
||||
caseInSensitiveMap.put(NO_REDACTION_INDICATOR, true);
|
||||
caseInSensitiveMap.put(REDACTION_INDICATOR, true);
|
||||
caseInSensitiveMap.put(HINT_ONLY_INDICATOR, true);
|
||||
caseInSensitiveMap.put(MUST_REDACT_INDICATOR, true);
|
||||
caseInSensitiveMap.put(PUBLISHED_INFORMATION_INDICATOR, true);
|
||||
caseInSensitiveMap.put(TEST_METHOD_INDICATOR, false);
|
||||
caseInSensitiveMap.put(DICTIONARY_PII, false);
|
||||
caseInSensitiveMap.put(PURITY_INDICATOR, false);
|
||||
caseInSensitiveMap.put(IMAGE_INDICATOR, true);
|
||||
caseInSensitiveMap.put(OCR_INDICATOR, true);
|
||||
caseInSensitiveMap.put(SIGNATURE_INDICATOR, true);
|
||||
caseInSensitiveMap.put(LOGO_INDICATOR, true);
|
||||
caseInSensitiveMap.put(FORMULA_INDICATOR, true);
|
||||
caseInSensitiveMap.put(DOSSIER_REDACTIONS_INDICATOR, false);
|
||||
caseInSensitiveMap.put(IMPORTED_REDACTION_INDICATOR, false);
|
||||
caseInSensitiveMap.put(ROTATE_SIMPLE_INDICATOR, true);
|
||||
|
||||
recommendationTypeMap.put(VERTEBRATE_INDICATOR, false);
|
||||
recommendationTypeMap.put(DICTIONARY_ADDRESS, false);
|
||||
recommendationTypeMap.put(DICTIONARY_AUTHOR, false);
|
||||
recommendationTypeMap.put(DICTIONARY_SPONSOR, false);
|
||||
recommendationTypeMap.put(NO_REDACTION_INDICATOR, false);
|
||||
recommendationTypeMap.put(REDACTION_INDICATOR, false);
|
||||
recommendationTypeMap.put(HINT_ONLY_INDICATOR, false);
|
||||
recommendationTypeMap.put(MUST_REDACT_INDICATOR, false);
|
||||
recommendationTypeMap.put(PUBLISHED_INFORMATION_INDICATOR, false);
|
||||
recommendationTypeMap.put(TEST_METHOD_INDICATOR, false);
|
||||
recommendationTypeMap.put(DICTIONARY_PII, false);
|
||||
recommendationTypeMap.put(PURITY_INDICATOR, false);
|
||||
recommendationTypeMap.put(IMAGE_INDICATOR, false);
|
||||
recommendationTypeMap.put(OCR_INDICATOR, false);
|
||||
recommendationTypeMap.put(FORMULA_INDICATOR, false);
|
||||
recommendationTypeMap.put(SIGNATURE_INDICATOR, false);
|
||||
recommendationTypeMap.put(LOGO_INDICATOR, false);
|
||||
recommendationTypeMap.put(DOSSIER_REDACTIONS_INDICATOR, false);
|
||||
recommendationTypeMap.put(IMPORTED_REDACTION_INDICATOR, false);
|
||||
recommendationTypeMap.put(ROTATE_SIMPLE_INDICATOR, false);
|
||||
|
||||
rankTypeMap.put(PURITY_INDICATOR, 155);
|
||||
rankTypeMap.put(DICTIONARY_PII, 150);
|
||||
rankTypeMap.put(DICTIONARY_ADDRESS, 140);
|
||||
rankTypeMap.put(DICTIONARY_AUTHOR, 130);
|
||||
rankTypeMap.put(DICTIONARY_SPONSOR, 120);
|
||||
rankTypeMap.put(VERTEBRATE_INDICATOR, 110);
|
||||
rankTypeMap.put(MUST_REDACT_INDICATOR, 100);
|
||||
rankTypeMap.put(REDACTION_INDICATOR, 90);
|
||||
rankTypeMap.put(NO_REDACTION_INDICATOR, 80);
|
||||
rankTypeMap.put(PUBLISHED_INFORMATION_INDICATOR, 70);
|
||||
rankTypeMap.put(TEST_METHOD_INDICATOR, 60);
|
||||
rankTypeMap.put(HINT_ONLY_INDICATOR, 50);
|
||||
rankTypeMap.put(IMAGE_INDICATOR, 30);
|
||||
rankTypeMap.put(OCR_INDICATOR, 29);
|
||||
rankTypeMap.put(LOGO_INDICATOR, 28);
|
||||
rankTypeMap.put(SIGNATURE_INDICATOR, 27);
|
||||
rankTypeMap.put(FORMULA_INDICATOR, 26);
|
||||
rankTypeMap.put(DOSSIER_REDACTIONS_INDICATOR, 200);
|
||||
rankTypeMap.put(IMPORTED_REDACTION_INDICATOR, 200);
|
||||
rankTypeMap.put(ROTATE_SIMPLE_INDICATOR, 150);
|
||||
|
||||
colors.setSkippedColor("#cccccc");
|
||||
colors.setRequestAddColor("#04b093");
|
||||
colors.setRequestRemoveColor("#04b093");
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
protected void loadNerForTest() {
|
||||
|
||||
ClassPathResource responseJson = new ClassPathResource("files/ner_response.json");
|
||||
storageService.storeObject(TenantContext.getTenantId(),
|
||||
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES),
|
||||
responseJson.getInputStream());
|
||||
}
|
||||
|
||||
|
||||
protected List<Type> getTypeResponse() {
|
||||
|
||||
return typeColorMap.entrySet()
|
||||
.stream()
|
||||
.map(typeColor -> Type.builder()
|
||||
.id(typeColor.getKey() + ":" + TEST_DOSSIER_TEMPLATE_ID)
|
||||
.type(typeColor.getKey())
|
||||
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
|
||||
.hexColor(typeColor.getValue())
|
||||
.isHint(hintTypeMap.get(typeColor.getKey()))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(typeColor.getKey()))
|
||||
.isRecommendation(recommendationTypeMap.get(typeColor.getKey()))
|
||||
.rank(rankTypeMap.get(typeColor.getKey()))
|
||||
.build())
|
||||
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
protected Type getDictionaryResponse(String type, boolean isDossierDictionary) {
|
||||
|
||||
return Type.builder()
|
||||
.id(type + ":" + TEST_DOSSIER_TEMPLATE_ID)
|
||||
.hexColor(typeColorMap.get(type))
|
||||
.entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type)))
|
||||
.falsePositiveEntries(falsePositive.containsKey(type) ? toDictionaryEntry(falsePositive.get(type)) : new ArrayList<>())
|
||||
.falseRecommendationEntries(falseRecommendation.containsKey(type) ? toDictionaryEntry(falseRecommendation.get(type)) : new ArrayList<>())
|
||||
.isHint(hintTypeMap.get(type))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(type))
|
||||
.isRecommendation(recommendationTypeMap.get(type))
|
||||
.rank(rankTypeMap.get(type))
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
private String cleanDictionaryEntry(String entry) {
|
||||
|
||||
return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " ");
|
||||
}
|
||||
|
||||
|
||||
private List<DictionaryEntry> toDictionaryEntry(List<String> entries) {
|
||||
|
||||
if (entries == null) {
|
||||
entries = Collections.emptyList();
|
||||
}
|
||||
return entries.stream().map(this::toDictionaryEntry).collect(Collectors.toList());
|
||||
|
||||
}
|
||||
|
||||
|
||||
private DictionaryEntry toDictionaryEntry(String entry) {
|
||||
|
||||
return DictionaryEntry.builder().value(entry).version(reanlysisVersions.getOrDefault(entry, 0L)).deleted(deleted.contains(entry)).build();
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
protected AnalyzeRequest uploadFileToStorage(String file) {
|
||||
|
||||
return prepareStorage(file, "files/cv_service_empty_response.json");
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
protected AnalyzeRequest prepareStorage(String file, String cvServiceResponseFile) {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource(file);
|
||||
ClassPathResource cvServiceResponseFileResource = new ClassPathResource(cvServiceResponseFile);
|
||||
|
||||
return prepareStorage(pdfFileResource.getInputStream(), cvServiceResponseFileResource.getInputStream());
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
protected AnalyzeRequest prepareStorage(InputStream fileStream, InputStream cvServiceResponseFileStream) {
|
||||
|
||||
AnalyzeRequest request = AnalyzeRequest.builder()
|
||||
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
|
||||
.dossierId(TEST_DOSSIER_ID)
|
||||
.fileId(TEST_FILE_ID)
|
||||
.lastProcessed(OffsetDateTime.now())
|
||||
.build();
|
||||
|
||||
storageService.storeObject(TenantContext.getTenantId(),
|
||||
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.TABLES),
|
||||
cvServiceResponseFileStream);
|
||||
storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), fileStream);
|
||||
|
||||
return request;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -3,28 +3,26 @@ package com.iqser.red.service.redaction.v1.server;
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.URI;
|
||||
import java.net.URL;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.time.ZoneOffset;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
@ -33,17 +31,10 @@ import org.kie.api.KieServices;
|
||||
import org.kie.api.builder.KieBuilder;
|
||||
import org.kie.api.builder.KieFileSystem;
|
||||
import org.kie.api.builder.KieModule;
|
||||
import org.kie.api.builder.KieRepository;
|
||||
import org.kie.api.builder.ReleaseId;
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.kie.internal.io.ResourceFactory;
|
||||
import org.mockito.stubbing.Answer;
|
||||
import org.springframework.amqp.rabbit.core.RabbitTemplate;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.boot.test.mock.mockito.MockBean;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.ComponentScan;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
@ -53,9 +44,7 @@ import org.springframework.context.annotation.Primary;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.test.context.junit.jupiter.SpringExtension;
|
||||
|
||||
import com.amazonaws.services.s3.AmazonS3;
|
||||
import com.fasterxml.jackson.core.type.TypeReference;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute;
|
||||
@ -72,24 +61,14 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.configuration.Colors;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.DictionaryEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry;
|
||||
import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
|
||||
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
|
||||
import com.iqser.red.service.redaction.v1.server.annotate.AnnotationService;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
|
||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
|
||||
import com.iqser.red.service.redaction.v1.server.multitenancy.TenantContext;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.analyze.AnalyzeService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
@ -99,85 +78,12 @@ import lombok.SneakyThrows;
|
||||
@ExtendWith(SpringExtension.class)
|
||||
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
|
||||
@Import(RedactionIntegrationTest.RedactionIntegrationTestConfiguration.class)
|
||||
public class RedactionIntegrationTest {
|
||||
public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
|
||||
|
||||
private static final String RULES = loadFromClassPath("drools/rules.drl");
|
||||
private static final String VERTEBRATE = "vertebrate";
|
||||
private static final String ADDRESS = "CBI_address";
|
||||
private static final String AUTHOR = "CBI_author";
|
||||
private static final String SPONSOR = "CBI_sponsor";
|
||||
private static final String NO_REDACTION_INDICATOR = "no_redaction_indicator";
|
||||
private static final String REDACTION_INDICATOR = "redaction_indicator";
|
||||
private static final String HINT_ONLY = "hint_only";
|
||||
private static final String MUST_REDACT = "must_redact";
|
||||
private static final String PUBLISHED_INFORMATION = "published_information";
|
||||
private static final String TEST_METHOD = "test_method";
|
||||
private static final String PURITY = "purity";
|
||||
private static final String IMAGE = "image";
|
||||
private static final String LOGO = "logo";
|
||||
private static final String SIGNATURE = "signature";
|
||||
private static final String FORMULA = "formula";
|
||||
private static final String OCR = "ocr";
|
||||
private static final String DOSSIER_REDACTIONS = "dossier_redactions";
|
||||
private static final String IMPORTED_REDACTION = "imported_redaction";
|
||||
private static final String PII = "PII";
|
||||
private static final String ROTATE_SIMPLE = "RotateSimple";
|
||||
|
||||
@Autowired
|
||||
private RedactionController redactionController;
|
||||
|
||||
@Autowired
|
||||
private AnnotationService annotationService;
|
||||
|
||||
@Autowired
|
||||
private AnalyzeService analyzeService;
|
||||
|
||||
@Autowired
|
||||
private ObjectMapper objectMapper;
|
||||
|
||||
@MockBean
|
||||
private RulesClient rulesClient;
|
||||
|
||||
@MockBean
|
||||
private DictionaryClient dictionaryClient;
|
||||
|
||||
@Autowired
|
||||
private RedactionStorageService redactionStorageService;
|
||||
|
||||
@Autowired
|
||||
private StorageService storageService;
|
||||
|
||||
@Autowired
|
||||
private ManualRedactionSurroundingTextService manualRedactionSurroundingTextService;
|
||||
|
||||
@MockBean
|
||||
private AmazonS3 amazonS3;
|
||||
|
||||
@MockBean
|
||||
private RabbitTemplate rabbitTemplate;
|
||||
|
||||
@MockBean
|
||||
private LegalBasisClient legalBasisClient;
|
||||
|
||||
private final Map<String, List<String>> dictionary = new HashMap<>();
|
||||
private final Map<String, List<String>> dossierDictionary = new HashMap<>();
|
||||
private final Map<String, List<String>> falsePositive = new HashMap<>();
|
||||
private final Map<String, List<String>> falseRecommendation = new HashMap<>();
|
||||
private final Map<String, String> typeColorMap = new HashMap<>();
|
||||
private final Map<String, Boolean> hintTypeMap = new HashMap<>();
|
||||
private final Map<String, Boolean> caseInSensitiveMap = new HashMap<>();
|
||||
private final Map<String, Boolean> recommendationTypeMap = new HashMap<>();
|
||||
private final Map<String, Integer> rankTypeMap = new HashMap<>();
|
||||
private final Colors colors = new Colors();
|
||||
private final Map<String, Long> reanlysisVersions = new HashMap<>();
|
||||
private final Set<String> deleted = new HashSet<>();
|
||||
|
||||
private final static String TEST_DOSSIER_TEMPLATE_ID = "123";
|
||||
private final static String TEST_DOSSIER_ID = "123";
|
||||
private final static String TEST_FILE_ID = "123";
|
||||
|
||||
@Configuration
|
||||
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class})
|
||||
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class, StorageAutoConfiguration.class})
|
||||
@ComponentScan(excludeFilters={@ComponentScan.Filter(type= FilterType.ASSIGNABLE_TYPE, value=StorageAutoConfiguration.class)})
|
||||
public static class RedactionIntegrationTestConfiguration {
|
||||
|
||||
@ -207,15 +113,6 @@ public class RedactionIntegrationTest {
|
||||
}
|
||||
|
||||
|
||||
@AfterEach
|
||||
public void cleanupStorage() {
|
||||
|
||||
if (this.storageService instanceof FileSystemBackedStorageService) {
|
||||
((FileSystemBackedStorageService) this.storageService).clearStorage();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@BeforeEach
|
||||
public void stubClients() {
|
||||
|
||||
@ -232,67 +129,26 @@ public class RedactionIntegrationTest {
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
|
||||
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder()
|
||||
.id(DOSSIER_REDACTIONS + ":" + TEST_DOSSIER_TEMPLATE_ID)
|
||||
.type(DOSSIER_REDACTIONS)
|
||||
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
|
||||
.type(DOSSIER_REDACTIONS_INDICATOR)
|
||||
.dossierTemplateId(TEST_DOSSIER_ID)
|
||||
.hexColor("#ffe187")
|
||||
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS))
|
||||
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS))
|
||||
.rank(rankTypeMap.get(DOSSIER_REDACTIONS))
|
||||
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.build()));
|
||||
|
||||
mockDictionaryCalls(null);
|
||||
mockDictionaryCalls(0L);
|
||||
|
||||
when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors);
|
||||
}
|
||||
|
||||
|
||||
private void mockDictionaryCalls(Long version) {
|
||||
|
||||
when(dictionaryClient.getDictionaryForType(VERTEBRATE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(VERTEBRATE,
|
||||
false));
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(ADDRESS, false));
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(AUTHOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(SPONSOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(
|
||||
NO_REDACTION_INDICATOR,
|
||||
false));
|
||||
when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(
|
||||
REDACTION_INDICATOR,
|
||||
false));
|
||||
when(dictionaryClient.getDictionaryForType(HINT_ONLY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(HINT_ONLY, false));
|
||||
when(dictionaryClient.getDictionaryForType(MUST_REDACT + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(MUST_REDACT,
|
||||
false));
|
||||
when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(
|
||||
PUBLISHED_INFORMATION,
|
||||
false));
|
||||
when(dictionaryClient.getDictionaryForType(TEST_METHOD + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(TEST_METHOD,
|
||||
false));
|
||||
when(dictionaryClient.getDictionaryForType(PII + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(PII, false));
|
||||
when(dictionaryClient.getDictionaryForType(PURITY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(PURITY, false));
|
||||
when(dictionaryClient.getDictionaryForType(IMAGE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(IMAGE, false));
|
||||
when(dictionaryClient.getDictionaryForType(OCR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(OCR, false));
|
||||
when(dictionaryClient.getDictionaryForType(LOGO + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(LOGO, false));
|
||||
when(dictionaryClient.getDictionaryForType(SIGNATURE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(SIGNATURE, false));
|
||||
when(dictionaryClient.getDictionaryForType(FORMULA + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(FORMULA, false));
|
||||
when(dictionaryClient.getDictionaryForType(ROTATE_SIMPLE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(ROTATE_SIMPLE,
|
||||
false));
|
||||
when(dictionaryClient.getDictionaryForType(DOSSIER_REDACTIONS + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(
|
||||
DOSSIER_REDACTIONS,
|
||||
true));
|
||||
when(dictionaryClient.getDictionaryForType(IMPORTED_REDACTION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer<Type>) invocation -> getDictionaryResponse(
|
||||
IMPORTED_REDACTION,
|
||||
true));
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test270Rotated() {
|
||||
|
||||
AnalyzeRequest request = prepareStorage("files/Minimal Examples/270Rotated.pdf");
|
||||
AnalyzeRequest request = uploadFileToStorage("files/Minimal Examples/270Rotated.pdf");
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
assertThat(result).isNotNull();
|
||||
@ -303,7 +159,7 @@ public class RedactionIntegrationTest {
|
||||
@Disabled
|
||||
public void testLargeScannedFileOOM() {
|
||||
|
||||
AnalyzeRequest request = prepareStorage("scanned/VV-377031.pdf");
|
||||
AnalyzeRequest request = uploadFileToStorage("scanned/VV-377031.pdf");
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
assertThat(result).isNotNull();
|
||||
@ -315,7 +171,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
|
||||
AnalyzeRequest request = prepareStorage("files/Minimal Examples/merge_images.pdf");
|
||||
AnalyzeRequest request = uploadFileToStorage("files/Minimal Examples/merge_images.pdf");
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
|
||||
@ -329,7 +185,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
duplicates.forEach((key, value) -> assertThat(value.size()).isEqualTo(1));
|
||||
|
||||
dictionary.get(AUTHOR).add("Drinking water");
|
||||
dictionary.get(DICTIONARY_AUTHOR).add("Drinking water");
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(1L);
|
||||
|
||||
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build());
|
||||
@ -355,7 +211,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
// F. Lastname, J. Doe, M. Mustermann
|
||||
// Lastname M., Doe J., Mustermann M.
|
||||
AnalyzeRequest request = prepareStorage("files/Minimal Examples/ExpansionTest.pdf");
|
||||
AnalyzeRequest request = uploadFileToStorage("files/Minimal Examples/ExpansionTest.pdf");
|
||||
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
@ -371,7 +227,7 @@ public class RedactionIntegrationTest {
|
||||
@Test
|
||||
public void titleExtraction() throws IOException {
|
||||
|
||||
AnalyzeRequest request = prepareStorage("files/new/APN3_Clean_6.1 (6.4.3.01-02)_Apple_211029.pdf");
|
||||
AnalyzeRequest request = uploadFileToStorage("files/new/APN3_Clean_6.1 (6.4.3.01-02)_Apple_211029.pdf");
|
||||
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
@ -394,7 +250,7 @@ public class RedactionIntegrationTest {
|
||||
@Test
|
||||
public void testAddFileAttribute() {
|
||||
|
||||
AnalyzeRequest request = prepareStorage("files/RSS/01 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1).pdf");
|
||||
AnalyzeRequest request = uploadFileToStorage("files/RSS/01 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1).pdf");
|
||||
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
@ -411,7 +267,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
System.out.println("testIgnoreHint");
|
||||
|
||||
AnalyzeRequest request = prepareStorage("files/new/test-ignore-hint.pdf");
|
||||
AnalyzeRequest request = uploadFileToStorage("files/new/test-ignore-hint.pdf");
|
||||
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
analyzeService.analyze(request);
|
||||
@ -449,15 +305,11 @@ public class RedactionIntegrationTest {
|
||||
System.out.println("noExceptionShouldBeThrownForAnyFiles");
|
||||
ClassLoader loader = getClass().getClassLoader();
|
||||
URL url = loader.getResource("files");
|
||||
File[] files = new File(url.getPath()).listFiles();
|
||||
List<File> input = new ArrayList<>();
|
||||
for (File file : files) {
|
||||
input.addAll(getPathsRecursively(file));
|
||||
}
|
||||
for (File path : input) {
|
||||
Path path = Paths.get(URI.create(url.toString()));
|
||||
|
||||
AnalyzeRequest request = prepareStorage(path.getPath());
|
||||
System.out.println("Redacting file : " + path.getName());
|
||||
Files.walk(path).forEach(currentPath -> {
|
||||
AnalyzeRequest request = uploadFileToStorage(currentPath.toString());
|
||||
System.out.println("Redacting file : " + currentPath.getFileName());
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
|
||||
long fstart = System.currentTimeMillis();
|
||||
@ -476,7 +328,7 @@ public class RedactionIntegrationTest {
|
||||
assertThat(entry.getValue().size()).isEqualTo(1);
|
||||
});
|
||||
|
||||
dictionary.get(AUTHOR).add("Drinking water");
|
||||
dictionary.get(DICTIONARY_AUTHOR).add("Drinking water");
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(1L);
|
||||
|
||||
long rstart = System.currentTimeMillis();
|
||||
@ -484,8 +336,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
long rend = System.currentTimeMillis();
|
||||
System.out.println("reanalysis analysis duration: " + (rend - rstart));
|
||||
|
||||
}
|
||||
});
|
||||
|
||||
long end = System.currentTimeMillis();
|
||||
|
||||
@ -501,7 +352,7 @@ public class RedactionIntegrationTest {
|
||||
String outputFileName = OsUtils.getTemporaryDirectory() + "/AnnotatedRedactionTestSeparatedRedaction.pdf";
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
AnalyzeRequest request = prepareStorage(fileName);
|
||||
AnalyzeRequest request = uploadFileToStorage(fileName);
|
||||
request.setExcludedPages(Set.of(1));
|
||||
|
||||
request.setFileAttributes(List.of(FileAttribute.builder()
|
||||
@ -545,10 +396,10 @@ public class RedactionIntegrationTest {
|
||||
}
|
||||
assertThat(correctFound).isEqualTo(redactionLog.getRedactionLogEntry().size());
|
||||
|
||||
dictionary.get(AUTHOR).add("properties");
|
||||
dictionary.get(DICTIONARY_AUTHOR).add("properties");
|
||||
reanlysisVersions.put("properties", 1L);
|
||||
|
||||
dictionary.get(AUTHOR).add("physical");
|
||||
dictionary.get(DICTIONARY_AUTHOR).add("physical");
|
||||
reanlysisVersions.put("physical", 2L);
|
||||
|
||||
deleted.add("David Chubb");
|
||||
@ -558,7 +409,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(3L);
|
||||
|
||||
when(dictionaryClient.getDictionaryForType(VERTEBRATE, null)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
|
||||
when(dictionaryClient.getDictionaryForType(VERTEBRATE_INDICATOR, null)).thenReturn(getDictionaryResponse(VERTEBRATE_INDICATOR, false));
|
||||
|
||||
start = System.currentTimeMillis();
|
||||
|
||||
@ -591,7 +442,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(4L);
|
||||
|
||||
when(dictionaryClient.getDictionaryForType(VERTEBRATE, null)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
|
||||
when(dictionaryClient.getDictionaryForType(VERTEBRATE_INDICATOR, null)).thenReturn(getDictionaryResponse(VERTEBRATE_INDICATOR, false));
|
||||
|
||||
analyzeService.reanalyze(request);
|
||||
|
||||
@ -607,19 +458,19 @@ public class RedactionIntegrationTest {
|
||||
String fileName = "files/new/test1S1T1.pdf";
|
||||
String outputFileName = OsUtils.getTemporaryDirectory() + "/Annotated.pdf";
|
||||
|
||||
AnalyzeRequest request = prepareStorage(fileName);
|
||||
AnalyzeRequest request = uploadFileToStorage(fileName);
|
||||
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
analyzeService.analyze(request);
|
||||
|
||||
dictionary.get(AUTHOR).add("report");
|
||||
dictionary.get(DICTIONARY_AUTHOR).add("report");
|
||||
reanlysisVersions.put("report", 2L);
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(2L);
|
||||
mockDictionaryCalls(0L);
|
||||
|
||||
analyzeService.reanalyze(request);
|
||||
|
||||
dictionary.get(AUTHOR).add("assessment report");
|
||||
dictionary.get(DICTIONARY_AUTHOR).add("assessment report");
|
||||
reanlysisVersions.put("assessment report", 3L);
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(3L);
|
||||
mockDictionaryCalls(2L);
|
||||
@ -650,10 +501,12 @@ public class RedactionIntegrationTest {
|
||||
String outputFileName = OsUtils.getTemporaryDirectory() + "/Annotated.pdf";
|
||||
|
||||
ClassPathResource responseJson = new ClassPathResource("files/crafted_document.NER_ENTITIES.json");
|
||||
storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), responseJson.getInputStream());
|
||||
storageService.storeObject(TenantContext.getTenantId(),
|
||||
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES),
|
||||
responseJson.getInputStream());
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
AnalyzeRequest request = prepareStorage(fileName);
|
||||
AnalyzeRequest request = uploadFileToStorage(fileName);
|
||||
request.setExcludedPages(Set.of(1));
|
||||
|
||||
request.setFileAttributes(List.of(FileAttribute.builder()
|
||||
@ -697,11 +550,11 @@ public class RedactionIntegrationTest {
|
||||
}
|
||||
assertThat(correctFound).isEqualTo(redactionLog.getRedactionLogEntry().size());
|
||||
|
||||
dictionary.get(AUTHOR).add("properties");
|
||||
dictionary.get(DICTIONARY_AUTHOR).add("properties");
|
||||
reanlysisVersions.put("properties", 1L);
|
||||
mockDictionaryCalls(0L);
|
||||
|
||||
dictionary.get(AUTHOR).add("physical");
|
||||
dictionary.get(DICTIONARY_AUTHOR).add("physical");
|
||||
reanlysisVersions.put("physical", 2L);
|
||||
|
||||
deleted.add("David Chubb");
|
||||
@ -713,7 +566,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(3L);
|
||||
|
||||
when(dictionaryClient.getDictionaryForType(VERTEBRATE, null)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
|
||||
when(dictionaryClient.getDictionaryForType(VERTEBRATE_INDICATOR, null)).thenReturn(getDictionaryResponse(VERTEBRATE_INDICATOR, false));
|
||||
|
||||
start = System.currentTimeMillis();
|
||||
|
||||
@ -747,7 +600,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(4L);
|
||||
|
||||
when(dictionaryClient.getDictionaryForType(VERTEBRATE, null)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
|
||||
when(dictionaryClient.getDictionaryForType(VERTEBRATE_INDICATOR, null)).thenReturn(getDictionaryResponse(VERTEBRATE_INDICATOR, false));
|
||||
|
||||
analyzeService.reanalyze(request);
|
||||
|
||||
@ -770,7 +623,7 @@ public class RedactionIntegrationTest {
|
||||
};
|
||||
List<Type> types = objectMapper.readValue(typeResource.getInputStream(), typeRefForTypes);
|
||||
|
||||
AnalyzeRequest request = prepareStorage("files/new/PublishedInformationTest.pdf");
|
||||
AnalyzeRequest request = uploadFileToStorage("files/new/PublishedInformationTest.pdf");
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
ManualRedactions manualRedactions = new ManualRedactions();
|
||||
manualRedactions.getIdsToRemove()
|
||||
@ -828,7 +681,7 @@ public class RedactionIntegrationTest {
|
||||
System.out.println("testTableRedaction");
|
||||
long start = System.currentTimeMillis();
|
||||
|
||||
AnalyzeRequest request = prepareStorage("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
|
||||
AnalyzeRequest request = uploadFileToStorage("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
|
||||
@ -897,7 +750,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
|
||||
AnalyzeRequest request = prepareStorage("files/new/unicodeProblem.pdf");
|
||||
AnalyzeRequest request = uploadFileToStorage("files/new/unicodeProblem.pdf");
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
|
||||
@ -919,7 +772,7 @@ public class RedactionIntegrationTest {
|
||||
System.out.println("testTableRedaction");
|
||||
long start = System.currentTimeMillis();
|
||||
|
||||
AnalyzeRequest request = prepareStorage("files/new/RotateTestFile.pdf");
|
||||
AnalyzeRequest request = uploadFileToStorage("files/new/RotateTestFile.pdf");
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
|
||||
@ -945,7 +798,7 @@ public class RedactionIntegrationTest {
|
||||
System.out.println("testTableRedaction");
|
||||
long start = System.currentTimeMillis();
|
||||
|
||||
AnalyzeRequest request = prepareStorage("files/new/RotateTestFileSimple.pdf");
|
||||
AnalyzeRequest request = uploadFileToStorage("files/new/RotateTestFileSimple.pdf");
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
|
||||
@ -967,7 +820,7 @@ public class RedactionIntegrationTest {
|
||||
System.out.println("testTableHeader");
|
||||
long start = System.currentTimeMillis();
|
||||
|
||||
AnalyzeRequest request = prepareStorage("files/Minimal Examples/NoHeaderTable.pdf");
|
||||
AnalyzeRequest request = uploadFileToStorage("files/Minimal Examples/NoHeaderTable.pdf");
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
|
||||
@ -1002,7 +855,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
|
||||
AnalyzeRequest request = prepareStorage("files/new/S157.pdf");
|
||||
AnalyzeRequest request = uploadFileToStorage("files/new/S157.pdf");
|
||||
|
||||
ManualRedactions manualRedactions = new ManualRedactions();
|
||||
|
||||
@ -1078,7 +931,7 @@ public class RedactionIntegrationTest {
|
||||
manualRedactionEntry.setPositions(List.of(Rectangle.builder().topLeftX(375.61096f).topLeftY(241.282f).width(7.648041f).height(43.72262f).page(1).build(),
|
||||
Rectangle.builder().topLeftX(384.83517f).topLeftY(241.282f).width(7.648041f).height(17.043358f).page(1).build()));
|
||||
|
||||
AnalyzeRequest request = prepareStorage(pdfFile);
|
||||
AnalyzeRequest request = uploadFileToStorage(pdfFile);
|
||||
request.setManualRedactions(manualRedactions);
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
@ -1108,14 +961,10 @@ public class RedactionIntegrationTest {
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void phantomCellsDocumentTest() throws IOException {
|
||||
|
||||
AnalyzeRequest request = prepareStorage("files/Minimal Examples/Phantom Cells.pdf");
|
||||
AnalyzeRequest request = uploadFileToStorage("files/Minimal Examples/Phantom Cells.pdf");
|
||||
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
@ -1135,7 +984,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
|
||||
AnalyzeRequest request = prepareStorage("files/Minimal Examples/sponsor_companies.pdf");
|
||||
AnalyzeRequest request = uploadFileToStorage("files/Minimal Examples/sponsor_companies.pdf");
|
||||
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
@ -1197,7 +1046,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
// manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
|
||||
|
||||
AnalyzeRequest request = prepareStorage(pdfFile);
|
||||
AnalyzeRequest request = uploadFileToStorage(pdfFile);
|
||||
request.setManualRedactions(manualRedactions);
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
@ -1253,7 +1102,7 @@ public class RedactionIntegrationTest {
|
||||
System.out.println("expandByRegex");
|
||||
long start = System.currentTimeMillis();
|
||||
|
||||
AnalyzeRequest request = prepareStorage("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
|
||||
AnalyzeRequest request = uploadFileToStorage("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
|
||||
@ -1308,7 +1157,7 @@ public class RedactionIntegrationTest {
|
||||
manualRedactions.getEntriesToAdd().add(manualRedactionEntry2);
|
||||
manualRedactions.getEntriesToAdd().add(manualRedactionEntry3);
|
||||
|
||||
AnalyzeRequest request = prepareStorage(pdfFile);
|
||||
AnalyzeRequest request = uploadFileToStorage(pdfFile);
|
||||
request.setManualRedactions(manualRedactions);
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
@ -1332,280 +1181,15 @@ public class RedactionIntegrationTest {
|
||||
}
|
||||
|
||||
|
||||
private void loadDictionaryForTest() {
|
||||
|
||||
dictionary.computeIfAbsent(AUTHOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/CBI_author.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(SPONSOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(VERTEBRATE, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/vertebrate.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(ADDRESS, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/CBI_address.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(REDACTION_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(HINT_ONLY, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/hint_only.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(MUST_REDACT, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/must_redact.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(PUBLISHED_INFORMATION, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/published_information.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(TEST_METHOD, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/test_method.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(PII, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/PII.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(PURITY, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/purity.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(IMAGE, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(OCR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(LOGO, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(SIGNATURE, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(FORMULA, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dossierDictionary.put(IMPORTED_REDACTION, new ArrayList<>());
|
||||
|
||||
falsePositive.computeIfAbsent(PII, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
|
||||
}
|
||||
|
||||
|
||||
private void loadOnlyDictionaryForSimpleFile() {
|
||||
|
||||
dictionary.clear();
|
||||
dictionary.computeIfAbsent(ROTATE_SIMPLE, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/RotateTestFileSimple.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
}
|
||||
|
||||
|
||||
private static String loadFromClassPath(String path) {
|
||||
|
||||
URL resource = ResourceLoader.class.getClassLoader().getResource(path);
|
||||
if (resource == null) {
|
||||
throw new IllegalArgumentException("could not load classpath resource: drools/rules.drl");
|
||||
}
|
||||
try (BufferedReader br = new BufferedReader(new InputStreamReader(resource.openStream(), StandardCharsets.UTF_8))) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
String str;
|
||||
while ((str = br.readLine()) != null) {
|
||||
sb.append(str).append("\n");
|
||||
}
|
||||
return sb.toString();
|
||||
} catch (IOException e) {
|
||||
throw new IllegalArgumentException("could not load classpath resource: " + path, e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private List<File> getPathsRecursively(File path) {
|
||||
|
||||
List<File> result = new ArrayList<>();
|
||||
if (path == null || path.listFiles() == null) {
|
||||
return result;
|
||||
}
|
||||
for (File f : path.listFiles()) {
|
||||
if (f.isFile()) {
|
||||
result.add(f);
|
||||
} else {
|
||||
result.addAll(getPathsRecursively(f));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
|
||||
private void loadTypeForTest() {
|
||||
|
||||
typeColorMap.put(VERTEBRATE, "#ff85f7");
|
||||
typeColorMap.put(ADDRESS, "#ffe187");
|
||||
typeColorMap.put(AUTHOR, "#ffe187");
|
||||
typeColorMap.put(SPONSOR, "#85ebff");
|
||||
typeColorMap.put(NO_REDACTION_INDICATOR, "#be85ff");
|
||||
typeColorMap.put(REDACTION_INDICATOR, "#caff85");
|
||||
typeColorMap.put(HINT_ONLY, "#abc0c4");
|
||||
typeColorMap.put(MUST_REDACT, "#fab4c0");
|
||||
typeColorMap.put(PUBLISHED_INFORMATION, "#85ebff");
|
||||
typeColorMap.put(TEST_METHOD, "#91fae8");
|
||||
typeColorMap.put(PII, "#66ccff");
|
||||
typeColorMap.put(PURITY, "#ffe187");
|
||||
typeColorMap.put(IMAGE, "#fcc5fb");
|
||||
typeColorMap.put(OCR, "#fcc5fb");
|
||||
typeColorMap.put(LOGO, "#ffe187");
|
||||
typeColorMap.put(FORMULA, "#ffe187");
|
||||
typeColorMap.put(SIGNATURE, "#ffe187");
|
||||
typeColorMap.put(IMPORTED_REDACTION, "#fcfbe6");
|
||||
typeColorMap.put(ROTATE_SIMPLE, "#66ccff");
|
||||
|
||||
hintTypeMap.put(VERTEBRATE, true);
|
||||
hintTypeMap.put(ADDRESS, false);
|
||||
hintTypeMap.put(AUTHOR, false);
|
||||
hintTypeMap.put(SPONSOR, false);
|
||||
hintTypeMap.put(NO_REDACTION_INDICATOR, true);
|
||||
hintTypeMap.put(REDACTION_INDICATOR, true);
|
||||
hintTypeMap.put(HINT_ONLY, true);
|
||||
hintTypeMap.put(MUST_REDACT, true);
|
||||
hintTypeMap.put(PUBLISHED_INFORMATION, true);
|
||||
hintTypeMap.put(TEST_METHOD, true);
|
||||
hintTypeMap.put(PII, false);
|
||||
hintTypeMap.put(PURITY, false);
|
||||
hintTypeMap.put(IMAGE, true);
|
||||
hintTypeMap.put(OCR, true);
|
||||
hintTypeMap.put(FORMULA, false);
|
||||
hintTypeMap.put(LOGO, false);
|
||||
hintTypeMap.put(SIGNATURE, false);
|
||||
hintTypeMap.put(DOSSIER_REDACTIONS, false);
|
||||
hintTypeMap.put(IMPORTED_REDACTION, false);
|
||||
hintTypeMap.put(ROTATE_SIMPLE, false);
|
||||
|
||||
caseInSensitiveMap.put(VERTEBRATE, true);
|
||||
caseInSensitiveMap.put(ADDRESS, false);
|
||||
caseInSensitiveMap.put(AUTHOR, false);
|
||||
caseInSensitiveMap.put(SPONSOR, false);
|
||||
caseInSensitiveMap.put(NO_REDACTION_INDICATOR, true);
|
||||
caseInSensitiveMap.put(REDACTION_INDICATOR, true);
|
||||
caseInSensitiveMap.put(HINT_ONLY, true);
|
||||
caseInSensitiveMap.put(MUST_REDACT, true);
|
||||
caseInSensitiveMap.put(PUBLISHED_INFORMATION, true);
|
||||
caseInSensitiveMap.put(TEST_METHOD, false);
|
||||
caseInSensitiveMap.put(PII, false);
|
||||
caseInSensitiveMap.put(PURITY, false);
|
||||
caseInSensitiveMap.put(IMAGE, true);
|
||||
caseInSensitiveMap.put(OCR, true);
|
||||
caseInSensitiveMap.put(SIGNATURE, true);
|
||||
caseInSensitiveMap.put(LOGO, true);
|
||||
caseInSensitiveMap.put(FORMULA, true);
|
||||
caseInSensitiveMap.put(DOSSIER_REDACTIONS, false);
|
||||
caseInSensitiveMap.put(IMPORTED_REDACTION, false);
|
||||
caseInSensitiveMap.put(ROTATE_SIMPLE, true);
|
||||
|
||||
recommendationTypeMap.put(VERTEBRATE, false);
|
||||
recommendationTypeMap.put(ADDRESS, false);
|
||||
recommendationTypeMap.put(AUTHOR, false);
|
||||
recommendationTypeMap.put(SPONSOR, false);
|
||||
recommendationTypeMap.put(NO_REDACTION_INDICATOR, false);
|
||||
recommendationTypeMap.put(REDACTION_INDICATOR, false);
|
||||
recommendationTypeMap.put(HINT_ONLY, false);
|
||||
recommendationTypeMap.put(MUST_REDACT, false);
|
||||
recommendationTypeMap.put(PUBLISHED_INFORMATION, false);
|
||||
recommendationTypeMap.put(TEST_METHOD, false);
|
||||
recommendationTypeMap.put(PII, false);
|
||||
recommendationTypeMap.put(PURITY, false);
|
||||
recommendationTypeMap.put(IMAGE, false);
|
||||
recommendationTypeMap.put(OCR, false);
|
||||
recommendationTypeMap.put(FORMULA, false);
|
||||
recommendationTypeMap.put(SIGNATURE, false);
|
||||
recommendationTypeMap.put(LOGO, false);
|
||||
recommendationTypeMap.put(DOSSIER_REDACTIONS, false);
|
||||
recommendationTypeMap.put(IMPORTED_REDACTION, false);
|
||||
recommendationTypeMap.put(ROTATE_SIMPLE, false);
|
||||
|
||||
rankTypeMap.put(PURITY, 155);
|
||||
rankTypeMap.put(PII, 150);
|
||||
rankTypeMap.put(ADDRESS, 140);
|
||||
rankTypeMap.put(AUTHOR, 130);
|
||||
rankTypeMap.put(SPONSOR, 120);
|
||||
rankTypeMap.put(VERTEBRATE, 110);
|
||||
rankTypeMap.put(MUST_REDACT, 100);
|
||||
rankTypeMap.put(REDACTION_INDICATOR, 90);
|
||||
rankTypeMap.put(NO_REDACTION_INDICATOR, 80);
|
||||
rankTypeMap.put(PUBLISHED_INFORMATION, 70);
|
||||
rankTypeMap.put(TEST_METHOD, 60);
|
||||
rankTypeMap.put(HINT_ONLY, 50);
|
||||
rankTypeMap.put(IMAGE, 30);
|
||||
rankTypeMap.put(OCR, 29);
|
||||
rankTypeMap.put(LOGO, 28);
|
||||
rankTypeMap.put(SIGNATURE, 27);
|
||||
rankTypeMap.put(FORMULA, 26);
|
||||
rankTypeMap.put(DOSSIER_REDACTIONS, 200);
|
||||
rankTypeMap.put(IMPORTED_REDACTION, 200);
|
||||
rankTypeMap.put(ROTATE_SIMPLE, 150);
|
||||
|
||||
colors.setSkippedColor("#cccccc");
|
||||
colors.setRequestAddColor("#04b093");
|
||||
colors.setRequestRemoveColor("#04b093");
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private void loadNerForTest() {
|
||||
|
||||
ClassPathResource responseJson = new ClassPathResource("files/ner_response.json");
|
||||
storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), responseJson.getInputStream());
|
||||
}
|
||||
|
||||
|
||||
private List<Type> getTypeResponse() {
|
||||
|
||||
return typeColorMap.entrySet()
|
||||
.stream()
|
||||
.map(typeColor -> Type.builder()
|
||||
.id(typeColor.getKey() + ":" + TEST_DOSSIER_TEMPLATE_ID)
|
||||
.type(typeColor.getKey())
|
||||
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
|
||||
.hexColor(typeColor.getValue())
|
||||
.isHint(hintTypeMap.get(typeColor.getKey()))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(typeColor.getKey()))
|
||||
.isRecommendation(recommendationTypeMap.get(typeColor.getKey()))
|
||||
.rank(rankTypeMap.get(typeColor.getKey()))
|
||||
.build())
|
||||
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
private Type getDictionaryResponse(String type, boolean isDossierDictionary) {
|
||||
|
||||
return Type.builder()
|
||||
.id(type + ":" + TEST_DOSSIER_TEMPLATE_ID)
|
||||
.hexColor(typeColorMap.get(type))
|
||||
.entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type)))
|
||||
.falsePositiveEntries(falsePositive.containsKey(type) ? toDictionaryEntry(falsePositive.get(type)) : new ArrayList<>())
|
||||
.falseRecommendationEntries(falseRecommendation.containsKey(type) ? toDictionaryEntry(falseRecommendation.get(type)) : new ArrayList<>())
|
||||
.isHint(hintTypeMap.get(type))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(type))
|
||||
.isRecommendation(recommendationTypeMap.get(type))
|
||||
.rank(rankTypeMap.get(type))
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
private String cleanDictionaryEntry(String entry) {
|
||||
|
||||
return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " ");
|
||||
}
|
||||
|
||||
|
||||
private List<DictionaryEntry> toDictionaryEntry(List<String> entries) {
|
||||
|
||||
if (entries == null) {
|
||||
entries = Collections.emptyList();
|
||||
}
|
||||
|
||||
List<DictionaryEntry> dictionaryEntries = new ArrayList<>();
|
||||
entries.forEach(entry -> {
|
||||
dictionaryEntries.add(DictionaryEntry.builder().value(entry).version(reanlysisVersions.getOrDefault(entry, 0L)).deleted(deleted.contains(entry)).build());
|
||||
});
|
||||
return dictionaryEntries;
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testImportedRedactions() throws IOException {
|
||||
|
||||
String outputFileName = OsUtils.getTemporaryDirectory() + "/Annotated.pdf";
|
||||
ClassPathResource importedRedactions = new ClassPathResource("files/ImportedRedactions/RotateTestFile_without_highlights.IMPORTED_REDACTIONS.json");
|
||||
|
||||
AnalyzeRequest request = prepareStorage("files/ImportedRedactions/RotateTestFile_without_highlights.pdf");
|
||||
storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS),
|
||||
AnalyzeRequest request = uploadFileToStorage("files/ImportedRedactions/RotateTestFile_without_highlights.pdf");
|
||||
storageService.storeObject(TenantContext.getTenantId(),
|
||||
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS),
|
||||
importedRedactions.getInputStream());
|
||||
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
@ -1636,17 +1220,17 @@ public class RedactionIntegrationTest {
|
||||
@Test
|
||||
public void testExpandByPrefixRegEx() throws IOException {
|
||||
|
||||
assertThat(dictionary.get(AUTHOR)).contains("Robinson");
|
||||
assertThat(dictionary.get(AUTHOR)).doesNotContain("Mrs. Robinson");
|
||||
assertThat(dictionary.get(AUTHOR)).contains("Bojangles");
|
||||
assertThat(dictionary.get(AUTHOR)).doesNotContain("Mr. Bojangles");
|
||||
assertThat(dictionary.get(AUTHOR)).contains("Tambourine Man");
|
||||
assertThat(dictionary.get(AUTHOR)).doesNotContain("Mr. Tambourine Man");
|
||||
assertThat(dictionary.get(DICTIONARY_AUTHOR)).contains("Robinson");
|
||||
assertThat(dictionary.get(DICTIONARY_AUTHOR)).doesNotContain("Mrs. Robinson");
|
||||
assertThat(dictionary.get(DICTIONARY_AUTHOR)).contains("Bojangles");
|
||||
assertThat(dictionary.get(DICTIONARY_AUTHOR)).doesNotContain("Mr. Bojangles");
|
||||
assertThat(dictionary.get(DICTIONARY_AUTHOR)).contains("Tambourine Man");
|
||||
assertThat(dictionary.get(DICTIONARY_AUTHOR)).doesNotContain("Mr. Tambourine Man");
|
||||
|
||||
String fileName = "files/mr-mrs.pdf";
|
||||
String outputFileName = OsUtils.getTemporaryDirectory() + "/Annotated.pdf";
|
||||
|
||||
AnalyzeRequest request = prepareStorage(fileName);
|
||||
AnalyzeRequest request = uploadFileToStorage(fileName);
|
||||
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
analyzeService.analyze(request);
|
||||
@ -1665,39 +1249,4 @@ public class RedactionIntegrationTest {
|
||||
assertThat(values).contains("Mr. Tambourine Man");
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private AnalyzeRequest prepareStorage(InputStream fileStream, InputStream cvServiceResponseFileStream) {
|
||||
|
||||
AnalyzeRequest request = AnalyzeRequest.builder()
|
||||
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
|
||||
.dossierId(TEST_DOSSIER_ID)
|
||||
.fileId(TEST_FILE_ID)
|
||||
.lastProcessed(OffsetDateTime.now())
|
||||
.build();
|
||||
|
||||
storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.TABLES), cvServiceResponseFileStream);
|
||||
storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), fileStream);
|
||||
|
||||
return request;
|
||||
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private AnalyzeRequest prepareStorage(String file) {
|
||||
|
||||
return prepareStorage(file, "files/cv_service_empty_response.json");
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private AnalyzeRequest prepareStorage(String file, String cvServiceResponseFile) {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource(file);
|
||||
ClassPathResource cvServiceResponseFileResource = new ClassPathResource(cvServiceResponseFile);
|
||||
|
||||
return prepareStorage(pdfFileResource.getInputStream(), cvServiceResponseFileResource.getInputStream());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,151 @@
|
||||
package com.iqser.red.service.redaction.v1.server;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.kie.api.KieServices;
|
||||
import org.kie.api.builder.KieBuilder;
|
||||
import org.kie.api.builder.KieFileSystem;
|
||||
import org.kie.api.builder.KieModule;
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.context.annotation.Primary;
|
||||
import org.springframework.test.context.junit.jupiter.SpringExtension;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry;
|
||||
import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.server.multitenancy.TenantContext;
|
||||
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
@ExtendWith(SpringExtension.class)
|
||||
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
|
||||
@Import(RedactionIntegrationTestV2.RedactionIntegrationTestConfiguration.class)
|
||||
public class RedactionIntegrationTestV2 extends AbstractRedactionIntegrationTest {
|
||||
|
||||
private static final String RULES = loadFromClassPath("drools/rules_v2.drl");
|
||||
|
||||
@Configuration
|
||||
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class, StorageAutoConfiguration.class})
|
||||
static class RedactionIntegrationTestConfiguration {
|
||||
|
||||
@Bean
|
||||
public KieContainer kieContainer() {
|
||||
|
||||
KieServices kieServices = KieServices.Factory.get();
|
||||
|
||||
KieFileSystem kieFileSystem = kieServices.newKieFileSystem();
|
||||
InputStream input = new ByteArrayInputStream(RULES.getBytes(StandardCharsets.UTF_8));
|
||||
kieFileSystem.write("src/test/resources/drools/rules_v2", kieServices.getResources().newInputStreamResource(input));
|
||||
KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem);
|
||||
kieBuilder.buildAll();
|
||||
KieModule kieModule = kieBuilder.getKieModule();
|
||||
|
||||
return kieServices.newKieContainer(kieModule.getReleaseId());
|
||||
}
|
||||
|
||||
|
||||
@Bean
|
||||
@Primary
|
||||
public StorageService inmemoryStorage() {
|
||||
|
||||
return new FileSystemBackedStorageService();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@BeforeEach
|
||||
public void stubClients() {
|
||||
|
||||
TenantContext.setTenantId("redaction");
|
||||
|
||||
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
|
||||
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(JSONPrimitive.of(RULES));
|
||||
|
||||
loadDictionaryForTest();
|
||||
loadTypeForTest();
|
||||
loadNerForTest();
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
|
||||
when(dictionaryClient.getAllTypesForDossierTemplate(TEST_DOSSIER_TEMPLATE_ID, false)).thenReturn(getTypeResponse());
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
|
||||
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder()
|
||||
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
|
||||
.type(DOSSIER_REDACTIONS_INDICATOR)
|
||||
.dossierTemplateId(TEST_DOSSIER_ID)
|
||||
.hexColor("#ffe187")
|
||||
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
|
||||
.build()));
|
||||
|
||||
mockDictionaryCalls(null);
|
||||
|
||||
when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The case in this test: One term, 'Dr. Alan Miller', is found by PII-Rule and is in the PII-dictionary
|
||||
* as well as in the PII-false-positive-list - and in the CBI-author dictionary.
|
||||
* It gets redacted, as the PII-finding is false positive and so the CBI-author entry is effective
|
||||
* independent of the entity-rank
|
||||
*/
|
||||
@Test
|
||||
@SneakyThrows
|
||||
public void testTermIsInTwoDictionariesAndInOneFalsePositive() {
|
||||
|
||||
AnalyzeRequest request = uploadFileToStorage("files/new/simplified2.pdf");
|
||||
|
||||
dictionary.clear();
|
||||
dictionary.put(DICTIONARY_PII, Arrays.asList("Dr. Alan Miller"));
|
||||
dictionary.put(DICTIONARY_AUTHOR, Arrays.asList("Dr. Alan Miller"));
|
||||
|
||||
falsePositive.clear();
|
||||
falsePositive.put(DICTIONARY_PII, Arrays.asList("Dr. Alan Miller COMPLETION DATE:"));
|
||||
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
analyzeService.analyze(request);
|
||||
|
||||
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
|
||||
assertThat(redactionLog.getRedactionLogEntry().size()).isEqualTo(1);
|
||||
|
||||
RedactionLogEntry redactionLogEntry = redactionLog.getRedactionLogEntry().get(0);
|
||||
|
||||
assertThat(redactionLogEntry.getType()).isEqualTo(DICTIONARY_AUTHOR);
|
||||
assertThat(redactionLogEntry.getValue()).isEqualTo("Dr. Alan Miller");
|
||||
assertThat(redactionLogEntry.isRedacted()).isEqualTo(true);
|
||||
assertThat(redactionLogEntry.isRecommendation()).isEqualTo(false);
|
||||
assertThat(redactionLogEntry.isFalsePositive()).isEqualTo(false);
|
||||
assertThat(redactionLogEntry.isExcluded()).isEqualTo(false);
|
||||
assertThat(redactionLogEntry.isDictionaryEntry()).isEqualTo(true);
|
||||
|
||||
assertThat(redactionLogEntry.getEngines().size()).isEqualTo(1);
|
||||
assertThat(redactionLogEntry.getEngines().contains(Engine.DICTIONARY)).isEqualTo(true);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,341 @@
|
||||
package drools
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Section
|
||||
|
||||
global Section section
|
||||
|
||||
|
||||
// --------------------------------------- AI rules -------------------------------------------------------------------
|
||||
|
||||
rule "0: Add CBI_author from ai"
|
||||
when
|
||||
Section(aiMatchesType("CBI_author"))
|
||||
then
|
||||
section.addAiEntities("CBI_author", "CBI_author");
|
||||
end
|
||||
|
||||
rule "0: Combine address parts from ai to CBI_address (org is mandatory)"
|
||||
when
|
||||
Section(aiMatchesType("ORG"))
|
||||
then
|
||||
section.combineAiTypes("ORG", "STREET,POSTAL,COUNTRY,CARDINAL,CITY,STATE", 20, "CBI_address", 3, false);
|
||||
end
|
||||
|
||||
rule "0: Combine address parts from ai to CBI_address (street is mandatory)"
|
||||
when
|
||||
Section(aiMatchesType("STREET"))
|
||||
then
|
||||
section.combineAiTypes("STREET", "ORG,POSTAL,COUNTRY,CARDINAL,CITY,STATE", 20, "CBI_address", 3, false);
|
||||
end
|
||||
|
||||
rule "0: Combine address parts from ai to CBI_address (city is mandatory)"
|
||||
when
|
||||
Section(aiMatchesType("CITY"))
|
||||
then
|
||||
section.combineAiTypes("CITY", "ORG,STREET,POSTAL,COUNTRY,CARDINAL,STATE", 20, "CBI_address", 3, false);
|
||||
end
|
||||
|
||||
/* Syngenta specific laboratory recommendation */
|
||||
rule "0: Recommend CTL/BL laboratory that start with BL or CTL"
|
||||
when
|
||||
Section(searchText.contains("CT") || searchText.contains("BL"))
|
||||
then
|
||||
/* Regular expression: ((\b((([Cc]T(([1ILli\/])| L|~P))|(BL))[\. ]?([\dA-Ziltphz~\/.:!]| ?[\(',][Ppi](\(e)?|([\(-?']\/))+( ?[\(\/\dA-Znasieg]+)?)\b( ?\/? ?\d+)?)|(\bCT[L1i]\b)) */
|
||||
section.addRecommendationByRegEx("((\\b((([Cc]T(([1ILli\\/])| L|~P))|(BL))[\\. ]?([\\dA-Ziltphz~\\/.:!]| ?[\\(',][Ppi](\\(e)?|([\\(-?']\\/))+( ?[\\(\\/\\dA-Znasieg]+)?)\\b( ?\\/? ?\\d+)?)|(\\bCT[L1i]\\b))", true, 0, "CBI_address");
|
||||
end
|
||||
|
||||
|
||||
// --------------------------------------- CBI rules -------------------------------------------------------------------
|
||||
|
||||
rule "1: Redact CBI Authors (Non vertebrate study)"
|
||||
when
|
||||
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_author"))
|
||||
then
|
||||
section.redact("CBI_author", 1, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
rule "2: Redact CBI Authors (Vertebrate study)"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_author"))
|
||||
then
|
||||
section.redact("CBI_author", 2, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "3: Redact not CBI Address (Non vertebrate study)"
|
||||
when
|
||||
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_address"))
|
||||
then
|
||||
section.redactNot("CBI_address", 3, "Address found for non vertebrate study");
|
||||
section.ignoreRecommendations("CBI_address");
|
||||
end
|
||||
|
||||
rule "4: Redact CBI Address (Vertebrate study)"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_address"))
|
||||
then
|
||||
section.redact("CBI_address", 4, "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "5: Do not redact genitive CBI_author"
|
||||
when
|
||||
Section(matchesType("CBI_author"))
|
||||
then
|
||||
section.expandToFalsePositiveByRegEx("CBI_author", "['’’'ʼˈ´`‘′ʻ’']s", false, 0);
|
||||
end
|
||||
|
||||
|
||||
rule "6: Redact Author(s) cells in Tables with Author(s) header (Non vertebrate study)"
|
||||
when
|
||||
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author(s)") && !hasTableHeader("Vertebrate study Y/N"))
|
||||
then
|
||||
section.redactCell("Author(s)", 6, "CBI_author", false, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
rule "7: Redact Author(s) cells in Tables with Author(s) header (Vertebrate study)"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author(s)") && !hasTableHeader("Vertebrate study Y/N"))
|
||||
then
|
||||
section.redactCell("Author(s)", 7, "CBI_author", false, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "8: Redact Author cells in Tables with Author header (Non vertebrate study)"
|
||||
when
|
||||
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author") && !hasTableHeader("Vertebrate study Y/N"))
|
||||
then
|
||||
section.redactCell("Author", 8, "CBI_author", false, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
rule "9: Redact Author cells in Tables with Author header (Vertebrate study)"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author") && !hasTableHeader("Vertebrate study Y/N"))
|
||||
then
|
||||
section.redactCell("Author", 9, "CBI_author", false, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "10: Redact and recommand Authors in Tables with Vertebrate study Y/N header (Non vertebrate study)"
|
||||
when
|
||||
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (rowEquals("Vertebrate study Y/N", "Y") || rowEquals("Vertebrate study Y/N", "Yes") || rowEquals("Vertebrate study Y/N", "N") || rowEquals("Vertebrate study Y/N", "No")))
|
||||
then
|
||||
section.redactCell("Author(s)", 10, "CBI_author", true, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
rule "11: Redact and recommand Authors in Tables with Vertebrate study Y/N header (Vertebrate study)"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (rowEquals("Vertebrate study Y/N", "Y") || rowEquals("Vertebrate study Y/N", "Yes") || rowEquals("Vertebrate study Y/N", "N") || rowEquals("Vertebrate study Y/N", "No")))
|
||||
then
|
||||
section.redactCell("Author(s)", 11, "CBI_author", true, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
rule "14: Redact and add recommendation for et al. author (Non vertebrate study)"
|
||||
when
|
||||
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("et al"))
|
||||
then
|
||||
section.redactAndRecommendByRegEx("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", false, 1, "CBI_author", 14, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
rule "15: Redact and add recommendation for et al. author (Vertebrate study)"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("et al"))
|
||||
then
|
||||
section.redactAndRecommendByRegEx("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", false, 1, "CBI_author", 15, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "16: Add recommendation for Addresses in Test Organism sections"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("Species:") && searchText.contains("Source:"))
|
||||
then
|
||||
section.recommendLineAfter("Source:", "CBI_address");
|
||||
end
|
||||
|
||||
rule "17: Add recommendation for Addresses in Test Animals sections"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("Species") && searchText.contains("Source"))
|
||||
then
|
||||
section.recommendLineAfter("Source", "CBI_address");
|
||||
end
|
||||
|
||||
|
||||
rule "18: Do not redact Names and Addresses if Published Information found"
|
||||
when
|
||||
Section(matchesType("published_information"))
|
||||
then
|
||||
section.redactNotAndReference("CBI_author","published_information", 18, "Published Information found");
|
||||
section.redactNotAndReference("CBI_address","published_information", 18, "Published Information found");
|
||||
end
|
||||
|
||||
|
||||
// --------------------------------------- PII rules -------------------------------------------------------------------
|
||||
|
||||
|
||||
rule "19: Redacted PII Personal Identification Information (Non vertebrate study)"
|
||||
when
|
||||
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("PII"))
|
||||
then
|
||||
section.redact("PII", 19, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
rule "20: Redacted PII Personal Identification Information (Vertebrate study)"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("PII"))
|
||||
then
|
||||
section.redact("PII", 20, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "21: Redact Emails by RegEx (Non vertebrate study)"
|
||||
when
|
||||
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("@"))
|
||||
then
|
||||
section.redactByRegEx("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", true, 1, "PII", 21, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
rule "22: Redact Emails by RegEx (Vertebrate study)"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("@"))
|
||||
then
|
||||
section.redactByRegEx("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", true, 1, "PII", 22, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "25: Redact Phone and Fax by RegEx (Non vertebrate study)"
|
||||
when
|
||||
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (
|
||||
text.contains("Contact")
|
||||
|| text.contains("Telephone")
|
||||
|| text.contains("Phone")
|
||||
|| text.contains("Fax")
|
||||
|| text.contains("Tel")
|
||||
|| text.contains("Ter")
|
||||
|| text.contains("Mobile")
|
||||
|| text.contains("Fel")
|
||||
|| text.contains("Fer")
|
||||
))
|
||||
then
|
||||
section.redactByRegEx("\\b(contact|telephone|phone|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", true, 2, "PII", 25, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
rule "26: Redact Phone and Fax by RegEx (Vertebrate study)"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (
|
||||
text.contains("Contact")
|
||||
|| text.contains("Telephone")
|
||||
|| text.contains("Phone")
|
||||
|| text.contains("Fax")
|
||||
|| text.contains("Tel")
|
||||
|| text.contains("Ter")
|
||||
|| text.contains("Mobile")
|
||||
|| text.contains("Fel")
|
||||
|| text.contains("Fer")
|
||||
))
|
||||
then
|
||||
section.redactByRegEx("\\b(contact|telephone|phone|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", true, 2, "PII", 26, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "27: Redact AUTHOR(S) (Non vertebrate study)"
|
||||
when
|
||||
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes")
|
||||
&& searchText.contains("AUTHOR(S):")
|
||||
&& searchText.contains("COMPLETION DATE:")
|
||||
&& !searchText.contains("STUDY COMPLETION DATE:")
|
||||
)
|
||||
then
|
||||
section.redactLinesBetween("AUTHOR(S):", "COMPLETION DATE:", "PII", 27, true, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
rule "28: Redact AUTHOR(S) (Vertebrate study)"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes")
|
||||
&& searchText.contains("AUTHOR(S):")
|
||||
&& searchText.contains("COMPLETION DATE:")
|
||||
&& !searchText.contains("STUDY COMPLETION DATE:")
|
||||
)
|
||||
then
|
||||
section.redactLinesBetween("AUTHOR(S):", "COMPLETION DATE:", "PII", 28, true, "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "29: Redact AUTHOR(S) (Non vertebrate study)"
|
||||
when
|
||||
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes")
|
||||
&& searchText.contains("AUTHOR(S):")
|
||||
&& searchText.contains("STUDY COMPLETION DATE:")
|
||||
)
|
||||
then
|
||||
section.redactLinesBetween("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", 29, true, "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
rule "30: Redact AUTHOR(S) (Vertebrate study)"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes")
|
||||
&& searchText.contains("AUTHOR(S):")
|
||||
&& searchText.contains("STUDY COMPLETION DATE:")
|
||||
)
|
||||
then
|
||||
section.redactLinesBetween("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", 30, true, "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "31: Redact PERFORMING LABORATORY (Non vertebrate study)"
|
||||
when
|
||||
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes")
|
||||
&& searchText.contains("PERFORMING LABORATORY:")
|
||||
)
|
||||
then
|
||||
section.redactBetween("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", 31, true, "PERFORMING LABORATORY was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactNot("CBI_address", 31, "Performing laboratory found for non vertebrate study");
|
||||
end
|
||||
|
||||
rule "32: Redact PERFORMING LABORATORY (Vertebrate study)"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes")
|
||||
&& searchText.contains("PERFORMING LABORATORY:"))
|
||||
then
|
||||
section.redactBetween("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", 32, true, "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
// --------------------------------------- other rules -------------------------------------------------------------------
|
||||
|
||||
rule "33: Purity Hint"
|
||||
when
|
||||
Section(searchText.toLowerCase().contains("purity"))
|
||||
then
|
||||
section.addHintAnnotationByRegEx("(purity ?( of|\\(.{1,20}\\))?( ?:)?) .{0,5}[\\d\\.]+( .{0,4}\\.)? ?%", true, 1, "hint_only");
|
||||
end
|
||||
|
||||
|
||||
rule "34: Ignore dossier_redaction entries if confidentiality is not 'confidential'"
|
||||
when
|
||||
Section(!fileAttributeByLabelEqualsIgnoreCase("Confidentiality","confidential") && matchesType("dossier_redaction"));
|
||||
then
|
||||
section.ignore("dossier_redaction");
|
||||
end
|
||||
|
||||
|
||||
rule "35: Redact signatures (Non vertebrate study)"
|
||||
when
|
||||
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesImageType("signature"))
|
||||
then
|
||||
section.redactImage("signature", 35, "Signature found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
rule "36: Redact signatures (Vertebrate study)"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesImageType("signature"))
|
||||
then
|
||||
section.redactImage("signature", 36, "Signature found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "43: Redact Logos (Vertebrate study)"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesImageType("logo"))
|
||||
then
|
||||
section.redactImage("logo", 43, "Logo found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user