diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/TenantRules.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/TenantRules.java new file mode 100644 index 00000000..eff0a617 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/TenantRules.java @@ -0,0 +1,15 @@ +package com.iqser.red.service.redaction.v1.server.redaction.model; + +import java.util.HashMap; +import java.util.Map; + +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@NoArgsConstructor +public class TenantRules { + + private Map rulesVersionPerDossierTemplateId = new HashMap<>(); + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java index 6905fb63..eaaa9b63 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java @@ -1,11 +1,13 @@ package com.iqser.red.service.redaction.v1.server.redaction.service; -import com.iqser.red.service.redaction.v1.server.client.RulesClient; -import com.iqser.red.service.redaction.v1.server.exception.RulesValidationException; -import com.iqser.red.service.redaction.v1.server.redaction.model.Section; +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.TimeUnit; -import io.micrometer.core.annotation.Timed; -import lombok.RequiredArgsConstructor; +import javax.annotation.PostConstruct; import org.apache.commons.lang3.StringUtils; import org.kie.api.KieServices; @@ -16,11 +18,19 @@ import org.kie.api.runtime.KieContainer; import org.kie.api.runtime.KieSession; import org.springframework.stereotype.Service; -import java.io.ByteArrayInputStream; -import java.io.InputStream; -import java.nio.charset.StandardCharsets; -import java.util.HashMap; -import java.util.Map; +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.CacheLoader; +import com.google.common.cache.LoadingCache; +import com.iqser.red.service.redaction.v1.server.client.RulesClient; +import com.iqser.red.service.redaction.v1.server.exception.RulesValidationException; +import com.iqser.red.service.redaction.v1.server.multitenancy.TenantContext; +import com.iqser.red.service.redaction.v1.server.redaction.model.Section; +import com.iqser.red.service.redaction.v1.server.redaction.model.TenantRules; +import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings; + +import io.micrometer.core.annotation.Timed; +import lombok.RequiredArgsConstructor; +import lombok.SneakyThrows; @Service @RequiredArgsConstructor @@ -30,7 +40,24 @@ public class DroolsExecutionService { private final Map kieContainers = new HashMap<>(); - private final Map rulesVersionPerDossierTemplateId = new HashMap<>(); + private final RedactionServiceSettings settings; + + private LoadingCache tenantRulesCache; + + + @PostConstruct + protected void createCache() { + + tenantRulesCache = CacheBuilder.newBuilder() + .maximumSize(settings.getDictionaryCacheMaximumSize()) + .expireAfterAccess(settings.getDictionaryCacheExpireAfterAccessDays(), TimeUnit.DAYS) + .build(new CacheLoader<>() { + public TenantRules load(String key) { + + return new TenantRules(); + } + }); + } public KieContainer getKieContainer(String dossierTemplateId) { @@ -61,13 +88,13 @@ public class DroolsExecutionService { public KieContainer updateRules(String dossierTemplateId) { long version = rulesClient.getVersion(dossierTemplateId); - Long rulesVersion = rulesVersionPerDossierTemplateId.get(dossierTemplateId); + Long rulesVersion = getVersionForDossierTemplate(dossierTemplateId); if (rulesVersion == null) { rulesVersion = -1L; } if (version > rulesVersion) { - rulesVersionPerDossierTemplateId.put(dossierTemplateId, version); + setRulesVersionForDossierTemplate(dossierTemplateId, version); return createOrUpdateKieContainer(dossierTemplateId); } return getKieContainer(dossierTemplateId); @@ -126,11 +153,25 @@ public class DroolsExecutionService { public long getRulesVersion(String dossierTemplateId) { - Long rulesVersion = rulesVersionPerDossierTemplateId.get(dossierTemplateId); + Long rulesVersion = getVersionForDossierTemplate(dossierTemplateId); if (rulesVersion == null) { return -1; } return rulesVersion; } + + @SneakyThrows + private Long getVersionForDossierTemplate(String dossierTemplateId) { + + return tenantRulesCache.get(TenantContext.getTenantId()).getRulesVersionPerDossierTemplateId().get(dossierTemplateId); + } + + + @SneakyThrows + private void setRulesVersionForDossierTemplate(String dossierTemplateId, long version) { + + tenantRulesCache.get(TenantContext.getTenantId()).getRulesVersionPerDossierTemplateId().put(dossierTemplateId, version); + } + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/entityredaction/EntityFinder.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/entityredaction/EntityFinder.java index 0cf2d378..86c390f0 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/entityredaction/EntityFinder.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/entityredaction/EntityFinder.java @@ -66,7 +66,7 @@ class EntityFinder { !local, model.isDossierDictionary(), local ? Engine.RULE : Engine.DICTIONARY, - local ? EntityType.RECOMMENDATION : EntityType.ENTITY)); + local ? EntityType.RECOMMENDATION : EntityType.ENTITY)).stream().filter(e -> !e.isFalsePositive()).collect(Collectors.toSet()); EntitySearchUtils.addOrAddEngine(found, entities); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java index 8bd7f515..20e8e9f0 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java @@ -273,14 +273,14 @@ public final class EntitySearchUtils { existing.setLegalBasis(found.getLegalBasis()); existing.setMatchedRule(found.getMatchedRule()); existing.setRedactionReason(found.getRedactionReason()); - if (existing.getEntityType().equals(EntityType.RECOMMENDATION) && found.getEntityType().equals(EntityType.ENTITY) || existing.getEntityType() - .equals(EntityType.ENTITY) && found.getEntityType().equals(EntityType.RECOMMENDATION)) { + if (isOneARecommendationAndTheOtherEntity(found, existing)) { existing.setEntityType(EntityType.ENTITY); if (found.isRedaction()) { existing.setRedaction(true); } } - } else if (dictionary.getDictionaryRank(existing.getType()) <= dictionary.getDictionaryRank(found.getType())) { + } else if (dictionary.getDictionaryRank(existing.getType()) <= dictionary.getDictionaryRank(found.getType()) && + !(isOneARecommendationAndTheOtherEntity(found, existing) && existing.isRedaction() && found.isRedaction()) ) { entities.remove(found); entities.add(found); } @@ -289,6 +289,13 @@ public final class EntitySearchUtils { } } + private boolean isOneARecommendationAndTheOtherEntity(Entity entityOne, Entity entityTwo) { + + var entityTypeOne = entityOne.getEntityType(); + var entityTypeTwo = entityTwo.getEntityType(); + return entityTypeTwo.equals(EntityType.RECOMMENDATION) && entityTypeOne.equals(EntityType.ENTITY) + || entityTypeTwo.equals(EntityType.ENTITY) && entityTypeOne.equals(EntityType.RECOMMENDATION); + } public void addEntitiesIgnoreRank(Set entities, Set found) { // HashSet keeps old value but we want the new. diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AbstractRedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AbstractRedactionIntegrationTest.java new file mode 100644 index 00000000..4cdf09c0 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AbstractRedactionIntegrationTest.java @@ -0,0 +1,462 @@ +package com.iqser.red.service.redaction.v1.server; + +import static org.mockito.Mockito.when; + +import java.io.File; +import java.io.InputStream; +import java.net.URL; +import java.nio.file.Files; +import java.time.OffsetDateTime; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import org.junit.jupiter.api.AfterEach; +import org.mockito.stubbing.Answer; +import org.springframework.amqp.rabbit.core.RabbitTemplate; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.mock.mockito.MockBean; +import org.springframework.core.io.ClassPathResource; + +import com.amazonaws.services.s3.AmazonS3; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; +import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.configuration.Colors; +import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType; +import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.DictionaryEntry; +import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type; +import com.iqser.red.service.redaction.v1.server.annotate.AnnotationService; +import com.iqser.red.service.redaction.v1.server.client.DictionaryClient; +import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient; +import com.iqser.red.service.redaction.v1.server.client.RulesClient; +import com.iqser.red.service.redaction.v1.server.controller.RedactionController; +import com.iqser.red.service.redaction.v1.server.multitenancy.TenantContext; +import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService; +import com.iqser.red.service.redaction.v1.server.redaction.service.analyze.AnalyzeService; +import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader; +import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities; +import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; +import com.iqser.red.storage.commons.service.StorageService; + +import lombok.SneakyThrows; + +public abstract class AbstractRedactionIntegrationTest { + + protected static final String VERTEBRATE_INDICATOR = "vertebrate"; + protected static final String DICTIONARY_ADDRESS = "CBI_address"; + protected static final String DICTIONARY_AUTHOR = "CBI_author"; + protected static final String DICTIONARY_SPONSOR = "CBI_sponsor"; + protected static final String DICTIONARY_PII = "PII"; + protected static final String NO_REDACTION_INDICATOR = "no_redaction_indicator"; + protected static final String REDACTION_INDICATOR = "redaction_indicator"; + protected static final String HINT_ONLY_INDICATOR = "hint_only"; + protected static final String MUST_REDACT_INDICATOR = "must_redact"; + protected static final String PUBLISHED_INFORMATION_INDICATOR = "published_information"; + protected static final String TEST_METHOD_INDICATOR = "test_method"; + protected static final String PURITY_INDICATOR = "purity"; + protected static final String IMAGE_INDICATOR = "image"; + protected static final String LOGO_INDICATOR = "logo"; + protected static final String SIGNATURE_INDICATOR = "signature"; + protected static final String FORMULA_INDICATOR = "formula"; + protected static final String OCR_INDICATOR = "ocr"; + protected static final String DOSSIER_REDACTIONS_INDICATOR = "dossier_redactions"; + protected static final String IMPORTED_REDACTION_INDICATOR = "imported_redaction"; + protected static final String ROTATE_SIMPLE_INDICATOR = "RotateSimple"; + + protected final static String TEST_DOSSIER_TEMPLATE_ID = "123"; + public static final String IMPORTED_REDACTION_TYPE_ID = IMPORTED_REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID; + public static final String DOSSIER_REDACTIONS_TYPE_ID = DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID; + public static final String ROTATE_SIMPLE_TYPE_ID = ROTATE_SIMPLE_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID; + public static final String FORMULA_TYPE_ID = FORMULA_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID; + public static final String SIGNATURE_TYPE_ID = SIGNATURE_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID; + public static final String LOGO_TYPE_ID = LOGO_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID; + public static final String OCR_TYPE_ID = OCR_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID; + public static final String IMAGE_TYPE_ID = IMAGE_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID; + public static final String PURITY_TYPE_ID = PURITY_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID; + public static final String PII_TYPE_ID = DICTIONARY_PII + ":" + TEST_DOSSIER_TEMPLATE_ID; + public static final String TEST_METHOD_TYPE_ID = TEST_METHOD_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID; + public static final String PUBLISHED_INFORMATION_TYPE_ID = PUBLISHED_INFORMATION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID; + public static final String MUST_REDACT_TYPE_ID = MUST_REDACT_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID; + public static final String HINT_ONLY_TYPE_ID = HINT_ONLY_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID; + public static final String REDACTION_TYPE_ID = REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID; + public static final String NO_REDACTION_TYPE_ID = NO_REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID; + public static final String SPONSOR_TYPE_ID = DICTIONARY_SPONSOR + ":" + TEST_DOSSIER_TEMPLATE_ID; + public static final String AUTHOR_TYPE_ID = DICTIONARY_AUTHOR + ":" + TEST_DOSSIER_TEMPLATE_ID; + public static final String ADDRESS_TYPE_ID = DICTIONARY_ADDRESS + ":" + TEST_DOSSIER_TEMPLATE_ID; + public static final String VERTEBRATE_TYPE_ID = VERTEBRATE_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID; + + @Autowired + protected RedactionController redactionController; + + @Autowired + protected AnnotationService annotationService; + + @Autowired + protected AnalyzeService analyzeService; + + @Autowired + protected ObjectMapper objectMapper; + + @Autowired + protected RedactionStorageService redactionStorageService; + + @Autowired + protected StorageService storageService; + + @Autowired + protected ManualRedactionSurroundingTextService manualRedactionSurroundingTextService; + + @MockBean + protected AmazonS3 amazonS3; + + @MockBean + protected RabbitTemplate rabbitTemplate; + + @MockBean + protected LegalBasisClient legalBasisClient; + + protected final Map> dictionary = new HashMap<>(); + protected final Map> dossierDictionary = new HashMap<>(); + protected final Map> falsePositive = new HashMap<>(); + protected final Map> falseRecommendation = new HashMap<>(); + protected final Map typeColorMap = new HashMap<>(); + protected final Map hintTypeMap = new HashMap<>(); + protected final Map caseInSensitiveMap = new HashMap<>(); + protected final Map recommendationTypeMap = new HashMap<>(); + protected final Map rankTypeMap = new HashMap<>(); + protected final Colors colors = new Colors(); + protected final Map reanlysisVersions = new HashMap<>(); + protected final Set deleted = new HashSet<>(); + + protected final static String TEST_DOSSIER_ID = "123"; + protected final static String TEST_FILE_ID = "123"; + + @MockBean + protected RulesClient rulesClient; + + @MockBean + protected DictionaryClient dictionaryClient; + + + @AfterEach + public void cleanupStorage() { + + if (this.storageService instanceof FileSystemBackedStorageService) { + ((FileSystemBackedStorageService) this.storageService).clearStorage(); + } + } + + + protected void mockDictionaryCalls(Long version) { + + when(dictionaryClient.getDictionaryForType(VERTEBRATE_TYPE_ID, version)).then((Answer) invocation -> getDictionaryResponse(VERTEBRATE_INDICATOR, false)); + when(dictionaryClient.getDictionaryForType(ADDRESS_TYPE_ID, version)).then((Answer) invocation -> getDictionaryResponse(DICTIONARY_ADDRESS, false)); + when(dictionaryClient.getDictionaryForType(AUTHOR_TYPE_ID, version)).then((Answer) invocation -> getDictionaryResponse(DICTIONARY_AUTHOR, false)); + when(dictionaryClient.getDictionaryForType(SPONSOR_TYPE_ID, version)).then((Answer) invocation -> getDictionaryResponse(DICTIONARY_SPONSOR, false)); + when(dictionaryClient.getDictionaryForType(NO_REDACTION_TYPE_ID, version)).then((Answer) invocation -> getDictionaryResponse(NO_REDACTION_INDICATOR, false)); + when(dictionaryClient.getDictionaryForType(REDACTION_TYPE_ID, version)).then((Answer) invocation -> getDictionaryResponse(REDACTION_INDICATOR, false)); + when(dictionaryClient.getDictionaryForType(HINT_ONLY_TYPE_ID, version)).then((Answer) invocation -> getDictionaryResponse(HINT_ONLY_INDICATOR, false)); + when(dictionaryClient.getDictionaryForType(MUST_REDACT_TYPE_ID, version)).then((Answer) invocation -> getDictionaryResponse(MUST_REDACT_INDICATOR, false)); + when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION_TYPE_ID, version)).then((Answer) invocation -> getDictionaryResponse(PUBLISHED_INFORMATION_INDICATOR, false)); + when(dictionaryClient.getDictionaryForType(TEST_METHOD_TYPE_ID, version)).then((Answer) invocation -> getDictionaryResponse(TEST_METHOD_INDICATOR, false)); + when(dictionaryClient.getDictionaryForType(PII_TYPE_ID, version)).then((Answer) invocation -> getDictionaryResponse(DICTIONARY_PII, false)); + when(dictionaryClient.getDictionaryForType(PURITY_TYPE_ID, version)).then((Answer) invocation -> getDictionaryResponse(PURITY_INDICATOR, false)); + when(dictionaryClient.getDictionaryForType(IMAGE_TYPE_ID, version)).then((Answer) invocation -> getDictionaryResponse(IMAGE_INDICATOR, false)); + when(dictionaryClient.getDictionaryForType(OCR_TYPE_ID, version)).then((Answer) invocation -> getDictionaryResponse(OCR_INDICATOR, false)); + when(dictionaryClient.getDictionaryForType(LOGO_TYPE_ID, version)).then((Answer) invocation -> getDictionaryResponse(LOGO_INDICATOR, false)); + when(dictionaryClient.getDictionaryForType(SIGNATURE_TYPE_ID, version)).then((Answer) invocation -> getDictionaryResponse(SIGNATURE_INDICATOR, false)); + when(dictionaryClient.getDictionaryForType(FORMULA_TYPE_ID, version)).then((Answer) invocation -> getDictionaryResponse(FORMULA_INDICATOR, false)); + when(dictionaryClient.getDictionaryForType(ROTATE_SIMPLE_TYPE_ID, version)).then((Answer) invocation -> getDictionaryResponse(ROTATE_SIMPLE_INDICATOR, false)); + when(dictionaryClient.getDictionaryForType(DOSSIER_REDACTIONS_TYPE_ID, version)).then((Answer) invocation -> getDictionaryResponse(DOSSIER_REDACTIONS_INDICATOR,true)); + when(dictionaryClient.getDictionaryForType(IMPORTED_REDACTION_TYPE_ID, version)).then((Answer) invocation -> getDictionaryResponse(IMPORTED_REDACTION_INDICATOR,true)); + + } + + + protected void loadDictionaryForTest() { + + dictionary.computeIfAbsent(DICTIONARY_AUTHOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/CBI_author.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(DICTIONARY_SPONSOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(VERTEBRATE_INDICATOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/vertebrate.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(DICTIONARY_ADDRESS, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/CBI_address.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(REDACTION_INDICATOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(HINT_ONLY_INDICATOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/hint_only.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(MUST_REDACT_INDICATOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/must_redact.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(PUBLISHED_INFORMATION_INDICATOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/published_information.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(TEST_METHOD_INDICATOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/test_method.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(DICTIONARY_PII, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/PII.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(PURITY_INDICATOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/purity.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(IMAGE_INDICATOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(OCR_INDICATOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(LOGO_INDICATOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(SIGNATURE_INDICATOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(FORMULA_INDICATOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS_INDICATOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dossierDictionary.put(IMPORTED_REDACTION_INDICATOR, new ArrayList<>()); + + falsePositive.computeIfAbsent(DICTIONARY_PII, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + + } + + + protected void loadOnlyDictionaryForSimpleFile() { + + dictionary.clear(); + dictionary.computeIfAbsent(ROTATE_SIMPLE_INDICATOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/RotateTestFileSimple.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + } + + + @SneakyThrows + protected static String loadFromClassPath(String path) { + + URL resource = ResourceLoader.class.getClassLoader().getResource(path); + if (resource == null) { + throw new IllegalArgumentException("could not load classpath resource: drools/rules.drl"); + } + List stringList = Files.readAllLines(new File(resource.getPath()).toPath()); + return String.join("\n", stringList); + } + + + protected void loadTypeForTest() { + + typeColorMap.put(VERTEBRATE_INDICATOR, "#ff85f7"); + typeColorMap.put(DICTIONARY_ADDRESS, "#ffe187"); + typeColorMap.put(DICTIONARY_AUTHOR, "#ffe187"); + typeColorMap.put(DICTIONARY_SPONSOR, "#85ebff"); + typeColorMap.put(NO_REDACTION_INDICATOR, "#be85ff"); + typeColorMap.put(REDACTION_INDICATOR, "#caff85"); + typeColorMap.put(HINT_ONLY_INDICATOR, "#abc0c4"); + typeColorMap.put(MUST_REDACT_INDICATOR, "#fab4c0"); + typeColorMap.put(PUBLISHED_INFORMATION_INDICATOR, "#85ebff"); + typeColorMap.put(TEST_METHOD_INDICATOR, "#91fae8"); + typeColorMap.put(DICTIONARY_PII, "#66ccff"); + typeColorMap.put(PURITY_INDICATOR, "#ffe187"); + typeColorMap.put(IMAGE_INDICATOR, "#fcc5fb"); + typeColorMap.put(OCR_INDICATOR, "#fcc5fb"); + typeColorMap.put(LOGO_INDICATOR, "#ffe187"); + typeColorMap.put(FORMULA_INDICATOR, "#ffe187"); + typeColorMap.put(SIGNATURE_INDICATOR, "#ffe187"); + typeColorMap.put(IMPORTED_REDACTION_INDICATOR, "#fcfbe6"); + typeColorMap.put(ROTATE_SIMPLE_INDICATOR, "#66ccff"); + + hintTypeMap.put(VERTEBRATE_INDICATOR, true); + hintTypeMap.put(DICTIONARY_ADDRESS, false); + hintTypeMap.put(DICTIONARY_AUTHOR, false); + hintTypeMap.put(DICTIONARY_SPONSOR, false); + hintTypeMap.put(NO_REDACTION_INDICATOR, true); + hintTypeMap.put(REDACTION_INDICATOR, true); + hintTypeMap.put(HINT_ONLY_INDICATOR, true); + hintTypeMap.put(MUST_REDACT_INDICATOR, true); + hintTypeMap.put(PUBLISHED_INFORMATION_INDICATOR, true); + hintTypeMap.put(TEST_METHOD_INDICATOR, true); + hintTypeMap.put(DICTIONARY_PII, false); + hintTypeMap.put(PURITY_INDICATOR, false); + hintTypeMap.put(IMAGE_INDICATOR, true); + hintTypeMap.put(OCR_INDICATOR, true); + hintTypeMap.put(FORMULA_INDICATOR, false); + hintTypeMap.put(LOGO_INDICATOR, false); + hintTypeMap.put(SIGNATURE_INDICATOR, false); + hintTypeMap.put(DOSSIER_REDACTIONS_INDICATOR, false); + hintTypeMap.put(IMPORTED_REDACTION_INDICATOR, false); + hintTypeMap.put(ROTATE_SIMPLE_INDICATOR, false); + + caseInSensitiveMap.put(VERTEBRATE_INDICATOR, true); + caseInSensitiveMap.put(DICTIONARY_ADDRESS, false); + caseInSensitiveMap.put(DICTIONARY_AUTHOR, false); + caseInSensitiveMap.put(DICTIONARY_SPONSOR, false); + caseInSensitiveMap.put(NO_REDACTION_INDICATOR, true); + caseInSensitiveMap.put(REDACTION_INDICATOR, true); + caseInSensitiveMap.put(HINT_ONLY_INDICATOR, true); + caseInSensitiveMap.put(MUST_REDACT_INDICATOR, true); + caseInSensitiveMap.put(PUBLISHED_INFORMATION_INDICATOR, true); + caseInSensitiveMap.put(TEST_METHOD_INDICATOR, false); + caseInSensitiveMap.put(DICTIONARY_PII, false); + caseInSensitiveMap.put(PURITY_INDICATOR, false); + caseInSensitiveMap.put(IMAGE_INDICATOR, true); + caseInSensitiveMap.put(OCR_INDICATOR, true); + caseInSensitiveMap.put(SIGNATURE_INDICATOR, true); + caseInSensitiveMap.put(LOGO_INDICATOR, true); + caseInSensitiveMap.put(FORMULA_INDICATOR, true); + caseInSensitiveMap.put(DOSSIER_REDACTIONS_INDICATOR, false); + caseInSensitiveMap.put(IMPORTED_REDACTION_INDICATOR, false); + caseInSensitiveMap.put(ROTATE_SIMPLE_INDICATOR, true); + + recommendationTypeMap.put(VERTEBRATE_INDICATOR, false); + recommendationTypeMap.put(DICTIONARY_ADDRESS, false); + recommendationTypeMap.put(DICTIONARY_AUTHOR, false); + recommendationTypeMap.put(DICTIONARY_SPONSOR, false); + recommendationTypeMap.put(NO_REDACTION_INDICATOR, false); + recommendationTypeMap.put(REDACTION_INDICATOR, false); + recommendationTypeMap.put(HINT_ONLY_INDICATOR, false); + recommendationTypeMap.put(MUST_REDACT_INDICATOR, false); + recommendationTypeMap.put(PUBLISHED_INFORMATION_INDICATOR, false); + recommendationTypeMap.put(TEST_METHOD_INDICATOR, false); + recommendationTypeMap.put(DICTIONARY_PII, false); + recommendationTypeMap.put(PURITY_INDICATOR, false); + recommendationTypeMap.put(IMAGE_INDICATOR, false); + recommendationTypeMap.put(OCR_INDICATOR, false); + recommendationTypeMap.put(FORMULA_INDICATOR, false); + recommendationTypeMap.put(SIGNATURE_INDICATOR, false); + recommendationTypeMap.put(LOGO_INDICATOR, false); + recommendationTypeMap.put(DOSSIER_REDACTIONS_INDICATOR, false); + recommendationTypeMap.put(IMPORTED_REDACTION_INDICATOR, false); + recommendationTypeMap.put(ROTATE_SIMPLE_INDICATOR, false); + + rankTypeMap.put(PURITY_INDICATOR, 155); + rankTypeMap.put(DICTIONARY_PII, 150); + rankTypeMap.put(DICTIONARY_ADDRESS, 140); + rankTypeMap.put(DICTIONARY_AUTHOR, 130); + rankTypeMap.put(DICTIONARY_SPONSOR, 120); + rankTypeMap.put(VERTEBRATE_INDICATOR, 110); + rankTypeMap.put(MUST_REDACT_INDICATOR, 100); + rankTypeMap.put(REDACTION_INDICATOR, 90); + rankTypeMap.put(NO_REDACTION_INDICATOR, 80); + rankTypeMap.put(PUBLISHED_INFORMATION_INDICATOR, 70); + rankTypeMap.put(TEST_METHOD_INDICATOR, 60); + rankTypeMap.put(HINT_ONLY_INDICATOR, 50); + rankTypeMap.put(IMAGE_INDICATOR, 30); + rankTypeMap.put(OCR_INDICATOR, 29); + rankTypeMap.put(LOGO_INDICATOR, 28); + rankTypeMap.put(SIGNATURE_INDICATOR, 27); + rankTypeMap.put(FORMULA_INDICATOR, 26); + rankTypeMap.put(DOSSIER_REDACTIONS_INDICATOR, 200); + rankTypeMap.put(IMPORTED_REDACTION_INDICATOR, 200); + rankTypeMap.put(ROTATE_SIMPLE_INDICATOR, 150); + + colors.setSkippedColor("#cccccc"); + colors.setRequestAddColor("#04b093"); + colors.setRequestRemoveColor("#04b093"); + } + + + @SneakyThrows + protected void loadNerForTest() { + + ClassPathResource responseJson = new ClassPathResource("files/ner_response.json"); + storageService.storeObject(TenantContext.getTenantId(), + RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), + responseJson.getInputStream()); + } + + + protected List getTypeResponse() { + + return typeColorMap.entrySet() + .stream() + .map(typeColor -> Type.builder() + .id(typeColor.getKey() + ":" + TEST_DOSSIER_TEMPLATE_ID) + .type(typeColor.getKey()) + .dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID) + .hexColor(typeColor.getValue()) + .isHint(hintTypeMap.get(typeColor.getKey())) + .isCaseInsensitive(caseInSensitiveMap.get(typeColor.getKey())) + .isRecommendation(recommendationTypeMap.get(typeColor.getKey())) + .rank(rankTypeMap.get(typeColor.getKey())) + .build()) + + .collect(Collectors.toList()); + } + + + protected Type getDictionaryResponse(String type, boolean isDossierDictionary) { + + return Type.builder() + .id(type + ":" + TEST_DOSSIER_TEMPLATE_ID) + .hexColor(typeColorMap.get(type)) + .entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type))) + .falsePositiveEntries(falsePositive.containsKey(type) ? toDictionaryEntry(falsePositive.get(type)) : new ArrayList<>()) + .falseRecommendationEntries(falseRecommendation.containsKey(type) ? toDictionaryEntry(falseRecommendation.get(type)) : new ArrayList<>()) + .isHint(hintTypeMap.get(type)) + .isCaseInsensitive(caseInSensitiveMap.get(type)) + .isRecommendation(recommendationTypeMap.get(type)) + .rank(rankTypeMap.get(type)) + .build(); + } + + + private String cleanDictionaryEntry(String entry) { + + return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " "); + } + + + private List toDictionaryEntry(List entries) { + + if (entries == null) { + entries = Collections.emptyList(); + } + return entries.stream().map(this::toDictionaryEntry).collect(Collectors.toList()); + + } + + + private DictionaryEntry toDictionaryEntry(String entry) { + + return DictionaryEntry.builder().value(entry).version(reanlysisVersions.getOrDefault(entry, 0L)).deleted(deleted.contains(entry)).build(); + } + + + @SneakyThrows + protected AnalyzeRequest uploadFileToStorage(String file) { + + return prepareStorage(file, "files/cv_service_empty_response.json"); + } + + + @SneakyThrows + protected AnalyzeRequest prepareStorage(String file, String cvServiceResponseFile) { + + ClassPathResource pdfFileResource = new ClassPathResource(file); + ClassPathResource cvServiceResponseFileResource = new ClassPathResource(cvServiceResponseFile); + + return prepareStorage(pdfFileResource.getInputStream(), cvServiceResponseFileResource.getInputStream()); + } + + + @SneakyThrows + protected AnalyzeRequest prepareStorage(InputStream fileStream, InputStream cvServiceResponseFileStream) { + + AnalyzeRequest request = AnalyzeRequest.builder() + .dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID) + .dossierId(TEST_DOSSIER_ID) + .fileId(TEST_FILE_ID) + .lastProcessed(OffsetDateTime.now()) + .build(); + + storageService.storeObject(TenantContext.getTenantId(), + RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.TABLES), + cvServiceResponseFileStream); + storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), fileStream); + + return request; + + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index 4a5edc2b..94457223 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -3,28 +3,26 @@ package com.iqser.red.service.redaction.v1.server; import static org.assertj.core.api.Assertions.assertThat; import static org.mockito.Mockito.when; -import java.io.BufferedReader; import java.io.ByteArrayInputStream; -import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; -import java.io.InputStreamReader; +import java.net.URI; import java.net.URL; import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; import java.time.OffsetDateTime; import java.time.ZoneOffset; import java.util.ArrayList; -import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.UUID; import java.util.stream.Collectors; -import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @@ -33,17 +31,10 @@ import org.kie.api.KieServices; import org.kie.api.builder.KieBuilder; import org.kie.api.builder.KieFileSystem; import org.kie.api.builder.KieModule; -import org.kie.api.builder.KieRepository; -import org.kie.api.builder.ReleaseId; import org.kie.api.runtime.KieContainer; -import org.kie.internal.io.ResourceFactory; -import org.mockito.stubbing.Answer; -import org.springframework.amqp.rabbit.core.RabbitTemplate; -import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.autoconfigure.EnableAutoConfiguration; import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration; import org.springframework.boot.test.context.SpringBootTest; -import org.springframework.boot.test.mock.mockito.MockBean; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.ComponentScan; import org.springframework.context.annotation.Configuration; @@ -53,9 +44,7 @@ import org.springframework.context.annotation.Primary; import org.springframework.core.io.ClassPathResource; import org.springframework.test.context.junit.jupiter.SpringExtension; -import com.amazonaws.services.s3.AmazonS3; import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.ObjectMapper; import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult; import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute; @@ -72,24 +61,14 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.configuration.Colors; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType; -import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.DictionaryEntry; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry; import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest; import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest; import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse; -import com.iqser.red.service.redaction.v1.server.annotate.AnnotationService; import com.iqser.red.service.redaction.v1.server.classification.model.SectionText; -import com.iqser.red.service.redaction.v1.server.client.DictionaryClient; -import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient; -import com.iqser.red.service.redaction.v1.server.client.RulesClient; -import com.iqser.red.service.redaction.v1.server.controller.RedactionController; import com.iqser.red.service.redaction.v1.server.multitenancy.TenantContext; -import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService; -import com.iqser.red.service.redaction.v1.server.redaction.service.analyze.AnalyzeService; import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils; -import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader; -import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities; import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; import com.iqser.red.storage.commons.StorageAutoConfiguration; import com.iqser.red.storage.commons.service.StorageService; @@ -99,85 +78,12 @@ import lombok.SneakyThrows; @ExtendWith(SpringExtension.class) @SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT) @Import(RedactionIntegrationTest.RedactionIntegrationTestConfiguration.class) -public class RedactionIntegrationTest { +public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest { private static final String RULES = loadFromClassPath("drools/rules.drl"); - private static final String VERTEBRATE = "vertebrate"; - private static final String ADDRESS = "CBI_address"; - private static final String AUTHOR = "CBI_author"; - private static final String SPONSOR = "CBI_sponsor"; - private static final String NO_REDACTION_INDICATOR = "no_redaction_indicator"; - private static final String REDACTION_INDICATOR = "redaction_indicator"; - private static final String HINT_ONLY = "hint_only"; - private static final String MUST_REDACT = "must_redact"; - private static final String PUBLISHED_INFORMATION = "published_information"; - private static final String TEST_METHOD = "test_method"; - private static final String PURITY = "purity"; - private static final String IMAGE = "image"; - private static final String LOGO = "logo"; - private static final String SIGNATURE = "signature"; - private static final String FORMULA = "formula"; - private static final String OCR = "ocr"; - private static final String DOSSIER_REDACTIONS = "dossier_redactions"; - private static final String IMPORTED_REDACTION = "imported_redaction"; - private static final String PII = "PII"; - private static final String ROTATE_SIMPLE = "RotateSimple"; - - @Autowired - private RedactionController redactionController; - - @Autowired - private AnnotationService annotationService; - - @Autowired - private AnalyzeService analyzeService; - - @Autowired - private ObjectMapper objectMapper; - - @MockBean - private RulesClient rulesClient; - - @MockBean - private DictionaryClient dictionaryClient; - - @Autowired - private RedactionStorageService redactionStorageService; - - @Autowired - private StorageService storageService; - - @Autowired - private ManualRedactionSurroundingTextService manualRedactionSurroundingTextService; - - @MockBean - private AmazonS3 amazonS3; - - @MockBean - private RabbitTemplate rabbitTemplate; - - @MockBean - private LegalBasisClient legalBasisClient; - - private final Map> dictionary = new HashMap<>(); - private final Map> dossierDictionary = new HashMap<>(); - private final Map> falsePositive = new HashMap<>(); - private final Map> falseRecommendation = new HashMap<>(); - private final Map typeColorMap = new HashMap<>(); - private final Map hintTypeMap = new HashMap<>(); - private final Map caseInSensitiveMap = new HashMap<>(); - private final Map recommendationTypeMap = new HashMap<>(); - private final Map rankTypeMap = new HashMap<>(); - private final Colors colors = new Colors(); - private final Map reanlysisVersions = new HashMap<>(); - private final Set deleted = new HashSet<>(); - - private final static String TEST_DOSSIER_TEMPLATE_ID = "123"; - private final static String TEST_DOSSIER_ID = "123"; - private final static String TEST_FILE_ID = "123"; @Configuration - @EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class}) + @EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class, StorageAutoConfiguration.class}) @ComponentScan(excludeFilters={@ComponentScan.Filter(type= FilterType.ASSIGNABLE_TYPE, value=StorageAutoConfiguration.class)}) public static class RedactionIntegrationTestConfiguration { @@ -207,15 +113,6 @@ public class RedactionIntegrationTest { } - @AfterEach - public void cleanupStorage() { - - if (this.storageService instanceof FileSystemBackedStorageService) { - ((FileSystemBackedStorageService) this.storageService).clearStorage(); - } - } - - @BeforeEach public void stubClients() { @@ -232,67 +129,26 @@ public class RedactionIntegrationTest { when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder() - .id(DOSSIER_REDACTIONS + ":" + TEST_DOSSIER_TEMPLATE_ID) - .type(DOSSIER_REDACTIONS) + .id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID) + .type(DOSSIER_REDACTIONS_INDICATOR) .dossierTemplateId(TEST_DOSSIER_ID) .hexColor("#ffe187") - .isHint(hintTypeMap.get(DOSSIER_REDACTIONS)) - .isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS)) - .isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS)) - .rank(rankTypeMap.get(DOSSIER_REDACTIONS)) + .isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) .build())); mockDictionaryCalls(null); - mockDictionaryCalls(0L); when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors); } - private void mockDictionaryCalls(Long version) { - - when(dictionaryClient.getDictionaryForType(VERTEBRATE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(VERTEBRATE, - false)); - when(dictionaryClient.getDictionaryForType(ADDRESS + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(ADDRESS, false)); - when(dictionaryClient.getDictionaryForType(AUTHOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(AUTHOR, false)); - when(dictionaryClient.getDictionaryForType(SPONSOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(SPONSOR, false)); - when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse( - NO_REDACTION_INDICATOR, - false)); - when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse( - REDACTION_INDICATOR, - false)); - when(dictionaryClient.getDictionaryForType(HINT_ONLY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(HINT_ONLY, false)); - when(dictionaryClient.getDictionaryForType(MUST_REDACT + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(MUST_REDACT, - false)); - when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse( - PUBLISHED_INFORMATION, - false)); - when(dictionaryClient.getDictionaryForType(TEST_METHOD + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(TEST_METHOD, - false)); - when(dictionaryClient.getDictionaryForType(PII + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(PII, false)); - when(dictionaryClient.getDictionaryForType(PURITY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(PURITY, false)); - when(dictionaryClient.getDictionaryForType(IMAGE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(IMAGE, false)); - when(dictionaryClient.getDictionaryForType(OCR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(OCR, false)); - when(dictionaryClient.getDictionaryForType(LOGO + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(LOGO, false)); - when(dictionaryClient.getDictionaryForType(SIGNATURE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(SIGNATURE, false)); - when(dictionaryClient.getDictionaryForType(FORMULA + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(FORMULA, false)); - when(dictionaryClient.getDictionaryForType(ROTATE_SIMPLE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse(ROTATE_SIMPLE, - false)); - when(dictionaryClient.getDictionaryForType(DOSSIER_REDACTIONS + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse( - DOSSIER_REDACTIONS, - true)); - when(dictionaryClient.getDictionaryForType(IMPORTED_REDACTION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).then((Answer) invocation -> getDictionaryResponse( - IMPORTED_REDACTION, - true)); - - } - - @Test public void test270Rotated() { - AnalyzeRequest request = prepareStorage("files/Minimal Examples/270Rotated.pdf"); + AnalyzeRequest request = uploadFileToStorage("files/Minimal Examples/270Rotated.pdf"); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); AnalyzeResult result = analyzeService.analyze(request); assertThat(result).isNotNull(); @@ -303,7 +159,7 @@ public class RedactionIntegrationTest { @Disabled public void testLargeScannedFileOOM() { - AnalyzeRequest request = prepareStorage("scanned/VV-377031.pdf"); + AnalyzeRequest request = uploadFileToStorage("scanned/VV-377031.pdf"); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); AnalyzeResult result = analyzeService.analyze(request); assertThat(result).isNotNull(); @@ -315,7 +171,7 @@ public class RedactionIntegrationTest { long start = System.currentTimeMillis(); - AnalyzeRequest request = prepareStorage("files/Minimal Examples/merge_images.pdf"); + AnalyzeRequest request = uploadFileToStorage("files/Minimal Examples/merge_images.pdf"); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); AnalyzeResult result = analyzeService.analyze(request); @@ -329,7 +185,7 @@ public class RedactionIntegrationTest { duplicates.forEach((key, value) -> assertThat(value.size()).isEqualTo(1)); - dictionary.get(AUTHOR).add("Drinking water"); + dictionary.get(DICTIONARY_AUTHOR).add("Drinking water"); when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(1L); AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build()); @@ -355,7 +211,7 @@ public class RedactionIntegrationTest { // F. Lastname, J. Doe, M. Mustermann // Lastname M., Doe J., Mustermann M. - AnalyzeRequest request = prepareStorage("files/Minimal Examples/ExpansionTest.pdf"); + AnalyzeRequest request = uploadFileToStorage("files/Minimal Examples/ExpansionTest.pdf"); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); AnalyzeResult result = analyzeService.analyze(request); @@ -371,7 +227,7 @@ public class RedactionIntegrationTest { @Test public void titleExtraction() throws IOException { - AnalyzeRequest request = prepareStorage("files/new/APN3_Clean_6.1 (6.4.3.01-02)_Apple_211029.pdf"); + AnalyzeRequest request = uploadFileToStorage("files/new/APN3_Clean_6.1 (6.4.3.01-02)_Apple_211029.pdf"); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); AnalyzeResult result = analyzeService.analyze(request); @@ -394,7 +250,7 @@ public class RedactionIntegrationTest { @Test public void testAddFileAttribute() { - AnalyzeRequest request = prepareStorage("files/RSS/01 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1).pdf"); + AnalyzeRequest request = uploadFileToStorage("files/RSS/01 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1).pdf"); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); AnalyzeResult result = analyzeService.analyze(request); @@ -411,7 +267,7 @@ public class RedactionIntegrationTest { System.out.println("testIgnoreHint"); - AnalyzeRequest request = prepareStorage("files/new/test-ignore-hint.pdf"); + AnalyzeRequest request = uploadFileToStorage("files/new/test-ignore-hint.pdf"); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); analyzeService.analyze(request); @@ -449,15 +305,11 @@ public class RedactionIntegrationTest { System.out.println("noExceptionShouldBeThrownForAnyFiles"); ClassLoader loader = getClass().getClassLoader(); URL url = loader.getResource("files"); - File[] files = new File(url.getPath()).listFiles(); - List input = new ArrayList<>(); - for (File file : files) { - input.addAll(getPathsRecursively(file)); - } - for (File path : input) { + Path path = Paths.get(URI.create(url.toString())); - AnalyzeRequest request = prepareStorage(path.getPath()); - System.out.println("Redacting file : " + path.getName()); + Files.walk(path).forEach(currentPath -> { + AnalyzeRequest request = uploadFileToStorage(currentPath.toString()); + System.out.println("Redacting file : " + currentPath.getFileName()); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); long fstart = System.currentTimeMillis(); @@ -476,7 +328,7 @@ public class RedactionIntegrationTest { assertThat(entry.getValue().size()).isEqualTo(1); }); - dictionary.get(AUTHOR).add("Drinking water"); + dictionary.get(DICTIONARY_AUTHOR).add("Drinking water"); when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(1L); long rstart = System.currentTimeMillis(); @@ -484,8 +336,7 @@ public class RedactionIntegrationTest { long rend = System.currentTimeMillis(); System.out.println("reanalysis analysis duration: " + (rend - rstart)); - - } + }); long end = System.currentTimeMillis(); @@ -501,7 +352,7 @@ public class RedactionIntegrationTest { String outputFileName = OsUtils.getTemporaryDirectory() + "/AnnotatedRedactionTestSeparatedRedaction.pdf"; long start = System.currentTimeMillis(); - AnalyzeRequest request = prepareStorage(fileName); + AnalyzeRequest request = uploadFileToStorage(fileName); request.setExcludedPages(Set.of(1)); request.setFileAttributes(List.of(FileAttribute.builder() @@ -545,10 +396,10 @@ public class RedactionIntegrationTest { } assertThat(correctFound).isEqualTo(redactionLog.getRedactionLogEntry().size()); - dictionary.get(AUTHOR).add("properties"); + dictionary.get(DICTIONARY_AUTHOR).add("properties"); reanlysisVersions.put("properties", 1L); - dictionary.get(AUTHOR).add("physical"); + dictionary.get(DICTIONARY_AUTHOR).add("physical"); reanlysisVersions.put("physical", 2L); deleted.add("David Chubb"); @@ -558,7 +409,7 @@ public class RedactionIntegrationTest { when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(3L); - when(dictionaryClient.getDictionaryForType(VERTEBRATE, null)).thenReturn(getDictionaryResponse(VERTEBRATE, false)); + when(dictionaryClient.getDictionaryForType(VERTEBRATE_INDICATOR, null)).thenReturn(getDictionaryResponse(VERTEBRATE_INDICATOR, false)); start = System.currentTimeMillis(); @@ -591,7 +442,7 @@ public class RedactionIntegrationTest { when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(4L); - when(dictionaryClient.getDictionaryForType(VERTEBRATE, null)).thenReturn(getDictionaryResponse(VERTEBRATE, false)); + when(dictionaryClient.getDictionaryForType(VERTEBRATE_INDICATOR, null)).thenReturn(getDictionaryResponse(VERTEBRATE_INDICATOR, false)); analyzeService.reanalyze(request); @@ -607,19 +458,19 @@ public class RedactionIntegrationTest { String fileName = "files/new/test1S1T1.pdf"; String outputFileName = OsUtils.getTemporaryDirectory() + "/Annotated.pdf"; - AnalyzeRequest request = prepareStorage(fileName); + AnalyzeRequest request = uploadFileToStorage(fileName); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); analyzeService.analyze(request); - dictionary.get(AUTHOR).add("report"); + dictionary.get(DICTIONARY_AUTHOR).add("report"); reanlysisVersions.put("report", 2L); when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(2L); mockDictionaryCalls(0L); analyzeService.reanalyze(request); - dictionary.get(AUTHOR).add("assessment report"); + dictionary.get(DICTIONARY_AUTHOR).add("assessment report"); reanlysisVersions.put("assessment report", 3L); when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(3L); mockDictionaryCalls(2L); @@ -650,10 +501,12 @@ public class RedactionIntegrationTest { String outputFileName = OsUtils.getTemporaryDirectory() + "/Annotated.pdf"; ClassPathResource responseJson = new ClassPathResource("files/crafted_document.NER_ENTITIES.json"); - storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), responseJson.getInputStream()); + storageService.storeObject(TenantContext.getTenantId(), + RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), + responseJson.getInputStream()); long start = System.currentTimeMillis(); - AnalyzeRequest request = prepareStorage(fileName); + AnalyzeRequest request = uploadFileToStorage(fileName); request.setExcludedPages(Set.of(1)); request.setFileAttributes(List.of(FileAttribute.builder() @@ -697,11 +550,11 @@ public class RedactionIntegrationTest { } assertThat(correctFound).isEqualTo(redactionLog.getRedactionLogEntry().size()); - dictionary.get(AUTHOR).add("properties"); + dictionary.get(DICTIONARY_AUTHOR).add("properties"); reanlysisVersions.put("properties", 1L); mockDictionaryCalls(0L); - dictionary.get(AUTHOR).add("physical"); + dictionary.get(DICTIONARY_AUTHOR).add("physical"); reanlysisVersions.put("physical", 2L); deleted.add("David Chubb"); @@ -713,7 +566,7 @@ public class RedactionIntegrationTest { when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(3L); - when(dictionaryClient.getDictionaryForType(VERTEBRATE, null)).thenReturn(getDictionaryResponse(VERTEBRATE, false)); + when(dictionaryClient.getDictionaryForType(VERTEBRATE_INDICATOR, null)).thenReturn(getDictionaryResponse(VERTEBRATE_INDICATOR, false)); start = System.currentTimeMillis(); @@ -747,7 +600,7 @@ public class RedactionIntegrationTest { when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(4L); - when(dictionaryClient.getDictionaryForType(VERTEBRATE, null)).thenReturn(getDictionaryResponse(VERTEBRATE, false)); + when(dictionaryClient.getDictionaryForType(VERTEBRATE_INDICATOR, null)).thenReturn(getDictionaryResponse(VERTEBRATE_INDICATOR, false)); analyzeService.reanalyze(request); @@ -770,7 +623,7 @@ public class RedactionIntegrationTest { }; List types = objectMapper.readValue(typeResource.getInputStream(), typeRefForTypes); - AnalyzeRequest request = prepareStorage("files/new/PublishedInformationTest.pdf"); + AnalyzeRequest request = uploadFileToStorage("files/new/PublishedInformationTest.pdf"); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); ManualRedactions manualRedactions = new ManualRedactions(); manualRedactions.getIdsToRemove() @@ -828,7 +681,7 @@ public class RedactionIntegrationTest { System.out.println("testTableRedaction"); long start = System.currentTimeMillis(); - AnalyzeRequest request = prepareStorage("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf"); + AnalyzeRequest request = uploadFileToStorage("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf"); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); AnalyzeResult result = analyzeService.analyze(request); @@ -897,7 +750,7 @@ public class RedactionIntegrationTest { long start = System.currentTimeMillis(); - AnalyzeRequest request = prepareStorage("files/new/unicodeProblem.pdf"); + AnalyzeRequest request = uploadFileToStorage("files/new/unicodeProblem.pdf"); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); AnalyzeResult result = analyzeService.analyze(request); @@ -919,7 +772,7 @@ public class RedactionIntegrationTest { System.out.println("testTableRedaction"); long start = System.currentTimeMillis(); - AnalyzeRequest request = prepareStorage("files/new/RotateTestFile.pdf"); + AnalyzeRequest request = uploadFileToStorage("files/new/RotateTestFile.pdf"); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); AnalyzeResult result = analyzeService.analyze(request); @@ -945,7 +798,7 @@ public class RedactionIntegrationTest { System.out.println("testTableRedaction"); long start = System.currentTimeMillis(); - AnalyzeRequest request = prepareStorage("files/new/RotateTestFileSimple.pdf"); + AnalyzeRequest request = uploadFileToStorage("files/new/RotateTestFileSimple.pdf"); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); AnalyzeResult result = analyzeService.analyze(request); @@ -967,7 +820,7 @@ public class RedactionIntegrationTest { System.out.println("testTableHeader"); long start = System.currentTimeMillis(); - AnalyzeRequest request = prepareStorage("files/Minimal Examples/NoHeaderTable.pdf"); + AnalyzeRequest request = uploadFileToStorage("files/Minimal Examples/NoHeaderTable.pdf"); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); AnalyzeResult result = analyzeService.analyze(request); @@ -1002,7 +855,7 @@ public class RedactionIntegrationTest { long start = System.currentTimeMillis(); - AnalyzeRequest request = prepareStorage("files/new/S157.pdf"); + AnalyzeRequest request = uploadFileToStorage("files/new/S157.pdf"); ManualRedactions manualRedactions = new ManualRedactions(); @@ -1078,7 +931,7 @@ public class RedactionIntegrationTest { manualRedactionEntry.setPositions(List.of(Rectangle.builder().topLeftX(375.61096f).topLeftY(241.282f).width(7.648041f).height(43.72262f).page(1).build(), Rectangle.builder().topLeftX(384.83517f).topLeftY(241.282f).width(7.648041f).height(17.043358f).page(1).build())); - AnalyzeRequest request = prepareStorage(pdfFile); + AnalyzeRequest request = uploadFileToStorage(pdfFile); request.setManualRedactions(manualRedactions); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); AnalyzeResult result = analyzeService.analyze(request); @@ -1108,14 +961,10 @@ public class RedactionIntegrationTest { } - - - - @Test public void phantomCellsDocumentTest() throws IOException { - AnalyzeRequest request = prepareStorage("files/Minimal Examples/Phantom Cells.pdf"); + AnalyzeRequest request = uploadFileToStorage("files/Minimal Examples/Phantom Cells.pdf"); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); AnalyzeResult result = analyzeService.analyze(request); @@ -1135,7 +984,7 @@ public class RedactionIntegrationTest { long start = System.currentTimeMillis(); - AnalyzeRequest request = prepareStorage("files/Minimal Examples/sponsor_companies.pdf"); + AnalyzeRequest request = uploadFileToStorage("files/Minimal Examples/sponsor_companies.pdf"); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); AnalyzeResult result = analyzeService.analyze(request); @@ -1197,7 +1046,7 @@ public class RedactionIntegrationTest { // manualRedactions.getEntriesToAdd().add(manualRedactionEntry); - AnalyzeRequest request = prepareStorage(pdfFile); + AnalyzeRequest request = uploadFileToStorage(pdfFile); request.setManualRedactions(manualRedactions); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); AnalyzeResult result = analyzeService.analyze(request); @@ -1253,7 +1102,7 @@ public class RedactionIntegrationTest { System.out.println("expandByRegex"); long start = System.currentTimeMillis(); - AnalyzeRequest request = prepareStorage("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf"); + AnalyzeRequest request = uploadFileToStorage("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf"); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); AnalyzeResult result = analyzeService.analyze(request); @@ -1308,7 +1157,7 @@ public class RedactionIntegrationTest { manualRedactions.getEntriesToAdd().add(manualRedactionEntry2); manualRedactions.getEntriesToAdd().add(manualRedactionEntry3); - AnalyzeRequest request = prepareStorage(pdfFile); + AnalyzeRequest request = uploadFileToStorage(pdfFile); request.setManualRedactions(manualRedactions); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); AnalyzeResult result = analyzeService.analyze(request); @@ -1332,280 +1181,15 @@ public class RedactionIntegrationTest { } - private void loadDictionaryForTest() { - - dictionary.computeIfAbsent(AUTHOR, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/CBI_author.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(SPONSOR, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/CBI_sponsor.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(VERTEBRATE, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/vertebrate.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(ADDRESS, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/CBI_address.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/no_redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(REDACTION_INDICATOR, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/redaction_indicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(HINT_ONLY, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/hint_only.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(MUST_REDACT, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/must_redact.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(PUBLISHED_INFORMATION, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/published_information.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(TEST_METHOD, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/test_method.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(PII, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/PII.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(PURITY, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/purity.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(IMAGE, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(OCR, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(LOGO, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(SIGNATURE, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(FORMULA, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/empty.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dossierDictionary.put(IMPORTED_REDACTION, new ArrayList<>()); - - falsePositive.computeIfAbsent(PII, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - - } - - - private void loadOnlyDictionaryForSimpleFile() { - - dictionary.clear(); - dictionary.computeIfAbsent(ROTATE_SIMPLE, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/RotateTestFileSimple.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - } - - - private static String loadFromClassPath(String path) { - - URL resource = ResourceLoader.class.getClassLoader().getResource(path); - if (resource == null) { - throw new IllegalArgumentException("could not load classpath resource: drools/rules.drl"); - } - try (BufferedReader br = new BufferedReader(new InputStreamReader(resource.openStream(), StandardCharsets.UTF_8))) { - StringBuilder sb = new StringBuilder(); - String str; - while ((str = br.readLine()) != null) { - sb.append(str).append("\n"); - } - return sb.toString(); - } catch (IOException e) { - throw new IllegalArgumentException("could not load classpath resource: " + path, e); - } - } - - - private List getPathsRecursively(File path) { - - List result = new ArrayList<>(); - if (path == null || path.listFiles() == null) { - return result; - } - for (File f : path.listFiles()) { - if (f.isFile()) { - result.add(f); - } else { - result.addAll(getPathsRecursively(f)); - } - } - return result; - - } - - - private void loadTypeForTest() { - - typeColorMap.put(VERTEBRATE, "#ff85f7"); - typeColorMap.put(ADDRESS, "#ffe187"); - typeColorMap.put(AUTHOR, "#ffe187"); - typeColorMap.put(SPONSOR, "#85ebff"); - typeColorMap.put(NO_REDACTION_INDICATOR, "#be85ff"); - typeColorMap.put(REDACTION_INDICATOR, "#caff85"); - typeColorMap.put(HINT_ONLY, "#abc0c4"); - typeColorMap.put(MUST_REDACT, "#fab4c0"); - typeColorMap.put(PUBLISHED_INFORMATION, "#85ebff"); - typeColorMap.put(TEST_METHOD, "#91fae8"); - typeColorMap.put(PII, "#66ccff"); - typeColorMap.put(PURITY, "#ffe187"); - typeColorMap.put(IMAGE, "#fcc5fb"); - typeColorMap.put(OCR, "#fcc5fb"); - typeColorMap.put(LOGO, "#ffe187"); - typeColorMap.put(FORMULA, "#ffe187"); - typeColorMap.put(SIGNATURE, "#ffe187"); - typeColorMap.put(IMPORTED_REDACTION, "#fcfbe6"); - typeColorMap.put(ROTATE_SIMPLE, "#66ccff"); - - hintTypeMap.put(VERTEBRATE, true); - hintTypeMap.put(ADDRESS, false); - hintTypeMap.put(AUTHOR, false); - hintTypeMap.put(SPONSOR, false); - hintTypeMap.put(NO_REDACTION_INDICATOR, true); - hintTypeMap.put(REDACTION_INDICATOR, true); - hintTypeMap.put(HINT_ONLY, true); - hintTypeMap.put(MUST_REDACT, true); - hintTypeMap.put(PUBLISHED_INFORMATION, true); - hintTypeMap.put(TEST_METHOD, true); - hintTypeMap.put(PII, false); - hintTypeMap.put(PURITY, false); - hintTypeMap.put(IMAGE, true); - hintTypeMap.put(OCR, true); - hintTypeMap.put(FORMULA, false); - hintTypeMap.put(LOGO, false); - hintTypeMap.put(SIGNATURE, false); - hintTypeMap.put(DOSSIER_REDACTIONS, false); - hintTypeMap.put(IMPORTED_REDACTION, false); - hintTypeMap.put(ROTATE_SIMPLE, false); - - caseInSensitiveMap.put(VERTEBRATE, true); - caseInSensitiveMap.put(ADDRESS, false); - caseInSensitiveMap.put(AUTHOR, false); - caseInSensitiveMap.put(SPONSOR, false); - caseInSensitiveMap.put(NO_REDACTION_INDICATOR, true); - caseInSensitiveMap.put(REDACTION_INDICATOR, true); - caseInSensitiveMap.put(HINT_ONLY, true); - caseInSensitiveMap.put(MUST_REDACT, true); - caseInSensitiveMap.put(PUBLISHED_INFORMATION, true); - caseInSensitiveMap.put(TEST_METHOD, false); - caseInSensitiveMap.put(PII, false); - caseInSensitiveMap.put(PURITY, false); - caseInSensitiveMap.put(IMAGE, true); - caseInSensitiveMap.put(OCR, true); - caseInSensitiveMap.put(SIGNATURE, true); - caseInSensitiveMap.put(LOGO, true); - caseInSensitiveMap.put(FORMULA, true); - caseInSensitiveMap.put(DOSSIER_REDACTIONS, false); - caseInSensitiveMap.put(IMPORTED_REDACTION, false); - caseInSensitiveMap.put(ROTATE_SIMPLE, true); - - recommendationTypeMap.put(VERTEBRATE, false); - recommendationTypeMap.put(ADDRESS, false); - recommendationTypeMap.put(AUTHOR, false); - recommendationTypeMap.put(SPONSOR, false); - recommendationTypeMap.put(NO_REDACTION_INDICATOR, false); - recommendationTypeMap.put(REDACTION_INDICATOR, false); - recommendationTypeMap.put(HINT_ONLY, false); - recommendationTypeMap.put(MUST_REDACT, false); - recommendationTypeMap.put(PUBLISHED_INFORMATION, false); - recommendationTypeMap.put(TEST_METHOD, false); - recommendationTypeMap.put(PII, false); - recommendationTypeMap.put(PURITY, false); - recommendationTypeMap.put(IMAGE, false); - recommendationTypeMap.put(OCR, false); - recommendationTypeMap.put(FORMULA, false); - recommendationTypeMap.put(SIGNATURE, false); - recommendationTypeMap.put(LOGO, false); - recommendationTypeMap.put(DOSSIER_REDACTIONS, false); - recommendationTypeMap.put(IMPORTED_REDACTION, false); - recommendationTypeMap.put(ROTATE_SIMPLE, false); - - rankTypeMap.put(PURITY, 155); - rankTypeMap.put(PII, 150); - rankTypeMap.put(ADDRESS, 140); - rankTypeMap.put(AUTHOR, 130); - rankTypeMap.put(SPONSOR, 120); - rankTypeMap.put(VERTEBRATE, 110); - rankTypeMap.put(MUST_REDACT, 100); - rankTypeMap.put(REDACTION_INDICATOR, 90); - rankTypeMap.put(NO_REDACTION_INDICATOR, 80); - rankTypeMap.put(PUBLISHED_INFORMATION, 70); - rankTypeMap.put(TEST_METHOD, 60); - rankTypeMap.put(HINT_ONLY, 50); - rankTypeMap.put(IMAGE, 30); - rankTypeMap.put(OCR, 29); - rankTypeMap.put(LOGO, 28); - rankTypeMap.put(SIGNATURE, 27); - rankTypeMap.put(FORMULA, 26); - rankTypeMap.put(DOSSIER_REDACTIONS, 200); - rankTypeMap.put(IMPORTED_REDACTION, 200); - rankTypeMap.put(ROTATE_SIMPLE, 150); - - colors.setSkippedColor("#cccccc"); - colors.setRequestAddColor("#04b093"); - colors.setRequestRemoveColor("#04b093"); - } - - - @SneakyThrows - private void loadNerForTest() { - - ClassPathResource responseJson = new ClassPathResource("files/ner_response.json"); - storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), responseJson.getInputStream()); - } - - - private List getTypeResponse() { - - return typeColorMap.entrySet() - .stream() - .map(typeColor -> Type.builder() - .id(typeColor.getKey() + ":" + TEST_DOSSIER_TEMPLATE_ID) - .type(typeColor.getKey()) - .dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID) - .hexColor(typeColor.getValue()) - .isHint(hintTypeMap.get(typeColor.getKey())) - .isCaseInsensitive(caseInSensitiveMap.get(typeColor.getKey())) - .isRecommendation(recommendationTypeMap.get(typeColor.getKey())) - .rank(rankTypeMap.get(typeColor.getKey())) - .build()) - - .collect(Collectors.toList()); - } - - - private Type getDictionaryResponse(String type, boolean isDossierDictionary) { - - return Type.builder() - .id(type + ":" + TEST_DOSSIER_TEMPLATE_ID) - .hexColor(typeColorMap.get(type)) - .entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type))) - .falsePositiveEntries(falsePositive.containsKey(type) ? toDictionaryEntry(falsePositive.get(type)) : new ArrayList<>()) - .falseRecommendationEntries(falseRecommendation.containsKey(type) ? toDictionaryEntry(falseRecommendation.get(type)) : new ArrayList<>()) - .isHint(hintTypeMap.get(type)) - .isCaseInsensitive(caseInSensitiveMap.get(type)) - .isRecommendation(recommendationTypeMap.get(type)) - .rank(rankTypeMap.get(type)) - .build(); - } - - - private String cleanDictionaryEntry(String entry) { - - return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " "); - } - - - private List toDictionaryEntry(List entries) { - - if (entries == null) { - entries = Collections.emptyList(); - } - - List dictionaryEntries = new ArrayList<>(); - entries.forEach(entry -> { - dictionaryEntries.add(DictionaryEntry.builder().value(entry).version(reanlysisVersions.getOrDefault(entry, 0L)).deleted(deleted.contains(entry)).build()); - }); - return dictionaryEntries; - } - - @Test public void testImportedRedactions() throws IOException { String outputFileName = OsUtils.getTemporaryDirectory() + "/Annotated.pdf"; ClassPathResource importedRedactions = new ClassPathResource("files/ImportedRedactions/RotateTestFile_without_highlights.IMPORTED_REDACTIONS.json"); - AnalyzeRequest request = prepareStorage("files/ImportedRedactions/RotateTestFile_without_highlights.pdf"); - storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS), + AnalyzeRequest request = uploadFileToStorage("files/ImportedRedactions/RotateTestFile_without_highlights.pdf"); + storageService.storeObject(TenantContext.getTenantId(), + RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS), importedRedactions.getInputStream()); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); @@ -1636,17 +1220,17 @@ public class RedactionIntegrationTest { @Test public void testExpandByPrefixRegEx() throws IOException { - assertThat(dictionary.get(AUTHOR)).contains("Robinson"); - assertThat(dictionary.get(AUTHOR)).doesNotContain("Mrs. Robinson"); - assertThat(dictionary.get(AUTHOR)).contains("Bojangles"); - assertThat(dictionary.get(AUTHOR)).doesNotContain("Mr. Bojangles"); - assertThat(dictionary.get(AUTHOR)).contains("Tambourine Man"); - assertThat(dictionary.get(AUTHOR)).doesNotContain("Mr. Tambourine Man"); + assertThat(dictionary.get(DICTIONARY_AUTHOR)).contains("Robinson"); + assertThat(dictionary.get(DICTIONARY_AUTHOR)).doesNotContain("Mrs. Robinson"); + assertThat(dictionary.get(DICTIONARY_AUTHOR)).contains("Bojangles"); + assertThat(dictionary.get(DICTIONARY_AUTHOR)).doesNotContain("Mr. Bojangles"); + assertThat(dictionary.get(DICTIONARY_AUTHOR)).contains("Tambourine Man"); + assertThat(dictionary.get(DICTIONARY_AUTHOR)).doesNotContain("Mr. Tambourine Man"); String fileName = "files/mr-mrs.pdf"; String outputFileName = OsUtils.getTemporaryDirectory() + "/Annotated.pdf"; - AnalyzeRequest request = prepareStorage(fileName); + AnalyzeRequest request = uploadFileToStorage(fileName); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); analyzeService.analyze(request); @@ -1665,39 +1249,4 @@ public class RedactionIntegrationTest { assertThat(values).contains("Mr. Tambourine Man"); } - - @SneakyThrows - private AnalyzeRequest prepareStorage(InputStream fileStream, InputStream cvServiceResponseFileStream) { - - AnalyzeRequest request = AnalyzeRequest.builder() - .dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID) - .dossierId(TEST_DOSSIER_ID) - .fileId(TEST_FILE_ID) - .lastProcessed(OffsetDateTime.now()) - .build(); - - storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.TABLES), cvServiceResponseFileStream); - storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), fileStream); - - return request; - - } - - - @SneakyThrows - private AnalyzeRequest prepareStorage(String file) { - - return prepareStorage(file, "files/cv_service_empty_response.json"); - } - - - @SneakyThrows - private AnalyzeRequest prepareStorage(String file, String cvServiceResponseFile) { - - ClassPathResource pdfFileResource = new ClassPathResource(file); - ClassPathResource cvServiceResponseFileResource = new ClassPathResource(cvServiceResponseFile); - - return prepareStorage(pdfFileResource.getInputStream(), cvServiceResponseFileResource.getInputStream()); - } - } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTestV2.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTestV2.java new file mode 100644 index 00000000..b8f4d592 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTestV2.java @@ -0,0 +1,151 @@ +package com.iqser.red.service.redaction.v1.server; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.Mockito.when; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.List; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.kie.api.KieServices; +import org.kie.api.builder.KieBuilder; +import org.kie.api.builder.KieFileSystem; +import org.kie.api.builder.KieModule; +import org.kie.api.runtime.KieContainer; +import org.springframework.boot.autoconfigure.EnableAutoConfiguration; +import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.Import; +import org.springframework.context.annotation.Primary; +import org.springframework.test.context.junit.jupiter.SpringExtension; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; +import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive; +import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type; +import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine; +import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry; +import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest; +import com.iqser.red.service.redaction.v1.server.multitenancy.TenantContext; +import com.iqser.red.storage.commons.StorageAutoConfiguration; +import com.iqser.red.storage.commons.service.StorageService; + +import lombok.SneakyThrows; + +@ExtendWith(SpringExtension.class) +@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT) +@Import(RedactionIntegrationTestV2.RedactionIntegrationTestConfiguration.class) +public class RedactionIntegrationTestV2 extends AbstractRedactionIntegrationTest { + + private static final String RULES = loadFromClassPath("drools/rules_v2.drl"); + + @Configuration + @EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class, StorageAutoConfiguration.class}) + static class RedactionIntegrationTestConfiguration { + + @Bean + public KieContainer kieContainer() { + + KieServices kieServices = KieServices.Factory.get(); + + KieFileSystem kieFileSystem = kieServices.newKieFileSystem(); + InputStream input = new ByteArrayInputStream(RULES.getBytes(StandardCharsets.UTF_8)); + kieFileSystem.write("src/test/resources/drools/rules_v2", kieServices.getResources().newInputStreamResource(input)); + KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem); + kieBuilder.buildAll(); + KieModule kieModule = kieBuilder.getKieModule(); + + return kieServices.newKieContainer(kieModule.getReleaseId()); + } + + + @Bean + @Primary + public StorageService inmemoryStorage() { + + return new FileSystemBackedStorageService(); + } + + } + + + @BeforeEach + public void stubClients() { + + TenantContext.setTenantId("redaction"); + + when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); + when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(JSONPrimitive.of(RULES)); + + loadDictionaryForTest(); + loadTypeForTest(); + loadNerForTest(); + when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); + when(dictionaryClient.getAllTypesForDossierTemplate(TEST_DOSSIER_TEMPLATE_ID, false)).thenReturn(getTypeResponse()); + + when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); + when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder() + .id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID) + .type(DOSSIER_REDACTIONS_INDICATOR) + .dossierTemplateId(TEST_DOSSIER_ID) + .hexColor("#ffe187") + .isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .build())); + + mockDictionaryCalls(null); + + when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors); + } + + + /** + * The case in this test: One term, 'Dr. Alan Miller', is found by PII-Rule and is in the PII-dictionary + * as well as in the PII-false-positive-list - and in the CBI-author dictionary. + * It gets redacted, as the PII-finding is false positive and so the CBI-author entry is effective + * independent of the entity-rank + */ + @Test + @SneakyThrows + public void testTermIsInTwoDictionariesAndInOneFalsePositive() { + + AnalyzeRequest request = uploadFileToStorage("files/new/simplified2.pdf"); + + dictionary.clear(); + dictionary.put(DICTIONARY_PII, Arrays.asList("Dr. Alan Miller")); + dictionary.put(DICTIONARY_AUTHOR, Arrays.asList("Dr. Alan Miller")); + + falsePositive.clear(); + falsePositive.put(DICTIONARY_PII, Arrays.asList("Dr. Alan Miller COMPLETION DATE:")); + + analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); + analyzeService.analyze(request); + + var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID); + + assertThat(redactionLog.getRedactionLogEntry().size()).isEqualTo(1); + + RedactionLogEntry redactionLogEntry = redactionLog.getRedactionLogEntry().get(0); + + assertThat(redactionLogEntry.getType()).isEqualTo(DICTIONARY_AUTHOR); + assertThat(redactionLogEntry.getValue()).isEqualTo("Dr. Alan Miller"); + assertThat(redactionLogEntry.isRedacted()).isEqualTo(true); + assertThat(redactionLogEntry.isRecommendation()).isEqualTo(false); + assertThat(redactionLogEntry.isFalsePositive()).isEqualTo(false); + assertThat(redactionLogEntry.isExcluded()).isEqualTo(false); + assertThat(redactionLogEntry.isDictionaryEntry()).isEqualTo(true); + + assertThat(redactionLogEntry.getEngines().size()).isEqualTo(1); + assertThat(redactionLogEntry.getEngines().contains(Engine.DICTIONARY)).isEqualTo(true); + + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl new file mode 100644 index 00000000..44e24b98 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules_v2.drl @@ -0,0 +1,341 @@ +package drools + +import com.iqser.red.service.redaction.v1.server.redaction.model.Section + +global Section section + + +// --------------------------------------- AI rules ------------------------------------------------------------------- + +rule "0: Add CBI_author from ai" + when + Section(aiMatchesType("CBI_author")) + then + section.addAiEntities("CBI_author", "CBI_author"); + end + +rule "0: Combine address parts from ai to CBI_address (org is mandatory)" + when + Section(aiMatchesType("ORG")) + then + section.combineAiTypes("ORG", "STREET,POSTAL,COUNTRY,CARDINAL,CITY,STATE", 20, "CBI_address", 3, false); + end + +rule "0: Combine address parts from ai to CBI_address (street is mandatory)" + when + Section(aiMatchesType("STREET")) + then + section.combineAiTypes("STREET", "ORG,POSTAL,COUNTRY,CARDINAL,CITY,STATE", 20, "CBI_address", 3, false); + end + +rule "0: Combine address parts from ai to CBI_address (city is mandatory)" + when + Section(aiMatchesType("CITY")) + then + section.combineAiTypes("CITY", "ORG,STREET,POSTAL,COUNTRY,CARDINAL,STATE", 20, "CBI_address", 3, false); + end + +/* Syngenta specific laboratory recommendation */ +rule "0: Recommend CTL/BL laboratory that start with BL or CTL" + when + Section(searchText.contains("CT") || searchText.contains("BL")) + then + /* Regular expression: ((\b((([Cc]T(([1ILli\/])| L|~P))|(BL))[\. ]?([\dA-Ziltphz~\/.:!]| ?[\(',][Ppi](\(e)?|([\(-?']\/))+( ?[\(\/\dA-Znasieg]+)?)\b( ?\/? ?\d+)?)|(\bCT[L1i]\b)) */ + section.addRecommendationByRegEx("((\\b((([Cc]T(([1ILli\\/])| L|~P))|(BL))[\\. ]?([\\dA-Ziltphz~\\/.:!]| ?[\\(',][Ppi](\\(e)?|([\\(-?']\\/))+( ?[\\(\\/\\dA-Znasieg]+)?)\\b( ?\\/? ?\\d+)?)|(\\bCT[L1i]\\b))", true, 0, "CBI_address"); + end + + +// --------------------------------------- CBI rules ------------------------------------------------------------------- + +rule "1: Redact CBI Authors (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_author")) + then + section.redact("CBI_author", 1, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "2: Redact CBI Authors (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_author")) + then + section.redact("CBI_author", 2, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "3: Redact not CBI Address (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_address")) + then + section.redactNot("CBI_address", 3, "Address found for non vertebrate study"); + section.ignoreRecommendations("CBI_address"); + end + +rule "4: Redact CBI Address (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_address")) + then + section.redact("CBI_address", 4, "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "5: Do not redact genitive CBI_author" + when + Section(matchesType("CBI_author")) + then + section.expandToFalsePositiveByRegEx("CBI_author", "['’’'ʼˈ´`‘′ʻ’']s", false, 0); + end + + +rule "6: Redact Author(s) cells in Tables with Author(s) header (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author(s)") && !hasTableHeader("Vertebrate study Y/N")) + then + section.redactCell("Author(s)", 6, "CBI_author", false, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "7: Redact Author(s) cells in Tables with Author(s) header (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author(s)") && !hasTableHeader("Vertebrate study Y/N")) + then + section.redactCell("Author(s)", 7, "CBI_author", false, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "8: Redact Author cells in Tables with Author header (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author") && !hasTableHeader("Vertebrate study Y/N")) + then + section.redactCell("Author", 8, "CBI_author", false, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "9: Redact Author cells in Tables with Author header (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author") && !hasTableHeader("Vertebrate study Y/N")) + then + section.redactCell("Author", 9, "CBI_author", false, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "10: Redact and recommand Authors in Tables with Vertebrate study Y/N header (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (rowEquals("Vertebrate study Y/N", "Y") || rowEquals("Vertebrate study Y/N", "Yes") || rowEquals("Vertebrate study Y/N", "N") || rowEquals("Vertebrate study Y/N", "No"))) + then + section.redactCell("Author(s)", 10, "CBI_author", true, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "11: Redact and recommand Authors in Tables with Vertebrate study Y/N header (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (rowEquals("Vertebrate study Y/N", "Y") || rowEquals("Vertebrate study Y/N", "Yes") || rowEquals("Vertebrate study Y/N", "N") || rowEquals("Vertebrate study Y/N", "No"))) + then + section.redactCell("Author(s)", 11, "CBI_author", true, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + +rule "14: Redact and add recommendation for et al. author (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("et al")) + then + section.redactAndRecommendByRegEx("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", false, 1, "CBI_author", 14, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "15: Redact and add recommendation for et al. author (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("et al")) + then + section.redactAndRecommendByRegEx("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", false, 1, "CBI_author", 15, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "16: Add recommendation for Addresses in Test Organism sections" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("Species:") && searchText.contains("Source:")) + then + section.recommendLineAfter("Source:", "CBI_address"); + end + +rule "17: Add recommendation for Addresses in Test Animals sections" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("Species") && searchText.contains("Source")) + then + section.recommendLineAfter("Source", "CBI_address"); + end + + +rule "18: Do not redact Names and Addresses if Published Information found" + when + Section(matchesType("published_information")) + then + section.redactNotAndReference("CBI_author","published_information", 18, "Published Information found"); + section.redactNotAndReference("CBI_address","published_information", 18, "Published Information found"); + end + + +// --------------------------------------- PII rules ------------------------------------------------------------------- + + +rule "19: Redacted PII Personal Identification Information (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("PII")) + then + section.redact("PII", 19, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "20: Redacted PII Personal Identification Information (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("PII")) + then + section.redact("PII", 20, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "21: Redact Emails by RegEx (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("@")) + then + section.redactByRegEx("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", true, 1, "PII", 21, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "22: Redact Emails by RegEx (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("@")) + then + section.redactByRegEx("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", true, 1, "PII", 22, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "25: Redact Phone and Fax by RegEx (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && ( + text.contains("Contact") + || text.contains("Telephone") + || text.contains("Phone") + || text.contains("Fax") + || text.contains("Tel") + || text.contains("Ter") + || text.contains("Mobile") + || text.contains("Fel") + || text.contains("Fer") + )) + then + section.redactByRegEx("\\b(contact|telephone|phone|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", true, 2, "PII", 25, "Personal information found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "26: Redact Phone and Fax by RegEx (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && ( + text.contains("Contact") + || text.contains("Telephone") + || text.contains("Phone") + || text.contains("Fax") + || text.contains("Tel") + || text.contains("Ter") + || text.contains("Mobile") + || text.contains("Fel") + || text.contains("Fer") + )) + then + section.redactByRegEx("\\b(contact|telephone|phone|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", true, 2, "PII", 26, "Personal information found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "27: Redact AUTHOR(S) (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") + && searchText.contains("AUTHOR(S):") + && searchText.contains("COMPLETION DATE:") + && !searchText.contains("STUDY COMPLETION DATE:") + ) + then + section.redactLinesBetween("AUTHOR(S):", "COMPLETION DATE:", "PII", 27, true, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "28: Redact AUTHOR(S) (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") + && searchText.contains("AUTHOR(S):") + && searchText.contains("COMPLETION DATE:") + && !searchText.contains("STUDY COMPLETION DATE:") + ) + then + section.redactLinesBetween("AUTHOR(S):", "COMPLETION DATE:", "PII", 28, true, "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "29: Redact AUTHOR(S) (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") + && searchText.contains("AUTHOR(S):") + && searchText.contains("STUDY COMPLETION DATE:") + ) + then + section.redactLinesBetween("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", 29, true, "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "30: Redact AUTHOR(S) (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") + && searchText.contains("AUTHOR(S):") + && searchText.contains("STUDY COMPLETION DATE:") + ) + then + section.redactLinesBetween("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", 30, true, "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "31: Redact PERFORMING LABORATORY (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") + && searchText.contains("PERFORMING LABORATORY:") + ) + then + section.redactBetween("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", 31, true, "PERFORMING LABORATORY was found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + section.redactNot("CBI_address", 31, "Performing laboratory found for non vertebrate study"); + end + +rule "32: Redact PERFORMING LABORATORY (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") + && searchText.contains("PERFORMING LABORATORY:")) + then + section.redactBetween("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", 32, true, "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +// --------------------------------------- other rules ------------------------------------------------------------------- + +rule "33: Purity Hint" + when + Section(searchText.toLowerCase().contains("purity")) + then + section.addHintAnnotationByRegEx("(purity ?( of|\\(.{1,20}\\))?( ?:)?) .{0,5}[\\d\\.]+( .{0,4}\\.)? ?%", true, 1, "hint_only"); + end + + +rule "34: Ignore dossier_redaction entries if confidentiality is not 'confidential'" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Confidentiality","confidential") && matchesType("dossier_redaction")); + then + section.ignore("dossier_redaction"); + end + + +rule "35: Redact signatures (Non vertebrate study)" + when + Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesImageType("signature")) + then + section.redactImage("signature", 35, "Signature found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); + end + +rule "36: Redact signatures (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesImageType("signature")) + then + section.redactImage("signature", 36, "Signature found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end + + +rule "43: Redact Logos (Vertebrate study)" + when + Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesImageType("logo")) + then + section.redactImage("logo", 43, "Logo found", "Article 39(e)(2) of Regulation (EC) No 178/2002"); + end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/simplified2.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/simplified2.pdf new file mode 100644 index 00000000..32eac69f Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/simplified2.pdf differ