RuleSetId integration and drools update
This commit is contained in:
parent
c2fd749fec
commit
5aba4b69ba
@ -32,7 +32,7 @@
|
||||
<dependency>
|
||||
<groupId>com.iqser.red</groupId>
|
||||
<artifactId>platform-commons-dependency</artifactId>
|
||||
<version>1.1.0</version>
|
||||
<version>1.1.7</version>
|
||||
<scope>import</scope>
|
||||
<type>pom</type>
|
||||
</dependency>
|
||||
|
||||
@ -12,6 +12,7 @@ import lombok.NoArgsConstructor;
|
||||
public class RedactionRequest {
|
||||
|
||||
private byte[] document;
|
||||
private String ruleSetId;
|
||||
private boolean flatRedaction;
|
||||
private ManualRedactions manualRedactions;
|
||||
}
|
||||
|
||||
@ -20,7 +20,7 @@
|
||||
<dependency>
|
||||
<groupId>com.iqser.red.service</groupId>
|
||||
<artifactId>configuration-service-api-v1</artifactId>
|
||||
<version>1.3.7</version>
|
||||
<version>2.0.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.drools</groupId>
|
||||
|
||||
@ -1,16 +1,8 @@
|
||||
package com.iqser.red.service.redaction.v1.server;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.kie.api.KieServices;
|
||||
import org.kie.api.builder.KieBuilder;
|
||||
import org.kie.api.builder.KieFileSystem;
|
||||
import org.kie.api.builder.KieModule;
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import com.iqser.red.commons.spring.DefaultWebMvcConfiguration;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
import org.springframework.boot.SpringApplication;
|
||||
import org.springframework.boot.actuate.autoconfigure.metrics.web.servlet.WebMvcMetricsAutoConfiguration;
|
||||
import org.springframework.boot.actuate.autoconfigure.security.servlet.ManagementWebSecurityAutoConfiguration;
|
||||
@ -18,54 +10,17 @@ import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
import org.springframework.boot.autoconfigure.security.servlet.SecurityAutoConfiguration;
|
||||
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
||||
import org.springframework.cloud.openfeign.EnableFeignClients;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Import;
|
||||
|
||||
import com.iqser.red.commons.spring.DefaultWebMvcConfiguration;
|
||||
import com.iqser.red.service.configuration.v1.api.model.RulesResponse;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.exception.RulesValidationException;
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
|
||||
@Import({DefaultWebMvcConfiguration.class})
|
||||
@EnableFeignClients(basePackageClasses = RulesClient.class)
|
||||
@EnableConfigurationProperties(RedactionServiceSettings.class)
|
||||
@SpringBootApplication(exclude = {SecurityAutoConfiguration.class, ManagementWebSecurityAutoConfiguration.class, WebMvcMetricsAutoConfiguration.class})
|
||||
public class Application {
|
||||
|
||||
@Autowired
|
||||
private RulesClient rulesClient;
|
||||
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
SpringApplication.run(Application.class, args);
|
||||
}
|
||||
|
||||
|
||||
@Bean
|
||||
public KieContainer kieContainer() {
|
||||
|
||||
try {
|
||||
KieServices kieServices = KieServices.Factory.get();
|
||||
|
||||
KieFileSystem kieFileSystem = kieServices.newKieFileSystem();
|
||||
RulesResponse rules = rulesClient.getRules();
|
||||
if (StringUtils.isEmpty(rules.getRules())) {
|
||||
throw new RuntimeException("Rules cannot be empty.");
|
||||
}
|
||||
InputStream input = new ByteArrayInputStream(rules.getRules().getBytes(StandardCharsets.UTF_8));
|
||||
kieFileSystem.write("src/main/resources/drools/rules.drl", kieServices.getResources()
|
||||
.newInputStreamResource(input));
|
||||
KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem);
|
||||
kieBuilder.buildAll();
|
||||
KieModule kieModule = kieBuilder.getKieModule();
|
||||
|
||||
return kieServices.newKieContainer(kieModule.getReleaseId());
|
||||
} catch (Exception e) {
|
||||
throw new RulesValidationException("Could not update rules: " + e.getMessage(), e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,14 +1,5 @@
|
||||
package com.iqser.red.service.redaction.v1.server.controller;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.springframework.web.bind.annotation.RequestBody;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLog;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
|
||||
@ -25,9 +16,16 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractT
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.visualization.service.AnnotationHighlightService;
|
||||
import com.iqser.red.service.redaction.v1.server.visualization.service.PdfVisualisationService;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.springframework.web.bind.annotation.RequestBody;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
@Slf4j
|
||||
@RestController
|
||||
@ -51,9 +49,9 @@ public class RedactionController implements RedactionResource {
|
||||
|
||||
log.info("Document structure analysis successful, starting redaction analysis...");
|
||||
|
||||
entityRedactionService.processDocument(classifiedDoc, redactionRequest.getManualRedactions());
|
||||
entityRedactionService.processDocument(classifiedDoc, redactionRequest.getRuleSetId(), redactionRequest.getManualRedactions());
|
||||
annotationHighlightService.highlight(pdDocument, classifiedDoc, redactionRequest.isFlatRedaction(), redactionRequest
|
||||
.getManualRedactions());
|
||||
.getManualRedactions(), redactionRequest.getRuleSetId());
|
||||
|
||||
log.info("Redaction analysis successful...");
|
||||
|
||||
@ -152,7 +150,7 @@ public class RedactionController implements RedactionResource {
|
||||
return RedactionResult.builder()
|
||||
.document(byteArrayOutputStream.toByteArray())
|
||||
.numberOfPages(numberOfPages)
|
||||
.redactionLog(new RedactionLog(redactionLogEntities,dictionaryVersion, rulesVersion))
|
||||
.redactionLog(new RedactionLog(redactionLogEntities, dictionaryVersion, rulesVersion))
|
||||
.sectionGrid(sectionGrid)
|
||||
.build();
|
||||
}
|
||||
|
||||
@ -0,0 +1,24 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
@Data
|
||||
public class DictionaryRepresentation {
|
||||
|
||||
private String ruleSetId;
|
||||
private long dictionaryVersion = -1;
|
||||
private List<DictionaryModel> dictionary = new ArrayList<>();
|
||||
private float[] defaultColor;
|
||||
private float[] requestAddColor;
|
||||
private float[] requestRemoveColor;
|
||||
private float[] notRedactedColor;
|
||||
private Map<String, DictionaryModel> localAccessMap = new HashMap<>();
|
||||
|
||||
|
||||
|
||||
}
|
||||
@ -1,5 +1,19 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import com.iqser.red.service.configuration.v1.api.model.Colors;
|
||||
import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
|
||||
import com.iqser.red.service.configuration.v1.api.model.TypeResult;
|
||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryRepresentation;
|
||||
import feign.FeignException;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.commons.lang3.SerializationUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.awt.Color;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
@ -10,22 +24,6 @@ import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.commons.lang3.SerializationUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.configuration.v1.api.model.Colors;
|
||||
import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
|
||||
import com.iqser.red.service.configuration.v1.api.model.TypeResult;
|
||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
|
||||
|
||||
import feign.FeignException;
|
||||
import lombok.Getter;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@ -34,60 +32,51 @@ public class DictionaryService {
|
||||
|
||||
private final DictionaryClient dictionaryClient;
|
||||
|
||||
@Getter
|
||||
private long dictionaryVersion = -1;
|
||||
|
||||
@Getter
|
||||
private List<DictionaryModel> dictionary = new ArrayList<>();
|
||||
|
||||
@Getter
|
||||
private float[] defaultColor;
|
||||
|
||||
@Getter
|
||||
private float[] requestAddColor;
|
||||
|
||||
@Getter
|
||||
private float[] requestRemoveColor;
|
||||
|
||||
@Getter
|
||||
private float[] notRedactedColor;
|
||||
private Map<String, DictionaryRepresentation> dictionariesByRuleSets = new HashMap<>();
|
||||
|
||||
|
||||
private Map<String, DictionaryModel> localAccessMap = new HashMap<>();
|
||||
public void updateDictionary(String ruleSetId) {
|
||||
|
||||
long version = dictionaryClient.getVersion(ruleSetId);
|
||||
|
||||
var foundDictionary = dictionariesByRuleSets.get(ruleSetId);
|
||||
|
||||
public void updateDictionary() {
|
||||
|
||||
long version = dictionaryClient.getVersion();
|
||||
if (version > dictionaryVersion) {
|
||||
dictionaryVersion = version;
|
||||
updateDictionaryEntry();
|
||||
if (foundDictionary == null || version > foundDictionary.getDictionaryVersion()) {
|
||||
updateDictionaryEntry(ruleSetId, version);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void updateDictionaryEntry() {
|
||||
private void updateDictionaryEntry(String ruleSetId, long version) {
|
||||
|
||||
try {
|
||||
TypeResponse typeResponse = dictionaryClient.getAllTypes();
|
||||
DictionaryRepresentation dictionaryRepresentation = new DictionaryRepresentation();
|
||||
|
||||
TypeResponse typeResponse = dictionaryClient.getAllTypes(ruleSetId);
|
||||
if (typeResponse != null && CollectionUtils.isNotEmpty(typeResponse.getTypes())) {
|
||||
|
||||
dictionary = typeResponse.getTypes()
|
||||
List<DictionaryModel> dictionary = typeResponse.getTypes()
|
||||
.stream()
|
||||
.map(t -> new DictionaryModel(t.getType(), t.getRank(), convertColor(t.getHexColor()), t.isCaseInsensitive(), t
|
||||
.isHint(), t.isRecommendation(), convertEntries(t), new HashSet<>()))
|
||||
.sorted(Comparator.comparingInt(DictionaryModel::getRank).reversed())
|
||||
.collect(Collectors.toList());
|
||||
|
||||
localAccessMap.clear();
|
||||
dictionary.forEach(dm -> localAccessMap.put(dm.getType(), dm));
|
||||
|
||||
Colors colors = dictionaryClient.getColors();
|
||||
defaultColor = convertColor(colors.getDefaultColor());
|
||||
requestAddColor = convertColor(colors.getRequestAdd());
|
||||
requestRemoveColor = convertColor(colors.getRequestRemove());
|
||||
notRedactedColor = convertColor(colors.getNotRedacted());
|
||||
dictionary.forEach(dm -> dictionaryRepresentation.getLocalAccessMap().put(dm.getType(), dm));
|
||||
|
||||
Colors colors = dictionaryClient.getColors(ruleSetId);
|
||||
|
||||
dictionaryRepresentation.setDefaultColor(convertColor(colors.getDefaultColor()));
|
||||
dictionaryRepresentation.setRequestAddColor(convertColor(colors.getRequestAdd()));
|
||||
dictionaryRepresentation.setRequestRemoveColor(convertColor(colors.getRequestRemove()));
|
||||
dictionaryRepresentation.setNotRedactedColor(convertColor(colors.getNotRedacted()));
|
||||
dictionaryRepresentation.setRuleSetId(ruleSetId);
|
||||
dictionaryRepresentation.setDictionaryVersion(version);
|
||||
dictionaryRepresentation.setDictionary(dictionary);
|
||||
|
||||
dictionariesByRuleSets.put(ruleSetId, dictionaryRepresentation);
|
||||
}
|
||||
} catch (FeignException e) {
|
||||
log.warn("Got some unknown feignException", e);
|
||||
@ -96,12 +85,12 @@ public class DictionaryService {
|
||||
}
|
||||
|
||||
|
||||
public void updateExternalDictionary(Dictionary dictionary){
|
||||
public void updateExternalDictionary(Dictionary dictionary, String ruleSetId) {
|
||||
dictionary.getDictionaryModels().forEach(dm -> {
|
||||
if(dm.isRecommendation() && !dm.getLocalEntries().isEmpty()){
|
||||
dictionaryClient.addEntries(dm.getType(), new ArrayList<>(dm.getLocalEntries()), false);
|
||||
long externalVersion = dictionaryClient.getVersion();
|
||||
if(externalVersion == dictionary.getVersion() + 1){
|
||||
if (dm.isRecommendation() && !dm.getLocalEntries().isEmpty()) {
|
||||
dictionaryClient.addEntries(dm.getType(), ruleSetId, new ArrayList<>(dm.getLocalEntries()), false);
|
||||
long externalVersion = dictionaryClient.getVersion(ruleSetId);
|
||||
if (externalVersion == dictionary.getVersion() + 1) {
|
||||
dictionary.setVersion(externalVersion);
|
||||
}
|
||||
}
|
||||
@ -112,13 +101,13 @@ public class DictionaryService {
|
||||
private Set<String> convertEntries(TypeResult t) {
|
||||
|
||||
if (t.isCaseInsensitive()) {
|
||||
return dictionaryClient.getDictionaryForType(t.getType())
|
||||
return dictionaryClient.getDictionaryForType(t.getType(), t.getRuleSetId())
|
||||
.getEntries()
|
||||
.stream()
|
||||
.map(String::toLowerCase)
|
||||
.collect(Collectors.toSet());
|
||||
} else {
|
||||
return new HashSet<>(dictionaryClient.getDictionaryForType(t.getType()).getEntries());
|
||||
return new HashSet<>(dictionaryClient.getDictionaryForType(t.getType(), t.getRuleSetId()).getEntries());
|
||||
}
|
||||
}
|
||||
|
||||
@ -130,9 +119,9 @@ public class DictionaryService {
|
||||
}
|
||||
|
||||
|
||||
public boolean isCaseInsensitiveDictionary(String type) {
|
||||
public boolean isCaseInsensitiveDictionary(String type, String ruleSetId) {
|
||||
|
||||
DictionaryModel dictionaryModel = localAccessMap.get(type);
|
||||
DictionaryModel dictionaryModel = dictionariesByRuleSets.get(ruleSetId).getLocalAccessMap().get(type);
|
||||
if (dictionaryModel != null) {
|
||||
return dictionaryModel.isCaseInsensitive();
|
||||
}
|
||||
@ -140,28 +129,28 @@ public class DictionaryService {
|
||||
}
|
||||
|
||||
|
||||
public float[] getColor(String type) {
|
||||
public float[] getColor(String type, String ruleSetId) {
|
||||
|
||||
DictionaryModel model = localAccessMap.get(type);
|
||||
DictionaryModel model = dictionariesByRuleSets.get(ruleSetId).getLocalAccessMap().get(type);
|
||||
if (model != null) {
|
||||
return model.getColor();
|
||||
}
|
||||
return defaultColor;
|
||||
return dictionariesByRuleSets.get(ruleSetId).getDefaultColor();
|
||||
}
|
||||
|
||||
|
||||
public boolean isHint(String type) {
|
||||
public boolean isHint(String type, String ruleSetId) {
|
||||
|
||||
DictionaryModel model = localAccessMap.get(type);
|
||||
DictionaryModel model = dictionariesByRuleSets.get(ruleSetId).getLocalAccessMap().get(type);
|
||||
if (model != null) {
|
||||
return model.isHint();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean isRecommendation(String type) {
|
||||
public boolean isRecommendation(String type, String ruleSetId) {
|
||||
|
||||
DictionaryModel model = localAccessMap.get(type);
|
||||
DictionaryModel model = dictionariesByRuleSets.get(ruleSetId).getLocalAccessMap().get(type);
|
||||
if (model != null) {
|
||||
return model.isRecommendation();
|
||||
}
|
||||
@ -169,14 +158,27 @@ public class DictionaryService {
|
||||
}
|
||||
|
||||
|
||||
public Dictionary getDeepCopyDictionary(){
|
||||
public Dictionary getDeepCopyDictionary(String ruleSetId) {
|
||||
List<DictionaryModel> copy = new ArrayList<>();
|
||||
|
||||
var representation = dictionariesByRuleSets.get(ruleSetId);
|
||||
var dictionary = dictionariesByRuleSets.get(ruleSetId).getDictionary();
|
||||
dictionary.forEach(dm -> {
|
||||
copy.add(SerializationUtils.clone(dm));
|
||||
});
|
||||
|
||||
return new Dictionary(copy, dictionaryVersion);
|
||||
return new Dictionary(copy, representation.getDictionaryVersion());
|
||||
}
|
||||
|
||||
public float[] getRequestRemoveColor(String ruleSetId) {
|
||||
return dictionariesByRuleSets.get(ruleSetId).getRequestAddColor();
|
||||
}
|
||||
|
||||
public float[] getNotRedactedColor(String ruleSetId) {
|
||||
return dictionariesByRuleSets.get(ruleSetId).getNotRedactedColor();
|
||||
}
|
||||
|
||||
public float[] getRequestAddColor(String ruleSetId) {
|
||||
return dictionariesByRuleSets.get(ruleSetId).getRequestAddColor();
|
||||
}
|
||||
}
|
||||
@ -1,9 +1,11 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import com.iqser.red.service.configuration.v1.api.model.RulesResponse;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.exception.RulesValidationException;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
|
||||
import lombok.Getter;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.kie.api.KieServices;
|
||||
import org.kie.api.builder.KieBuilder;
|
||||
@ -11,15 +13,13 @@ import org.kie.api.builder.KieFileSystem;
|
||||
import org.kie.api.builder.KieModule;
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.kie.api.runtime.KieSession;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.exception.RulesValidationException;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@ -27,13 +27,22 @@ public class DroolsExecutionService {
|
||||
|
||||
private final RulesClient rulesClient;
|
||||
|
||||
@Autowired
|
||||
private KieContainer kieContainer;
|
||||
private Map<String, KieContainer> kieContainers = new HashMap<>();
|
||||
|
||||
public KieContainer getKieContainer(String ruleSetId) {
|
||||
KieContainer container = kieContainers.get(ruleSetId);
|
||||
if (container == null) {
|
||||
return createOrUpdateKieContainer(ruleSetId);
|
||||
} else {
|
||||
return container;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Getter
|
||||
private long rulesVersion = -1;
|
||||
|
||||
public Section executeRules(Section section) {
|
||||
public Section executeRules(KieContainer kieContainer, Section section) {
|
||||
|
||||
KieSession kieSession = kieContainer.newKieSession();
|
||||
kieSession.setGlobal("section", section);
|
||||
@ -45,34 +54,53 @@ public class DroolsExecutionService {
|
||||
|
||||
}
|
||||
|
||||
public void updateRules() {
|
||||
public KieContainer updateRules(String ruleSetId) {
|
||||
|
||||
long version = rulesClient.getVersion();
|
||||
long version = rulesClient.getVersion(ruleSetId);
|
||||
if (version > rulesVersion) {
|
||||
rulesVersion = version;
|
||||
updateRules(rulesClient.getRules().getRules());
|
||||
return createOrUpdateKieContainer(ruleSetId);
|
||||
}
|
||||
return getKieContainer(ruleSetId);
|
||||
|
||||
}
|
||||
|
||||
public void updateRules(String drlAsString) {
|
||||
private KieContainer createOrUpdateKieContainer(String ruleSetId) {
|
||||
|
||||
try {
|
||||
if (StringUtils.isEmpty(drlAsString)) {
|
||||
|
||||
RulesResponse rules = rulesClient.getRules(ruleSetId);
|
||||
if (rules == null || StringUtils.isEmpty(rules.getRules())) {
|
||||
throw new RuntimeException("Rules cannot be empty.");
|
||||
}
|
||||
|
||||
KieServices kieServices = KieServices.Factory.get();
|
||||
InputStream input = new ByteArrayInputStream(drlAsString.getBytes(StandardCharsets.UTF_8));
|
||||
KieFileSystem kieFileSystem = kieServices.newKieFileSystem();
|
||||
kieFileSystem.write("src/main/resources/drools/rules.drl", kieServices.getResources().newInputStreamResource(input));
|
||||
KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem);
|
||||
kieBuilder.buildAll();
|
||||
KieModule kieModule = kieBuilder.getKieModule();
|
||||
kieContainer.updateToVersion(kieModule.getReleaseId());
|
||||
KieModule kieModule = getKieModule(ruleSetId, rules.getRules(), kieServices);
|
||||
|
||||
var container = kieContainers.get(ruleSetId);
|
||||
if (container != null) {
|
||||
container.updateToVersion(kieModule.getReleaseId());
|
||||
return container;
|
||||
}
|
||||
|
||||
container = kieServices.newKieContainer(kieModule.getReleaseId());
|
||||
kieContainers.put(ruleSetId, container);
|
||||
return container;
|
||||
} catch (Exception e) {
|
||||
throw new RulesValidationException("Could not update rules: " + e.getMessage(), e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private KieModule getKieModule(String ruleSetId, String rules, KieServices kieServices) {
|
||||
KieFileSystem kieFileSystem = kieServices.newKieFileSystem();
|
||||
InputStream input = new ByteArrayInputStream(rules.getBytes(StandardCharsets.UTF_8));
|
||||
kieFileSystem.write("src/main/resources/drools/rules" + ruleSetId + ".drl", kieServices.getResources()
|
||||
.newInputStreamResource(input));
|
||||
KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem);
|
||||
kieBuilder.buildAll();
|
||||
return kieBuilder.getKieModule();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@ -1,20 +1,5 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
|
||||
import com.iqser.red.service.redaction.v1.model.ManualRedactions;
|
||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
||||
@ -32,9 +17,23 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.SectionSearchab
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.PositionUtil;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@ -46,15 +45,15 @@ public class EntityRedactionService {
|
||||
private final SurroundingWordsService surroundingWordsService;
|
||||
|
||||
|
||||
public void processDocument(Document classifiedDoc, ManualRedactions manualRedactions) {
|
||||
public void processDocument(Document classifiedDoc, String ruleSetId, ManualRedactions manualRedactions) {
|
||||
|
||||
dictionaryService.updateDictionary();
|
||||
droolsExecutionService.updateRules();
|
||||
dictionaryService.updateDictionary(ruleSetId);
|
||||
KieContainer container = droolsExecutionService.updateRules(ruleSetId);
|
||||
long rulesVersion = droolsExecutionService.getRulesVersion();
|
||||
|
||||
Dictionary dictionary = dictionaryService.getDeepCopyDictionary();
|
||||
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(ruleSetId);
|
||||
|
||||
Set<Entity> documentEntities = new HashSet<>(findEntities(classifiedDoc, manualRedactions, dictionary, false, null));
|
||||
Set<Entity> documentEntities = new HashSet<>(findEntities(classifiedDoc, container, manualRedactions, dictionary, false, null));
|
||||
|
||||
if (dictionary.hasLocalEntries()) {
|
||||
|
||||
@ -66,7 +65,7 @@ public class EntityRedactionService {
|
||||
}
|
||||
});
|
||||
|
||||
Set<Entity> foundByLocal = findEntities(classifiedDoc, manualRedactions, dictionary, true, hintsPerSectionNumber);
|
||||
Set<Entity> foundByLocal = findEntities(classifiedDoc, container, manualRedactions, dictionary, true, hintsPerSectionNumber);
|
||||
// HashSet keeps the older value, but we want the new only.
|
||||
documentEntities.removeAll(foundByLocal);
|
||||
documentEntities.addAll(foundByLocal);
|
||||
@ -90,14 +89,14 @@ public class EntityRedactionService {
|
||||
}
|
||||
}
|
||||
|
||||
dictionaryService.updateExternalDictionary(dictionary);
|
||||
dictionaryService.updateExternalDictionary(dictionary, ruleSetId);
|
||||
|
||||
classifiedDoc.setDictionaryVersion(dictionary.getVersion());
|
||||
classifiedDoc.setRulesVersion(rulesVersion);
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> findEntities(Document classifiedDoc, ManualRedactions manualRedactions, Dictionary dictionary,
|
||||
private Set<Entity> findEntities(Document classifiedDoc, KieContainer kieContainer, ManualRedactions manualRedactions, Dictionary dictionary,
|
||||
boolean local, Map<Integer, Set<Entity>> hintsPerSectionNumber) {
|
||||
|
||||
Set<Entity> documentEntities = new HashSet<>();
|
||||
@ -120,7 +119,7 @@ public class EntityRedactionService {
|
||||
}
|
||||
|
||||
sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> {
|
||||
Section analysedRowSection = droolsExecutionService.executeRules(sectionSearchableTextPair.getSection());
|
||||
Section analysedRowSection = droolsExecutionService.executeRules(kieContainer,sectionSearchableTextPair.getSection());
|
||||
documentEntities.addAll(analysedRowSection.getEntities());
|
||||
|
||||
analysedRowSection.getLocalDictionaryAdds().keySet().forEach(key -> {
|
||||
|
||||
@ -1,28 +1,5 @@
|
||||
package com.iqser.red.service.redaction.v1.server.visualization.service;
|
||||
|
||||
import java.awt.Color;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.GregorianCalendar;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.PDPageContentStream;
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType1Font;
|
||||
import org.apache.pdfbox.pdmodel.graphics.color.PDColor;
|
||||
import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB;
|
||||
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
|
||||
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationText;
|
||||
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup;
|
||||
import org.apache.pdfbox.text.TextPosition;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.CellRectangle;
|
||||
import com.iqser.red.service.redaction.v1.model.Comment;
|
||||
import com.iqser.red.service.redaction.v1.model.IdRemoval;
|
||||
@ -44,8 +21,29 @@ import com.iqser.red.service.redaction.v1.server.redaction.service.DictionarySer
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.PDPageContentStream;
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType1Font;
|
||||
import org.apache.pdfbox.pdmodel.graphics.color.PDColor;
|
||||
import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB;
|
||||
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
|
||||
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationText;
|
||||
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup;
|
||||
import org.apache.pdfbox.text.TextPosition;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.awt.Color;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.GregorianCalendar;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@ -55,7 +53,7 @@ public class AnnotationHighlightService {
|
||||
|
||||
|
||||
public void highlight(PDDocument document, Document classifiedDoc, boolean flatRedaction,
|
||||
ManualRedactions manualRedactions) throws IOException {
|
||||
ManualRedactions manualRedactions, String ruleSetId) throws IOException {
|
||||
|
||||
Set<Integer> manualRedactionPages = getManualRedactionPages(manualRedactions);
|
||||
|
||||
@ -66,11 +64,11 @@ public class AnnotationHighlightService {
|
||||
drawSectionFrames(document, classifiedDoc, flatRedaction, pdPage, page);
|
||||
|
||||
if (classifiedDoc.getEntities().get(page) != null) {
|
||||
addAnnotations(pdPage, classifiedDoc, flatRedaction, manualRedactions, page);
|
||||
addAnnotations(pdPage, classifiedDoc, flatRedaction, manualRedactions, page, ruleSetId);
|
||||
}
|
||||
|
||||
if (manualRedactionPages.contains(page)) {
|
||||
addManualAnnotations(pdPage, classifiedDoc, manualRedactions, page);
|
||||
addManualAnnotations(pdPage, classifiedDoc, manualRedactions, page, ruleSetId);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -94,7 +92,7 @@ public class AnnotationHighlightService {
|
||||
|
||||
|
||||
private void addAnnotations(PDPage pdPage, Document classifiedDoc, boolean flatRedaction,
|
||||
ManualRedactions manualRedactions, int page) throws IOException {
|
||||
ManualRedactions manualRedactions, int page, String ruleSetId) throws IOException {
|
||||
|
||||
List<PDAnnotation> annotations = pdPage.getAnnotations();
|
||||
|
||||
@ -104,7 +102,7 @@ public class AnnotationHighlightService {
|
||||
entityLoop:
|
||||
for (Entity entity : classifiedDoc.getEntities().get(page)) {
|
||||
|
||||
if (flatRedaction && !isRedactionType(entity)) {
|
||||
if (flatRedaction && !isRedactionType(entity, ruleSetId)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -113,7 +111,7 @@ public class AnnotationHighlightService {
|
||||
|
||||
for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) {
|
||||
|
||||
RedactionLogEntry redactionLogEntry = createRedactionLogEntry(entity);
|
||||
RedactionLogEntry redactionLogEntry = createRedactionLogEntry(entity, ruleSetId);
|
||||
if (processedIds.contains(entityPositionSequence.getId())) {
|
||||
|
||||
// TODO refactor this outer loop jump as soon as we have the time.
|
||||
@ -161,7 +159,7 @@ public class AnnotationHighlightService {
|
||||
|
||||
redactionLogEntry.getPositions().addAll(rectanglesPerLine);
|
||||
|
||||
annotations.addAll(createAnnotation(rectanglesPerLine, entityPositionSequence.getId(), createAnnotationContent(entity), getColor(entity, requestedToRemove), comments, !isHint(entity)));
|
||||
annotations.addAll(createAnnotation(rectanglesPerLine, entityPositionSequence.getId(), createAnnotationContent(entity), getColor(entity, ruleSetId, requestedToRemove), comments, !isHint(entity, ruleSetId)));
|
||||
}
|
||||
|
||||
redactionLogEntry.setId(entityPositionSequence.getId());
|
||||
@ -203,7 +201,7 @@ public class AnnotationHighlightService {
|
||||
|
||||
|
||||
private void addManualAnnotations(PDPage pdPage, Document classifiedDoc, ManualRedactions manualRedactions,
|
||||
int page) throws IOException {
|
||||
int page, String ruleSetId) throws IOException {
|
||||
|
||||
if (manualRedactions == null) {
|
||||
return;
|
||||
@ -215,7 +213,7 @@ public class AnnotationHighlightService {
|
||||
|
||||
String id = manualRedactionEntry.getId();
|
||||
|
||||
RedactionLogEntry redactionLogEntry = createRedactionLogEntry(manualRedactionEntry, id);
|
||||
RedactionLogEntry redactionLogEntry = createRedactionLogEntry(manualRedactionEntry, id, ruleSetId);
|
||||
|
||||
List<Rectangle> rectanglesOnPage = new ArrayList<>();
|
||||
for (Rectangle rectangle : manualRedactionEntry.getPositions()) {
|
||||
@ -227,7 +225,7 @@ public class AnnotationHighlightService {
|
||||
|
||||
if (!rectanglesOnPage.isEmpty() && !approvedAndShouldBeInDictionary(manualRedactionEntry)) {
|
||||
annotations.addAll(createAnnotation(rectanglesOnPage, id, createAnnotationContent(manualRedactionEntry), getColorForManualAdd(manualRedactionEntry
|
||||
.getType(), manualRedactionEntry.getStatus()), manualRedactions.getComments().get(id), true));
|
||||
.getType(), ruleSetId, manualRedactionEntry.getStatus()), manualRedactions.getComments().get(id), true));
|
||||
classifiedDoc.getRedactionLogEntities().add(redactionLogEntry);
|
||||
}
|
||||
}
|
||||
@ -240,11 +238,11 @@ public class AnnotationHighlightService {
|
||||
}
|
||||
|
||||
|
||||
private RedactionLogEntry createRedactionLogEntry(ManualRedactionEntry manualRedactionEntry, String id) {
|
||||
private RedactionLogEntry createRedactionLogEntry(ManualRedactionEntry manualRedactionEntry, String id, String ruleSetId) {
|
||||
|
||||
return RedactionLogEntry.builder()
|
||||
.id(id)
|
||||
.color(getColor(manualRedactionEntry.getType()))
|
||||
.color(getColor(manualRedactionEntry.getType(), ruleSetId))
|
||||
.reason(manualRedactionEntry.getReason())
|
||||
.legalBasis(manualRedactionEntry.getLegalBasis())
|
||||
.value(manualRedactionEntry.getValue())
|
||||
@ -261,17 +259,17 @@ public class AnnotationHighlightService {
|
||||
}
|
||||
|
||||
|
||||
private RedactionLogEntry createRedactionLogEntry(Entity entity) {
|
||||
private RedactionLogEntry createRedactionLogEntry(Entity entity, String ruleSetId) {
|
||||
|
||||
return RedactionLogEntry.builder()
|
||||
.color(getColor(entity, false))
|
||||
.color(getColor(entity, ruleSetId, false))
|
||||
.reason(entity.getRedactionReason())
|
||||
.legalBasis(entity.getLegalBasis())
|
||||
.value(entity.getWord())
|
||||
.type(entity.getType())
|
||||
.redacted(entity.isRedaction())
|
||||
.isHint(isHint(entity))
|
||||
.isRecommendation(isRecommendation(entity))
|
||||
.isHint(isHint(entity, ruleSetId))
|
||||
.isRecommendation(isRecommendation(entity, ruleSetId))
|
||||
.section(entity.getHeadline())
|
||||
.sectionNumber(entity.getSectionNumber())
|
||||
.matchedRule(entity.getMatchedRule())
|
||||
@ -389,49 +387,49 @@ public class AnnotationHighlightService {
|
||||
}
|
||||
|
||||
|
||||
private boolean isRedactionType(Entity entity) {
|
||||
private boolean isRedactionType(Entity entity, String ruleSetId) {
|
||||
|
||||
if (!entity.isRedaction()) {
|
||||
return false;
|
||||
}
|
||||
return !isHint(entity);
|
||||
return !isHint(entity, ruleSetId);
|
||||
}
|
||||
|
||||
|
||||
private float[] getColor(Entity entity, boolean requestedToRemove) {
|
||||
private float[] getColor(Entity entity, String ruleSetId, boolean requestedToRemove) {
|
||||
|
||||
if (requestedToRemove) {
|
||||
return dictionaryService.getRequestRemoveColor();
|
||||
return dictionaryService.getRequestRemoveColor(ruleSetId);
|
||||
}
|
||||
if (!entity.isRedaction() && !isHint(entity)) {
|
||||
return dictionaryService.getNotRedactedColor();
|
||||
if (!entity.isRedaction() && !isHint(entity, ruleSetId)) {
|
||||
return dictionaryService.getNotRedactedColor(ruleSetId);
|
||||
}
|
||||
return dictionaryService.getColor(entity.getType());
|
||||
return dictionaryService.getColor(entity.getType(), ruleSetId);
|
||||
}
|
||||
|
||||
|
||||
private float[] getColorForManualAdd(String type, Status status) {
|
||||
private float[] getColorForManualAdd(String type, String ruleSetId, Status status) {
|
||||
|
||||
if (status.equals(Status.REQUESTED)) {
|
||||
return dictionaryService.getRequestAddColor();
|
||||
return dictionaryService.getRequestAddColor(ruleSetId);
|
||||
} else if (status.equals(Status.DECLINED)) {
|
||||
return dictionaryService.getNotRedactedColor();
|
||||
return dictionaryService.getNotRedactedColor(ruleSetId);
|
||||
}
|
||||
return getColor(type);
|
||||
return getColor(type, ruleSetId);
|
||||
}
|
||||
|
||||
|
||||
private float[] getColor(String type) {
|
||||
return dictionaryService.getColor(type);
|
||||
private float[] getColor(String type, String ruleSetId) {
|
||||
return dictionaryService.getColor(type, ruleSetId);
|
||||
}
|
||||
|
||||
|
||||
private boolean isHint(Entity entity) {
|
||||
return dictionaryService.isHint(entity.getType());
|
||||
private boolean isHint(Entity entity, String ruleSetId) {
|
||||
return dictionaryService.isHint(entity.getType(), ruleSetId);
|
||||
}
|
||||
|
||||
private boolean isRecommendation(Entity entity) {
|
||||
return dictionaryService.isRecommendation(entity.getType());
|
||||
private boolean isRecommendation(Entity entity, String ruleSetId) {
|
||||
return dictionaryService.isRecommendation(entity.getType(), ruleSetId);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -1,45 +1,5 @@
|
||||
package com.iqser.red.service.redaction.v1.server;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.mockito.Mockito.when;
|
||||
import static org.springframework.boot.test.context.SpringBootTest.WebEnvironment.DEFINED_PORT;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.URL;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.kie.api.KieServices;
|
||||
import org.kie.api.builder.KieBuilder;
|
||||
import org.kie.api.builder.KieFileSystem;
|
||||
import org.kie.api.builder.KieModule;
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.boot.test.context.TestConfiguration;
|
||||
import org.springframework.boot.test.mock.mockito.MockBean;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.test.context.junit4.SpringRunner;
|
||||
|
||||
import com.iqser.red.service.configuration.v1.api.model.Colors;
|
||||
import com.iqser.red.service.configuration.v1.api.model.DictionaryResponse;
|
||||
import com.iqser.red.service.configuration.v1.api.model.RulesResponse;
|
||||
@ -60,6 +20,45 @@ import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.kie.api.KieServices;
|
||||
import org.kie.api.builder.KieBuilder;
|
||||
import org.kie.api.builder.KieFileSystem;
|
||||
import org.kie.api.builder.KieModule;
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.boot.test.context.TestConfiguration;
|
||||
import org.springframework.boot.test.mock.mockito.MockBean;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.test.context.junit4.SpringRunner;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.URL;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.mockito.Mockito.when;
|
||||
import static org.springframework.boot.test.context.SpringBootTest.WebEnvironment.DEFINED_PORT;
|
||||
|
||||
@RunWith(SpringRunner.class)
|
||||
@SpringBootTest(webEnvironment = DEFINED_PORT)
|
||||
@ -100,6 +99,8 @@ public class RedactionIntegrationTest {
|
||||
private final Map<String, Boolean> recommendationTypeMap = new HashMap<>();
|
||||
private final Colors colors = new Colors();
|
||||
|
||||
private final static String TEST_RULESET_ID = "123";
|
||||
|
||||
@TestConfiguration
|
||||
public static class RedactionIntegrationTestConfiguration {
|
||||
|
||||
@ -125,28 +126,28 @@ public class RedactionIntegrationTest {
|
||||
@Before
|
||||
public void stubClients() {
|
||||
|
||||
when(rulesClient.getVersion()).thenReturn(0L);
|
||||
when(rulesClient.getRules()).thenReturn(new RulesResponse(RULES));
|
||||
when(rulesClient.getVersion(TEST_RULESET_ID)).thenReturn(0L);
|
||||
when(rulesClient.getRules(TEST_RULESET_ID)).thenReturn(new RulesResponse(RULES));
|
||||
|
||||
loadDictionaryForTest();
|
||||
loadTypeForTest();
|
||||
when(dictionaryClient.getVersion()).thenReturn(0L);
|
||||
when(dictionaryClient.getAllTypes()).thenReturn(TypeResponse.builder().types(getTypeResponse()).build());
|
||||
when(dictionaryClient.getDictionaryForType(VERTEBRATE)).thenReturn(getDictionaryResponse(VERTEBRATE));
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS)).thenReturn(getDictionaryResponse(ADDRESS));
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR)).thenReturn(getDictionaryResponse(AUTHOR));
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR)).thenReturn(getDictionaryResponse(SPONSOR));
|
||||
when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR)).thenReturn(getDictionaryResponse(NO_REDACTION_INDICATOR));
|
||||
when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR)).thenReturn(getDictionaryResponse(REDACTION_INDICATOR));
|
||||
when(dictionaryClient.getDictionaryForType(HINT_ONLY)).thenReturn(getDictionaryResponse(HINT_ONLY));
|
||||
when(dictionaryClient.getDictionaryForType(MUST_REDACT)).thenReturn(getDictionaryResponse(MUST_REDACT));
|
||||
when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION)).thenReturn(getDictionaryResponse(PUBLISHED_INFORMATION));
|
||||
when(dictionaryClient.getDictionaryForType(TEST_METHOD)).thenReturn(getDictionaryResponse(TEST_METHOD));
|
||||
when(dictionaryClient.getDictionaryForType(PII)).thenReturn(getDictionaryResponse(PII));
|
||||
when(dictionaryClient.getDictionaryForType(RECOMMENDATION_AUTHOR)).thenReturn(getDictionaryResponse(RECOMMENDATION_AUTHOR));
|
||||
when(dictionaryClient.getDictionaryForType(RECOMMENDATION_ADDRESS)).thenReturn(getDictionaryResponse(RECOMMENDATION_ADDRESS));
|
||||
when(dictionaryClient.getDictionaryForType(FALSE_POSITIVE)).thenReturn(getDictionaryResponse(FALSE_POSITIVE));
|
||||
when(dictionaryClient.getColors()).thenReturn(colors);
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(0L);
|
||||
when(dictionaryClient.getAllTypes(TEST_RULESET_ID)).thenReturn(TypeResponse.builder().types(getTypeResponse()).build());
|
||||
when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(VERTEBRATE));
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(ADDRESS));
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(AUTHOR));
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(SPONSOR));
|
||||
when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(NO_REDACTION_INDICATOR));
|
||||
when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(REDACTION_INDICATOR));
|
||||
when(dictionaryClient.getDictionaryForType(HINT_ONLY, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(HINT_ONLY));
|
||||
when(dictionaryClient.getDictionaryForType(MUST_REDACT, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(MUST_REDACT));
|
||||
when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(PUBLISHED_INFORMATION));
|
||||
when(dictionaryClient.getDictionaryForType(TEST_METHOD, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(TEST_METHOD));
|
||||
when(dictionaryClient.getDictionaryForType(PII, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(PII));
|
||||
when(dictionaryClient.getDictionaryForType(RECOMMENDATION_AUTHOR, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(RECOMMENDATION_AUTHOR));
|
||||
when(dictionaryClient.getDictionaryForType(RECOMMENDATION_ADDRESS, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(RECOMMENDATION_ADDRESS));
|
||||
when(dictionaryClient.getDictionaryForType(FALSE_POSITIVE, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(FALSE_POSITIVE));
|
||||
when(dictionaryClient.getColors(TEST_RULESET_ID)).thenReturn(colors);
|
||||
}
|
||||
|
||||
|
||||
@ -306,6 +307,7 @@ public class RedactionIntegrationTest {
|
||||
.stream()
|
||||
.map(typeColor -> TypeResult.builder()
|
||||
.type(typeColor.getKey())
|
||||
.ruleSetId(TEST_RULESET_ID)
|
||||
.hexColor(typeColor.getValue())
|
||||
.isHint(hintTypeMap.get(typeColor.getKey()))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(typeColor.getKey()))
|
||||
@ -342,6 +344,7 @@ public class RedactionIntegrationTest {
|
||||
}
|
||||
for (File path : input) {
|
||||
RedactionRequest request = RedactionRequest.builder()
|
||||
.ruleSetId(TEST_RULESET_ID)
|
||||
.document(IOUtils.toByteArray(new FileInputStream(path)))
|
||||
.build();
|
||||
System.out.println("Redacting file : " + path.getName());
|
||||
@ -390,6 +393,7 @@ public class RedactionIntegrationTest {
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf");
|
||||
|
||||
RedactionRequest request = RedactionRequest.builder()
|
||||
.ruleSetId(TEST_RULESET_ID)
|
||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||
.build();
|
||||
request.setFlatRedaction(false);
|
||||
@ -420,6 +424,7 @@ public class RedactionIntegrationTest {
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
|
||||
|
||||
RedactionRequest request = RedactionRequest.builder()
|
||||
.ruleSetId(TEST_RULESET_ID)
|
||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||
.build();
|
||||
|
||||
@ -471,6 +476,7 @@ public class RedactionIntegrationTest {
|
||||
manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
|
||||
|
||||
RedactionRequest request = RedactionRequest.builder()
|
||||
.ruleSetId(TEST_RULESET_ID)
|
||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||
.manualRedactions(manualRedactions)
|
||||
.build();
|
||||
@ -565,6 +571,7 @@ public class RedactionIntegrationTest {
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Phantom Cells.pdf");
|
||||
|
||||
RedactionRequest request = RedactionRequest.builder()
|
||||
.ruleSetId(TEST_RULESET_ID)
|
||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||
.build();
|
||||
request.setFlatRedaction(false);
|
||||
@ -586,9 +593,10 @@ public class RedactionIntegrationTest {
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/sponsor_companies.pdf");
|
||||
|
||||
RedactionRequest request = RedactionRequest.builder()
|
||||
.ruleSetId(TEST_RULESET_ID)
|
||||
.flatRedaction(false)
|
||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||
.build();
|
||||
request.setFlatRedaction(false);
|
||||
|
||||
RedactionResult result = redactionController.redact(request);
|
||||
|
||||
|
||||
@ -1,25 +1,18 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.URL;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.iqser.red.service.configuration.v1.api.model.Colors;
|
||||
import com.iqser.red.service.configuration.v1.api.model.DictionaryResponse;
|
||||
import com.iqser.red.service.configuration.v1.api.model.RulesResponse;
|
||||
import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
|
||||
import com.iqser.red.service.configuration.v1.api.model.TypeResult;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.PositionUtil;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
|
||||
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.junit.Before;
|
||||
@ -38,19 +31,25 @@ import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.test.context.junit4.SpringRunner;
|
||||
|
||||
import com.iqser.red.service.configuration.v1.api.model.Colors;
|
||||
import com.iqser.red.service.configuration.v1.api.model.DictionaryResponse;
|
||||
import com.iqser.red.service.configuration.v1.api.model.RulesResponse;
|
||||
import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
|
||||
import com.iqser.red.service.configuration.v1.api.model.TypeResult;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.PositionUtil;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
|
||||
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.URL;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
@SpringBootTest
|
||||
@RunWith(SpringRunner.class)
|
||||
@ -79,6 +78,8 @@ public class EntityRedactionServiceTest {
|
||||
@Autowired
|
||||
private DroolsExecutionService droolsExecutionService;
|
||||
|
||||
private final static String TEST_RULESET_ID = "123";
|
||||
|
||||
@TestConfiguration
|
||||
public static class RedactionIntegrationTestConfiguration {
|
||||
|
||||
@ -129,21 +130,21 @@ public class EntityRedactionServiceTest {
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(Arrays.asList("Casey, H.W.", "O’Loughlin, C.K.", "Salamon, C.M.", "Smith, S.H."))
|
||||
.build();
|
||||
when(dictionaryClient.getVersion()).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE)).thenReturn(dictionaryResponse);
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.singletonList("Toxigenics, Inc., Decatur, IL 62526, USA"))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(addressResponse);
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
|
||||
|
||||
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.emptyList())
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE)).thenReturn(sponsorResponse);
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
|
||||
|
||||
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(redactionRequest.getDocument()))) {
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||
entityRedactionService.processDocument(classifiedDoc, null);
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||
assertThat(classifiedDoc.getEntities().get(1)).hasSize(7);// 3 author cells, 1 address, 1 Y and 2 N entities
|
||||
}
|
||||
@ -162,20 +163,20 @@ public class EntityRedactionServiceTest {
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(Arrays.asList("Casey, H.W.", "O’Loughlin, C.K.", "Salamon, C.M.", "Smith, S.H."))
|
||||
.build();
|
||||
when(dictionaryClient.getVersion()).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE)).thenReturn(dictionaryResponse);
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.singletonList("Toxigenics, Inc., Decatur, IL 62526, USA"))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(addressResponse);
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
|
||||
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.emptyList())
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE)).thenReturn(sponsorResponse);
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
|
||||
|
||||
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(redactionRequest.getDocument()))) {
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||
entityRedactionService.processDocument(classifiedDoc, null);
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||
assertThat(classifiedDoc.getEntities().get(1)).hasSize(7);// 3 author cells, 1 address, 1 Y and 2 N entities
|
||||
}
|
||||
@ -187,22 +188,22 @@ public class EntityRedactionServiceTest {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Cyprodinil/40 Cyprodinil - EU AIR3 - LCA Section 1" +
|
||||
" Supplement - Identity of the active substance - Reference list.pdf");
|
||||
when(dictionaryClient.getVersion()).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt")))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE)).thenReturn(dictionaryResponse);
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt")))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(addressResponse);
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
|
||||
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.emptyList())
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE)).thenReturn(sponsorResponse);
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
|
||||
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||
entityRedactionService.processDocument(classifiedDoc, null);
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
|
||||
assertThat(classifiedDoc.getEntities()
|
||||
.entrySet()
|
||||
.stream()
|
||||
@ -212,7 +213,7 @@ public class EntityRedactionServiceTest {
|
||||
"the plant protection product.pdf");
|
||||
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||
entityRedactionService.processDocument(classifiedDoc, null);
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
|
||||
assertThat(classifiedDoc.getEntities()
|
||||
.entrySet()
|
||||
.stream()
|
||||
@ -224,22 +225,22 @@ public class EntityRedactionServiceTest {
|
||||
public void testFalsePositiveInWrongCell() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Row With Ambiguous Redaction.pdf");
|
||||
when(dictionaryClient.getVersion()).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt")))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE)).thenReturn(dictionaryResponse);
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt")))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(addressResponse);
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
|
||||
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
|
||||
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_sponsor.txt")))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE)).thenReturn(sponsorResponse);
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
|
||||
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||
entityRedactionService.processDocument(classifiedDoc, null);
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||
assertThat(classifiedDoc.getEntities().get(1).stream()
|
||||
.filter(entity -> entity.getMatchedRule() == 9)
|
||||
@ -288,27 +289,27 @@ public class EntityRedactionServiceTest {
|
||||
"\"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
|
||||
" section.redactBetween(\"Contact:\", \"Tel.:\", \"address\", 6,true, \"Applicant information was found\", \"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
|
||||
" end";
|
||||
when(rulesClient.getVersion()).thenReturn(RULES_VERSION.incrementAndGet());
|
||||
when(rulesClient.getRules()).thenReturn(new RulesResponse(tableRules));
|
||||
droolsExecutionService.updateRules();
|
||||
when(rulesClient.getVersion(TEST_RULESET_ID)).thenReturn(RULES_VERSION.incrementAndGet());
|
||||
when(rulesClient.getRules(TEST_RULESET_ID)).thenReturn(new RulesResponse(tableRules));
|
||||
droolsExecutionService.updateRules(TEST_RULESET_ID);
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Applicant Producer Table.pdf");
|
||||
when(dictionaryClient.getVersion()).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt")))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE)).thenReturn(dictionaryResponse);
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt")))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(addressResponse);
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
|
||||
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.emptyList())
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE)).thenReturn(sponsorResponse);
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
|
||||
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||
entityRedactionService.processDocument(classifiedDoc, null);
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||
assertThat(classifiedDoc.getEntities().get(1).stream()
|
||||
.filter(entity -> entity.getMatchedRule() == 6)
|
||||
@ -329,27 +330,27 @@ public class EntityRedactionServiceTest {
|
||||
"Section(searchText.toLowerCase().contains(\"batches produced at\"))\n" + " then\n" + " section" +
|
||||
".redactIfPrecededBy(\"batches produced at\", \"sponsor\", 11, \"Redacted because it represents a " +
|
||||
"sponsor company\", \"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" + " end";
|
||||
when(rulesClient.getVersion()).thenReturn(RULES_VERSION.incrementAndGet());
|
||||
when(rulesClient.getRules()).thenReturn(new RulesResponse(tableRules));
|
||||
droolsExecutionService.updateRules();
|
||||
when(rulesClient.getVersion(TEST_RULESET_ID)).thenReturn(RULES_VERSION.incrementAndGet());
|
||||
when(rulesClient.getRules(TEST_RULESET_ID)).thenReturn(new RulesResponse(tableRules));
|
||||
droolsExecutionService.updateRules(TEST_RULESET_ID);
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/batches_new_line.pdf");
|
||||
when(dictionaryClient.getVersion()).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.emptyList())
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(addressResponse);
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
|
||||
DictionaryResponse authorResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.emptyList())
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE)).thenReturn(authorResponse);
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(authorResponse);
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_sponsor.txt")))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE)).thenReturn(dictionaryResponse);
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
|
||||
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||
entityRedactionService.processDocument(classifiedDoc, null);
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||
assertThat(classifiedDoc.getEntities().get(1).stream()
|
||||
.filter(entity -> entity.getMatchedRule() == 11)
|
||||
@ -368,20 +369,20 @@ public class EntityRedactionServiceTest {
|
||||
.entries(Arrays.asList("Bissig R.", "Thanei P."))
|
||||
.build();
|
||||
|
||||
when(dictionaryClient.getVersion()).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE)).thenReturn(dictionaryResponse);
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland"))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(addressResponse);
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
|
||||
|
||||
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.emptyList())
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE)).thenReturn(sponsorResponse);
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
|
||||
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||
entityRedactionService.processDocument(classifiedDoc, null);
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(2); // two pages
|
||||
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(8);
|
||||
assertThat(classifiedDoc.getEntities().get(2).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(5); // 2 names, 1 address, 2 Y
|
||||
@ -393,16 +394,16 @@ public class EntityRedactionServiceTest {
|
||||
.entries(Arrays.asList("Tribolet, R.", "Muir, G.", "Kühne-Thu, H.", "Close, C."))
|
||||
.build();
|
||||
|
||||
when(dictionaryClient.getVersion()).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE)).thenReturn(dictionaryResponse);
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
|
||||
addressResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland"))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(addressResponse);
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
|
||||
|
||||
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||
entityRedactionService.processDocument(classifiedDoc, null);
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(3);
|
||||
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 8).count()).isEqualTo(9);
|
||||
@ -419,20 +420,20 @@ public class EntityRedactionServiceTest {
|
||||
.entries(Collections.singletonList("Aldershof S."))
|
||||
.build();
|
||||
|
||||
when(dictionaryClient.getVersion()).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE)).thenReturn(dictionaryResponse);
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland"))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(addressResponse);
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
|
||||
|
||||
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.emptyList())
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE)).thenReturn(sponsorResponse);
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
|
||||
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||
entityRedactionService.processDocument(classifiedDoc, null);
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 8).count()).isEqualTo(6);
|
||||
}
|
||||
@ -464,27 +465,27 @@ public class EntityRedactionServiceTest {
|
||||
" 1107/2009 Art. 63 (2g)\");\n" +
|
||||
" section.highlightCell(\"Vertebrate study Y/N\", 9, \"must_redact\");\n" +
|
||||
" end";
|
||||
when(rulesClient.getVersion()).thenReturn(RULES_VERSION.incrementAndGet());
|
||||
when(rulesClient.getRules()).thenReturn(new RulesResponse(tableRules));
|
||||
when(rulesClient.getVersion(TEST_RULESET_ID)).thenReturn(RULES_VERSION.incrementAndGet());
|
||||
when(rulesClient.getRules(TEST_RULESET_ID)).thenReturn(new RulesResponse(tableRules));
|
||||
TypeResponse typeResponse = TypeResponse.builder()
|
||||
.types(Arrays.asList(
|
||||
TypeResult.builder().type(AUTHOR_CODE).hexColor("#ffff00").build(),
|
||||
TypeResult.builder().type(ADDRESS_CODE).hexColor("#ff00ff").build(),
|
||||
TypeResult.builder().type(SPONSOR_CODE).hexColor("#00ffff").build()))
|
||||
TypeResult.builder().ruleSetId(TEST_RULESET_ID).type(AUTHOR_CODE).hexColor("#ffff00").build(),
|
||||
TypeResult.builder().ruleSetId(TEST_RULESET_ID).type(ADDRESS_CODE).hexColor("#ff00ff").build(),
|
||||
TypeResult.builder().ruleSetId(TEST_RULESET_ID).type(SPONSOR_CODE).hexColor("#00ffff").build()))
|
||||
.build();
|
||||
when(dictionaryClient.getVersion()).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getAllTypes()).thenReturn(typeResponse);
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getAllTypes(TEST_RULESET_ID)).thenReturn(typeResponse);
|
||||
|
||||
// Default empty return to prevent NPEs
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE)).thenReturn(dictionaryResponse);
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(addressResponse);
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
|
||||
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE)).thenReturn(sponsorResponse);
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
|
||||
|
||||
Colors colors = new Colors();
|
||||
colors.setDefaultColor("#acfc00");
|
||||
@ -492,7 +493,7 @@ public class EntityRedactionServiceTest {
|
||||
colors.setRequestAdd("#04b093");
|
||||
colors.setRequestRemove("#04b093");
|
||||
|
||||
when(dictionaryClient.getColors()).thenReturn(colors);
|
||||
when(dictionaryClient.getColors(TEST_RULESET_ID)).thenReturn(colors);
|
||||
}
|
||||
|
||||
|
||||
@ -516,7 +517,7 @@ public class EntityRedactionServiceTest {
|
||||
|
||||
|
||||
@Test
|
||||
public void testAuthorSplitting(){
|
||||
public void testAuthorSplitting() {
|
||||
|
||||
String word = "Porch JR, " + "Kendall TZ, " + "Krueger HO";
|
||||
|
||||
@ -530,8 +531,8 @@ public class EntityRedactionServiceTest {
|
||||
allMatches.add(matcher.group());
|
||||
}
|
||||
|
||||
for(String name: allMatches) {
|
||||
if(name.length() >= 3) {
|
||||
for (String name : allMatches) {
|
||||
if (name.length() >= 3) {
|
||||
System.out.println(name);
|
||||
// dictionaryService.addToLocalDictionary(type, name);
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user