Pull request #12: Bugfix/RED-183
Merge in RED/redaction-service from bugfix/RED-183 to master * commit '7dbe03483b7a8a3c391e9d0791717b9fab5fc5db': RED-183: Fix catching validation errors Fix style.
This commit is contained in:
commit
ca439d821d
@ -4,6 +4,7 @@ import java.io.ByteArrayInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.kie.api.KieServices;
|
||||
import org.kie.api.builder.KieBuilder;
|
||||
import org.kie.api.builder.KieFileSystem;
|
||||
@ -22,6 +23,7 @@ import org.springframework.context.annotation.Import;
|
||||
import com.iqser.gin4.commons.spring.DefaultWebMvcConfiguration;
|
||||
import com.iqser.red.service.configuration.v1.api.model.RulesResponse;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.exception.RulesValidationException;
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
|
||||
@Import({DefaultWebMvcConfiguration.class})
|
||||
@ -33,24 +35,35 @@ public class Application {
|
||||
@Autowired
|
||||
private RulesClient rulesClient;
|
||||
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
SpringApplication.run(Application.class, args);
|
||||
}
|
||||
|
||||
|
||||
@Bean
|
||||
public KieContainer kieContainer() {
|
||||
|
||||
KieServices kieServices = KieServices.Factory.get();
|
||||
try {
|
||||
KieServices kieServices = KieServices.Factory.get();
|
||||
|
||||
KieFileSystem kieFileSystem = kieServices.newKieFileSystem();
|
||||
RulesResponse rules = rulesClient.getRules();
|
||||
InputStream input = new ByteArrayInputStream(rules.getRules().getBytes(StandardCharsets.UTF_8));
|
||||
kieFileSystem.write("src/main/resources/drools/rules.drl", kieServices.getResources().newInputStreamResource(input));
|
||||
KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem);
|
||||
kieBuilder.buildAll();
|
||||
KieModule kieModule = kieBuilder.getKieModule();
|
||||
KieFileSystem kieFileSystem = kieServices.newKieFileSystem();
|
||||
RulesResponse rules = rulesClient.getRules();
|
||||
if (StringUtils.isEmpty(rules.getRules())) {
|
||||
throw new RuntimeException("Rules cannot be empty.");
|
||||
}
|
||||
InputStream input = new ByteArrayInputStream(rules.getRules().getBytes(StandardCharsets.UTF_8));
|
||||
kieFileSystem.write("src/main/resources/drools/rules.drl", kieServices.getResources()
|
||||
.newInputStreamResource(input));
|
||||
KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem);
|
||||
kieBuilder.buildAll();
|
||||
KieModule kieModule = kieBuilder.getKieModule();
|
||||
|
||||
return kieServices.newKieContainer(kieModule.getReleaseId());
|
||||
return kieServices.newKieContainer(kieModule.getReleaseId());
|
||||
} catch (Exception e) {
|
||||
throw new RulesValidationException("Could not update rules: " + e.getMessage(), e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -25,13 +25,17 @@ public class Section {
|
||||
|
||||
private String headline;
|
||||
|
||||
|
||||
public boolean contains(String type) {
|
||||
|
||||
return entities.stream().anyMatch(entity -> entity.getType().equals(type));
|
||||
}
|
||||
|
||||
public void redact(String type, int ruleNumber, String reason){
|
||||
|
||||
public void redact(String type, int ruleNumber, String reason) {
|
||||
|
||||
entities.forEach(entity -> {
|
||||
if(entity.getType().equals(type)){
|
||||
if (entity.getType().equals(type)) {
|
||||
entity.setRedaction(true);
|
||||
entity.setMatchedRule(ruleNumber);
|
||||
entity.setRedactionReason(reason);
|
||||
@ -39,9 +43,11 @@ public class Section {
|
||||
});
|
||||
}
|
||||
|
||||
public void redactNot(String type, int ruleNumber, String reason){
|
||||
|
||||
public void redactNot(String type, int ruleNumber, String reason) {
|
||||
|
||||
entities.forEach(entity -> {
|
||||
if(entity.getType().equals(type)){
|
||||
if (entity.getType().equals(type)) {
|
||||
entity.setRedaction(false);
|
||||
entity.setMatchedRule(ruleNumber);
|
||||
entity.setRedactionReason(reason);
|
||||
@ -49,18 +55,19 @@ public class Section {
|
||||
});
|
||||
}
|
||||
|
||||
public void redactLineAfter(String start, String asType, int ruleNumber, String reason){
|
||||
|
||||
public void redactLineAfter(String start, String asType, int ruleNumber, String reason) {
|
||||
|
||||
String value = StringUtils.substringBetween(text, start, "\n");
|
||||
|
||||
if(value != null){
|
||||
Set<Entity> found = findEntity(value.trim(), asType);
|
||||
if (value != null) {
|
||||
Set<Entity> found = findEntity(value.trim(), asType);
|
||||
entities.addAll(found);
|
||||
}
|
||||
|
||||
// TODO No need to iterate
|
||||
entities.forEach(entity -> {
|
||||
if(entity.getType().equals(asType)){
|
||||
if (entity.getType().equals(asType)) {
|
||||
entity.setRedaction(true);
|
||||
entity.setMatchedRule(ruleNumber);
|
||||
entity.setRedactionReason(reason);
|
||||
@ -70,19 +77,18 @@ public class Section {
|
||||
}
|
||||
|
||||
|
||||
|
||||
public void redactBetween(String start, String stop, String asType, int ruleNumber, String reason){
|
||||
public void redactBetween(String start, String stop, String asType, int ruleNumber, String reason) {
|
||||
|
||||
String value = StringUtils.substringBetween(searchText, start, stop);
|
||||
|
||||
if(value != null){
|
||||
Set<Entity> found = findEntity(value.trim(), asType);
|
||||
if (value != null) {
|
||||
Set<Entity> found = findEntity(value.trim(), asType);
|
||||
entities.addAll(found);
|
||||
}
|
||||
|
||||
// TODO No need to iterate
|
||||
entities.forEach(entity -> {
|
||||
if(entity.getType().equals(asType)){
|
||||
if (entity.getType().equals(asType)) {
|
||||
entity.setRedaction(true);
|
||||
entity.setMatchedRule(ruleNumber);
|
||||
entity.setRedactionReason(reason);
|
||||
@ -91,25 +97,21 @@ public class Section {
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
private Set<Entity> findEntity(String value, String asType) {
|
||||
|
||||
Set<Entity> found = new HashSet<>();
|
||||
|
||||
int startIndex;
|
||||
int stopIndex = 0;
|
||||
do {
|
||||
startIndex = searchText.indexOf(value, stopIndex);
|
||||
stopIndex = startIndex + value.length();
|
||||
|
||||
if (startIndex > -1 &&
|
||||
(startIndex == 0 || Character.isWhitespace(searchText.charAt(startIndex - 1)) || isSeparator(searchText.charAt(startIndex - 1))) &&
|
||||
(stopIndex == searchText.length() || isSeparator(searchText.charAt(stopIndex)))) {
|
||||
found.add(new Entity(searchText.substring(startIndex, stopIndex), asType, startIndex, stopIndex, headline));
|
||||
}
|
||||
} while (startIndex > -1);
|
||||
int startIndex;
|
||||
int stopIndex = 0;
|
||||
do {
|
||||
startIndex = searchText.indexOf(value, stopIndex);
|
||||
stopIndex = startIndex + value.length();
|
||||
|
||||
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(searchText.charAt(startIndex - 1)) || isSeparator(searchText
|
||||
.charAt(startIndex - 1))) && (stopIndex == searchText.length() || isSeparator(searchText.charAt(stopIndex)))) {
|
||||
found.add(new Entity(searchText.substring(startIndex, stopIndex), asType, startIndex, stopIndex, headline));
|
||||
}
|
||||
} while (startIndex > -1);
|
||||
|
||||
removeEntitiesContainedInLarger(found);
|
||||
|
||||
@ -118,14 +120,18 @@ public class Section {
|
||||
|
||||
|
||||
private boolean isSeparator(char c) {
|
||||
|
||||
return Character.isWhitespace(c) || Pattern.matches("\\p{Punct}", String.valueOf(c)) || c == '\"' || c == '‘' || c == '’';
|
||||
}
|
||||
|
||||
|
||||
public void removeEntitiesContainedInLarger(Set<Entity> entities) {
|
||||
|
||||
List<Entity> wordsToRemove = new ArrayList<>();
|
||||
for (Entity word : entities) {
|
||||
for (Entity inner : entities) {
|
||||
if (inner.getWord().length() < word.getWord().length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner) {
|
||||
if (inner.getWord().length() < word.getWord()
|
||||
.length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner) {
|
||||
wordsToRemove.add(inner);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2,6 +2,7 @@ package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
@ -55,16 +56,14 @@ public class DictionaryService {
|
||||
|
||||
try {
|
||||
TypeResponse typeResponse = dictionaryClient.getAllTypes();
|
||||
if (typeResponse != null && !CollectionUtils.isEmpty(typeResponse.getTypes())) {
|
||||
if (typeResponse != null && CollectionUtils.isNotEmpty(typeResponse.getTypes())) {
|
||||
entryColors = typeResponse.getTypes()
|
||||
.stream()
|
||||
.collect(Collectors.toMap(TypeResult::getType, TypeResult::getColor));
|
||||
dictionary = entryColors.keySet()
|
||||
.stream()
|
||||
.collect(Collectors.toMap(type -> type, s -> dictionaryClient.getDictionaryForType(s)
|
||||
.getEntries()
|
||||
.stream()
|
||||
.collect(Collectors.toSet())));
|
||||
.collect(Collectors.toMap(type -> type, s -> new HashSet<>(dictionaryClient.getDictionaryForType(s)
|
||||
.getEntries())));
|
||||
hintTypes = typeResponse.getTypes()
|
||||
.stream()
|
||||
.filter(TypeResult::isHint)
|
||||
|
||||
@ -68,7 +68,7 @@ public class DroolsExecutionService {
|
||||
KieModule kieModule = kieBuilder.getKieModule();
|
||||
kieContainer.updateToVersion(kieModule.getReleaseId());
|
||||
} catch (Exception e) {
|
||||
throw new RulesValidationException("Could not update rules", e);
|
||||
throw new RulesValidationException("Could not update rules: " + e.getMessage(), e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -52,11 +52,12 @@ import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizati
|
||||
@SpringBootTest(webEnvironment = DEFINED_PORT)
|
||||
public class RedactionIntegrationTest {
|
||||
|
||||
public static final String VERTEBRATES_CODE = "vertebrate";
|
||||
public static final String ADDRESS_CODE = "address";
|
||||
public static final String NAME_CODE = "name";
|
||||
public static final String NO_REDACTION_INDICATOR = "no_redaction_indicator";
|
||||
public static final String DEFAULT = "default";
|
||||
private static final String RULES = loadFromClassPath("drools/rules.drl");
|
||||
private static final String VERTEBRATES_CODE = "vertebrate";
|
||||
private static final String ADDRESS_CODE = "address";
|
||||
private static final String NAME_CODE = "name";
|
||||
private static final String NO_REDACTION_INDICATOR = "no_redaction_indicator";
|
||||
private static final String DEFAULT = "default";
|
||||
|
||||
@Autowired
|
||||
private RedactionController redactionController;
|
||||
@ -67,8 +68,8 @@ public class RedactionIntegrationTest {
|
||||
@MockBean
|
||||
private DictionaryClient dictionaryClient;
|
||||
|
||||
private Map<String, List<String>> dictionary = new HashMap<>();
|
||||
private Map<String, float[]> typeColorMap = new HashMap<>();
|
||||
private final Map<String, List<String>> dictionary = new HashMap<>();
|
||||
private final Map<String, float[]> typeColorMap = new HashMap<>();
|
||||
|
||||
@TestConfiguration
|
||||
public static class RedactionIntegrationTestConfiguration {
|
||||
@ -79,21 +80,23 @@ public class RedactionIntegrationTest {
|
||||
KieServices kieServices = KieServices.Factory.get();
|
||||
|
||||
KieFileSystem kieFileSystem = kieServices.newKieFileSystem();
|
||||
InputStream input = new ByteArrayInputStream("".getBytes(StandardCharsets.UTF_8));
|
||||
kieFileSystem.write("src/main/resources/drools/rules.drl", kieServices.getResources().newInputStreamResource(input));
|
||||
InputStream input = new ByteArrayInputStream(RULES.getBytes(StandardCharsets.UTF_8));
|
||||
kieFileSystem.write("src/test/resources/drools/rules.drl", kieServices.getResources().newInputStreamResource(input));
|
||||
KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem);
|
||||
kieBuilder.buildAll();
|
||||
KieModule kieModule = kieBuilder.getKieModule();
|
||||
|
||||
return kieServices.newKieContainer(kieModule.getReleaseId());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Before
|
||||
public void stubRulesClient() {
|
||||
|
||||
when(rulesClient.getVersion()).thenReturn(0L);
|
||||
when(rulesClient.getRules()).thenReturn(new RulesResponse(loadFromClassPath("drools/rules.drl")));
|
||||
when(rulesClient.getRules()).thenReturn(new RulesResponse(RULES));
|
||||
|
||||
loadDictionaryForTest();
|
||||
loadTypeForTest();
|
||||
@ -106,19 +109,41 @@ public class RedactionIntegrationTest {
|
||||
when(dictionaryClient.getDictionaryForType(DEFAULT)).thenReturn(getDictionaryResponse(DEFAULT));
|
||||
}
|
||||
|
||||
|
||||
private void loadDictionaryForTest() {
|
||||
dictionary.computeIfAbsent(NAME_CODE, v -> new ArrayList<>()).addAll(ResourceLoader.load("dictionaries/names.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(VERTEBRATES_CODE, v -> new ArrayList<>()).addAll(ResourceLoader.load("dictionaries/vertebrates.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(ADDRESS_CODE, v -> new ArrayList<>()).addAll(ResourceLoader.load("dictionaries/addresses.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>()).addAll(ResourceLoader.load("dictionaries/NoRedactionIndicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
|
||||
|
||||
dictionary.computeIfAbsent(NAME_CODE, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/names.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(VERTEBRATES_CODE, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/vertebrates.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(ADDRESS_CODE, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/addresses.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/NoRedactionIndicator.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.put(DEFAULT, new ArrayList<>());
|
||||
}
|
||||
|
||||
|
||||
private String cleanDictionaryEntry(String entry) {
|
||||
|
||||
return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " ");
|
||||
}
|
||||
|
||||
|
||||
private void loadTypeForTest() {
|
||||
|
||||
typeColorMap.put(VERTEBRATES_CODE, new float[]{0, 1, 0});
|
||||
typeColorMap.put(ADDRESS_CODE, new float[]{0, 1, 1});
|
||||
typeColorMap.put(NAME_CODE, new float[]{1, 1, 0});
|
||||
@ -126,21 +151,31 @@ public class RedactionIntegrationTest {
|
||||
typeColorMap.put(DEFAULT, new float[]{1, 0.502f, 0});
|
||||
}
|
||||
|
||||
|
||||
private List<TypeResult> getTypeResponse() {
|
||||
return typeColorMap.entrySet().stream().map(typeColor -> TypeResult.builder().type(typeColor.getKey()).color(typeColor.getValue()).build()).collect(Collectors.toList());
|
||||
|
||||
return typeColorMap.entrySet()
|
||||
.stream()
|
||||
.map(typeColor -> TypeResult.builder().type(typeColor.getKey()).color(typeColor.getValue()).build())
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
private DictionaryResponse getDictionaryResponse(String type) {
|
||||
|
||||
return DictionaryResponse.builder().color(typeColorMap.get(type)).entries(dictionary.get(type)).build();
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void redactionTest() throws IOException {
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf");
|
||||
|
||||
RedactionRequest request = RedactionRequest.builder().document(IOUtils.toByteArray(pdfFileResource.getInputStream())).build();
|
||||
RedactionRequest request = RedactionRequest.builder()
|
||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||
.build();
|
||||
request.setFlatRedaction(false);
|
||||
|
||||
RedactionResult result = redactionController.redact(request);
|
||||
@ -154,12 +189,15 @@ public class RedactionIntegrationTest {
|
||||
System.out.println("numberOfPages: " + result.getNumberOfPages());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void classificationTest() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
|
||||
|
||||
RedactionRequest request = RedactionRequest.builder().document(IOUtils.toByteArray(pdfFileResource.getInputStream())).build();
|
||||
RedactionRequest request = RedactionRequest.builder()
|
||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||
.build();
|
||||
|
||||
RedactionResult result = redactionController.classify(request);
|
||||
|
||||
@ -168,12 +206,15 @@ public class RedactionIntegrationTest {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void sectionsTest() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
|
||||
|
||||
RedactionRequest request = RedactionRequest.builder().document(IOUtils.toByteArray(pdfFileResource.getInputStream())).build();
|
||||
RedactionRequest request = RedactionRequest.builder()
|
||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||
.build();
|
||||
|
||||
RedactionResult result = redactionController.sections(request);
|
||||
|
||||
@ -182,12 +223,15 @@ public class RedactionIntegrationTest {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void htmlTablesTest() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
|
||||
|
||||
RedactionRequest request = RedactionRequest.builder().document(IOUtils.toByteArray(pdfFileResource.getInputStream())).build();
|
||||
RedactionRequest request = RedactionRequest.builder()
|
||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||
.build();
|
||||
|
||||
RedactionResult result = redactionController.htmlTables(request);
|
||||
|
||||
@ -196,12 +240,15 @@ public class RedactionIntegrationTest {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void htmlTableRotationTest() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
|
||||
|
||||
RedactionRequest request = RedactionRequest.builder().document(IOUtils.toByteArray(pdfFileResource.getInputStream())).build();
|
||||
RedactionRequest request = RedactionRequest.builder()
|
||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||
.build();
|
||||
|
||||
RedactionResult result = redactionController.htmlTables(request);
|
||||
|
||||
@ -210,10 +257,10 @@ public class RedactionIntegrationTest {
|
||||
}
|
||||
}
|
||||
|
||||
private String loadFromClassPath(String path) {
|
||||
|
||||
URL resource = ResourceLoader.class.getClassLoader()
|
||||
.getResource(path);
|
||||
private static String loadFromClassPath(String path) {
|
||||
|
||||
URL resource = ResourceLoader.class.getClassLoader().getResource(path);
|
||||
if (resource == null) {
|
||||
throw new IllegalArgumentException("could not load classpath resource: drools/rules.drl");
|
||||
}
|
||||
@ -221,12 +268,12 @@ public class RedactionIntegrationTest {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
String str;
|
||||
while ((str = br.readLine()) != null) {
|
||||
sb.append(str)
|
||||
.append("\n");
|
||||
sb.append(str).append("\n");
|
||||
}
|
||||
return sb.toString();
|
||||
} catch (IOException e) {
|
||||
throw new IllegalArgumentException("could not load classpath resource: " + path, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user