Pull request #12: Bugfix/RED-183

Merge in RED/redaction-service from bugfix/RED-183 to master

* commit '7dbe03483b7a8a3c391e9d0791717b9fab5fc5db':
  RED-183: Fix catching validation errors
  Fix style.
This commit is contained in:
Thierry Goeckel 2020-07-23 14:48:11 +02:00
commit ca439d821d
5 changed files with 134 additions and 69 deletions

View File

@ -4,6 +4,7 @@ import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import org.apache.commons.lang3.StringUtils;
import org.kie.api.KieServices;
import org.kie.api.builder.KieBuilder;
import org.kie.api.builder.KieFileSystem;
@ -22,6 +23,7 @@ import org.springframework.context.annotation.Import;
import com.iqser.gin4.commons.spring.DefaultWebMvcConfiguration;
import com.iqser.red.service.configuration.v1.api.model.RulesResponse;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.exception.RulesValidationException;
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
@Import({DefaultWebMvcConfiguration.class})
@ -33,24 +35,35 @@ public class Application {
@Autowired
private RulesClient rulesClient;
public static void main(String[] args) {
SpringApplication.run(Application.class, args);
}
@Bean
public KieContainer kieContainer() {
KieServices kieServices = KieServices.Factory.get();
try {
KieServices kieServices = KieServices.Factory.get();
KieFileSystem kieFileSystem = kieServices.newKieFileSystem();
RulesResponse rules = rulesClient.getRules();
InputStream input = new ByteArrayInputStream(rules.getRules().getBytes(StandardCharsets.UTF_8));
kieFileSystem.write("src/main/resources/drools/rules.drl", kieServices.getResources().newInputStreamResource(input));
KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem);
kieBuilder.buildAll();
KieModule kieModule = kieBuilder.getKieModule();
KieFileSystem kieFileSystem = kieServices.newKieFileSystem();
RulesResponse rules = rulesClient.getRules();
if (StringUtils.isEmpty(rules.getRules())) {
throw new RuntimeException("Rules cannot be empty.");
}
InputStream input = new ByteArrayInputStream(rules.getRules().getBytes(StandardCharsets.UTF_8));
kieFileSystem.write("src/main/resources/drools/rules.drl", kieServices.getResources()
.newInputStreamResource(input));
KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem);
kieBuilder.buildAll();
KieModule kieModule = kieBuilder.getKieModule();
return kieServices.newKieContainer(kieModule.getReleaseId());
return kieServices.newKieContainer(kieModule.getReleaseId());
} catch (Exception e) {
throw new RulesValidationException("Could not update rules: " + e.getMessage(), e);
}
}

View File

@ -25,13 +25,17 @@ public class Section {
private String headline;
public boolean contains(String type) {
return entities.stream().anyMatch(entity -> entity.getType().equals(type));
}
public void redact(String type, int ruleNumber, String reason){
public void redact(String type, int ruleNumber, String reason) {
entities.forEach(entity -> {
if(entity.getType().equals(type)){
if (entity.getType().equals(type)) {
entity.setRedaction(true);
entity.setMatchedRule(ruleNumber);
entity.setRedactionReason(reason);
@ -39,9 +43,11 @@ public class Section {
});
}
public void redactNot(String type, int ruleNumber, String reason){
public void redactNot(String type, int ruleNumber, String reason) {
entities.forEach(entity -> {
if(entity.getType().equals(type)){
if (entity.getType().equals(type)) {
entity.setRedaction(false);
entity.setMatchedRule(ruleNumber);
entity.setRedactionReason(reason);
@ -49,18 +55,19 @@ public class Section {
});
}
public void redactLineAfter(String start, String asType, int ruleNumber, String reason){
public void redactLineAfter(String start, String asType, int ruleNumber, String reason) {
String value = StringUtils.substringBetween(text, start, "\n");
if(value != null){
Set<Entity> found = findEntity(value.trim(), asType);
if (value != null) {
Set<Entity> found = findEntity(value.trim(), asType);
entities.addAll(found);
}
// TODO No need to iterate
entities.forEach(entity -> {
if(entity.getType().equals(asType)){
if (entity.getType().equals(asType)) {
entity.setRedaction(true);
entity.setMatchedRule(ruleNumber);
entity.setRedactionReason(reason);
@ -70,19 +77,18 @@ public class Section {
}
public void redactBetween(String start, String stop, String asType, int ruleNumber, String reason){
public void redactBetween(String start, String stop, String asType, int ruleNumber, String reason) {
String value = StringUtils.substringBetween(searchText, start, stop);
if(value != null){
Set<Entity> found = findEntity(value.trim(), asType);
if (value != null) {
Set<Entity> found = findEntity(value.trim(), asType);
entities.addAll(found);
}
// TODO No need to iterate
entities.forEach(entity -> {
if(entity.getType().equals(asType)){
if (entity.getType().equals(asType)) {
entity.setRedaction(true);
entity.setMatchedRule(ruleNumber);
entity.setRedactionReason(reason);
@ -91,25 +97,21 @@ public class Section {
}
private Set<Entity> findEntity(String value, String asType) {
Set<Entity> found = new HashSet<>();
int startIndex;
int stopIndex = 0;
do {
startIndex = searchText.indexOf(value, stopIndex);
stopIndex = startIndex + value.length();
if (startIndex > -1 &&
(startIndex == 0 || Character.isWhitespace(searchText.charAt(startIndex - 1)) || isSeparator(searchText.charAt(startIndex - 1))) &&
(stopIndex == searchText.length() || isSeparator(searchText.charAt(stopIndex)))) {
found.add(new Entity(searchText.substring(startIndex, stopIndex), asType, startIndex, stopIndex, headline));
}
} while (startIndex > -1);
int startIndex;
int stopIndex = 0;
do {
startIndex = searchText.indexOf(value, stopIndex);
stopIndex = startIndex + value.length();
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(searchText.charAt(startIndex - 1)) || isSeparator(searchText
.charAt(startIndex - 1))) && (stopIndex == searchText.length() || isSeparator(searchText.charAt(stopIndex)))) {
found.add(new Entity(searchText.substring(startIndex, stopIndex), asType, startIndex, stopIndex, headline));
}
} while (startIndex > -1);
removeEntitiesContainedInLarger(found);
@ -118,14 +120,18 @@ public class Section {
private boolean isSeparator(char c) {
return Character.isWhitespace(c) || Pattern.matches("\\p{Punct}", String.valueOf(c)) || c == '\"' || c == '' || c == '';
}
public void removeEntitiesContainedInLarger(Set<Entity> entities) {
List<Entity> wordsToRemove = new ArrayList<>();
for (Entity word : entities) {
for (Entity inner : entities) {
if (inner.getWord().length() < word.getWord().length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner) {
if (inner.getWord().length() < word.getWord()
.length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner) {
wordsToRemove.add(inner);
}
}

View File

@ -2,6 +2,7 @@ package com.iqser.red.service.redaction.v1.server.redaction.service;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
@ -55,16 +56,14 @@ public class DictionaryService {
try {
TypeResponse typeResponse = dictionaryClient.getAllTypes();
if (typeResponse != null && !CollectionUtils.isEmpty(typeResponse.getTypes())) {
if (typeResponse != null && CollectionUtils.isNotEmpty(typeResponse.getTypes())) {
entryColors = typeResponse.getTypes()
.stream()
.collect(Collectors.toMap(TypeResult::getType, TypeResult::getColor));
dictionary = entryColors.keySet()
.stream()
.collect(Collectors.toMap(type -> type, s -> dictionaryClient.getDictionaryForType(s)
.getEntries()
.stream()
.collect(Collectors.toSet())));
.collect(Collectors.toMap(type -> type, s -> new HashSet<>(dictionaryClient.getDictionaryForType(s)
.getEntries())));
hintTypes = typeResponse.getTypes()
.stream()
.filter(TypeResult::isHint)

View File

@ -68,7 +68,7 @@ public class DroolsExecutionService {
KieModule kieModule = kieBuilder.getKieModule();
kieContainer.updateToVersion(kieModule.getReleaseId());
} catch (Exception e) {
throw new RulesValidationException("Could not update rules", e);
throw new RulesValidationException("Could not update rules: " + e.getMessage(), e);
}
}

View File

@ -52,11 +52,12 @@ import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizati
@SpringBootTest(webEnvironment = DEFINED_PORT)
public class RedactionIntegrationTest {
public static final String VERTEBRATES_CODE = "vertebrate";
public static final String ADDRESS_CODE = "address";
public static final String NAME_CODE = "name";
public static final String NO_REDACTION_INDICATOR = "no_redaction_indicator";
public static final String DEFAULT = "default";
private static final String RULES = loadFromClassPath("drools/rules.drl");
private static final String VERTEBRATES_CODE = "vertebrate";
private static final String ADDRESS_CODE = "address";
private static final String NAME_CODE = "name";
private static final String NO_REDACTION_INDICATOR = "no_redaction_indicator";
private static final String DEFAULT = "default";
@Autowired
private RedactionController redactionController;
@ -67,8 +68,8 @@ public class RedactionIntegrationTest {
@MockBean
private DictionaryClient dictionaryClient;
private Map<String, List<String>> dictionary = new HashMap<>();
private Map<String, float[]> typeColorMap = new HashMap<>();
private final Map<String, List<String>> dictionary = new HashMap<>();
private final Map<String, float[]> typeColorMap = new HashMap<>();
@TestConfiguration
public static class RedactionIntegrationTestConfiguration {
@ -79,21 +80,23 @@ public class RedactionIntegrationTest {
KieServices kieServices = KieServices.Factory.get();
KieFileSystem kieFileSystem = kieServices.newKieFileSystem();
InputStream input = new ByteArrayInputStream("".getBytes(StandardCharsets.UTF_8));
kieFileSystem.write("src/main/resources/drools/rules.drl", kieServices.getResources().newInputStreamResource(input));
InputStream input = new ByteArrayInputStream(RULES.getBytes(StandardCharsets.UTF_8));
kieFileSystem.write("src/test/resources/drools/rules.drl", kieServices.getResources().newInputStreamResource(input));
KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem);
kieBuilder.buildAll();
KieModule kieModule = kieBuilder.getKieModule();
return kieServices.newKieContainer(kieModule.getReleaseId());
}
}
@Before
public void stubRulesClient() {
when(rulesClient.getVersion()).thenReturn(0L);
when(rulesClient.getRules()).thenReturn(new RulesResponse(loadFromClassPath("drools/rules.drl")));
when(rulesClient.getRules()).thenReturn(new RulesResponse(RULES));
loadDictionaryForTest();
loadTypeForTest();
@ -106,19 +109,41 @@ public class RedactionIntegrationTest {
when(dictionaryClient.getDictionaryForType(DEFAULT)).thenReturn(getDictionaryResponse(DEFAULT));
}
private void loadDictionaryForTest() {
dictionary.computeIfAbsent(NAME_CODE, v -> new ArrayList<>()).addAll(ResourceLoader.load("dictionaries/names.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(VERTEBRATES_CODE, v -> new ArrayList<>()).addAll(ResourceLoader.load("dictionaries/vertebrates.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(ADDRESS_CODE, v -> new ArrayList<>()).addAll(ResourceLoader.load("dictionaries/addresses.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>()).addAll(ResourceLoader.load("dictionaries/NoRedactionIndicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet()));
dictionary.computeIfAbsent(NAME_CODE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/names.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(VERTEBRATES_CODE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/vertebrates.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(ADDRESS_CODE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/addresses.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/NoRedactionIndicator.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.put(DEFAULT, new ArrayList<>());
}
private String cleanDictionaryEntry(String entry) {
return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " ");
}
private void loadTypeForTest() {
typeColorMap.put(VERTEBRATES_CODE, new float[]{0, 1, 0});
typeColorMap.put(ADDRESS_CODE, new float[]{0, 1, 1});
typeColorMap.put(NAME_CODE, new float[]{1, 1, 0});
@ -126,21 +151,31 @@ public class RedactionIntegrationTest {
typeColorMap.put(DEFAULT, new float[]{1, 0.502f, 0});
}
private List<TypeResult> getTypeResponse() {
return typeColorMap.entrySet().stream().map(typeColor -> TypeResult.builder().type(typeColor.getKey()).color(typeColor.getValue()).build()).collect(Collectors.toList());
return typeColorMap.entrySet()
.stream()
.map(typeColor -> TypeResult.builder().type(typeColor.getKey()).color(typeColor.getValue()).build())
.collect(Collectors.toList());
}
private DictionaryResponse getDictionaryResponse(String type) {
return DictionaryResponse.builder().color(typeColorMap.get(type)).entries(dictionary.get(type)).build();
}
@Test
public void redactionTest() throws IOException {
long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf");
RedactionRequest request = RedactionRequest.builder().document(IOUtils.toByteArray(pdfFileResource.getInputStream())).build();
RedactionRequest request = RedactionRequest.builder()
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
.build();
request.setFlatRedaction(false);
RedactionResult result = redactionController.redact(request);
@ -154,12 +189,15 @@ public class RedactionIntegrationTest {
System.out.println("numberOfPages: " + result.getNumberOfPages());
}
@Test
public void classificationTest() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
RedactionRequest request = RedactionRequest.builder().document(IOUtils.toByteArray(pdfFileResource.getInputStream())).build();
RedactionRequest request = RedactionRequest.builder()
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
.build();
RedactionResult result = redactionController.classify(request);
@ -168,12 +206,15 @@ public class RedactionIntegrationTest {
}
}
@Test
public void sectionsTest() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
RedactionRequest request = RedactionRequest.builder().document(IOUtils.toByteArray(pdfFileResource.getInputStream())).build();
RedactionRequest request = RedactionRequest.builder()
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
.build();
RedactionResult result = redactionController.sections(request);
@ -182,12 +223,15 @@ public class RedactionIntegrationTest {
}
}
@Test
public void htmlTablesTest() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
RedactionRequest request = RedactionRequest.builder().document(IOUtils.toByteArray(pdfFileResource.getInputStream())).build();
RedactionRequest request = RedactionRequest.builder()
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
.build();
RedactionResult result = redactionController.htmlTables(request);
@ -196,12 +240,15 @@ public class RedactionIntegrationTest {
}
}
@Test
public void htmlTableRotationTest() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
RedactionRequest request = RedactionRequest.builder().document(IOUtils.toByteArray(pdfFileResource.getInputStream())).build();
RedactionRequest request = RedactionRequest.builder()
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
.build();
RedactionResult result = redactionController.htmlTables(request);
@ -210,10 +257,10 @@ public class RedactionIntegrationTest {
}
}
private String loadFromClassPath(String path) {
URL resource = ResourceLoader.class.getClassLoader()
.getResource(path);
private static String loadFromClassPath(String path) {
URL resource = ResourceLoader.class.getClassLoader().getResource(path);
if (resource == null) {
throw new IllegalArgumentException("could not load classpath resource: drools/rules.drl");
}
@ -221,12 +268,12 @@ public class RedactionIntegrationTest {
StringBuilder sb = new StringBuilder();
String str;
while ((str = br.readLine()) != null) {
sb.append(str)
.append("\n");
sb.append(str).append("\n");
}
return sb.toString();
} catch (IOException e) {
throw new IllegalArgumentException("could not load classpath resource: " + path, e);
}
}
}
}