From 33ab09d7fc9b0fa3ec9187e297439496ad91894f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thierry=20G=C3=B6ckel?= Date: Thu, 23 Jul 2020 13:35:58 +0200 Subject: [PATCH 1/2] Fix style. --- .../v1/server/redaction/model/Section.java | 62 ++++++++++--------- .../redaction/service/DictionaryService.java | 9 ++- 2 files changed, 38 insertions(+), 33 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java index 049349cc..8bb99977 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java @@ -25,13 +25,17 @@ public class Section { private String headline; + public boolean contains(String type) { + return entities.stream().anyMatch(entity -> entity.getType().equals(type)); } - public void redact(String type, int ruleNumber, String reason){ + + public void redact(String type, int ruleNumber, String reason) { + entities.forEach(entity -> { - if(entity.getType().equals(type)){ + if (entity.getType().equals(type)) { entity.setRedaction(true); entity.setMatchedRule(ruleNumber); entity.setRedactionReason(reason); @@ -39,9 +43,11 @@ public class Section { }); } - public void redactNot(String type, int ruleNumber, String reason){ + + public void redactNot(String type, int ruleNumber, String reason) { + entities.forEach(entity -> { - if(entity.getType().equals(type)){ + if (entity.getType().equals(type)) { entity.setRedaction(false); entity.setMatchedRule(ruleNumber); entity.setRedactionReason(reason); @@ -49,18 +55,19 @@ public class Section { }); } - public void redactLineAfter(String start, String asType, int ruleNumber, String reason){ + + public void redactLineAfter(String start, String asType, int ruleNumber, String reason) { String value = StringUtils.substringBetween(text, start, "\n"); - if(value != null){ - Set found = findEntity(value.trim(), asType); + if (value != null) { + Set found = findEntity(value.trim(), asType); entities.addAll(found); } // TODO No need to iterate entities.forEach(entity -> { - if(entity.getType().equals(asType)){ + if (entity.getType().equals(asType)) { entity.setRedaction(true); entity.setMatchedRule(ruleNumber); entity.setRedactionReason(reason); @@ -70,19 +77,18 @@ public class Section { } - - public void redactBetween(String start, String stop, String asType, int ruleNumber, String reason){ + public void redactBetween(String start, String stop, String asType, int ruleNumber, String reason) { String value = StringUtils.substringBetween(searchText, start, stop); - if(value != null){ - Set found = findEntity(value.trim(), asType); + if (value != null) { + Set found = findEntity(value.trim(), asType); entities.addAll(found); } // TODO No need to iterate entities.forEach(entity -> { - if(entity.getType().equals(asType)){ + if (entity.getType().equals(asType)) { entity.setRedaction(true); entity.setMatchedRule(ruleNumber); entity.setRedactionReason(reason); @@ -91,25 +97,21 @@ public class Section { } - - private Set findEntity(String value, String asType) { Set found = new HashSet<>(); - int startIndex; - int stopIndex = 0; - do { - startIndex = searchText.indexOf(value, stopIndex); - stopIndex = startIndex + value.length(); - - if (startIndex > -1 && - (startIndex == 0 || Character.isWhitespace(searchText.charAt(startIndex - 1)) || isSeparator(searchText.charAt(startIndex - 1))) && - (stopIndex == searchText.length() || isSeparator(searchText.charAt(stopIndex)))) { - found.add(new Entity(searchText.substring(startIndex, stopIndex), asType, startIndex, stopIndex, headline)); - } - } while (startIndex > -1); + int startIndex; + int stopIndex = 0; + do { + startIndex = searchText.indexOf(value, stopIndex); + stopIndex = startIndex + value.length(); + if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(searchText.charAt(startIndex - 1)) || isSeparator(searchText + .charAt(startIndex - 1))) && (stopIndex == searchText.length() || isSeparator(searchText.charAt(stopIndex)))) { + found.add(new Entity(searchText.substring(startIndex, stopIndex), asType, startIndex, stopIndex, headline)); + } + } while (startIndex > -1); removeEntitiesContainedInLarger(found); @@ -118,14 +120,18 @@ public class Section { private boolean isSeparator(char c) { + return Character.isWhitespace(c) || Pattern.matches("\\p{Punct}", String.valueOf(c)) || c == '\"' || c == '‘' || c == '’'; } + public void removeEntitiesContainedInLarger(Set entities) { + List wordsToRemove = new ArrayList<>(); for (Entity word : entities) { for (Entity inner : entities) { - if (inner.getWord().length() < word.getWord().length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner) { + if (inner.getWord().length() < word.getWord() + .length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner) { wordsToRemove.add(inner); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java index 4a02ad89..d245c5d0 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java @@ -2,6 +2,7 @@ package com.iqser.red.service.redaction.v1.server.redaction.service; import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -55,16 +56,14 @@ public class DictionaryService { try { TypeResponse typeResponse = dictionaryClient.getAllTypes(); - if (typeResponse != null && !CollectionUtils.isEmpty(typeResponse.getTypes())) { + if (typeResponse != null && CollectionUtils.isNotEmpty(typeResponse.getTypes())) { entryColors = typeResponse.getTypes() .stream() .collect(Collectors.toMap(TypeResult::getType, TypeResult::getColor)); dictionary = entryColors.keySet() .stream() - .collect(Collectors.toMap(type -> type, s -> dictionaryClient.getDictionaryForType(s) - .getEntries() - .stream() - .collect(Collectors.toSet()))); + .collect(Collectors.toMap(type -> type, s -> new HashSet<>(dictionaryClient.getDictionaryForType(s) + .getEntries()))); hintTypes = typeResponse.getTypes() .stream() .filter(TypeResult::isHint) From 7dbe03483b7a8a3c391e9d0791717b9fab5fc5db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thierry=20G=C3=B6ckel?= Date: Thu, 23 Jul 2020 13:36:02 +0200 Subject: [PATCH 2/2] RED-183: Fix catching validation errors --- .../redaction/v1/server/Application.java | 31 ++++-- .../service/DroolsExecutionService.java | 2 +- .../v1/server/RedactionIntegrationTest.java | 99 ++++++++++++++----- 3 files changed, 96 insertions(+), 36 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/Application.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/Application.java index 81b718cb..42dd263e 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/Application.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/Application.java @@ -4,6 +4,7 @@ import java.io.ByteArrayInputStream; import java.io.InputStream; import java.nio.charset.StandardCharsets; +import org.apache.commons.lang3.StringUtils; import org.kie.api.KieServices; import org.kie.api.builder.KieBuilder; import org.kie.api.builder.KieFileSystem; @@ -22,6 +23,7 @@ import org.springframework.context.annotation.Import; import com.iqser.gin4.commons.spring.DefaultWebMvcConfiguration; import com.iqser.red.service.configuration.v1.api.model.RulesResponse; import com.iqser.red.service.redaction.v1.server.client.RulesClient; +import com.iqser.red.service.redaction.v1.server.exception.RulesValidationException; import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings; @Import({DefaultWebMvcConfiguration.class}) @@ -33,24 +35,35 @@ public class Application { @Autowired private RulesClient rulesClient; + public static void main(String[] args) { + SpringApplication.run(Application.class, args); } + @Bean public KieContainer kieContainer() { - KieServices kieServices = KieServices.Factory.get(); + try { + KieServices kieServices = KieServices.Factory.get(); - KieFileSystem kieFileSystem = kieServices.newKieFileSystem(); - RulesResponse rules = rulesClient.getRules(); - InputStream input = new ByteArrayInputStream(rules.getRules().getBytes(StandardCharsets.UTF_8)); - kieFileSystem.write("src/main/resources/drools/rules.drl", kieServices.getResources().newInputStreamResource(input)); - KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem); - kieBuilder.buildAll(); - KieModule kieModule = kieBuilder.getKieModule(); + KieFileSystem kieFileSystem = kieServices.newKieFileSystem(); + RulesResponse rules = rulesClient.getRules(); + if (StringUtils.isEmpty(rules.getRules())) { + throw new RuntimeException("Rules cannot be empty."); + } + InputStream input = new ByteArrayInputStream(rules.getRules().getBytes(StandardCharsets.UTF_8)); + kieFileSystem.write("src/main/resources/drools/rules.drl", kieServices.getResources() + .newInputStreamResource(input)); + KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem); + kieBuilder.buildAll(); + KieModule kieModule = kieBuilder.getKieModule(); - return kieServices.newKieContainer(kieModule.getReleaseId()); + return kieServices.newKieContainer(kieModule.getReleaseId()); + } catch (Exception e) { + throw new RulesValidationException("Could not update rules: " + e.getMessage(), e); + } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java index 98243c60..9e05f0e0 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java @@ -68,7 +68,7 @@ public class DroolsExecutionService { KieModule kieModule = kieBuilder.getKieModule(); kieContainer.updateToVersion(kieModule.getReleaseId()); } catch (Exception e) { - throw new RulesValidationException("Could not update rules", e); + throw new RulesValidationException("Could not update rules: " + e.getMessage(), e); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index 949c5b22..01dd7485 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -52,11 +52,12 @@ import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizati @SpringBootTest(webEnvironment = DEFINED_PORT) public class RedactionIntegrationTest { - public static final String VERTEBRATES_CODE = "vertebrate"; - public static final String ADDRESS_CODE = "address"; - public static final String NAME_CODE = "name"; - public static final String NO_REDACTION_INDICATOR = "no_redaction_indicator"; - public static final String DEFAULT = "default"; + private static final String RULES = loadFromClassPath("drools/rules.drl"); + private static final String VERTEBRATES_CODE = "vertebrate"; + private static final String ADDRESS_CODE = "address"; + private static final String NAME_CODE = "name"; + private static final String NO_REDACTION_INDICATOR = "no_redaction_indicator"; + private static final String DEFAULT = "default"; @Autowired private RedactionController redactionController; @@ -67,8 +68,8 @@ public class RedactionIntegrationTest { @MockBean private DictionaryClient dictionaryClient; - private Map> dictionary = new HashMap<>(); - private Map typeColorMap = new HashMap<>(); + private final Map> dictionary = new HashMap<>(); + private final Map typeColorMap = new HashMap<>(); @TestConfiguration public static class RedactionIntegrationTestConfiguration { @@ -79,21 +80,23 @@ public class RedactionIntegrationTest { KieServices kieServices = KieServices.Factory.get(); KieFileSystem kieFileSystem = kieServices.newKieFileSystem(); - InputStream input = new ByteArrayInputStream("".getBytes(StandardCharsets.UTF_8)); - kieFileSystem.write("src/main/resources/drools/rules.drl", kieServices.getResources().newInputStreamResource(input)); + InputStream input = new ByteArrayInputStream(RULES.getBytes(StandardCharsets.UTF_8)); + kieFileSystem.write("src/test/resources/drools/rules.drl", kieServices.getResources().newInputStreamResource(input)); KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem); kieBuilder.buildAll(); KieModule kieModule = kieBuilder.getKieModule(); return kieServices.newKieContainer(kieModule.getReleaseId()); } + } + @Before public void stubRulesClient() { when(rulesClient.getVersion()).thenReturn(0L); - when(rulesClient.getRules()).thenReturn(new RulesResponse(loadFromClassPath("drools/rules.drl"))); + when(rulesClient.getRules()).thenReturn(new RulesResponse(RULES)); loadDictionaryForTest(); loadTypeForTest(); @@ -106,19 +109,41 @@ public class RedactionIntegrationTest { when(dictionaryClient.getDictionaryForType(DEFAULT)).thenReturn(getDictionaryResponse(DEFAULT)); } + private void loadDictionaryForTest() { - dictionary.computeIfAbsent(NAME_CODE, v -> new ArrayList<>()).addAll(ResourceLoader.load("dictionaries/names.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(VERTEBRATES_CODE, v -> new ArrayList<>()).addAll(ResourceLoader.load("dictionaries/vertebrates.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(ADDRESS_CODE, v -> new ArrayList<>()).addAll(ResourceLoader.load("dictionaries/addresses.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); - dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>()).addAll(ResourceLoader.load("dictionaries/NoRedactionIndicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + + dictionary.computeIfAbsent(NAME_CODE, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/names.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); + dictionary.computeIfAbsent(VERTEBRATES_CODE, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/vertebrates.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); + dictionary.computeIfAbsent(ADDRESS_CODE, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/addresses.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); + dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/NoRedactionIndicator.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); dictionary.put(DEFAULT, new ArrayList<>()); } + private String cleanDictionaryEntry(String entry) { + return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " "); } + private void loadTypeForTest() { + typeColorMap.put(VERTEBRATES_CODE, new float[]{0, 1, 0}); typeColorMap.put(ADDRESS_CODE, new float[]{0, 1, 1}); typeColorMap.put(NAME_CODE, new float[]{1, 1, 0}); @@ -126,21 +151,31 @@ public class RedactionIntegrationTest { typeColorMap.put(DEFAULT, new float[]{1, 0.502f, 0}); } + private List getTypeResponse() { - return typeColorMap.entrySet().stream().map(typeColor -> TypeResult.builder().type(typeColor.getKey()).color(typeColor.getValue()).build()).collect(Collectors.toList()); + + return typeColorMap.entrySet() + .stream() + .map(typeColor -> TypeResult.builder().type(typeColor.getKey()).color(typeColor.getValue()).build()) + .collect(Collectors.toList()); } + private DictionaryResponse getDictionaryResponse(String type) { + return DictionaryResponse.builder().color(typeColorMap.get(type)).entries(dictionary.get(type)).build(); } + @Test public void redactionTest() throws IOException { long start = System.currentTimeMillis(); ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf"); - RedactionRequest request = RedactionRequest.builder().document(IOUtils.toByteArray(pdfFileResource.getInputStream())).build(); + RedactionRequest request = RedactionRequest.builder() + .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) + .build(); request.setFlatRedaction(false); RedactionResult result = redactionController.redact(request); @@ -154,12 +189,15 @@ public class RedactionIntegrationTest { System.out.println("numberOfPages: " + result.getNumberOfPages()); } + @Test public void classificationTest() throws IOException { ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf"); - RedactionRequest request = RedactionRequest.builder().document(IOUtils.toByteArray(pdfFileResource.getInputStream())).build(); + RedactionRequest request = RedactionRequest.builder() + .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) + .build(); RedactionResult result = redactionController.classify(request); @@ -168,12 +206,15 @@ public class RedactionIntegrationTest { } } + @Test public void sectionsTest() throws IOException { ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf"); - RedactionRequest request = RedactionRequest.builder().document(IOUtils.toByteArray(pdfFileResource.getInputStream())).build(); + RedactionRequest request = RedactionRequest.builder() + .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) + .build(); RedactionResult result = redactionController.sections(request); @@ -182,12 +223,15 @@ public class RedactionIntegrationTest { } } + @Test public void htmlTablesTest() throws IOException { ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf"); - RedactionRequest request = RedactionRequest.builder().document(IOUtils.toByteArray(pdfFileResource.getInputStream())).build(); + RedactionRequest request = RedactionRequest.builder() + .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) + .build(); RedactionResult result = redactionController.htmlTables(request); @@ -196,12 +240,15 @@ public class RedactionIntegrationTest { } } + @Test public void htmlTableRotationTest() throws IOException { ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf"); - RedactionRequest request = RedactionRequest.builder().document(IOUtils.toByteArray(pdfFileResource.getInputStream())).build(); + RedactionRequest request = RedactionRequest.builder() + .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) + .build(); RedactionResult result = redactionController.htmlTables(request); @@ -210,10 +257,10 @@ public class RedactionIntegrationTest { } } - private String loadFromClassPath(String path) { - URL resource = ResourceLoader.class.getClassLoader() - .getResource(path); + private static String loadFromClassPath(String path) { + + URL resource = ResourceLoader.class.getClassLoader().getResource(path); if (resource == null) { throw new IllegalArgumentException("could not load classpath resource: drools/rules.drl"); } @@ -221,12 +268,12 @@ public class RedactionIntegrationTest { StringBuilder sb = new StringBuilder(); String str; while ((str = br.readLine()) != null) { - sb.append(str) - .append("\n"); + sb.append(str).append("\n"); } return sb.toString(); } catch (IOException e) { throw new IllegalArgumentException("could not load classpath resource: " + path, e); } } -} + +} \ No newline at end of file