diff --git a/redaction-service-v1/redaction-service-server-v1/pom.xml b/redaction-service-v1/redaction-service-server-v1/pom.xml index d18dabdb..6b1e04e4 100644 --- a/redaction-service-v1/redaction-service-server-v1/pom.xml +++ b/redaction-service-v1/redaction-service-server-v1/pom.xml @@ -39,7 +39,7 @@ com.iqser.red.service configuration-service-api-v1 - 1.0.0 + 1.0.1 org.drools diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/DictionaryClient.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/DictionaryClient.java new file mode 100644 index 00000000..19b553e9 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/DictionaryClient.java @@ -0,0 +1,10 @@ +package com.iqser.red.service.redaction.v1.server.client; + +import org.springframework.cloud.openfeign.FeignClient; + +import com.iqser.red.service.configuration.v1.api.resource.DictionaryResource; +import com.iqser.red.service.configuration.v1.api.resource.RulesResource; + +@FeignClient(name = RulesResource.SERVICE_NAME, url = "http://" + RulesResource.SERVICE_NAME + ":8080") +public interface DictionaryClient extends DictionaryResource { +} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java index 3f7782b1..4b6d5bf8 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java @@ -1,58 +1,56 @@ package com.iqser.red.service.redaction.v1.server.redaction.service; import java.util.HashMap; -import java.util.HashSet; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; -import javax.annotation.PostConstruct; - +import org.apache.commons.collections4.CollectionUtils; import org.springframework.stereotype.Service; -import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader; -import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities; +import com.iqser.red.service.configuration.v1.api.model.TypeResponse; +import com.iqser.red.service.configuration.v1.api.model.TypeResult; +import com.iqser.red.service.redaction.v1.server.client.DictionaryClient; +import feign.FeignException; import lombok.Getter; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -@Slf4j @Service @RequiredArgsConstructor +@Slf4j public class DictionaryService { - public static final String VERTEBRATES_CODE = "VERTEBRATE"; - public static final String ADDRESS_CODE = "ADDRESS"; - public static final String NAME_CODE = "NAME"; - public static final String NO_REDACTION_INDICATOR = "NO_REDACTION_INDICATOR"; + private final DictionaryClient dictionaryClient; + + private long dictionaryVersion = -1; @Getter private Map> dictionary = new HashMap<>(); @Getter - private long generation; - - @PostConstruct - public void init() { - loadFromResourceFiles(); - } - + private Map entryColors = new HashMap<>(); public void updateDictionary() { - //TODO + + long version = dictionaryClient.getVersion(); + if (version > dictionaryVersion) { + dictionaryVersion = version; + updateDictionaryEntry(); + } } - - public void loadFromResourceFiles() { - dictionary.computeIfAbsent(NAME_CODE, v -> new HashSet<>()).addAll(ResourceLoader.load("dictionaries/names.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toList())); - dictionary.computeIfAbsent(VERTEBRATES_CODE, v -> new HashSet<>()).addAll(ResourceLoader.load("dictionaries/vertebrates.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toList())); - dictionary.computeIfAbsent(ADDRESS_CODE, v -> new HashSet<>()).addAll(ResourceLoader.load("dictionaries/addresses.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toList())); - dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new HashSet<>()).addAll(ResourceLoader.load("dictionaries/NoRedactionIndicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toList())); + private void updateDictionaryEntry() { + try { + TypeResponse typeResponse = dictionaryClient.getAllTypes(); + if (typeResponse != null && !CollectionUtils.isEmpty(typeResponse.getTypes())) { + entryColors = typeResponse.getTypes().stream().collect(Collectors.toMap(TypeResult::getType, TypeResult::getColor)); + dictionary = entryColors.keySet().stream().collect(Collectors.toMap(type -> type, s -> dictionaryClient.getDictionaryForType(s).getEntries().stream().collect(Collectors.toSet()))); + } + } catch (FeignException e) { + log.warn("Got some unknown feignException", e); + throw e; + } } - - - private String cleanDictionaryEntry(String entry) { - return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " "); - } -} +} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java index aaf928ad..98243c60 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java @@ -62,7 +62,7 @@ public class DroolsExecutionService { KieServices kieServices = KieServices.Factory.get(); InputStream input = new ByteArrayInputStream(drlAsString.getBytes(StandardCharsets.UTF_8)); KieFileSystem kieFileSystem = kieServices.newKieFileSystem(); - kieFileSystem.write(kieServices.getResources().newInputStreamResource(input)); + kieFileSystem.write("src/main/resources/drools/rules.drl", kieServices.getResources().newInputStreamResource(input)); KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem); kieBuilder.buildAll(); KieModule kieModule = kieBuilder.getKieModule(); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java index bbb6d016..a5f3a802 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java @@ -27,7 +27,6 @@ public class EntityRedactionService { private final DictionaryService dictionaryService; private final DroolsExecutionService droolsExecutionService; - public void processDocument(Document classifiedDoc) { dictionaryService.updateDictionary(); @@ -98,7 +97,6 @@ public class EntityRedactionService { }); } - private Set findEntities(SearchableText searchableText, String headline) { String normalizedInputString = searchableText.toString(); @@ -130,7 +128,6 @@ public class EntityRedactionService { return Character.isWhitespace(c) || Pattern.matches("\\p{Punct}", String.valueOf(c)) || c == '\"' || c == '‘' || c == '’'; } - public void removeEntitiesContainedInLarger(Set entities) { List wordsToRemove = new ArrayList<>(); for (Entity word : entities) { @@ -142,6 +139,4 @@ public class EntityRedactionService { } entities.removeAll(wordsToRemove); } - - } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/AnnotationHighlightService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/AnnotationHighlightService.java index 27a08355..9f522602 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/AnnotationHighlightService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/AnnotationHighlightService.java @@ -1,10 +1,5 @@ package com.iqser.red.service.redaction.v1.server.visualization.service; -import static com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService.ADDRESS_CODE; -import static com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService.NAME_CODE; -import static com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService.NO_REDACTION_INDICATOR; -import static com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService.VERTEBRATES_CODE; - import java.awt.Color; import java.io.IOException; import java.util.List; @@ -29,6 +24,7 @@ import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence; +import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService; import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; @@ -40,7 +36,7 @@ import lombok.extern.slf4j.Slf4j; @Service @RequiredArgsConstructor public class AnnotationHighlightService { - + private final DictionaryService dictionaryService; public void highlight(PDDocument document, Document classifiedDoc, boolean flatRedaction) throws IOException { @@ -177,36 +173,20 @@ public class AnnotationHighlightService { if (!entity.isRedaction()) { return false; } - if (entity.getType().equals(ADDRESS_CODE)) { - return true; + if(entity.getType().equalsIgnoreCase("VERTEBRATE") || entity.getType().equalsIgnoreCase("NO_REDACTION_INDICATOR") ){ + // TODO in RED-161. + return false; } - if (entity.getType().equals(NAME_CODE)) { - return true; - } - return false; + return dictionaryService.getDictionary().keySet().contains(entity.getType()); } - private float[] getColor(Entity entity) { if (!entity.isRedaction()) { return new float[]{0.627f, 0.627f, 0.627f}; } - if (entity.getType().equals(VERTEBRATES_CODE)) { - return new float[]{0, 1, 0}; - } - if (entity.getType().equals(ADDRESS_CODE)) { - return new float[]{0, 1, 1}; - } - if (entity.getType().equals(NAME_CODE)) { - return new float[]{1, 1, 0}; - } - if (entity.getType().equals(NO_REDACTION_INDICATOR)) { - return new float[]{1, 0.502f, 0}; - } - return null; + return dictionaryService.getEntryColors().get(entity.getType()); } - private void visualizeTextBlock(TextBlock textBlock, PDPageContentStream contentStream) throws IOException { contentStream.setStrokingColor(Color.LIGHT_GRAY); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index d1e7511c..549be749 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -11,6 +11,11 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.HashMap; +import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; import org.junit.Before; @@ -28,26 +33,44 @@ import org.springframework.boot.test.context.TestConfiguration; import org.springframework.boot.test.mock.mockito.MockBean; import org.springframework.context.annotation.Bean; import org.springframework.core.io.ClassPathResource; -import org.springframework.core.io.ResourceLoader; + +import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader; + import org.springframework.test.context.junit4.SpringRunner; +import com.iqser.red.service.configuration.v1.api.model.DictionaryResponse; import com.iqser.red.service.configuration.v1.api.model.RulesResponse; +import com.iqser.red.service.configuration.v1.api.model.TypeResponse; +import com.iqser.red.service.configuration.v1.api.model.TypeResult; import com.iqser.red.service.redaction.v1.model.RedactionRequest; import com.iqser.red.service.redaction.v1.model.RedactionResult; +import com.iqser.red.service.redaction.v1.server.client.DictionaryClient; import com.iqser.red.service.redaction.v1.server.client.RulesClient; import com.iqser.red.service.redaction.v1.server.controller.RedactionController; +import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities; @Ignore @RunWith(SpringRunner.class) @SpringBootTest(webEnvironment = DEFINED_PORT) public class RedactionIntegrationTest { + public static final String VERTEBRATES_CODE = "VERTEBRATE"; + public static final String ADDRESS_CODE = "ADDRESS"; + public static final String NAME_CODE = "NAME"; + public static final String NO_REDACTION_INDICATOR = "NO_REDACTION_INDICATOR"; + @Autowired private RedactionController redactionController; @MockBean private RulesClient rulesClient; + @MockBean + private DictionaryClient dictionaryClient; + + private Map> dictionary = new HashMap<>(); + private Map typeColorMap = new HashMap<>(); + @TestConfiguration public static class RedactionIntegrationTestConfiguration { @@ -64,9 +87,7 @@ public class RedactionIntegrationTest { KieModule kieModule = kieBuilder.getKieModule(); return kieServices.newKieContainer(kieModule.getReleaseId()); - } - } @Before @@ -75,6 +96,40 @@ public class RedactionIntegrationTest { when(rulesClient.getVersion()).thenReturn(0L); when(rulesClient.getRules()).thenReturn(new RulesResponse(loadFromClassPath("drools/rules.drl"))); + loadDictionaryForTest(); + loadTypeForTest(); + when(dictionaryClient.getVersion()).thenReturn(0L); + when(dictionaryClient.getAllTypes()).thenReturn(TypeResponse.builder().types(getTypeResponse()).build()); + when(dictionaryClient.getDictionaryForType(VERTEBRATES_CODE)).thenReturn(getDictionaryResponse(VERTEBRATES_CODE)); + when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(getDictionaryResponse(ADDRESS_CODE)); + when(dictionaryClient.getDictionaryForType(NAME_CODE)).thenReturn(getDictionaryResponse(NAME_CODE)); + when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR)).thenReturn(getDictionaryResponse(NO_REDACTION_INDICATOR)); + } + + private void loadDictionaryForTest() { + dictionary.computeIfAbsent(NAME_CODE, v -> new ArrayList<>()).addAll(ResourceLoader.load("dictionaries/names.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(VERTEBRATES_CODE, v -> new ArrayList<>()).addAll(ResourceLoader.load("dictionaries/vertebrates.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(ADDRESS_CODE, v -> new ArrayList<>()).addAll(ResourceLoader.load("dictionaries/addresses.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new ArrayList<>()).addAll(ResourceLoader.load("dictionaries/NoRedactionIndicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toSet())); + } + + private String cleanDictionaryEntry(String entry) { + return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " "); + } + + private void loadTypeForTest() { + typeColorMap.put("VERTEBRATE", new float[]{0, 1, 0}); + typeColorMap.put("ADDRESS", new float[]{0, 1, 1}); + typeColorMap.put("NAME", new float[]{1, 1, 0}); + typeColorMap.put("NO_REDACTION_INDICATOR", new float[]{1, 0.502f, 0}); + } + + private List getTypeResponse() { + return typeColorMap.entrySet().stream().map(typeColor -> TypeResult.builder().type(typeColor.getKey()).color(typeColor.getValue()).build()).collect(Collectors.toList()); + } + + private DictionaryResponse getDictionaryResponse(String type) { + return DictionaryResponse.builder().color(typeColorMap.get(type)).entries(dictionary.get(type)).build(); } @Test @@ -171,7 +226,5 @@ public class RedactionIntegrationTest { } catch (IOException e) { throw new IllegalArgumentException("could not load classpath resource: " + path, e); } - } - } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/utils/ResourceLoader.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/utils/ResourceLoader.java new file mode 100644 index 00000000..2ea1f81e --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/utils/ResourceLoader.java @@ -0,0 +1,64 @@ +package com.iqser.red.service.redaction.v1.server.redaction.utils; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.commons.io.IOUtils; + +import lombok.experimental.UtilityClass; + +@UtilityClass +public class ResourceLoader { + + public Map loadDictionaryFiles() { + + String name = "dictionaries/"; + + List files; + try { + files = IOUtils.readLines(ResourceLoader.class.getClassLoader().getResourceAsStream(name), "UTF-8"); + } catch (IOException e) { + throw new IllegalArgumentException("could not load classpath resource: " + name, e); + } + return files.stream().collect(Collectors.toMap(ResourceLoader::getFileName, s -> name + s)); + } + + private String getFileName(String filePath) { + return filePath.substring(0, filePath.indexOf(".txt")); + } + + public Set load(String classpathPath) { + + URL resource = ResourceLoader.class.getClassLoader().getResource(classpathPath); + if (resource == null) { + throw new IllegalArgumentException("could not load classpath resource: " + classpathPath); + } + try (BufferedReader br = new BufferedReader(new InputStreamReader(resource.openStream(), StandardCharsets.UTF_8))) { + return br.lines().collect(Collectors.toSet()); + } catch (IOException e) { + throw new IllegalArgumentException("could not load classpath resource: " + classpathPath, e); + } + } + + public String loadToString(String classpathPath) { + + URL resource = ResourceLoader.class.getClassLoader().getResource(classpathPath); + if (resource == null) { + throw new IllegalArgumentException("could not load classpath resource: " + classpathPath); + } + try (BufferedReader br = new BufferedReader(new InputStreamReader(resource.openStream(), StandardCharsets.UTF_8))) { + return br.lines().collect(Collectors.joining("\n")); + } catch (IOException e) { + throw new IllegalArgumentException("could not load classpath resource: " + classpathPath, e); + } + + } + +} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/utils/TextNormalizationUtilities.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/utils/TextNormalizationUtilities.java new file mode 100644 index 00000000..5405c047 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/utils/TextNormalizationUtilities.java @@ -0,0 +1,17 @@ +package com.iqser.red.service.redaction.v1.server.redaction.utils; + +import lombok.experimental.UtilityClass; + +@UtilityClass +public class TextNormalizationUtilities { + + /** + * Revert hyphenation due to line breaks. + * @param text Text to be processed. + * @return Text without line-break hyphenation. + */ + public static String removeHyphenLineBreaks(String text) { + return text.replaceAll("\\s(\\S+)[\\-\\u00AD]\\R|\n\r(.+ )", "\n$1$2"); + } + +} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/resources/dictionaries/NoRedactionIndicator.txt b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/NoRedactionIndicator.txt similarity index 100% rename from redaction-service-v1/redaction-service-server-v1/src/main/resources/dictionaries/NoRedactionIndicator.txt rename to redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/NoRedactionIndicator.txt diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/resources/dictionaries/addresses.txt b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/addresses.txt similarity index 100% rename from redaction-service-v1/redaction-service-server-v1/src/main/resources/dictionaries/addresses.txt rename to redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/addresses.txt diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/resources/dictionaries/names.txt b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/names.txt similarity index 100% rename from redaction-service-v1/redaction-service-server-v1/src/main/resources/dictionaries/names.txt rename to redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/names.txt diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/resources/dictionaries/vertebrates.txt b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/vertebrates.txt similarity index 100% rename from redaction-service-v1/redaction-service-server-v1/src/main/resources/dictionaries/vertebrates.txt rename to redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/vertebrates.txt