diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/DictionaryClient.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/DictionaryClient.java new file mode 100644 index 00000000..19b553e9 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/DictionaryClient.java @@ -0,0 +1,10 @@ +package com.iqser.red.service.redaction.v1.server.client; + +import org.springframework.cloud.openfeign.FeignClient; + +import com.iqser.red.service.configuration.v1.api.resource.DictionaryResource; +import com.iqser.red.service.configuration.v1.api.resource.RulesResource; + +@FeignClient(name = RulesResource.SERVICE_NAME, url = "http://" + RulesResource.SERVICE_NAME + ":8080") +public interface DictionaryClient extends DictionaryResource { +} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java index 3f7782b1..c8e0d2f4 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java @@ -1,58 +1,57 @@ package com.iqser.red.service.redaction.v1.server.redaction.service; +import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; -import javax.annotation.PostConstruct; - +import org.apache.commons.collections4.CollectionUtils; import org.springframework.stereotype.Service; -import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader; -import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities; +import com.iqser.red.service.configuration.v1.api.model.TypeResponse; +import com.iqser.red.service.redaction.v1.server.client.DictionaryClient; +import feign.FeignException; import lombok.Getter; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -@Slf4j @Service @RequiredArgsConstructor +@Slf4j public class DictionaryService { - public static final String VERTEBRATES_CODE = "VERTEBRATE"; - public static final String ADDRESS_CODE = "ADDRESS"; - public static final String NAME_CODE = "NAME"; - public static final String NO_REDACTION_INDICATOR = "NO_REDACTION_INDICATOR"; + private final DictionaryClient dictionaryClient; + + private long dictionaryVersion = -1; @Getter - private Map> dictionary = new HashMap<>(); - - @Getter - private long generation; - - @PostConstruct - public void init() { - loadFromResourceFiles(); - } - + private Map> dictionaryEntry = new HashMap<>(); public void updateDictionary() { - //TODO + + long version = 1; // TODO = dictionaryClient.getVersion(); + if (version > dictionaryVersion) { + dictionaryVersion = version; + dictionaryEntry = retrieveDictionaryEntry(); + } } - - public void loadFromResourceFiles() { - dictionary.computeIfAbsent(NAME_CODE, v -> new HashSet<>()).addAll(ResourceLoader.load("dictionaries/names.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toList())); - dictionary.computeIfAbsent(VERTEBRATES_CODE, v -> new HashSet<>()).addAll(ResourceLoader.load("dictionaries/vertebrates.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toList())); - dictionary.computeIfAbsent(ADDRESS_CODE, v -> new HashSet<>()).addAll(ResourceLoader.load("dictionaries/addresses.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toList())); - dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new HashSet<>()).addAll(ResourceLoader.load("dictionaries/NoRedactionIndicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toList())); + private Map> retrieveDictionaryEntry() { + try { + TypeResponse typeResponse = dictionaryClient.getAllTypes(); + if (typeResponse == null || CollectionUtils.isEmpty(typeResponse.getTypes())) { + return Collections.emptyMap(); + } else { + List types = typeResponse.getTypes().stream().map(typeResult -> typeResult.getType()).collect(Collectors.toList()); + return types.stream().collect(Collectors.toMap(type -> type, s -> dictionaryClient.getDictionaryForType(s).getEntries().stream().collect(Collectors.toSet()))); + } + } catch (FeignException e) { + log.warn("Got some unknown feignException", e); + throw e; + } } - - private String cleanDictionaryEntry(String entry) { - return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " "); - } -} +} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java index bbb6d016..80b3a320 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java @@ -19,15 +19,16 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; @Service @RequiredArgsConstructor +@Slf4j public class EntityRedactionService { private final DictionaryService dictionaryService; private final DroolsExecutionService droolsExecutionService; - public void processDocument(Document classifiedDoc) { dictionaryService.updateDictionary(); @@ -98,13 +99,12 @@ public class EntityRedactionService { }); } - private Set findEntities(SearchableText searchableText, String headline) { String normalizedInputString = searchableText.toString(); Set found = new HashSet<>(); - for (Map.Entry> entry : dictionaryService.getDictionary().entrySet()) { + for (Map.Entry> entry : dictionaryService.getDictionaryEntry().entrySet()) { for (String value : entry.getValue()) { int startIndex; int stopIndex = 0; @@ -130,7 +130,6 @@ public class EntityRedactionService { return Character.isWhitespace(c) || Pattern.matches("\\p{Punct}", String.valueOf(c)) || c == '\"' || c == '‘' || c == '’'; } - public void removeEntitiesContainedInLarger(Set entities) { List wordsToRemove = new ArrayList<>(); for (Entity word : entities) { @@ -142,6 +141,4 @@ public class EntityRedactionService { } entities.removeAll(wordsToRemove); } - - } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/resources/dictionaries/addresses.txt b/redaction-service-v1/redaction-service-server-v1/src/main/resources/dictionaries/address.txt similarity index 100% rename from redaction-service-v1/redaction-service-server-v1/src/main/resources/dictionaries/addresses.txt rename to redaction-service-v1/redaction-service-server-v1/src/main/resources/dictionaries/address.txt diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/resources/dictionaries/names.txt b/redaction-service-v1/redaction-service-server-v1/src/main/resources/dictionaries/name.txt similarity index 100% rename from redaction-service-v1/redaction-service-server-v1/src/main/resources/dictionaries/names.txt rename to redaction-service-v1/redaction-service-server-v1/src/main/resources/dictionaries/name.txt diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/resources/dictionaries/vertebrates.txt b/redaction-service-v1/redaction-service-server-v1/src/main/resources/dictionaries/vertebrate.txt similarity index 100% rename from redaction-service-v1/redaction-service-server-v1/src/main/resources/dictionaries/vertebrates.txt rename to redaction-service-v1/redaction-service-server-v1/src/main/resources/dictionaries/vertebrate.txt