RED-106: replace the local dictionary preload with remove dictionary service.
This commit is contained in:
parent
fe5c20e1a0
commit
8ed548f1a8
@ -0,0 +1,10 @@
|
||||
package com.iqser.red.service.redaction.v1.server.client;
|
||||
|
||||
import org.springframework.cloud.openfeign.FeignClient;
|
||||
|
||||
import com.iqser.red.service.configuration.v1.api.resource.DictionaryResource;
|
||||
import com.iqser.red.service.configuration.v1.api.resource.RulesResource;
|
||||
|
||||
@FeignClient(name = RulesResource.SERVICE_NAME, url = "http://" + RulesResource.SERVICE_NAME + ":8080")
|
||||
public interface DictionaryClient extends DictionaryResource {
|
||||
}
|
||||
@ -1,58 +1,57 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import javax.annotation.PostConstruct;
|
||||
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
|
||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
|
||||
import feign.FeignException;
|
||||
import lombok.Getter;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class DictionaryService {
|
||||
|
||||
public static final String VERTEBRATES_CODE = "VERTEBRATE";
|
||||
public static final String ADDRESS_CODE = "ADDRESS";
|
||||
public static final String NAME_CODE = "NAME";
|
||||
public static final String NO_REDACTION_INDICATOR = "NO_REDACTION_INDICATOR";
|
||||
private final DictionaryClient dictionaryClient;
|
||||
|
||||
private long dictionaryVersion = -1;
|
||||
|
||||
@Getter
|
||||
private Map<String, Set<String>> dictionary = new HashMap<>();
|
||||
|
||||
@Getter
|
||||
private long generation;
|
||||
|
||||
@PostConstruct
|
||||
public void init() {
|
||||
loadFromResourceFiles();
|
||||
}
|
||||
|
||||
private Map<String, Set<String>> dictionaryEntry = new HashMap<>();
|
||||
|
||||
public void updateDictionary() {
|
||||
//TODO
|
||||
|
||||
long version = 1; // TODO = dictionaryClient.getVersion();
|
||||
if (version > dictionaryVersion) {
|
||||
dictionaryVersion = version;
|
||||
dictionaryEntry = retrieveDictionaryEntry();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void loadFromResourceFiles() {
|
||||
dictionary.computeIfAbsent(NAME_CODE, v -> new HashSet<>()).addAll(ResourceLoader.load("dictionaries/names.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toList()));
|
||||
dictionary.computeIfAbsent(VERTEBRATES_CODE, v -> new HashSet<>()).addAll(ResourceLoader.load("dictionaries/vertebrates.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toList()));
|
||||
dictionary.computeIfAbsent(ADDRESS_CODE, v -> new HashSet<>()).addAll(ResourceLoader.load("dictionaries/addresses.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toList()));
|
||||
dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new HashSet<>()).addAll(ResourceLoader.load("dictionaries/NoRedactionIndicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toList()));
|
||||
private Map<String, Set<String>> retrieveDictionaryEntry() {
|
||||
try {
|
||||
TypeResponse typeResponse = dictionaryClient.getAllTypes();
|
||||
if (typeResponse == null || CollectionUtils.isEmpty(typeResponse.getTypes())) {
|
||||
return Collections.emptyMap();
|
||||
} else {
|
||||
List<String> types = typeResponse.getTypes().stream().map(typeResult -> typeResult.getType()).collect(Collectors.toList());
|
||||
return types.stream().collect(Collectors.toMap(type -> type, s -> dictionaryClient.getDictionaryForType(s).getEntries().stream().collect(Collectors.toSet())));
|
||||
}
|
||||
} catch (FeignException e) {
|
||||
log.warn("Got some unknown feignException", e);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private String cleanDictionaryEntry(String entry) {
|
||||
return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " ");
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -19,15 +19,16 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class EntityRedactionService {
|
||||
|
||||
private final DictionaryService dictionaryService;
|
||||
private final DroolsExecutionService droolsExecutionService;
|
||||
|
||||
|
||||
public void processDocument(Document classifiedDoc) {
|
||||
|
||||
dictionaryService.updateDictionary();
|
||||
@ -98,13 +99,12 @@ public class EntityRedactionService {
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> findEntities(SearchableText searchableText, String headline) {
|
||||
|
||||
String normalizedInputString = searchableText.toString();
|
||||
|
||||
Set<Entity> found = new HashSet<>();
|
||||
for (Map.Entry<String, Set<String>> entry : dictionaryService.getDictionary().entrySet()) {
|
||||
for (Map.Entry<String, Set<String>> entry : dictionaryService.getDictionaryEntry().entrySet()) {
|
||||
for (String value : entry.getValue()) {
|
||||
int startIndex;
|
||||
int stopIndex = 0;
|
||||
@ -130,7 +130,6 @@ public class EntityRedactionService {
|
||||
return Character.isWhitespace(c) || Pattern.matches("\\p{Punct}", String.valueOf(c)) || c == '\"' || c == '‘' || c == '’';
|
||||
}
|
||||
|
||||
|
||||
public void removeEntitiesContainedInLarger(Set<Entity> entities) {
|
||||
List<Entity> wordsToRemove = new ArrayList<>();
|
||||
for (Entity word : entities) {
|
||||
@ -142,6 +141,4 @@ public class EntityRedactionService {
|
||||
}
|
||||
entities.removeAll(wordsToRemove);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user