Added rank of dictionary to processing entities in redaction service, simplified code
This commit is contained in:
parent
f458a1f930
commit
536d4689f3
@ -20,7 +20,7 @@
|
||||
<dependency>
|
||||
<groupId>com.iqser.red.service</groupId>
|
||||
<artifactId>configuration-service-api-v1</artifactId>
|
||||
<version>1.2.0</version>
|
||||
<version>1.3.5</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.drools</groupId>
|
||||
|
||||
@ -0,0 +1,25 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
public class DictionaryModel {
|
||||
|
||||
private String type;
|
||||
private int rank;
|
||||
private float[] color;
|
||||
private boolean caseInsensitive;
|
||||
private boolean hint;
|
||||
private Set<String> entries;
|
||||
private Set<String> localEntries;
|
||||
|
||||
public Set<String> getValues(boolean local){
|
||||
return local ? localEntries : entries;
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,6 +1,18 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import java.awt.Color;
|
||||
import com.iqser.red.service.configuration.v1.api.model.Colors;
|
||||
import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
|
||||
import com.iqser.red.service.configuration.v1.api.model.TypeResult;
|
||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
|
||||
import feign.FeignException;
|
||||
import lombok.Getter;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.awt.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
@ -8,22 +20,8 @@ import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.configuration.v1.api.model.Colors;
|
||||
import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
|
||||
import com.iqser.red.service.configuration.v1.api.model.TypeResult;
|
||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
|
||||
import feign.FeignException;
|
||||
import lombok.Getter;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@ -35,19 +33,7 @@ public class DictionaryService {
|
||||
private long dictionaryVersion = -1;
|
||||
|
||||
@Getter
|
||||
private Map<String, Set<String>> dictionary = new TreeMap<>(Comparator.reverseOrder()); // Using TreeMap, because order of keys is important.
|
||||
|
||||
@Getter
|
||||
private Map<String, Set<String>> localDictionary = new TreeMap<>(Comparator.reverseOrder()); // Using TreeMap, because order of keys is important.
|
||||
|
||||
@Getter
|
||||
private Map<String, float[]> entryColors = new HashMap<>();
|
||||
|
||||
@Getter
|
||||
private List<String> hintTypes = new ArrayList<>();
|
||||
|
||||
@Getter
|
||||
private List<String> caseInsensitiveTypes = new ArrayList<>();
|
||||
private List<DictionaryModel> dictionary = new ArrayList<>();
|
||||
|
||||
@Getter
|
||||
private float[] defaultColor;
|
||||
@ -61,16 +47,18 @@ public class DictionaryService {
|
||||
@Getter
|
||||
private float[] notRedactedColor;
|
||||
|
||||
private Map<String, DictionaryModel> localAccessMap = new HashMap<>();
|
||||
|
||||
public void addToLocalDictionary(String type, String value) {
|
||||
|
||||
localDictionary.computeIfAbsent(type, (x) -> new HashSet<>()).add(value);
|
||||
public boolean hasLocalEntries(){
|
||||
return this.dictionary.stream().anyMatch(dm -> !dm.getLocalEntries().isEmpty());
|
||||
}
|
||||
|
||||
public void addToLocalDictionary(String type, String value) {
|
||||
localAccessMap.get(type).getLocalEntries().add(value);
|
||||
}
|
||||
|
||||
public void clearLocalDictionary() {
|
||||
|
||||
localDictionary = new TreeMap<>(Comparator.reverseOrder());
|
||||
public void clearLocalEntries() {
|
||||
this.dictionary.forEach(dm -> dm.getLocalEntries().clear());
|
||||
}
|
||||
|
||||
|
||||
@ -89,24 +77,16 @@ public class DictionaryService {
|
||||
try {
|
||||
TypeResponse typeResponse = dictionaryClient.getAllTypes();
|
||||
if (typeResponse != null && CollectionUtils.isNotEmpty(typeResponse.getTypes())) {
|
||||
entryColors = typeResponse.getTypes()
|
||||
|
||||
dictionary = typeResponse.getTypes()
|
||||
.stream()
|
||||
.collect(Collectors.toMap(TypeResult::getType, t -> convertColor(t.getHexColor())));
|
||||
hintTypes = typeResponse.getTypes()
|
||||
.stream()
|
||||
.filter(TypeResult::isHint)
|
||||
.map(TypeResult::getType)
|
||||
.collect(Collectors.toList());
|
||||
caseInsensitiveTypes = typeResponse.getTypes()
|
||||
.stream()
|
||||
.filter(TypeResult::isCaseInsensitive)
|
||||
.map(TypeResult::getType)
|
||||
.map(t ->
|
||||
new DictionaryModel(t.getType(), t.getRank(), convertColor(t.getHexColor()), t.isCaseInsensitive(), t.isHint(), convertEntries(t), new HashSet<>()))
|
||||
.sorted(Comparator.comparingInt(DictionaryModel::getRank).reversed())
|
||||
.collect(Collectors.toList());
|
||||
|
||||
dictionary = new TreeMap<>(Comparator.reverseOrder());
|
||||
entryColors.keySet().forEach(type -> {
|
||||
dictionary.put(type, convertEntries(type));
|
||||
});
|
||||
localAccessMap.clear();
|
||||
dictionary.forEach(dm -> localAccessMap.put(dm.getType(), dm));
|
||||
|
||||
Colors colors = dictionaryClient.getColors();
|
||||
defaultColor = convertColor(colors.getDefaultColor());
|
||||
@ -121,16 +101,17 @@ public class DictionaryService {
|
||||
}
|
||||
|
||||
|
||||
private Set<String> convertEntries(String s) {
|
||||
private Set<String> convertEntries(TypeResult t) {
|
||||
|
||||
if (caseInsensitiveTypes.contains(s)) {
|
||||
return dictionaryClient.getDictionaryForType(s)
|
||||
if (t.isCaseInsensitive()) {
|
||||
return dictionaryClient.getDictionaryForType(t.getType())
|
||||
.getEntries()
|
||||
.stream()
|
||||
.map(String::toLowerCase)
|
||||
.collect(Collectors.toSet());
|
||||
} else {
|
||||
return new HashSet<>(dictionaryClient.getDictionaryForType(t.getType()).getEntries());
|
||||
}
|
||||
return new HashSet<>(dictionaryClient.getDictionaryForType(s).getEntries());
|
||||
}
|
||||
|
||||
|
||||
@ -140,4 +121,7 @@ public class DictionaryService {
|
||||
return new float[]{color.getRed() / 255f, color.getGreen() / 255f, color.getBlue() / 255f};
|
||||
}
|
||||
|
||||
public boolean isCaseInsensitiveDictionary(String type) {
|
||||
return localAccessMap.get(type).isCaseInsensitive();
|
||||
}
|
||||
}
|
||||
@ -1,5 +1,24 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
|
||||
import com.iqser.red.service.redaction.v1.model.ManualRedactions;
|
||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
@ -8,26 +27,6 @@ import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
|
||||
import com.iqser.red.service.redaction.v1.model.ManualRedactions;
|
||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class EntityRedactionService {
|
||||
@ -35,18 +34,16 @@ public class EntityRedactionService {
|
||||
private final DictionaryService dictionaryService;
|
||||
private final DroolsExecutionService droolsExecutionService;
|
||||
|
||||
|
||||
public void processDocument(Document classifiedDoc, ManualRedactions manualRedactions) {
|
||||
|
||||
dictionaryService.updateDictionary();
|
||||
droolsExecutionService.updateRules();
|
||||
dictionaryService.clearLocalDictionary();
|
||||
dictionaryService.clearLocalEntries();
|
||||
|
||||
Set<Entity> documentEntities = new HashSet<>();
|
||||
documentEntities.addAll(findEntities(classifiedDoc, manualRedactions, dictionaryService.getDictionary()));
|
||||
Set<Entity> documentEntities = new HashSet<>(findEntities(classifiedDoc, manualRedactions, false));
|
||||
|
||||
if(!dictionaryService.getLocalDictionary().isEmpty()){
|
||||
Set<Entity> foundByLocal = findEntities(classifiedDoc, manualRedactions, dictionaryService.getLocalDictionary());
|
||||
if (dictionaryService.hasLocalEntries()) {
|
||||
Set<Entity> foundByLocal = findEntities(classifiedDoc, manualRedactions, true);
|
||||
// HashSet keeps the older value, but we want the new only.
|
||||
documentEntities.removeAll(foundByLocal);
|
||||
documentEntities.addAll(foundByLocal);
|
||||
@ -70,7 +67,7 @@ public class EntityRedactionService {
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> findEntities(Document classifiedDoc, ManualRedactions manualRedactions, Map<String, Set<String>> dictionary){
|
||||
private Set<Entity> findEntities(Document classifiedDoc, ManualRedactions manualRedactions, boolean localEntries) {
|
||||
Set<Entity> documentEntities = new HashSet<>();
|
||||
int sectionNumber = 1;
|
||||
for (Paragraph paragraph : classifiedDoc.getParagraphs()) {
|
||||
@ -105,7 +102,7 @@ public class EntityRedactionService {
|
||||
searchableRow.addAll(textBlock.getSequences());
|
||||
}
|
||||
}
|
||||
Set<Entity> rowEntities = findEntities(searchableRow, table.getHeadline(), sectionNumber, dictionary);
|
||||
Set<Entity> rowEntities = findEntities(searchableRow, table.getHeadline(), sectionNumber, localEntries);
|
||||
|
||||
Section analysedRowSection = droolsExecutionService.executeRules(Section.builder()
|
||||
.dictionaryService(dictionaryService)
|
||||
@ -124,7 +121,7 @@ public class EntityRedactionService {
|
||||
}
|
||||
|
||||
addSectionToManualRedactions(paragraph.getTextBlocks(), manualRedactions, paragraph.getHeadline(), sectionNumber);
|
||||
Set<Entity> entities = findEntities(searchableText, paragraph.getHeadline(), sectionNumber, dictionary);
|
||||
Set<Entity> entities = findEntities(searchableText, paragraph.getHeadline(), sectionNumber, localEntries);
|
||||
Section analysedSection = droolsExecutionService.executeRules(Section.builder()
|
||||
.dictionaryService(dictionaryService)
|
||||
.entities(entities)
|
||||
@ -146,18 +143,14 @@ public class EntityRedactionService {
|
||||
removeEntitiesContainedInLarger(entities);
|
||||
|
||||
for (Entity entity : entities) {
|
||||
if (dictionaryService.getCaseInsensitiveTypes().contains(entity.getType())) {
|
||||
entity.setPositionSequences(text.getSequences(entity.getWord(), true, entity.getTargetSequences()));
|
||||
} else {
|
||||
entity.setPositionSequences(text.getSequences(entity.getWord(), false, entity.getTargetSequences()));
|
||||
}
|
||||
entity.setPositionSequences(text.getSequences(entity.getWord(), dictionaryService.isCaseInsensitiveDictionary(entity.getType()), entity.getTargetSequences()));
|
||||
}
|
||||
|
||||
return entities;
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> findEntities(SearchableText searchableText, String headline, int sectionNumber, Map<String, Set<String>> dictionary) {
|
||||
private Set<Entity> findEntities(SearchableText searchableText, String headline, int sectionNumber, boolean local) {
|
||||
|
||||
Set<Entity> found = new HashSet<>();
|
||||
String searchableString = searchableText.toString();
|
||||
@ -166,16 +159,14 @@ public class EntityRedactionService {
|
||||
}
|
||||
|
||||
String lowercaseInputString = searchableString.toLowerCase();
|
||||
for (Map.Entry<String, Set<String>> entry : dictionary.entrySet()) {
|
||||
if (dictionaryService.getCaseInsensitiveTypes().contains(entry.getKey())) {
|
||||
found.addAll(find(lowercaseInputString, entry.getValue(), entry.getKey(), headline, sectionNumber));
|
||||
for (DictionaryModel model : dictionaryService.getDictionary()) {
|
||||
if (model.isCaseInsensitive()) {
|
||||
found.addAll(find(lowercaseInputString, model.getValues(local), model.getType(), headline, sectionNumber));
|
||||
} else {
|
||||
found.addAll(find(searchableString, entry.getValue(), entry.getKey(), headline, sectionNumber));
|
||||
found.addAll(find(searchableString, model.getValues(local), model.getType(), headline, sectionNumber));
|
||||
}
|
||||
}
|
||||
|
||||
removeEntitiesContainedInLarger(found);
|
||||
|
||||
return found;
|
||||
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user