Added rank of dictionary to processing entities in redaction service, simplified code

This commit is contained in:
Timo 2020-11-26 18:52:44 +02:00
parent f458a1f930
commit 536d4689f3
4 changed files with 95 additions and 95 deletions

View File

@ -20,7 +20,7 @@
<dependency>
<groupId>com.iqser.red.service</groupId>
<artifactId>configuration-service-api-v1</artifactId>
<version>1.2.0</version>
<version>1.3.5</version>
</dependency>
<dependency>
<groupId>org.drools</groupId>

View File

@ -0,0 +1,25 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import lombok.AllArgsConstructor;
import lombok.Data;
import java.util.Set;
@Data
@AllArgsConstructor
public class DictionaryModel {
private String type;
private int rank;
private float[] color;
private boolean caseInsensitive;
private boolean hint;
private Set<String> entries;
private Set<String> localEntries;
public Set<String> getValues(boolean local){
return local ? localEntries : entries;
}
}

View File

@ -1,6 +1,18 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import java.awt.Color;
import com.iqser.red.service.configuration.v1.api.model.Colors;
import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
import com.iqser.red.service.configuration.v1.api.model.TypeResult;
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
import feign.FeignException;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.CollectionUtils;
import org.springframework.stereotype.Service;
import java.awt.*;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
@ -8,22 +20,8 @@ import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.stream.Collectors;
import org.apache.commons.collections4.CollectionUtils;
import org.springframework.stereotype.Service;
import com.iqser.red.service.configuration.v1.api.model.Colors;
import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
import com.iqser.red.service.configuration.v1.api.model.TypeResult;
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import feign.FeignException;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
@ -35,19 +33,7 @@ public class DictionaryService {
private long dictionaryVersion = -1;
@Getter
private Map<String, Set<String>> dictionary = new TreeMap<>(Comparator.reverseOrder()); // Using TreeMap, because order of keys is important.
@Getter
private Map<String, Set<String>> localDictionary = new TreeMap<>(Comparator.reverseOrder()); // Using TreeMap, because order of keys is important.
@Getter
private Map<String, float[]> entryColors = new HashMap<>();
@Getter
private List<String> hintTypes = new ArrayList<>();
@Getter
private List<String> caseInsensitiveTypes = new ArrayList<>();
private List<DictionaryModel> dictionary = new ArrayList<>();
@Getter
private float[] defaultColor;
@ -61,16 +47,18 @@ public class DictionaryService {
@Getter
private float[] notRedactedColor;
private Map<String, DictionaryModel> localAccessMap = new HashMap<>();
public void addToLocalDictionary(String type, String value) {
localDictionary.computeIfAbsent(type, (x) -> new HashSet<>()).add(value);
public boolean hasLocalEntries(){
return this.dictionary.stream().anyMatch(dm -> !dm.getLocalEntries().isEmpty());
}
public void addToLocalDictionary(String type, String value) {
localAccessMap.get(type).getLocalEntries().add(value);
}
public void clearLocalDictionary() {
localDictionary = new TreeMap<>(Comparator.reverseOrder());
public void clearLocalEntries() {
this.dictionary.forEach(dm -> dm.getLocalEntries().clear());
}
@ -89,24 +77,16 @@ public class DictionaryService {
try {
TypeResponse typeResponse = dictionaryClient.getAllTypes();
if (typeResponse != null && CollectionUtils.isNotEmpty(typeResponse.getTypes())) {
entryColors = typeResponse.getTypes()
dictionary = typeResponse.getTypes()
.stream()
.collect(Collectors.toMap(TypeResult::getType, t -> convertColor(t.getHexColor())));
hintTypes = typeResponse.getTypes()
.stream()
.filter(TypeResult::isHint)
.map(TypeResult::getType)
.collect(Collectors.toList());
caseInsensitiveTypes = typeResponse.getTypes()
.stream()
.filter(TypeResult::isCaseInsensitive)
.map(TypeResult::getType)
.map(t ->
new DictionaryModel(t.getType(), t.getRank(), convertColor(t.getHexColor()), t.isCaseInsensitive(), t.isHint(), convertEntries(t), new HashSet<>()))
.sorted(Comparator.comparingInt(DictionaryModel::getRank).reversed())
.collect(Collectors.toList());
dictionary = new TreeMap<>(Comparator.reverseOrder());
entryColors.keySet().forEach(type -> {
dictionary.put(type, convertEntries(type));
});
localAccessMap.clear();
dictionary.forEach(dm -> localAccessMap.put(dm.getType(), dm));
Colors colors = dictionaryClient.getColors();
defaultColor = convertColor(colors.getDefaultColor());
@ -121,16 +101,17 @@ public class DictionaryService {
}
private Set<String> convertEntries(String s) {
private Set<String> convertEntries(TypeResult t) {
if (caseInsensitiveTypes.contains(s)) {
return dictionaryClient.getDictionaryForType(s)
if (t.isCaseInsensitive()) {
return dictionaryClient.getDictionaryForType(t.getType())
.getEntries()
.stream()
.map(String::toLowerCase)
.collect(Collectors.toSet());
} else {
return new HashSet<>(dictionaryClient.getDictionaryForType(t.getType()).getEntries());
}
return new HashSet<>(dictionaryClient.getDictionaryForType(s).getEntries());
}
@ -140,4 +121,7 @@ public class DictionaryService {
return new float[]{color.getRed() / 255f, color.getGreen() / 255f, color.getBlue() / 255f};
}
public boolean isCaseInsensitiveDictionary(String type) {
return localAccessMap.get(type).isCaseInsensitive();
}
}

View File

@ -1,5 +1,24 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
import com.iqser.red.service.redaction.v1.model.ManualRedactions;
import com.iqser.red.service.redaction.v1.model.Rectangle;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import lombok.RequiredArgsConstructor;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
@ -8,26 +27,6 @@ import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
import com.iqser.red.service.redaction.v1.model.ManualRedactions;
import com.iqser.red.service.redaction.v1.model.Rectangle;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import lombok.RequiredArgsConstructor;
@Service
@RequiredArgsConstructor
public class EntityRedactionService {
@ -35,18 +34,16 @@ public class EntityRedactionService {
private final DictionaryService dictionaryService;
private final DroolsExecutionService droolsExecutionService;
public void processDocument(Document classifiedDoc, ManualRedactions manualRedactions) {
dictionaryService.updateDictionary();
droolsExecutionService.updateRules();
dictionaryService.clearLocalDictionary();
dictionaryService.clearLocalEntries();
Set<Entity> documentEntities = new HashSet<>();
documentEntities.addAll(findEntities(classifiedDoc, manualRedactions, dictionaryService.getDictionary()));
Set<Entity> documentEntities = new HashSet<>(findEntities(classifiedDoc, manualRedactions, false));
if(!dictionaryService.getLocalDictionary().isEmpty()){
Set<Entity> foundByLocal = findEntities(classifiedDoc, manualRedactions, dictionaryService.getLocalDictionary());
if (dictionaryService.hasLocalEntries()) {
Set<Entity> foundByLocal = findEntities(classifiedDoc, manualRedactions, true);
// HashSet keeps the older value, but we want the new only.
documentEntities.removeAll(foundByLocal);
documentEntities.addAll(foundByLocal);
@ -70,7 +67,7 @@ public class EntityRedactionService {
}
private Set<Entity> findEntities(Document classifiedDoc, ManualRedactions manualRedactions, Map<String, Set<String>> dictionary){
private Set<Entity> findEntities(Document classifiedDoc, ManualRedactions manualRedactions, boolean localEntries) {
Set<Entity> documentEntities = new HashSet<>();
int sectionNumber = 1;
for (Paragraph paragraph : classifiedDoc.getParagraphs()) {
@ -105,7 +102,7 @@ public class EntityRedactionService {
searchableRow.addAll(textBlock.getSequences());
}
}
Set<Entity> rowEntities = findEntities(searchableRow, table.getHeadline(), sectionNumber, dictionary);
Set<Entity> rowEntities = findEntities(searchableRow, table.getHeadline(), sectionNumber, localEntries);
Section analysedRowSection = droolsExecutionService.executeRules(Section.builder()
.dictionaryService(dictionaryService)
@ -124,7 +121,7 @@ public class EntityRedactionService {
}
addSectionToManualRedactions(paragraph.getTextBlocks(), manualRedactions, paragraph.getHeadline(), sectionNumber);
Set<Entity> entities = findEntities(searchableText, paragraph.getHeadline(), sectionNumber, dictionary);
Set<Entity> entities = findEntities(searchableText, paragraph.getHeadline(), sectionNumber, localEntries);
Section analysedSection = droolsExecutionService.executeRules(Section.builder()
.dictionaryService(dictionaryService)
.entities(entities)
@ -146,18 +143,14 @@ public class EntityRedactionService {
removeEntitiesContainedInLarger(entities);
for (Entity entity : entities) {
if (dictionaryService.getCaseInsensitiveTypes().contains(entity.getType())) {
entity.setPositionSequences(text.getSequences(entity.getWord(), true, entity.getTargetSequences()));
} else {
entity.setPositionSequences(text.getSequences(entity.getWord(), false, entity.getTargetSequences()));
}
entity.setPositionSequences(text.getSequences(entity.getWord(), dictionaryService.isCaseInsensitiveDictionary(entity.getType()), entity.getTargetSequences()));
}
return entities;
}
private Set<Entity> findEntities(SearchableText searchableText, String headline, int sectionNumber, Map<String, Set<String>> dictionary) {
private Set<Entity> findEntities(SearchableText searchableText, String headline, int sectionNumber, boolean local) {
Set<Entity> found = new HashSet<>();
String searchableString = searchableText.toString();
@ -166,16 +159,14 @@ public class EntityRedactionService {
}
String lowercaseInputString = searchableString.toLowerCase();
for (Map.Entry<String, Set<String>> entry : dictionary.entrySet()) {
if (dictionaryService.getCaseInsensitiveTypes().contains(entry.getKey())) {
found.addAll(find(lowercaseInputString, entry.getValue(), entry.getKey(), headline, sectionNumber));
for (DictionaryModel model : dictionaryService.getDictionary()) {
if (model.isCaseInsensitive()) {
found.addAll(find(lowercaseInputString, model.getValues(local), model.getType(), headline, sectionNumber));
} else {
found.addAll(find(searchableString, entry.getValue(), entry.getKey(), headline, sectionNumber));
found.addAll(find(searchableString, model.getValues(local), model.getType(), headline, sectionNumber));
}
}
removeEntitiesContainedInLarger(found);
return found;
}