From 536d4689f38606678bb06dd98dcc1761b375f232 Mon Sep 17 00:00:00 2001 From: Timo Date: Thu, 26 Nov 2020 18:52:44 +0200 Subject: [PATCH] Added rank of dictionary to processing entities in redaction service, simplified code --- .../redaction-service-server-v1/pom.xml | 2 +- .../redaction/model/DictionaryModel.java | 25 ++++++ .../redaction/service/DictionaryService.java | 90 ++++++++----------- .../service/EntityRedactionService.java | 73 +++++++-------- 4 files changed, 95 insertions(+), 95 deletions(-) create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryModel.java diff --git a/redaction-service-v1/redaction-service-server-v1/pom.xml b/redaction-service-v1/redaction-service-server-v1/pom.xml index 67a80e3f..df5aa5e9 100644 --- a/redaction-service-v1/redaction-service-server-v1/pom.xml +++ b/redaction-service-v1/redaction-service-server-v1/pom.xml @@ -20,7 +20,7 @@ com.iqser.red.service configuration-service-api-v1 - 1.2.0 + 1.3.5 org.drools diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryModel.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryModel.java new file mode 100644 index 00000000..0a5e1032 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryModel.java @@ -0,0 +1,25 @@ +package com.iqser.red.service.redaction.v1.server.redaction.model; + + +import lombok.AllArgsConstructor; +import lombok.Data; + +import java.util.Set; + +@Data +@AllArgsConstructor +public class DictionaryModel { + + private String type; + private int rank; + private float[] color; + private boolean caseInsensitive; + private boolean hint; + private Set entries; + private Set localEntries; + + public Set getValues(boolean local){ + return local ? localEntries : entries; + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java index 62c539a0..870ca9ba 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java @@ -1,6 +1,18 @@ package com.iqser.red.service.redaction.v1.server.redaction.service; -import java.awt.Color; +import com.iqser.red.service.configuration.v1.api.model.Colors; +import com.iqser.red.service.configuration.v1.api.model.TypeResponse; +import com.iqser.red.service.configuration.v1.api.model.TypeResult; +import com.iqser.red.service.redaction.v1.server.client.DictionaryClient; +import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel; +import feign.FeignException; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.collections4.CollectionUtils; +import org.springframework.stereotype.Service; + +import java.awt.*; import java.util.ArrayList; import java.util.Comparator; import java.util.HashMap; @@ -8,22 +20,8 @@ import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.TreeMap; import java.util.stream.Collectors; -import org.apache.commons.collections4.CollectionUtils; -import org.springframework.stereotype.Service; - -import com.iqser.red.service.configuration.v1.api.model.Colors; -import com.iqser.red.service.configuration.v1.api.model.TypeResponse; -import com.iqser.red.service.configuration.v1.api.model.TypeResult; -import com.iqser.red.service.redaction.v1.server.client.DictionaryClient; - -import feign.FeignException; -import lombok.Getter; -import lombok.RequiredArgsConstructor; -import lombok.extern.slf4j.Slf4j; - @Slf4j @Service @RequiredArgsConstructor @@ -35,19 +33,7 @@ public class DictionaryService { private long dictionaryVersion = -1; @Getter - private Map> dictionary = new TreeMap<>(Comparator.reverseOrder()); // Using TreeMap, because order of keys is important. - - @Getter - private Map> localDictionary = new TreeMap<>(Comparator.reverseOrder()); // Using TreeMap, because order of keys is important. - - @Getter - private Map entryColors = new HashMap<>(); - - @Getter - private List hintTypes = new ArrayList<>(); - - @Getter - private List caseInsensitiveTypes = new ArrayList<>(); + private List dictionary = new ArrayList<>(); @Getter private float[] defaultColor; @@ -61,16 +47,18 @@ public class DictionaryService { @Getter private float[] notRedactedColor; + private Map localAccessMap = new HashMap<>(); - public void addToLocalDictionary(String type, String value) { - - localDictionary.computeIfAbsent(type, (x) -> new HashSet<>()).add(value); + public boolean hasLocalEntries(){ + return this.dictionary.stream().anyMatch(dm -> !dm.getLocalEntries().isEmpty()); } + public void addToLocalDictionary(String type, String value) { + localAccessMap.get(type).getLocalEntries().add(value); + } - public void clearLocalDictionary() { - - localDictionary = new TreeMap<>(Comparator.reverseOrder()); + public void clearLocalEntries() { + this.dictionary.forEach(dm -> dm.getLocalEntries().clear()); } @@ -89,24 +77,16 @@ public class DictionaryService { try { TypeResponse typeResponse = dictionaryClient.getAllTypes(); if (typeResponse != null && CollectionUtils.isNotEmpty(typeResponse.getTypes())) { - entryColors = typeResponse.getTypes() + + dictionary = typeResponse.getTypes() .stream() - .collect(Collectors.toMap(TypeResult::getType, t -> convertColor(t.getHexColor()))); - hintTypes = typeResponse.getTypes() - .stream() - .filter(TypeResult::isHint) - .map(TypeResult::getType) - .collect(Collectors.toList()); - caseInsensitiveTypes = typeResponse.getTypes() - .stream() - .filter(TypeResult::isCaseInsensitive) - .map(TypeResult::getType) + .map(t -> + new DictionaryModel(t.getType(), t.getRank(), convertColor(t.getHexColor()), t.isCaseInsensitive(), t.isHint(), convertEntries(t), new HashSet<>())) + .sorted(Comparator.comparingInt(DictionaryModel::getRank).reversed()) .collect(Collectors.toList()); - dictionary = new TreeMap<>(Comparator.reverseOrder()); - entryColors.keySet().forEach(type -> { - dictionary.put(type, convertEntries(type)); - }); + localAccessMap.clear(); + dictionary.forEach(dm -> localAccessMap.put(dm.getType(), dm)); Colors colors = dictionaryClient.getColors(); defaultColor = convertColor(colors.getDefaultColor()); @@ -121,16 +101,17 @@ public class DictionaryService { } - private Set convertEntries(String s) { + private Set convertEntries(TypeResult t) { - if (caseInsensitiveTypes.contains(s)) { - return dictionaryClient.getDictionaryForType(s) + if (t.isCaseInsensitive()) { + return dictionaryClient.getDictionaryForType(t.getType()) .getEntries() .stream() .map(String::toLowerCase) .collect(Collectors.toSet()); + } else { + return new HashSet<>(dictionaryClient.getDictionaryForType(t.getType()).getEntries()); } - return new HashSet<>(dictionaryClient.getDictionaryForType(s).getEntries()); } @@ -140,4 +121,7 @@ public class DictionaryService { return new float[]{color.getRed() / 255f, color.getGreen() / 255f, color.getBlue() / 255f}; } + public boolean isCaseInsensitiveDictionary(String type) { + return localAccessMap.get(type).isCaseInsensitive(); + } } \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java index 6fca0281..1af41b03 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java @@ -1,5 +1,24 @@ package com.iqser.red.service.redaction.v1.server.redaction.service; +import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry; +import com.iqser.red.service.redaction.v1.model.ManualRedactions; +import com.iqser.red.service.redaction.v1.model.Rectangle; +import com.iqser.red.service.redaction.v1.server.classification.model.Document; +import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph; +import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; +import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue; +import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel; +import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; +import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence; +import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText; +import com.iqser.red.service.redaction.v1.server.redaction.model.Section; +import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell; +import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; +import lombok.RequiredArgsConstructor; +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.springframework.stereotype.Service; + import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; @@ -8,26 +27,6 @@ import java.util.Map; import java.util.Set; import java.util.regex.Pattern; -import org.apache.commons.collections4.CollectionUtils; -import org.apache.commons.lang3.StringUtils; -import org.springframework.stereotype.Service; - -import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry; -import com.iqser.red.service.redaction.v1.model.ManualRedactions; -import com.iqser.red.service.redaction.v1.model.Rectangle; -import com.iqser.red.service.redaction.v1.server.classification.model.Document; -import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph; -import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; -import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue; -import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; -import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence; -import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText; -import com.iqser.red.service.redaction.v1.server.redaction.model.Section; -import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell; -import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; - -import lombok.RequiredArgsConstructor; - @Service @RequiredArgsConstructor public class EntityRedactionService { @@ -35,18 +34,16 @@ public class EntityRedactionService { private final DictionaryService dictionaryService; private final DroolsExecutionService droolsExecutionService; - public void processDocument(Document classifiedDoc, ManualRedactions manualRedactions) { dictionaryService.updateDictionary(); droolsExecutionService.updateRules(); - dictionaryService.clearLocalDictionary(); + dictionaryService.clearLocalEntries(); - Set documentEntities = new HashSet<>(); - documentEntities.addAll(findEntities(classifiedDoc, manualRedactions, dictionaryService.getDictionary())); + Set documentEntities = new HashSet<>(findEntities(classifiedDoc, manualRedactions, false)); - if(!dictionaryService.getLocalDictionary().isEmpty()){ - Set foundByLocal = findEntities(classifiedDoc, manualRedactions, dictionaryService.getLocalDictionary()); + if (dictionaryService.hasLocalEntries()) { + Set foundByLocal = findEntities(classifiedDoc, manualRedactions, true); // HashSet keeps the older value, but we want the new only. documentEntities.removeAll(foundByLocal); documentEntities.addAll(foundByLocal); @@ -70,7 +67,7 @@ public class EntityRedactionService { } - private Set findEntities(Document classifiedDoc, ManualRedactions manualRedactions, Map> dictionary){ + private Set findEntities(Document classifiedDoc, ManualRedactions manualRedactions, boolean localEntries) { Set documentEntities = new HashSet<>(); int sectionNumber = 1; for (Paragraph paragraph : classifiedDoc.getParagraphs()) { @@ -105,7 +102,7 @@ public class EntityRedactionService { searchableRow.addAll(textBlock.getSequences()); } } - Set rowEntities = findEntities(searchableRow, table.getHeadline(), sectionNumber, dictionary); + Set rowEntities = findEntities(searchableRow, table.getHeadline(), sectionNumber, localEntries); Section analysedRowSection = droolsExecutionService.executeRules(Section.builder() .dictionaryService(dictionaryService) @@ -124,7 +121,7 @@ public class EntityRedactionService { } addSectionToManualRedactions(paragraph.getTextBlocks(), manualRedactions, paragraph.getHeadline(), sectionNumber); - Set entities = findEntities(searchableText, paragraph.getHeadline(), sectionNumber, dictionary); + Set entities = findEntities(searchableText, paragraph.getHeadline(), sectionNumber, localEntries); Section analysedSection = droolsExecutionService.executeRules(Section.builder() .dictionaryService(dictionaryService) .entities(entities) @@ -146,18 +143,14 @@ public class EntityRedactionService { removeEntitiesContainedInLarger(entities); for (Entity entity : entities) { - if (dictionaryService.getCaseInsensitiveTypes().contains(entity.getType())) { - entity.setPositionSequences(text.getSequences(entity.getWord(), true, entity.getTargetSequences())); - } else { - entity.setPositionSequences(text.getSequences(entity.getWord(), false, entity.getTargetSequences())); - } + entity.setPositionSequences(text.getSequences(entity.getWord(), dictionaryService.isCaseInsensitiveDictionary(entity.getType()), entity.getTargetSequences())); } return entities; } - private Set findEntities(SearchableText searchableText, String headline, int sectionNumber, Map> dictionary) { + private Set findEntities(SearchableText searchableText, String headline, int sectionNumber, boolean local) { Set found = new HashSet<>(); String searchableString = searchableText.toString(); @@ -166,16 +159,14 @@ public class EntityRedactionService { } String lowercaseInputString = searchableString.toLowerCase(); - for (Map.Entry> entry : dictionary.entrySet()) { - if (dictionaryService.getCaseInsensitiveTypes().contains(entry.getKey())) { - found.addAll(find(lowercaseInputString, entry.getValue(), entry.getKey(), headline, sectionNumber)); + for (DictionaryModel model : dictionaryService.getDictionary()) { + if (model.isCaseInsensitive()) { + found.addAll(find(lowercaseInputString, model.getValues(local), model.getType(), headline, sectionNumber)); } else { - found.addAll(find(searchableString, entry.getValue(), entry.getKey(), headline, sectionNumber)); + found.addAll(find(searchableString, model.getValues(local), model.getType(), headline, sectionNumber)); } } - removeEntitiesContainedInLarger(found); - return found; }