From d89a41caca623eedbeee7bc9b058b605db8fc359 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominique=20Eifl=C3=A4nder?= Date: Wed, 8 Sep 2021 13:20:38 +0200 Subject: [PATCH] RED-2082: Added engines to redactionLog, to identify where a entry comes from --- .../service/redaction/v1/model/Engine.java | 5 ++++ .../redaction/v1/model/RedactionLogEntry.java | 8 ++++-- .../v1/server/redaction/model/Entity.java | 18 ++++++++++--- .../v1/server/redaction/model/Section.java | 5 ++-- .../service/EntityRedactionService.java | 25 ++++++++++--------- .../service/RedactionLogCreatorService.java | 1 + .../redaction/utils/EntitySearchUtils.java | 24 +++++++++++++++--- .../utils/EntitySearchUtilsTest.java | 5 ++-- 8 files changed, 67 insertions(+), 24 deletions(-) create mode 100644 redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/Engine.java diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/Engine.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/Engine.java new file mode 100644 index 00000000..8dfa1184 --- /dev/null +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/Engine.java @@ -0,0 +1,5 @@ +package com.iqser.red.service.redaction.v1.model; + +public enum Engine { + DICTIONARY, NER, RULE +} diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLogEntry.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLogEntry.java index dc14eebc..e9431f21 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLogEntry.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLogEntry.java @@ -7,9 +7,9 @@ import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; - - +import java.util.Set; @Data @Builder @@ -62,4 +62,8 @@ public class RedactionLogEntry { @Builder.Default private List changes = new ArrayList<>(); + private Set engines= new HashSet<>(); + + + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Entity.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Entity.java index 2ae553db..ad426337 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Entity.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Entity.java @@ -1,17 +1,20 @@ package com.iqser.red.service.redaction.v1.server.redaction.model; +import com.iqser.red.service.redaction.v1.model.Engine; import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; + import lombok.Data; import lombok.EqualsAndHashCode; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.Set; @Data @EqualsAndHashCode(onlyExplicitlyIncluded = true) public class Entity implements ReasonHolder { - private final String word; private final String type; private boolean redaction; @@ -39,8 +42,13 @@ public class Entity implements ReasonHolder { private boolean isDossierDictionaryEntry; + private Set engines = new HashSet<>(); - public Entity(String word, String type, boolean redaction, String redactionReason, List positionSequences, String headline, int matchedRule, int sectionNumber, String legalBasis, boolean isDictionaryEntry, String textBefore, String textAfter, Integer start, Integer end, boolean isDossierDictionaryEntry) { + + public Entity(String word, String type, boolean redaction, String redactionReason, + List positionSequences, String headline, int matchedRule, int sectionNumber, + String legalBasis, boolean isDictionaryEntry, String textBefore, String textAfter, Integer start, + Integer end, boolean isDossierDictionaryEntry, Set engines) { this.word = word; this.type = type; @@ -57,10 +65,12 @@ public class Entity implements ReasonHolder { this.start = start; this.end = end; this.isDossierDictionaryEntry = isDossierDictionaryEntry; + this.engines = engines; } - public Entity(String word, String type, Integer start, Integer end, String headline, int sectionNumber, boolean isDictionaryEntry, boolean isDossierDictionaryEntry) { + public Entity(String word, String type, Integer start, Integer end, String headline, int sectionNumber, + boolean isDictionaryEntry, boolean isDossierDictionaryEntry, Engine engine) { this.word = word; this.type = type; @@ -70,6 +80,8 @@ public class Entity implements ReasonHolder { this.sectionNumber = sectionNumber; this.isDictionaryEntry = isDictionaryEntry; this.isDossierDictionaryEntry = isDossierDictionaryEntry; + this.engines.add(engine); } + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java index 929e4110..6a1594f8 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java @@ -1,6 +1,7 @@ package com.iqser.red.service.redaction.v1.server.redaction.model; import com.iqser.red.service.redaction.v1.model.ArgumentType; +import com.iqser.red.service.redaction.v1.model.Engine; import com.iqser.red.service.redaction.v1.model.FileAttribute; import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils; @@ -524,7 +525,7 @@ public class Section { String text = caseInsensitive ? searchText.toLowerCase() : searchText; String searchValue = caseInsensitive ? value.toLowerCase() : value; - Set found = EntitySearchUtils.find(text, Set.of(searchValue), asType, headline, sectionNumber, true, false); + Set found = EntitySearchUtils.find(text, Set.of(searchValue), asType, headline, sectionNumber, false, false, Engine.RULE); found.forEach(entity -> { if (redacted) { @@ -550,7 +551,7 @@ public class Section { } else { String word = value.toString(); - Entity entity = new Entity(word, type, value.getRowSpanStart(), value.getRowSpanStart() + word.length(), headline, sectionNumber, false, false); + Entity entity = new Entity(word, type, value.getRowSpanStart(), value.getRowSpanStart() + word.length(), headline, sectionNumber, false, false, Engine.RULE); entity.setRedaction(redact); entity.setMatchedRule(ruleNumber); entity.setRedactionReason(reason); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java index 752034da..7b7e5547 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java @@ -15,11 +15,10 @@ import org.kie.api.runtime.KieContainer; import org.springframework.stereotype.Service; import com.iqser.red.service.redaction.v1.model.AnalyzeRequest; +import com.iqser.red.service.redaction.v1.model.Engine; import com.iqser.red.service.redaction.v1.model.ManualImageRecategorization; import com.iqser.red.service.redaction.v1.model.Status; import com.iqser.red.service.redaction.v1.server.classification.model.SectionText; -import com.iqser.red.service.redaction.v1.server.client.EntityRecognitionClient; -import com.iqser.red.service.redaction.v1.server.client.model.EntityRecogintionEntity; import com.iqser.red.service.redaction.v1.server.client.model.NerEntities; import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary; import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel; @@ -42,7 +41,6 @@ import lombok.extern.slf4j.Slf4j; @RequiredArgsConstructor public class EntityRedactionService { - private final EntityRecognitionClient entityRecognitionClient; private final RedactionServiceSettings redactionServiceSettings; private final DroolsExecutionService droolsExecutionService; private final SurroundingWordsService surroundingWordsService; @@ -151,7 +149,7 @@ public class EntityRedactionService { .add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), entry .getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber(), entity .getLegalBasis(), entity.isDictionaryEntry(), entity.getTextBefore(), entity.getTextAfter(), entity - .getStart(), entity.getEnd(), entity.isDossierDictionaryEntry())); + .getStart(), entity.getEnd(), entity.isDossierDictionaryEntry(), entity.getEngines())); } } return entitiesPerPage; @@ -210,18 +208,19 @@ public class EntityRedactionService { String lowercaseInputString = searchableString.toLowerCase(); for (DictionaryModel model : dictionary.getDictionaryModels()) { if (model.isCaseInsensitive()) { - found.addAll(EntitySearchUtils.find(lowercaseInputString, model.getValues(local), model.getType(), headline, sectionNumber, !local, model - .isDossierDictionary())); + EntitySearchUtils.addOrAddEngine(found, EntitySearchUtils.find(lowercaseInputString, model.getValues(local), model + .getType(), headline, sectionNumber, !local, model.isDossierDictionary(), Engine.DICTIONARY)); } else { - found.addAll(EntitySearchUtils.find(searchableString, model.getValues(local), model.getType(), headline, sectionNumber, !local, model - .isDossierDictionary())); + EntitySearchUtils.addOrAddEngine(found, EntitySearchUtils.find(searchableString, model.getValues(local), model + .getType(), headline, sectionNumber, !local, model.isDossierDictionary(), Engine.DICTIONARY)); } } if (!local) { Map> nerValuesPerType = getNerValues(sectionNumber, nerEntities, cellstarts); nerValuesPerType.entrySet().forEach(entry -> { - found.addAll(EntitySearchUtils.find(searchableString, entry.getValue(), entry.getKey(), headline, sectionNumber, false, false)); + EntitySearchUtils.addOrAddEngine(found, EntitySearchUtils.find(searchableString, entry.getValue(), entry + .getKey(), headline, sectionNumber, false, false, Engine.NER)); }); } @@ -230,7 +229,7 @@ public class EntityRedactionService { private Map> getNerValues(int sectionNumber, NerEntities nerEntities, - List cellstarts) { + List cellstarts) { Map> nerValuesPerType = new HashMap<>(); @@ -238,7 +237,8 @@ public class EntityRedactionService { .containsKey(sectionNumber)) { nerEntities.getResult().get(sectionNumber).forEach(res -> { if (cellstarts == null || cellstarts.isEmpty()) { - nerValuesPerType.computeIfAbsent(res.getType(), (a) -> new HashSet<>()).add(new String(Base64.decodeBase64(res.getValue().getBytes()))); + nerValuesPerType.computeIfAbsent(res.getType(), (a) -> new HashSet<>()) + .add(new String(Base64.decodeBase64(res.getValue().getBytes()))); } else { boolean intersectsCellStart = false; for (Integer cellStart : cellstarts) { @@ -247,7 +247,8 @@ public class EntityRedactionService { } } if (!intersectsCellStart) { - nerValuesPerType.computeIfAbsent(res.getType(), (a) -> new HashSet<>()).add(new String(Base64.decodeBase64(res.getValue().getBytes()))); + nerValuesPerType.computeIfAbsent(res.getType(), (a) -> new HashSet<>()) + .add(new String(Base64.decodeBase64(res.getValue().getBytes()))); } } }); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java index 0eaea701..7fefdbc1 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java @@ -177,6 +177,7 @@ public class RedactionLogCreatorService { .startOffset(entity.getStart()) .endOffset(entity.getEnd()) .isDossierDictionaryEntry(entity.isDossierDictionaryEntry()) + .engines(entity.getEngines()) .build(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java index 14bee48b..1cd9d6d0 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java @@ -1,5 +1,6 @@ package com.iqser.red.service.redaction.v1.server.redaction.utils; +import com.iqser.red.service.redaction.v1.model.Engine; import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary; import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrementValue; import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; @@ -47,7 +48,7 @@ public class EntitySearchUtils { public Set find(String inputString, Set values, String type, String headline, int sectionNumber, - boolean isDictionaryEntry, boolean isDossierDictionary) { + boolean isDictionaryEntry, boolean isDossierDictionary, Engine engine) { Set found = new HashSet<>(); @@ -67,7 +68,7 @@ public class EntitySearchUtils { if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString .charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) { - found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline, sectionNumber, isDictionaryEntry, isDossierDictionary)); + found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline, sectionNumber, isDictionaryEntry, isDossierDictionary, engine)); } } while (startIndex > -1); } @@ -142,9 +143,13 @@ public class EntitySearchUtils { Entity existing = entities.stream().filter(entity -> entity.equals(found)).findFirst().get(); if (dictionary.getDictionaryRank(existing.getType()) <= dictionary.getDictionaryRank(found.getType())) { entities.remove(found); + entities.add(found); + } else { + existing.getEngines().addAll(found.getEngines()); } + } else { + entities.add(found); } - entities.add(found); } @@ -154,4 +159,17 @@ public class EntitySearchUtils { entities.addAll(found); } + + public void addOrAddEngine(Set existing, Set toBeAdded){ + + for(Entity toAdd: toBeAdded){ + if (existing.contains(toAdd)) { + Entity existingEntity = existing.stream().filter(entity -> entity.equals(toAdd)).findFirst().get(); + existingEntity.getEngines().addAll(toAdd.getEngines()); + } else { + existing.add(toAdd); + } + } + } + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtilsTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtilsTest.java index 544b334e..ae2fb019 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtilsTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtilsTest.java @@ -7,6 +7,7 @@ import java.util.Set; import org.junit.Test; +import com.iqser.red.service.redaction.v1.model.Engine; import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; public class EntitySearchUtilsTest { @@ -15,8 +16,8 @@ public class EntitySearchUtilsTest { public void testNestedEntitiesRemoval() { Set entities = new HashSet<>(); - Entity nested = new Entity("nested", "fake type", 10, 16, "fake headline", 0, false, false); - Entity nesting = new Entity("nesting nested", "fake type", 2, 16, "fake headline", 0, false, false); + Entity nested = new Entity("nested", "fake type", 10, 16, "fake headline", 0, false, false, Engine.RULE); + Entity nesting = new Entity("nesting nested", "fake type", 2, 16, "fake headline", 0, false, false, Engine.RULE); entities.add(nested); entities.add(nesting); EntitySearchUtils.removeEntitiesContainedInLarger(entities);