RED-2082: Added engines to redactionLog, to identify where a entry comes from

This commit is contained in:
Dominique Eifländer 2021-09-08 13:20:38 +02:00
parent bcc069a826
commit d89a41caca
8 changed files with 67 additions and 24 deletions

View File

@ -0,0 +1,5 @@
package com.iqser.red.service.redaction.v1.model;
public enum Engine {
DICTIONARY, NER, RULE
}

View File

@ -7,9 +7,9 @@ import lombok.EqualsAndHashCode;
import lombok.NoArgsConstructor; import lombok.NoArgsConstructor;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Set;
@Data @Data
@Builder @Builder
@ -62,4 +62,8 @@ public class RedactionLogEntry {
@Builder.Default @Builder.Default
private List<Change> changes = new ArrayList<>(); private List<Change> changes = new ArrayList<>();
private Set<Engine> engines= new HashSet<>();
} }

View File

@ -1,17 +1,20 @@
package com.iqser.red.service.redaction.v1.server.redaction.model; package com.iqser.red.service.redaction.v1.server.redaction.model;
import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import lombok.Data; import lombok.Data;
import lombok.EqualsAndHashCode; import lombok.EqualsAndHashCode;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Set;
@Data @Data
@EqualsAndHashCode(onlyExplicitlyIncluded = true) @EqualsAndHashCode(onlyExplicitlyIncluded = true)
public class Entity implements ReasonHolder { public class Entity implements ReasonHolder {
private final String word; private final String word;
private final String type; private final String type;
private boolean redaction; private boolean redaction;
@ -39,8 +42,13 @@ public class Entity implements ReasonHolder {
private boolean isDossierDictionaryEntry; private boolean isDossierDictionaryEntry;
private Set<Engine> engines = new HashSet<>();
public Entity(String word, String type, boolean redaction, String redactionReason, List<EntityPositionSequence> positionSequences, String headline, int matchedRule, int sectionNumber, String legalBasis, boolean isDictionaryEntry, String textBefore, String textAfter, Integer start, Integer end, boolean isDossierDictionaryEntry) {
public Entity(String word, String type, boolean redaction, String redactionReason,
List<EntityPositionSequence> positionSequences, String headline, int matchedRule, int sectionNumber,
String legalBasis, boolean isDictionaryEntry, String textBefore, String textAfter, Integer start,
Integer end, boolean isDossierDictionaryEntry, Set<Engine> engines) {
this.word = word; this.word = word;
this.type = type; this.type = type;
@ -57,10 +65,12 @@ public class Entity implements ReasonHolder {
this.start = start; this.start = start;
this.end = end; this.end = end;
this.isDossierDictionaryEntry = isDossierDictionaryEntry; this.isDossierDictionaryEntry = isDossierDictionaryEntry;
this.engines = engines;
} }
public Entity(String word, String type, Integer start, Integer end, String headline, int sectionNumber, boolean isDictionaryEntry, boolean isDossierDictionaryEntry) { public Entity(String word, String type, Integer start, Integer end, String headline, int sectionNumber,
boolean isDictionaryEntry, boolean isDossierDictionaryEntry, Engine engine) {
this.word = word; this.word = word;
this.type = type; this.type = type;
@ -70,6 +80,8 @@ public class Entity implements ReasonHolder {
this.sectionNumber = sectionNumber; this.sectionNumber = sectionNumber;
this.isDictionaryEntry = isDictionaryEntry; this.isDictionaryEntry = isDictionaryEntry;
this.isDossierDictionaryEntry = isDossierDictionaryEntry; this.isDossierDictionaryEntry = isDossierDictionaryEntry;
this.engines.add(engine);
} }
} }

View File

@ -1,6 +1,7 @@
package com.iqser.red.service.redaction.v1.server.redaction.model; package com.iqser.red.service.redaction.v1.server.redaction.model;
import com.iqser.red.service.redaction.v1.model.ArgumentType; import com.iqser.red.service.redaction.v1.model.ArgumentType;
import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.model.FileAttribute; import com.iqser.red.service.redaction.v1.model.FileAttribute;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils; import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
@ -524,7 +525,7 @@ public class Section {
String text = caseInsensitive ? searchText.toLowerCase() : searchText; String text = caseInsensitive ? searchText.toLowerCase() : searchText;
String searchValue = caseInsensitive ? value.toLowerCase() : value; String searchValue = caseInsensitive ? value.toLowerCase() : value;
Set<Entity> found = EntitySearchUtils.find(text, Set.of(searchValue), asType, headline, sectionNumber, true, false); Set<Entity> found = EntitySearchUtils.find(text, Set.of(searchValue), asType, headline, sectionNumber, false, false, Engine.RULE);
found.forEach(entity -> { found.forEach(entity -> {
if (redacted) { if (redacted) {
@ -550,7 +551,7 @@ public class Section {
} else { } else {
String word = value.toString(); String word = value.toString();
Entity entity = new Entity(word, type, value.getRowSpanStart(), value.getRowSpanStart() + word.length(), headline, sectionNumber, false, false); Entity entity = new Entity(word, type, value.getRowSpanStart(), value.getRowSpanStart() + word.length(), headline, sectionNumber, false, false, Engine.RULE);
entity.setRedaction(redact); entity.setRedaction(redact);
entity.setMatchedRule(ruleNumber); entity.setMatchedRule(ruleNumber);
entity.setRedactionReason(reason); entity.setRedactionReason(reason);

View File

@ -15,11 +15,10 @@ import org.kie.api.runtime.KieContainer;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest; import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.model.ManualImageRecategorization; import com.iqser.red.service.redaction.v1.model.ManualImageRecategorization;
import com.iqser.red.service.redaction.v1.model.Status; import com.iqser.red.service.redaction.v1.model.Status;
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText; import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
import com.iqser.red.service.redaction.v1.server.client.EntityRecognitionClient;
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecogintionEntity;
import com.iqser.red.service.redaction.v1.server.client.model.NerEntities; import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary; import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel; import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
@ -42,7 +41,6 @@ import lombok.extern.slf4j.Slf4j;
@RequiredArgsConstructor @RequiredArgsConstructor
public class EntityRedactionService { public class EntityRedactionService {
private final EntityRecognitionClient entityRecognitionClient;
private final RedactionServiceSettings redactionServiceSettings; private final RedactionServiceSettings redactionServiceSettings;
private final DroolsExecutionService droolsExecutionService; private final DroolsExecutionService droolsExecutionService;
private final SurroundingWordsService surroundingWordsService; private final SurroundingWordsService surroundingWordsService;
@ -151,7 +149,7 @@ public class EntityRedactionService {
.add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), entry .add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), entry
.getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber(), entity .getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber(), entity
.getLegalBasis(), entity.isDictionaryEntry(), entity.getTextBefore(), entity.getTextAfter(), entity .getLegalBasis(), entity.isDictionaryEntry(), entity.getTextBefore(), entity.getTextAfter(), entity
.getStart(), entity.getEnd(), entity.isDossierDictionaryEntry())); .getStart(), entity.getEnd(), entity.isDossierDictionaryEntry(), entity.getEngines()));
} }
} }
return entitiesPerPage; return entitiesPerPage;
@ -210,18 +208,19 @@ public class EntityRedactionService {
String lowercaseInputString = searchableString.toLowerCase(); String lowercaseInputString = searchableString.toLowerCase();
for (DictionaryModel model : dictionary.getDictionaryModels()) { for (DictionaryModel model : dictionary.getDictionaryModels()) {
if (model.isCaseInsensitive()) { if (model.isCaseInsensitive()) {
found.addAll(EntitySearchUtils.find(lowercaseInputString, model.getValues(local), model.getType(), headline, sectionNumber, !local, model EntitySearchUtils.addOrAddEngine(found, EntitySearchUtils.find(lowercaseInputString, model.getValues(local), model
.isDossierDictionary())); .getType(), headline, sectionNumber, !local, model.isDossierDictionary(), Engine.DICTIONARY));
} else { } else {
found.addAll(EntitySearchUtils.find(searchableString, model.getValues(local), model.getType(), headline, sectionNumber, !local, model EntitySearchUtils.addOrAddEngine(found, EntitySearchUtils.find(searchableString, model.getValues(local), model
.isDossierDictionary())); .getType(), headline, sectionNumber, !local, model.isDossierDictionary(), Engine.DICTIONARY));
} }
} }
if (!local) { if (!local) {
Map<String, Set<String>> nerValuesPerType = getNerValues(sectionNumber, nerEntities, cellstarts); Map<String, Set<String>> nerValuesPerType = getNerValues(sectionNumber, nerEntities, cellstarts);
nerValuesPerType.entrySet().forEach(entry -> { nerValuesPerType.entrySet().forEach(entry -> {
found.addAll(EntitySearchUtils.find(searchableString, entry.getValue(), entry.getKey(), headline, sectionNumber, false, false)); EntitySearchUtils.addOrAddEngine(found, EntitySearchUtils.find(searchableString, entry.getValue(), entry
.getKey(), headline, sectionNumber, false, false, Engine.NER));
}); });
} }
@ -238,7 +237,8 @@ public class EntityRedactionService {
.containsKey(sectionNumber)) { .containsKey(sectionNumber)) {
nerEntities.getResult().get(sectionNumber).forEach(res -> { nerEntities.getResult().get(sectionNumber).forEach(res -> {
if (cellstarts == null || cellstarts.isEmpty()) { if (cellstarts == null || cellstarts.isEmpty()) {
nerValuesPerType.computeIfAbsent(res.getType(), (a) -> new HashSet<>()).add(new String(Base64.decodeBase64(res.getValue().getBytes()))); nerValuesPerType.computeIfAbsent(res.getType(), (a) -> new HashSet<>())
.add(new String(Base64.decodeBase64(res.getValue().getBytes())));
} else { } else {
boolean intersectsCellStart = false; boolean intersectsCellStart = false;
for (Integer cellStart : cellstarts) { for (Integer cellStart : cellstarts) {
@ -247,7 +247,8 @@ public class EntityRedactionService {
} }
} }
if (!intersectsCellStart) { if (!intersectsCellStart) {
nerValuesPerType.computeIfAbsent(res.getType(), (a) -> new HashSet<>()).add(new String(Base64.decodeBase64(res.getValue().getBytes()))); nerValuesPerType.computeIfAbsent(res.getType(), (a) -> new HashSet<>())
.add(new String(Base64.decodeBase64(res.getValue().getBytes())));
} }
} }
}); });

View File

@ -177,6 +177,7 @@ public class RedactionLogCreatorService {
.startOffset(entity.getStart()) .startOffset(entity.getStart())
.endOffset(entity.getEnd()) .endOffset(entity.getEnd())
.isDossierDictionaryEntry(entity.isDossierDictionaryEntry()) .isDossierDictionaryEntry(entity.isDossierDictionaryEntry())
.engines(entity.getEngines())
.build(); .build();
} }

View File

@ -1,5 +1,6 @@
package com.iqser.red.service.redaction.v1.server.redaction.utils; package com.iqser.red.service.redaction.v1.server.redaction.utils;
import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary; import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrementValue; import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrementValue;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
@ -47,7 +48,7 @@ public class EntitySearchUtils {
public Set<Entity> find(String inputString, Set<String> values, String type, String headline, int sectionNumber, public Set<Entity> find(String inputString, Set<String> values, String type, String headline, int sectionNumber,
boolean isDictionaryEntry, boolean isDossierDictionary) { boolean isDictionaryEntry, boolean isDossierDictionary, Engine engine) {
Set<Entity> found = new HashSet<>(); Set<Entity> found = new HashSet<>();
@ -67,7 +68,7 @@ public class EntitySearchUtils {
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString
.charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) { .charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) {
found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline, sectionNumber, isDictionaryEntry, isDossierDictionary)); found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline, sectionNumber, isDictionaryEntry, isDossierDictionary, engine));
} }
} while (startIndex > -1); } while (startIndex > -1);
} }
@ -142,9 +143,13 @@ public class EntitySearchUtils {
Entity existing = entities.stream().filter(entity -> entity.equals(found)).findFirst().get(); Entity existing = entities.stream().filter(entity -> entity.equals(found)).findFirst().get();
if (dictionary.getDictionaryRank(existing.getType()) <= dictionary.getDictionaryRank(found.getType())) { if (dictionary.getDictionaryRank(existing.getType()) <= dictionary.getDictionaryRank(found.getType())) {
entities.remove(found); entities.remove(found);
}
}
entities.add(found); entities.add(found);
} else {
existing.getEngines().addAll(found.getEngines());
}
} else {
entities.add(found);
}
} }
@ -154,4 +159,17 @@ public class EntitySearchUtils {
entities.addAll(found); entities.addAll(found);
} }
public void addOrAddEngine(Set<Entity> existing, Set<Entity> toBeAdded){
for(Entity toAdd: toBeAdded){
if (existing.contains(toAdd)) {
Entity existingEntity = existing.stream().filter(entity -> entity.equals(toAdd)).findFirst().get();
existingEntity.getEngines().addAll(toAdd.getEngines());
} else {
existing.add(toAdd);
}
}
}
} }

View File

@ -7,6 +7,7 @@ import java.util.Set;
import org.junit.Test; import org.junit.Test;
import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
public class EntitySearchUtilsTest { public class EntitySearchUtilsTest {
@ -15,8 +16,8 @@ public class EntitySearchUtilsTest {
public void testNestedEntitiesRemoval() { public void testNestedEntitiesRemoval() {
Set<Entity> entities = new HashSet<>(); Set<Entity> entities = new HashSet<>();
Entity nested = new Entity("nested", "fake type", 10, 16, "fake headline", 0, false, false); Entity nested = new Entity("nested", "fake type", 10, 16, "fake headline", 0, false, false, Engine.RULE);
Entity nesting = new Entity("nesting nested", "fake type", 2, 16, "fake headline", 0, false, false); Entity nesting = new Entity("nesting nested", "fake type", 2, 16, "fake headline", 0, false, false, Engine.RULE);
entities.add(nested); entities.add(nested);
entities.add(nesting); entities.add(nesting);
EntitySearchUtils.removeEntitiesContainedInLarger(entities); EntitySearchUtils.removeEntitiesContainedInLarger(entities);