Pull request #223: RED-2082: Added engines to redactionLog, to identify where a entry comes from

Merge in RED/redaction-service from RED-2082 to master

* commit 'd89a41caca623eedbeee7bc9b058b605db8fc359':
  RED-2082: Added engines to redactionLog, to identify where a entry comes from
This commit is contained in:
Dominique Eiflaender 2021-09-08 14:10:22 +02:00
commit c4b99378a7
8 changed files with 67 additions and 24 deletions

View File

@ -0,0 +1,5 @@
package com.iqser.red.service.redaction.v1.model;
public enum Engine {
DICTIONARY, NER, RULE
}

View File

@ -7,9 +7,9 @@ import lombok.EqualsAndHashCode;
import lombok.NoArgsConstructor;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
@Data
@Builder
@ -62,4 +62,8 @@ public class RedactionLogEntry {
@Builder.Default
private List<Change> changes = new ArrayList<>();
private Set<Engine> engines= new HashSet<>();
}

View File

@ -1,17 +1,20 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import lombok.Data;
import lombok.EqualsAndHashCode;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
@Data
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
public class Entity implements ReasonHolder {
private final String word;
private final String type;
private boolean redaction;
@ -39,8 +42,13 @@ public class Entity implements ReasonHolder {
private boolean isDossierDictionaryEntry;
private Set<Engine> engines = new HashSet<>();
public Entity(String word, String type, boolean redaction, String redactionReason, List<EntityPositionSequence> positionSequences, String headline, int matchedRule, int sectionNumber, String legalBasis, boolean isDictionaryEntry, String textBefore, String textAfter, Integer start, Integer end, boolean isDossierDictionaryEntry) {
public Entity(String word, String type, boolean redaction, String redactionReason,
List<EntityPositionSequence> positionSequences, String headline, int matchedRule, int sectionNumber,
String legalBasis, boolean isDictionaryEntry, String textBefore, String textAfter, Integer start,
Integer end, boolean isDossierDictionaryEntry, Set<Engine> engines) {
this.word = word;
this.type = type;
@ -57,10 +65,12 @@ public class Entity implements ReasonHolder {
this.start = start;
this.end = end;
this.isDossierDictionaryEntry = isDossierDictionaryEntry;
this.engines = engines;
}
public Entity(String word, String type, Integer start, Integer end, String headline, int sectionNumber, boolean isDictionaryEntry, boolean isDossierDictionaryEntry) {
public Entity(String word, String type, Integer start, Integer end, String headline, int sectionNumber,
boolean isDictionaryEntry, boolean isDossierDictionaryEntry, Engine engine) {
this.word = word;
this.type = type;
@ -70,6 +80,8 @@ public class Entity implements ReasonHolder {
this.sectionNumber = sectionNumber;
this.isDictionaryEntry = isDictionaryEntry;
this.isDossierDictionaryEntry = isDossierDictionaryEntry;
this.engines.add(engine);
}
}

View File

@ -1,6 +1,7 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import com.iqser.red.service.redaction.v1.model.ArgumentType;
import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.model.FileAttribute;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
@ -524,7 +525,7 @@ public class Section {
String text = caseInsensitive ? searchText.toLowerCase() : searchText;
String searchValue = caseInsensitive ? value.toLowerCase() : value;
Set<Entity> found = EntitySearchUtils.find(text, Set.of(searchValue), asType, headline, sectionNumber, true, false);
Set<Entity> found = EntitySearchUtils.find(text, Set.of(searchValue), asType, headline, sectionNumber, false, false, Engine.RULE);
found.forEach(entity -> {
if (redacted) {
@ -550,7 +551,7 @@ public class Section {
} else {
String word = value.toString();
Entity entity = new Entity(word, type, value.getRowSpanStart(), value.getRowSpanStart() + word.length(), headline, sectionNumber, false, false);
Entity entity = new Entity(word, type, value.getRowSpanStart(), value.getRowSpanStart() + word.length(), headline, sectionNumber, false, false, Engine.RULE);
entity.setRedaction(redact);
entity.setMatchedRule(ruleNumber);
entity.setRedactionReason(reason);

View File

@ -15,11 +15,10 @@ import org.kie.api.runtime.KieContainer;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.model.ManualImageRecategorization;
import com.iqser.red.service.redaction.v1.model.Status;
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
import com.iqser.red.service.redaction.v1.server.client.EntityRecognitionClient;
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecogintionEntity;
import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
@ -42,7 +41,6 @@ import lombok.extern.slf4j.Slf4j;
@RequiredArgsConstructor
public class EntityRedactionService {
private final EntityRecognitionClient entityRecognitionClient;
private final RedactionServiceSettings redactionServiceSettings;
private final DroolsExecutionService droolsExecutionService;
private final SurroundingWordsService surroundingWordsService;
@ -151,7 +149,7 @@ public class EntityRedactionService {
.add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), entry
.getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber(), entity
.getLegalBasis(), entity.isDictionaryEntry(), entity.getTextBefore(), entity.getTextAfter(), entity
.getStart(), entity.getEnd(), entity.isDossierDictionaryEntry()));
.getStart(), entity.getEnd(), entity.isDossierDictionaryEntry(), entity.getEngines()));
}
}
return entitiesPerPage;
@ -210,18 +208,19 @@ public class EntityRedactionService {
String lowercaseInputString = searchableString.toLowerCase();
for (DictionaryModel model : dictionary.getDictionaryModels()) {
if (model.isCaseInsensitive()) {
found.addAll(EntitySearchUtils.find(lowercaseInputString, model.getValues(local), model.getType(), headline, sectionNumber, !local, model
.isDossierDictionary()));
EntitySearchUtils.addOrAddEngine(found, EntitySearchUtils.find(lowercaseInputString, model.getValues(local), model
.getType(), headline, sectionNumber, !local, model.isDossierDictionary(), Engine.DICTIONARY));
} else {
found.addAll(EntitySearchUtils.find(searchableString, model.getValues(local), model.getType(), headline, sectionNumber, !local, model
.isDossierDictionary()));
EntitySearchUtils.addOrAddEngine(found, EntitySearchUtils.find(searchableString, model.getValues(local), model
.getType(), headline, sectionNumber, !local, model.isDossierDictionary(), Engine.DICTIONARY));
}
}
if (!local) {
Map<String, Set<String>> nerValuesPerType = getNerValues(sectionNumber, nerEntities, cellstarts);
nerValuesPerType.entrySet().forEach(entry -> {
found.addAll(EntitySearchUtils.find(searchableString, entry.getValue(), entry.getKey(), headline, sectionNumber, false, false));
EntitySearchUtils.addOrAddEngine(found, EntitySearchUtils.find(searchableString, entry.getValue(), entry
.getKey(), headline, sectionNumber, false, false, Engine.NER));
});
}
@ -230,7 +229,7 @@ public class EntityRedactionService {
private Map<String, Set<String>> getNerValues(int sectionNumber, NerEntities nerEntities,
List<Integer> cellstarts) {
List<Integer> cellstarts) {
Map<String, Set<String>> nerValuesPerType = new HashMap<>();
@ -238,7 +237,8 @@ public class EntityRedactionService {
.containsKey(sectionNumber)) {
nerEntities.getResult().get(sectionNumber).forEach(res -> {
if (cellstarts == null || cellstarts.isEmpty()) {
nerValuesPerType.computeIfAbsent(res.getType(), (a) -> new HashSet<>()).add(new String(Base64.decodeBase64(res.getValue().getBytes())));
nerValuesPerType.computeIfAbsent(res.getType(), (a) -> new HashSet<>())
.add(new String(Base64.decodeBase64(res.getValue().getBytes())));
} else {
boolean intersectsCellStart = false;
for (Integer cellStart : cellstarts) {
@ -247,7 +247,8 @@ public class EntityRedactionService {
}
}
if (!intersectsCellStart) {
nerValuesPerType.computeIfAbsent(res.getType(), (a) -> new HashSet<>()).add(new String(Base64.decodeBase64(res.getValue().getBytes())));
nerValuesPerType.computeIfAbsent(res.getType(), (a) -> new HashSet<>())
.add(new String(Base64.decodeBase64(res.getValue().getBytes())));
}
}
});

View File

@ -177,6 +177,7 @@ public class RedactionLogCreatorService {
.startOffset(entity.getStart())
.endOffset(entity.getEnd())
.isDossierDictionaryEntry(entity.isDossierDictionaryEntry())
.engines(entity.getEngines())
.build();
}

View File

@ -1,5 +1,6 @@
package com.iqser.red.service.redaction.v1.server.redaction.utils;
import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrementValue;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
@ -47,7 +48,7 @@ public class EntitySearchUtils {
public Set<Entity> find(String inputString, Set<String> values, String type, String headline, int sectionNumber,
boolean isDictionaryEntry, boolean isDossierDictionary) {
boolean isDictionaryEntry, boolean isDossierDictionary, Engine engine) {
Set<Entity> found = new HashSet<>();
@ -67,7 +68,7 @@ public class EntitySearchUtils {
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString
.charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) {
found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline, sectionNumber, isDictionaryEntry, isDossierDictionary));
found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline, sectionNumber, isDictionaryEntry, isDossierDictionary, engine));
}
} while (startIndex > -1);
}
@ -142,9 +143,13 @@ public class EntitySearchUtils {
Entity existing = entities.stream().filter(entity -> entity.equals(found)).findFirst().get();
if (dictionary.getDictionaryRank(existing.getType()) <= dictionary.getDictionaryRank(found.getType())) {
entities.remove(found);
entities.add(found);
} else {
existing.getEngines().addAll(found.getEngines());
}
} else {
entities.add(found);
}
entities.add(found);
}
@ -154,4 +159,17 @@ public class EntitySearchUtils {
entities.addAll(found);
}
public void addOrAddEngine(Set<Entity> existing, Set<Entity> toBeAdded){
for(Entity toAdd: toBeAdded){
if (existing.contains(toAdd)) {
Entity existingEntity = existing.stream().filter(entity -> entity.equals(toAdd)).findFirst().get();
existingEntity.getEngines().addAll(toAdd.getEngines());
} else {
existing.add(toAdd);
}
}
}
}

View File

@ -7,6 +7,7 @@ import java.util.Set;
import org.junit.Test;
import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
public class EntitySearchUtilsTest {
@ -15,8 +16,8 @@ public class EntitySearchUtilsTest {
public void testNestedEntitiesRemoval() {
Set<Entity> entities = new HashSet<>();
Entity nested = new Entity("nested", "fake type", 10, 16, "fake headline", 0, false, false);
Entity nesting = new Entity("nesting nested", "fake type", 2, 16, "fake headline", 0, false, false);
Entity nested = new Entity("nested", "fake type", 10, 16, "fake headline", 0, false, false, Engine.RULE);
Entity nesting = new Entity("nesting nested", "fake type", 2, 16, "fake headline", 0, false, false, Engine.RULE);
entities.add(nested);
entities.add(nesting);
EntitySearchUtils.removeEntitiesContainedInLarger(entities);