Pull request #223: RED-2082: Added engines to redactionLog, to identify where a entry comes from
Merge in RED/redaction-service from RED-2082 to master * commit 'd89a41caca623eedbeee7bc9b058b605db8fc359': RED-2082: Added engines to redactionLog, to identify where a entry comes from
This commit is contained in:
commit
c4b99378a7
@ -0,0 +1,5 @@
|
||||
package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
public enum Engine {
|
||||
DICTIONARY, NER, RULE
|
||||
}
|
||||
@ -7,9 +7,9 @@ import lombok.EqualsAndHashCode;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@ -62,4 +62,8 @@ public class RedactionLogEntry {
|
||||
@Builder.Default
|
||||
private List<Change> changes = new ArrayList<>();
|
||||
|
||||
private Set<Engine> engines= new HashSet<>();
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -1,17 +1,20 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.Engine;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
@Data
|
||||
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
|
||||
public class Entity implements ReasonHolder {
|
||||
|
||||
|
||||
private final String word;
|
||||
private final String type;
|
||||
private boolean redaction;
|
||||
@ -39,8 +42,13 @@ public class Entity implements ReasonHolder {
|
||||
|
||||
private boolean isDossierDictionaryEntry;
|
||||
|
||||
private Set<Engine> engines = new HashSet<>();
|
||||
|
||||
public Entity(String word, String type, boolean redaction, String redactionReason, List<EntityPositionSequence> positionSequences, String headline, int matchedRule, int sectionNumber, String legalBasis, boolean isDictionaryEntry, String textBefore, String textAfter, Integer start, Integer end, boolean isDossierDictionaryEntry) {
|
||||
|
||||
public Entity(String word, String type, boolean redaction, String redactionReason,
|
||||
List<EntityPositionSequence> positionSequences, String headline, int matchedRule, int sectionNumber,
|
||||
String legalBasis, boolean isDictionaryEntry, String textBefore, String textAfter, Integer start,
|
||||
Integer end, boolean isDossierDictionaryEntry, Set<Engine> engines) {
|
||||
|
||||
this.word = word;
|
||||
this.type = type;
|
||||
@ -57,10 +65,12 @@ public class Entity implements ReasonHolder {
|
||||
this.start = start;
|
||||
this.end = end;
|
||||
this.isDossierDictionaryEntry = isDossierDictionaryEntry;
|
||||
this.engines = engines;
|
||||
}
|
||||
|
||||
|
||||
public Entity(String word, String type, Integer start, Integer end, String headline, int sectionNumber, boolean isDictionaryEntry, boolean isDossierDictionaryEntry) {
|
||||
public Entity(String word, String type, Integer start, Integer end, String headline, int sectionNumber,
|
||||
boolean isDictionaryEntry, boolean isDossierDictionaryEntry, Engine engine) {
|
||||
|
||||
this.word = word;
|
||||
this.type = type;
|
||||
@ -70,6 +80,8 @@ public class Entity implements ReasonHolder {
|
||||
this.sectionNumber = sectionNumber;
|
||||
this.isDictionaryEntry = isDictionaryEntry;
|
||||
this.isDossierDictionaryEntry = isDossierDictionaryEntry;
|
||||
this.engines.add(engine);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.ArgumentType;
|
||||
import com.iqser.red.service.redaction.v1.model.Engine;
|
||||
import com.iqser.red.service.redaction.v1.model.FileAttribute;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
||||
@ -524,7 +525,7 @@ public class Section {
|
||||
String text = caseInsensitive ? searchText.toLowerCase() : searchText;
|
||||
String searchValue = caseInsensitive ? value.toLowerCase() : value;
|
||||
|
||||
Set<Entity> found = EntitySearchUtils.find(text, Set.of(searchValue), asType, headline, sectionNumber, true, false);
|
||||
Set<Entity> found = EntitySearchUtils.find(text, Set.of(searchValue), asType, headline, sectionNumber, false, false, Engine.RULE);
|
||||
|
||||
found.forEach(entity -> {
|
||||
if (redacted) {
|
||||
@ -550,7 +551,7 @@ public class Section {
|
||||
} else {
|
||||
String word = value.toString();
|
||||
|
||||
Entity entity = new Entity(word, type, value.getRowSpanStart(), value.getRowSpanStart() + word.length(), headline, sectionNumber, false, false);
|
||||
Entity entity = new Entity(word, type, value.getRowSpanStart(), value.getRowSpanStart() + word.length(), headline, sectionNumber, false, false, Engine.RULE);
|
||||
entity.setRedaction(redact);
|
||||
entity.setMatchedRule(ruleNumber);
|
||||
entity.setRedactionReason(reason);
|
||||
|
||||
@ -15,11 +15,10 @@ import org.kie.api.runtime.KieContainer;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.Engine;
|
||||
import com.iqser.red.service.redaction.v1.model.ManualImageRecategorization;
|
||||
import com.iqser.red.service.redaction.v1.model.Status;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
|
||||
import com.iqser.red.service.redaction.v1.server.client.EntityRecognitionClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecogintionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
|
||||
@ -42,7 +41,6 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@RequiredArgsConstructor
|
||||
public class EntityRedactionService {
|
||||
|
||||
private final EntityRecognitionClient entityRecognitionClient;
|
||||
private final RedactionServiceSettings redactionServiceSettings;
|
||||
private final DroolsExecutionService droolsExecutionService;
|
||||
private final SurroundingWordsService surroundingWordsService;
|
||||
@ -151,7 +149,7 @@ public class EntityRedactionService {
|
||||
.add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), entry
|
||||
.getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber(), entity
|
||||
.getLegalBasis(), entity.isDictionaryEntry(), entity.getTextBefore(), entity.getTextAfter(), entity
|
||||
.getStart(), entity.getEnd(), entity.isDossierDictionaryEntry()));
|
||||
.getStart(), entity.getEnd(), entity.isDossierDictionaryEntry(), entity.getEngines()));
|
||||
}
|
||||
}
|
||||
return entitiesPerPage;
|
||||
@ -210,18 +208,19 @@ public class EntityRedactionService {
|
||||
String lowercaseInputString = searchableString.toLowerCase();
|
||||
for (DictionaryModel model : dictionary.getDictionaryModels()) {
|
||||
if (model.isCaseInsensitive()) {
|
||||
found.addAll(EntitySearchUtils.find(lowercaseInputString, model.getValues(local), model.getType(), headline, sectionNumber, !local, model
|
||||
.isDossierDictionary()));
|
||||
EntitySearchUtils.addOrAddEngine(found, EntitySearchUtils.find(lowercaseInputString, model.getValues(local), model
|
||||
.getType(), headline, sectionNumber, !local, model.isDossierDictionary(), Engine.DICTIONARY));
|
||||
} else {
|
||||
found.addAll(EntitySearchUtils.find(searchableString, model.getValues(local), model.getType(), headline, sectionNumber, !local, model
|
||||
.isDossierDictionary()));
|
||||
EntitySearchUtils.addOrAddEngine(found, EntitySearchUtils.find(searchableString, model.getValues(local), model
|
||||
.getType(), headline, sectionNumber, !local, model.isDossierDictionary(), Engine.DICTIONARY));
|
||||
}
|
||||
}
|
||||
|
||||
if (!local) {
|
||||
Map<String, Set<String>> nerValuesPerType = getNerValues(sectionNumber, nerEntities, cellstarts);
|
||||
nerValuesPerType.entrySet().forEach(entry -> {
|
||||
found.addAll(EntitySearchUtils.find(searchableString, entry.getValue(), entry.getKey(), headline, sectionNumber, false, false));
|
||||
EntitySearchUtils.addOrAddEngine(found, EntitySearchUtils.find(searchableString, entry.getValue(), entry
|
||||
.getKey(), headline, sectionNumber, false, false, Engine.NER));
|
||||
});
|
||||
}
|
||||
|
||||
@ -230,7 +229,7 @@ public class EntityRedactionService {
|
||||
|
||||
|
||||
private Map<String, Set<String>> getNerValues(int sectionNumber, NerEntities nerEntities,
|
||||
List<Integer> cellstarts) {
|
||||
List<Integer> cellstarts) {
|
||||
|
||||
Map<String, Set<String>> nerValuesPerType = new HashMap<>();
|
||||
|
||||
@ -238,7 +237,8 @@ public class EntityRedactionService {
|
||||
.containsKey(sectionNumber)) {
|
||||
nerEntities.getResult().get(sectionNumber).forEach(res -> {
|
||||
if (cellstarts == null || cellstarts.isEmpty()) {
|
||||
nerValuesPerType.computeIfAbsent(res.getType(), (a) -> new HashSet<>()).add(new String(Base64.decodeBase64(res.getValue().getBytes())));
|
||||
nerValuesPerType.computeIfAbsent(res.getType(), (a) -> new HashSet<>())
|
||||
.add(new String(Base64.decodeBase64(res.getValue().getBytes())));
|
||||
} else {
|
||||
boolean intersectsCellStart = false;
|
||||
for (Integer cellStart : cellstarts) {
|
||||
@ -247,7 +247,8 @@ public class EntityRedactionService {
|
||||
}
|
||||
}
|
||||
if (!intersectsCellStart) {
|
||||
nerValuesPerType.computeIfAbsent(res.getType(), (a) -> new HashSet<>()).add(new String(Base64.decodeBase64(res.getValue().getBytes())));
|
||||
nerValuesPerType.computeIfAbsent(res.getType(), (a) -> new HashSet<>())
|
||||
.add(new String(Base64.decodeBase64(res.getValue().getBytes())));
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
@ -177,6 +177,7 @@ public class RedactionLogCreatorService {
|
||||
.startOffset(entity.getStart())
|
||||
.endOffset(entity.getEnd())
|
||||
.isDossierDictionaryEntry(entity.isDossierDictionaryEntry())
|
||||
.engines(entity.getEngines())
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.utils;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.Engine;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrementValue;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
@ -47,7 +48,7 @@ public class EntitySearchUtils {
|
||||
|
||||
|
||||
public Set<Entity> find(String inputString, Set<String> values, String type, String headline, int sectionNumber,
|
||||
boolean isDictionaryEntry, boolean isDossierDictionary) {
|
||||
boolean isDictionaryEntry, boolean isDossierDictionary, Engine engine) {
|
||||
|
||||
Set<Entity> found = new HashSet<>();
|
||||
|
||||
@ -67,7 +68,7 @@ public class EntitySearchUtils {
|
||||
|
||||
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString
|
||||
.charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) {
|
||||
found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline, sectionNumber, isDictionaryEntry, isDossierDictionary));
|
||||
found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline, sectionNumber, isDictionaryEntry, isDossierDictionary, engine));
|
||||
}
|
||||
} while (startIndex > -1);
|
||||
}
|
||||
@ -142,9 +143,13 @@ public class EntitySearchUtils {
|
||||
Entity existing = entities.stream().filter(entity -> entity.equals(found)).findFirst().get();
|
||||
if (dictionary.getDictionaryRank(existing.getType()) <= dictionary.getDictionaryRank(found.getType())) {
|
||||
entities.remove(found);
|
||||
entities.add(found);
|
||||
} else {
|
||||
existing.getEngines().addAll(found.getEngines());
|
||||
}
|
||||
} else {
|
||||
entities.add(found);
|
||||
}
|
||||
entities.add(found);
|
||||
}
|
||||
|
||||
|
||||
@ -154,4 +159,17 @@ public class EntitySearchUtils {
|
||||
entities.addAll(found);
|
||||
}
|
||||
|
||||
|
||||
public void addOrAddEngine(Set<Entity> existing, Set<Entity> toBeAdded){
|
||||
|
||||
for(Entity toAdd: toBeAdded){
|
||||
if (existing.contains(toAdd)) {
|
||||
Entity existingEntity = existing.stream().filter(entity -> entity.equals(toAdd)).findFirst().get();
|
||||
existingEntity.getEngines().addAll(toAdd.getEngines());
|
||||
} else {
|
||||
existing.add(toAdd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -7,6 +7,7 @@ import java.util.Set;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.Engine;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
|
||||
public class EntitySearchUtilsTest {
|
||||
@ -15,8 +16,8 @@ public class EntitySearchUtilsTest {
|
||||
public void testNestedEntitiesRemoval() {
|
||||
|
||||
Set<Entity> entities = new HashSet<>();
|
||||
Entity nested = new Entity("nested", "fake type", 10, 16, "fake headline", 0, false, false);
|
||||
Entity nesting = new Entity("nesting nested", "fake type", 2, 16, "fake headline", 0, false, false);
|
||||
Entity nested = new Entity("nested", "fake type", 10, 16, "fake headline", 0, false, false, Engine.RULE);
|
||||
Entity nesting = new Entity("nesting nested", "fake type", 2, 16, "fake headline", 0, false, false, Engine.RULE);
|
||||
entities.add(nested);
|
||||
entities.add(nesting);
|
||||
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user