Pull request #81: RED-864, Added isDictionaryEntry to redactionLog.

Merge in RED/redaction-service from RED-864 to master

* commit 'e43bd1b71134ce9d34cc521459240c10606de7fc':
  RED-864, Added isDictionaryEntry to redactionLog. Fixed order of dictionary types
This commit is contained in:
Dominique Eiflaender 2020-12-10 12:45:53 +01:00
commit 03a09860f4
6 changed files with 35 additions and 14 deletions

View File

@ -32,5 +32,6 @@ public class RedactionLogEntry {
private boolean manual;
private Status status;
private ManualRedactionType manualRedactionType;
private boolean isDictionaryEntry;
}

View File

@ -12,7 +12,7 @@ import lombok.EqualsAndHashCode;
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
public class Entity {
@EqualsAndHashCode.Include
private final String word;
private final String type;
private boolean redaction;
@ -20,7 +20,10 @@ public class Entity {
private String legalBasis;
private List<EntityPositionSequence> positionSequences = new ArrayList<>();
private List<TextPositionSequence> targetSequences;
@EqualsAndHashCode.Include
private Integer start;
@EqualsAndHashCode.Include
private Integer end;
@EqualsAndHashCode.Include
@ -30,8 +33,10 @@ public class Entity {
@EqualsAndHashCode.Include
private int sectionNumber;
private boolean isDictionaryEntry;
public Entity(String word, String type, boolean redaction, String redactionReason, List<EntityPositionSequence> positionSequences, String headline, int matchedRule, int sectionNumber, String legalBasis) {
public Entity(String word, String type, boolean redaction, String redactionReason, List<EntityPositionSequence> positionSequences, String headline, int matchedRule, int sectionNumber, String legalBasis, boolean isDictionaryEntry) {
this.word = word;
this.type = type;
@ -42,10 +47,11 @@ public class Entity {
this.matchedRule = matchedRule;
this.sectionNumber = sectionNumber;
this.legalBasis = legalBasis;
this.isDictionaryEntry = isDictionaryEntry;
}
public Entity(String word, String type, Integer start, Integer end, String headline, int sectionNumber) {
public Entity(String word, String type, Integer start, Integer end, String headline, int sectionNumber, boolean isDictionaryEntry) {
this.word = word;
this.type = type;
@ -53,6 +59,7 @@ public class Entity {
this.end = end;
this.headline = headline;
this.sectionNumber = sectionNumber;
this.isDictionaryEntry = isDictionaryEntry;
}
}

View File

@ -230,7 +230,7 @@ public class Section {
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(text.charAt(startIndex - 1)) || isSeparator(text
.charAt(startIndex - 1))) && (stopIndex == text.length() || isSeparator(text.charAt(stopIndex)))) {
found.add(new Entity(searchText.substring(startIndex, stopIndex), asType, startIndex, stopIndex, headline, sectionNumber));
found.add(new Entity(searchText.substring(startIndex, stopIndex), asType, startIndex, stopIndex, headline, sectionNumber, false));
}
} while (startIndex > -1);
@ -291,7 +291,7 @@ public class Section {
} else {
String word = value.toString();
Entity entity = new Entity(word, type, value.getRowSpanStart(), value.getRowSpanStart() + word.length(), headline, sectionNumber);
Entity entity = new Entity(word, type, value.getRowSpanStart(), value.getRowSpanStart() + word.length(), headline, sectionNumber, false);
entity.setRedaction(redact);
entity.setMatchedRule(ruleNumber);
entity.setRedactionReason(reason);

View File

@ -32,7 +32,9 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
public class EntityRedactionService {
@ -81,7 +83,7 @@ public class EntityRedactionService {
.computeIfAbsent(entry.getKey(), (x) -> new ArrayList<>())
.add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), entry
.getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber(), entity
.getLegalBasis()));
.getLegalBasis(), entity.isDictionaryEntry()));
}
}
@ -182,7 +184,16 @@ public class EntityRedactionService {
});
} else {
analysedRowSection.getLocalDictionaryAdds().get(key).forEach( value -> {
dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value);
if(dictionary.getLocalAccessMap().get(key) == null){
log.warn("Dictionary {} is null", key);
}
if(dictionary.getLocalAccessMap().get(key).getLocalEntries() == null){
log.warn("Dictionary {} localEntries is null", key);
}
dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value);
});
}
});
@ -221,9 +232,9 @@ public class EntityRedactionService {
String lowercaseInputString = searchableString.toLowerCase();
for (DictionaryModel model : dictionary) {
if (model.isCaseInsensitive()) {
found.addAll(find(lowercaseInputString, model.getValues(local), model.getType(), headline, sectionNumber));
found.addAll(find(lowercaseInputString, model.getValues(local), model.getType(), headline, sectionNumber, local));
} else {
found.addAll(find(searchableString, model.getValues(local), model.getType(), headline, sectionNumber));
found.addAll(find(searchableString, model.getValues(local), model.getType(), headline, sectionNumber, local));
}
}
removeEntitiesContainedInLarger(found);
@ -232,7 +243,7 @@ public class EntityRedactionService {
}
private Set<Entity> find(String inputString, Set<String> values, String type, String headline, int sectionNumber) {
private Set<Entity> find(String inputString, Set<String> values, String type, String headline, int sectionNumber, boolean local) {
Set<Entity> found = new HashSet<>();
for (String value : values) {
@ -244,7 +255,7 @@ public class EntityRedactionService {
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString
.charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) {
found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline, sectionNumber));
found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline, sectionNumber, !local));
}
} while (startIndex > -1);
}

View File

@ -256,6 +256,7 @@ public class AnnotationHighlightService {
.manual(true)
.status(manualRedactionEntry.getStatus())
.manualRedactionType(ManualRedactionType.ADD)
.isDictionaryEntry(false)
.build();
}
@ -274,6 +275,7 @@ public class AnnotationHighlightService {
.section(entity.getHeadline())
.sectionNumber(entity.getSectionNumber())
.matchedRule(entity.getMatchedRule())
.isDictionaryEntry(entity.isDictionaryEntry())
.build();
}

View File

@ -104,8 +104,8 @@ public class EntityRedactionServiceTest {
public void testNestedEntitiesRemoval() {
Set<Entity> entities = new HashSet<>();
Entity nested = new Entity("nested", "fake type", 10, 16, "fake headline", 0);
Entity nesting = new Entity("nesting nested", "fake type", 2, 16, "fake headline", 0);
Entity nested = new Entity("nested", "fake type", 10, 16, "fake headline", 0, false);
Entity nesting = new Entity("nesting nested", "fake type", 2, 16, "fake headline", 0, false);
entities.add(nested);
entities.add(nesting);
entityRedactionService.removeEntitiesContainedInLarger(entities);
@ -404,7 +404,7 @@ public class EntityRedactionServiceTest {
entityRedactionService.processDocument(classifiedDoc, null);
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(3);
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 8).count()).isEqualTo(8);
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 8).count()).isEqualTo(9);
}
}