Pull request #81: RED-864, Added isDictionaryEntry to redactionLog.
Merge in RED/redaction-service from RED-864 to master * commit 'e43bd1b71134ce9d34cc521459240c10606de7fc': RED-864, Added isDictionaryEntry to redactionLog. Fixed order of dictionary types
This commit is contained in:
commit
03a09860f4
@ -32,5 +32,6 @@ public class RedactionLogEntry {
|
||||
private boolean manual;
|
||||
private Status status;
|
||||
private ManualRedactionType manualRedactionType;
|
||||
private boolean isDictionaryEntry;
|
||||
|
||||
}
|
||||
|
||||
@ -12,7 +12,7 @@ import lombok.EqualsAndHashCode;
|
||||
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
|
||||
public class Entity {
|
||||
|
||||
@EqualsAndHashCode.Include
|
||||
|
||||
private final String word;
|
||||
private final String type;
|
||||
private boolean redaction;
|
||||
@ -20,7 +20,10 @@ public class Entity {
|
||||
private String legalBasis;
|
||||
private List<EntityPositionSequence> positionSequences = new ArrayList<>();
|
||||
private List<TextPositionSequence> targetSequences;
|
||||
|
||||
@EqualsAndHashCode.Include
|
||||
private Integer start;
|
||||
@EqualsAndHashCode.Include
|
||||
private Integer end;
|
||||
|
||||
@EqualsAndHashCode.Include
|
||||
@ -30,8 +33,10 @@ public class Entity {
|
||||
@EqualsAndHashCode.Include
|
||||
private int sectionNumber;
|
||||
|
||||
private boolean isDictionaryEntry;
|
||||
|
||||
public Entity(String word, String type, boolean redaction, String redactionReason, List<EntityPositionSequence> positionSequences, String headline, int matchedRule, int sectionNumber, String legalBasis) {
|
||||
|
||||
public Entity(String word, String type, boolean redaction, String redactionReason, List<EntityPositionSequence> positionSequences, String headline, int matchedRule, int sectionNumber, String legalBasis, boolean isDictionaryEntry) {
|
||||
|
||||
this.word = word;
|
||||
this.type = type;
|
||||
@ -42,10 +47,11 @@ public class Entity {
|
||||
this.matchedRule = matchedRule;
|
||||
this.sectionNumber = sectionNumber;
|
||||
this.legalBasis = legalBasis;
|
||||
this.isDictionaryEntry = isDictionaryEntry;
|
||||
}
|
||||
|
||||
|
||||
public Entity(String word, String type, Integer start, Integer end, String headline, int sectionNumber) {
|
||||
public Entity(String word, String type, Integer start, Integer end, String headline, int sectionNumber, boolean isDictionaryEntry) {
|
||||
|
||||
this.word = word;
|
||||
this.type = type;
|
||||
@ -53,6 +59,7 @@ public class Entity {
|
||||
this.end = end;
|
||||
this.headline = headline;
|
||||
this.sectionNumber = sectionNumber;
|
||||
this.isDictionaryEntry = isDictionaryEntry;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -230,7 +230,7 @@ public class Section {
|
||||
|
||||
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(text.charAt(startIndex - 1)) || isSeparator(text
|
||||
.charAt(startIndex - 1))) && (stopIndex == text.length() || isSeparator(text.charAt(stopIndex)))) {
|
||||
found.add(new Entity(searchText.substring(startIndex, stopIndex), asType, startIndex, stopIndex, headline, sectionNumber));
|
||||
found.add(new Entity(searchText.substring(startIndex, stopIndex), asType, startIndex, stopIndex, headline, sectionNumber, false));
|
||||
}
|
||||
} while (startIndex > -1);
|
||||
|
||||
@ -291,7 +291,7 @@ public class Section {
|
||||
} else {
|
||||
String word = value.toString();
|
||||
|
||||
Entity entity = new Entity(word, type, value.getRowSpanStart(), value.getRowSpanStart() + word.length(), headline, sectionNumber);
|
||||
Entity entity = new Entity(word, type, value.getRowSpanStart(), value.getRowSpanStart() + word.length(), headline, sectionNumber, false);
|
||||
entity.setRedaction(redact);
|
||||
entity.setMatchedRule(ruleNumber);
|
||||
entity.setRedactionReason(reason);
|
||||
|
||||
@ -32,7 +32,9 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class EntityRedactionService {
|
||||
@ -81,7 +83,7 @@ public class EntityRedactionService {
|
||||
.computeIfAbsent(entry.getKey(), (x) -> new ArrayList<>())
|
||||
.add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), entry
|
||||
.getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber(), entity
|
||||
.getLegalBasis()));
|
||||
.getLegalBasis(), entity.isDictionaryEntry()));
|
||||
}
|
||||
}
|
||||
|
||||
@ -182,7 +184,16 @@ public class EntityRedactionService {
|
||||
});
|
||||
} else {
|
||||
analysedRowSection.getLocalDictionaryAdds().get(key).forEach( value -> {
|
||||
dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value);
|
||||
|
||||
if(dictionary.getLocalAccessMap().get(key) == null){
|
||||
log.warn("Dictionary {} is null", key);
|
||||
}
|
||||
|
||||
if(dictionary.getLocalAccessMap().get(key).getLocalEntries() == null){
|
||||
log.warn("Dictionary {} localEntries is null", key);
|
||||
}
|
||||
|
||||
dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value);
|
||||
});
|
||||
}
|
||||
});
|
||||
@ -221,9 +232,9 @@ public class EntityRedactionService {
|
||||
String lowercaseInputString = searchableString.toLowerCase();
|
||||
for (DictionaryModel model : dictionary) {
|
||||
if (model.isCaseInsensitive()) {
|
||||
found.addAll(find(lowercaseInputString, model.getValues(local), model.getType(), headline, sectionNumber));
|
||||
found.addAll(find(lowercaseInputString, model.getValues(local), model.getType(), headline, sectionNumber, local));
|
||||
} else {
|
||||
found.addAll(find(searchableString, model.getValues(local), model.getType(), headline, sectionNumber));
|
||||
found.addAll(find(searchableString, model.getValues(local), model.getType(), headline, sectionNumber, local));
|
||||
}
|
||||
}
|
||||
removeEntitiesContainedInLarger(found);
|
||||
@ -232,7 +243,7 @@ public class EntityRedactionService {
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> find(String inputString, Set<String> values, String type, String headline, int sectionNumber) {
|
||||
private Set<Entity> find(String inputString, Set<String> values, String type, String headline, int sectionNumber, boolean local) {
|
||||
|
||||
Set<Entity> found = new HashSet<>();
|
||||
for (String value : values) {
|
||||
@ -244,7 +255,7 @@ public class EntityRedactionService {
|
||||
|
||||
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString
|
||||
.charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) {
|
||||
found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline, sectionNumber));
|
||||
found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline, sectionNumber, !local));
|
||||
}
|
||||
} while (startIndex > -1);
|
||||
}
|
||||
|
||||
@ -256,6 +256,7 @@ public class AnnotationHighlightService {
|
||||
.manual(true)
|
||||
.status(manualRedactionEntry.getStatus())
|
||||
.manualRedactionType(ManualRedactionType.ADD)
|
||||
.isDictionaryEntry(false)
|
||||
.build();
|
||||
}
|
||||
|
||||
@ -274,6 +275,7 @@ public class AnnotationHighlightService {
|
||||
.section(entity.getHeadline())
|
||||
.sectionNumber(entity.getSectionNumber())
|
||||
.matchedRule(entity.getMatchedRule())
|
||||
.isDictionaryEntry(entity.isDictionaryEntry())
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
@ -104,8 +104,8 @@ public class EntityRedactionServiceTest {
|
||||
public void testNestedEntitiesRemoval() {
|
||||
|
||||
Set<Entity> entities = new HashSet<>();
|
||||
Entity nested = new Entity("nested", "fake type", 10, 16, "fake headline", 0);
|
||||
Entity nesting = new Entity("nesting nested", "fake type", 2, 16, "fake headline", 0);
|
||||
Entity nested = new Entity("nested", "fake type", 10, 16, "fake headline", 0, false);
|
||||
Entity nesting = new Entity("nesting nested", "fake type", 2, 16, "fake headline", 0, false);
|
||||
entities.add(nested);
|
||||
entities.add(nesting);
|
||||
entityRedactionService.removeEntitiesContainedInLarger(entities);
|
||||
@ -404,7 +404,7 @@ public class EntityRedactionServiceTest {
|
||||
entityRedactionService.processDocument(classifiedDoc, null);
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(3);
|
||||
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 8).count()).isEqualTo(8);
|
||||
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 8).count()).isEqualTo(9);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user