From e43bd1b71134ce9d34cc521459240c10606de7fc Mon Sep 17 00:00:00 2001 From: deiflaender Date: Thu, 10 Dec 2020 12:19:33 +0100 Subject: [PATCH] RED-864, Added isDictionaryEntry to redactionLog. Fixed order of dictionary types --- .../redaction/v1/model/RedactionLogEntry.java | 1 + .../v1/server/redaction/model/Entity.java | 13 ++++++++--- .../v1/server/redaction/model/Section.java | 4 ++-- .../service/EntityRedactionService.java | 23 ++++++++++++++----- .../service/AnnotationHighlightService.java | 2 ++ .../service/EntityRedactionServiceTest.java | 6 ++--- 6 files changed, 35 insertions(+), 14 deletions(-) diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLogEntry.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLogEntry.java index ad6ea850..7a4d90ad 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLogEntry.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLogEntry.java @@ -32,5 +32,6 @@ public class RedactionLogEntry { private boolean manual; private Status status; private ManualRedactionType manualRedactionType; + private boolean isDictionaryEntry; } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Entity.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Entity.java index 1e763c1e..3f511e97 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Entity.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Entity.java @@ -12,7 +12,7 @@ import lombok.EqualsAndHashCode; @EqualsAndHashCode(onlyExplicitlyIncluded = true) public class Entity { - @EqualsAndHashCode.Include + private final String word; private final String type; private boolean redaction; @@ -20,7 +20,10 @@ public class Entity { private String legalBasis; private List positionSequences = new ArrayList<>(); private List targetSequences; + + @EqualsAndHashCode.Include private Integer start; + @EqualsAndHashCode.Include private Integer end; @EqualsAndHashCode.Include @@ -30,8 +33,10 @@ public class Entity { @EqualsAndHashCode.Include private int sectionNumber; + private boolean isDictionaryEntry; - public Entity(String word, String type, boolean redaction, String redactionReason, List positionSequences, String headline, int matchedRule, int sectionNumber, String legalBasis) { + + public Entity(String word, String type, boolean redaction, String redactionReason, List positionSequences, String headline, int matchedRule, int sectionNumber, String legalBasis, boolean isDictionaryEntry) { this.word = word; this.type = type; @@ -42,10 +47,11 @@ public class Entity { this.matchedRule = matchedRule; this.sectionNumber = sectionNumber; this.legalBasis = legalBasis; + this.isDictionaryEntry = isDictionaryEntry; } - public Entity(String word, String type, Integer start, Integer end, String headline, int sectionNumber) { + public Entity(String word, String type, Integer start, Integer end, String headline, int sectionNumber, boolean isDictionaryEntry) { this.word = word; this.type = type; @@ -53,6 +59,7 @@ public class Entity { this.end = end; this.headline = headline; this.sectionNumber = sectionNumber; + this.isDictionaryEntry = isDictionaryEntry; } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java index f3ff6de6..f70766b0 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java @@ -230,7 +230,7 @@ public class Section { if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(text.charAt(startIndex - 1)) || isSeparator(text .charAt(startIndex - 1))) && (stopIndex == text.length() || isSeparator(text.charAt(stopIndex)))) { - found.add(new Entity(searchText.substring(startIndex, stopIndex), asType, startIndex, stopIndex, headline, sectionNumber)); + found.add(new Entity(searchText.substring(startIndex, stopIndex), asType, startIndex, stopIndex, headline, sectionNumber, false)); } } while (startIndex > -1); @@ -291,7 +291,7 @@ public class Section { } else { String word = value.toString(); - Entity entity = new Entity(word, type, value.getRowSpanStart(), value.getRowSpanStart() + word.length(), headline, sectionNumber); + Entity entity = new Entity(word, type, value.getRowSpanStart(), value.getRowSpanStart() + word.length(), headline, sectionNumber, false); entity.setRedaction(redact); entity.setMatchedRule(ruleNumber); entity.setRedactionReason(reason); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java index c7b07398..aedaf96f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java @@ -32,7 +32,9 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +@Slf4j @Service @RequiredArgsConstructor public class EntityRedactionService { @@ -81,7 +83,7 @@ public class EntityRedactionService { .computeIfAbsent(entry.getKey(), (x) -> new ArrayList<>()) .add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), entry .getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber(), entity - .getLegalBasis())); + .getLegalBasis(), entity.isDictionaryEntry())); } } @@ -182,7 +184,16 @@ public class EntityRedactionService { }); } else { analysedRowSection.getLocalDictionaryAdds().get(key).forEach( value -> { - dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value); + + if(dictionary.getLocalAccessMap().get(key) == null){ + log.warn("Dictionary {} is null", key); + } + + if(dictionary.getLocalAccessMap().get(key).getLocalEntries() == null){ + log.warn("Dictionary {} localEntries is null", key); + } + + dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value); }); } }); @@ -221,9 +232,9 @@ public class EntityRedactionService { String lowercaseInputString = searchableString.toLowerCase(); for (DictionaryModel model : dictionary) { if (model.isCaseInsensitive()) { - found.addAll(find(lowercaseInputString, model.getValues(local), model.getType(), headline, sectionNumber)); + found.addAll(find(lowercaseInputString, model.getValues(local), model.getType(), headline, sectionNumber, local)); } else { - found.addAll(find(searchableString, model.getValues(local), model.getType(), headline, sectionNumber)); + found.addAll(find(searchableString, model.getValues(local), model.getType(), headline, sectionNumber, local)); } } removeEntitiesContainedInLarger(found); @@ -232,7 +243,7 @@ public class EntityRedactionService { } - private Set find(String inputString, Set values, String type, String headline, int sectionNumber) { + private Set find(String inputString, Set values, String type, String headline, int sectionNumber, boolean local) { Set found = new HashSet<>(); for (String value : values) { @@ -244,7 +255,7 @@ public class EntityRedactionService { if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString .charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) { - found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline, sectionNumber)); + found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline, sectionNumber, !local)); } } while (startIndex > -1); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/AnnotationHighlightService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/AnnotationHighlightService.java index c412408a..388af0df 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/AnnotationHighlightService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/AnnotationHighlightService.java @@ -256,6 +256,7 @@ public class AnnotationHighlightService { .manual(true) .status(manualRedactionEntry.getStatus()) .manualRedactionType(ManualRedactionType.ADD) + .isDictionaryEntry(false) .build(); } @@ -274,6 +275,7 @@ public class AnnotationHighlightService { .section(entity.getHeadline()) .sectionNumber(entity.getSectionNumber()) .matchedRule(entity.getMatchedRule()) + .isDictionaryEntry(entity.isDictionaryEntry()) .build(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java index f15143c8..f7ecded7 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java @@ -104,8 +104,8 @@ public class EntityRedactionServiceTest { public void testNestedEntitiesRemoval() { Set entities = new HashSet<>(); - Entity nested = new Entity("nested", "fake type", 10, 16, "fake headline", 0); - Entity nesting = new Entity("nesting nested", "fake type", 2, 16, "fake headline", 0); + Entity nested = new Entity("nested", "fake type", 10, 16, "fake headline", 0, false); + Entity nesting = new Entity("nesting nested", "fake type", 2, 16, "fake headline", 0, false); entities.add(nested); entities.add(nesting); entityRedactionService.removeEntitiesContainedInLarger(entities); @@ -404,7 +404,7 @@ public class EntityRedactionServiceTest { entityRedactionService.processDocument(classifiedDoc, null); assertThat(classifiedDoc.getEntities()).hasSize(1); // one page assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(3); - assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 8).count()).isEqualTo(8); + assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 8).count()).isEqualTo(9); } }