From a76095c5d62e1d5f39013d77c95328dee9f7fbea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominique=20Eifl=C3=A4nder?= Date: Tue, 26 Jan 2021 11:43:12 +0100 Subject: [PATCH] Always check dictionary rank when overriding annotations --- .../v1/server/redaction/model/Dictionary.java | 8 ++++++ .../v1/server/redaction/model/Section.java | 27 +++++-------------- .../service/EntityRedactionService.java | 5 +--- .../redaction/utils/EntitySearchUtils.java | 17 ++++++++++++ .../v1/server/RedactionIntegrationTest.java | 26 ++++++++++++++++-- .../resources/dictionaries/false_positive.txt | 3 ++- 6 files changed, 58 insertions(+), 28 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Dictionary.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Dictionary.java index 05ece6e3..c1fd7719 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Dictionary.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Dictionary.java @@ -28,6 +28,14 @@ public class Dictionary { } + public int getDictionaryRank(String type){ + if(!localAccessMap.containsKey(type)){ + return 0; + } + return localAccessMap.get(type).getRank(); + } + + public boolean isRecommendation(String type) { DictionaryModel model = localAccessMap.get(type); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java index 6955c913..8acd2a95 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java @@ -127,7 +127,7 @@ public class Section { public void addHintAnnotation(String value, String asType) { Set found = findEntities(value.trim(), asType, true, false, 0, null, null); - addNewerToEntities(found); + EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary); } @@ -140,7 +140,7 @@ public class Section { for (String value : values) { if (StringUtils.isNotBlank(value)) { Set found = findEntities(value.trim(), asType, false, true, ruleNumber, reason, legalBasis); - addNewerToEntities(found); + EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary); if (redactEverywhere && !isLocal()) { localDictionaryAdds.computeIfAbsent(asType, (x) -> new HashSet<>()).add(value.trim()); @@ -186,7 +186,7 @@ public class Section { String match = matcher.group(group); if (StringUtils.isNotBlank(match)) { Set found = findEntities(match.trim(), asType, false, true, ruleNumber, reason, legalBasis); - addNewerToEntities(found); + EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary); } } } @@ -234,7 +234,7 @@ public class Section { if (StringUtils.isNotBlank(value)) { Set found = findEntities(value.trim(), asType, false, true, ruleNumber, reason, legalBasis); - addNewerToEntities(found); + EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary); if (redactEverywhere && !isLocal()) { localDictionaryAdds.computeIfAbsent(asType, (x) -> new HashSet<>()).add(value.trim()); @@ -262,7 +262,7 @@ public class Section { } Set found = findEntities(line.trim(), asType, false, true, ruleNumber, reason, legalBasis); - addNewerToEntities(found); + EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary); if (redactEverywhere && !isLocal()) { localDictionaryAdds.computeIfAbsent(asType, (x) -> new HashSet<>()).add(line.trim()); @@ -341,7 +341,7 @@ public class Section { singleEntitySet.add(entity); EntitySearchUtils.clearAndFindPositions(singleEntitySet, searchableText, dictionary); - addNewerToEntities(entity); + EntitySearchUtils.addEntitiesWithHigherRank(entities, entity, dictionary); EntitySearchUtils.removeEntitiesContainedInLarger(entities); @@ -363,21 +363,6 @@ public class Section { } } } - - - private void addNewerToEntities(Set found) { - // HashSet keeps the older value, but we want the new only. - entities.removeAll(found); - entities.addAll(found); - } - - - private void addNewerToEntities(Entity found) { - // HashSet keeps the older value, but we want the new only. - entities.remove(found); - entities.add(found); - } - } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java index 0b03b940..70e0088a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java @@ -67,10 +67,7 @@ public class EntityRedactionService { }); Set foundByLocal = findEntities(classifiedDoc, container, manualRedactions, dictionary, true, hintsPerSectionNumber); - // HashSet keeps the older value, but we want the new only. - documentEntities.removeAll(foundByLocal); - documentEntities.addAll(foundByLocal); - + EntitySearchUtils.addEntitiesWithHigherRank(documentEntities, foundByLocal, dictionary); EntitySearchUtils.removeEntitiesContainedInLarger(documentEntities); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java index a493231d..2db7dbf6 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java @@ -104,4 +104,21 @@ public class EntitySearchUtils { } entities.removeAll(wordsToRemove); } + + + public void addEntitiesWithHigherRank(Set entities, Set found, Dictionary dictionary) { + found.forEach(f -> addEntitiesWithHigherRank(entities, f, dictionary)); + } + + + public void addEntitiesWithHigherRank(Set entities, Entity found, Dictionary dictionary) { + + if(entities.contains(found)){ + Entity existing = entities.stream().filter(entity -> entity.equals(found)).findFirst().get(); + if (dictionary.getDictionaryRank(existing.getType()) <= dictionary.getDictionaryRank(found.getType())){ + entities.remove(found); + } + } + entities.add(found); + } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index 44eca77c..bdb781a1 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -20,6 +20,7 @@ import com.iqser.red.service.redaction.v1.server.client.RulesClient; import com.iqser.red.service.redaction.v1.server.controller.RedactionController; import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader; import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities; + import org.apache.commons.io.IOUtils; import org.junit.Before; import org.junit.Test; @@ -98,6 +99,7 @@ public class RedactionIntegrationTest { private final Map hintTypeMap = new HashMap<>(); private final Map caseInSensitiveMap = new HashMap<>(); private final Map recommendationTypeMap = new HashMap<>(); + private final Map rankTypeMap = new HashMap<>(); private final Colors colors = new Colors(); private final static String TEST_RULESET_ID = "123"; @@ -133,7 +135,9 @@ public class RedactionIntegrationTest { loadDictionaryForTest(); loadTypeForTest(); when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(0L); - when(dictionaryClient.getAllTypes(TEST_RULESET_ID)).thenReturn(TypeResponse.builder().types(getTypeResponse()).build()); + when(dictionaryClient.getAllTypes(TEST_RULESET_ID)).thenReturn(TypeResponse.builder() + .types(getTypeResponse()) + .build()); when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(VERTEBRATE)); when(dictionaryClient.getDictionaryForType(ADDRESS, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(ADDRESS)); when(dictionaryClient.getDictionaryForType(AUTHOR, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(AUTHOR)); @@ -305,6 +309,22 @@ public class RedactionIntegrationTest { recommendationTypeMap.put(FALSE_POSITIVE, false); recommendationTypeMap.put(PURITY, false); + rankTypeMap.put(FALSE_POSITIVE, 160); + rankTypeMap.put(PURITY, 155); + rankTypeMap.put(PII, 150); + rankTypeMap.put(ADDRESS, 140); + rankTypeMap.put(AUTHOR, 130); + rankTypeMap.put(SPONSOR, 120); + rankTypeMap.put(VERTEBRATE, 110); + rankTypeMap.put(MUST_REDACT, 100); + rankTypeMap.put(REDACTION_INDICATOR, 90); + rankTypeMap.put(NO_REDACTION_INDICATOR, 80); + rankTypeMap.put(PUBLISHED_INFORMATION, 70); + rankTypeMap.put(TEST_METHOD, 60); + rankTypeMap.put(HINT_ONLY, 50); + rankTypeMap.put(RECOMMENDATION_AUTHOR, 40); + rankTypeMap.put(RECOMMENDATION_ADDRESS, 30); + colors.setDefaultColor("#acfc00"); colors.setNotRedacted("#cccccc"); colors.setRequestAdd("#04b093"); @@ -323,6 +343,7 @@ public class RedactionIntegrationTest { .isHint(hintTypeMap.get(typeColor.getKey())) .isCaseInsensitive(caseInSensitiveMap.get(typeColor.getKey())) .isRecommendation(recommendationTypeMap.get(typeColor.getKey())) + .rank(rankTypeMap.get(typeColor.getKey())) .build()) .collect(Collectors.toList()); @@ -337,6 +358,7 @@ public class RedactionIntegrationTest { .isHint(hintTypeMap.get(type)) .isCaseInsensitive(caseInSensitiveMap.get(type)) .isRecommendation(recommendationTypeMap.get(type)) + .rank(rankTypeMap.get(type)) .build(); } @@ -401,7 +423,7 @@ public class RedactionIntegrationTest { System.out.println("redactionTest"); long start = System.currentTimeMillis(); - ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_08_Volume_3CA_B-6_2018-09-06.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf"); RedactionRequest request = RedactionRequest.builder() .ruleSetId(TEST_RULESET_ID) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/false_positive.txt b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/false_positive.txt index 86e0c095..0bb75f29 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/false_positive.txt +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/false_positive.txt @@ -234,4 +234,5 @@ Syngenta AG N/A No details reported Not available -Test facility \ No newline at end of file +Test facility +TBD \ No newline at end of file