Always check dictionary rank when overriding annotations

This commit is contained in:
Dominique Eifländer 2021-01-26 11:43:12 +01:00
parent 43a3d76f1c
commit a76095c5d6
6 changed files with 58 additions and 28 deletions

View File

@ -28,6 +28,14 @@ public class Dictionary {
}
public int getDictionaryRank(String type){
if(!localAccessMap.containsKey(type)){
return 0;
}
return localAccessMap.get(type).getRank();
}
public boolean isRecommendation(String type) {
DictionaryModel model = localAccessMap.get(type);

View File

@ -127,7 +127,7 @@ public class Section {
public void addHintAnnotation(String value, String asType) {
Set<Entity> found = findEntities(value.trim(), asType, true, false, 0, null, null);
addNewerToEntities(found);
EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary);
}
@ -140,7 +140,7 @@ public class Section {
for (String value : values) {
if (StringUtils.isNotBlank(value)) {
Set<Entity> found = findEntities(value.trim(), asType, false, true, ruleNumber, reason, legalBasis);
addNewerToEntities(found);
EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary);
if (redactEverywhere && !isLocal()) {
localDictionaryAdds.computeIfAbsent(asType, (x) -> new HashSet<>()).add(value.trim());
@ -186,7 +186,7 @@ public class Section {
String match = matcher.group(group);
if (StringUtils.isNotBlank(match)) {
Set<Entity> found = findEntities(match.trim(), asType, false, true, ruleNumber, reason, legalBasis);
addNewerToEntities(found);
EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary);
}
}
}
@ -234,7 +234,7 @@ public class Section {
if (StringUtils.isNotBlank(value)) {
Set<Entity> found = findEntities(value.trim(), asType, false, true, ruleNumber, reason, legalBasis);
addNewerToEntities(found);
EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary);
if (redactEverywhere && !isLocal()) {
localDictionaryAdds.computeIfAbsent(asType, (x) -> new HashSet<>()).add(value.trim());
@ -262,7 +262,7 @@ public class Section {
}
Set<Entity> found = findEntities(line.trim(), asType, false, true, ruleNumber, reason, legalBasis);
addNewerToEntities(found);
EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary);
if (redactEverywhere && !isLocal()) {
localDictionaryAdds.computeIfAbsent(asType, (x) -> new HashSet<>()).add(line.trim());
@ -341,7 +341,7 @@ public class Section {
singleEntitySet.add(entity);
EntitySearchUtils.clearAndFindPositions(singleEntitySet, searchableText, dictionary);
addNewerToEntities(entity);
EntitySearchUtils.addEntitiesWithHigherRank(entities, entity, dictionary);
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
@ -363,21 +363,6 @@ public class Section {
}
}
}
private void addNewerToEntities(Set<Entity> found) {
// HashSet keeps the older value, but we want the new only.
entities.removeAll(found);
entities.addAll(found);
}
private void addNewerToEntities(Entity found) {
// HashSet keeps the older value, but we want the new only.
entities.remove(found);
entities.add(found);
}
}

View File

@ -67,10 +67,7 @@ public class EntityRedactionService {
});
Set<Entity> foundByLocal = findEntities(classifiedDoc, container, manualRedactions, dictionary, true, hintsPerSectionNumber);
// HashSet keeps the older value, but we want the new only.
documentEntities.removeAll(foundByLocal);
documentEntities.addAll(foundByLocal);
EntitySearchUtils.addEntitiesWithHigherRank(documentEntities, foundByLocal, dictionary);
EntitySearchUtils.removeEntitiesContainedInLarger(documentEntities);
}

View File

@ -104,4 +104,21 @@ public class EntitySearchUtils {
}
entities.removeAll(wordsToRemove);
}
public void addEntitiesWithHigherRank(Set<Entity> entities, Set<Entity> found, Dictionary dictionary) {
found.forEach(f -> addEntitiesWithHigherRank(entities, f, dictionary));
}
public void addEntitiesWithHigherRank(Set<Entity> entities, Entity found, Dictionary dictionary) {
if(entities.contains(found)){
Entity existing = entities.stream().filter(entity -> entity.equals(found)).findFirst().get();
if (dictionary.getDictionaryRank(existing.getType()) <= dictionary.getDictionaryRank(found.getType())){
entities.remove(found);
}
}
entities.add(found);
}
}

View File

@ -20,6 +20,7 @@ import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
import org.apache.commons.io.IOUtils;
import org.junit.Before;
import org.junit.Test;
@ -98,6 +99,7 @@ public class RedactionIntegrationTest {
private final Map<String, Boolean> hintTypeMap = new HashMap<>();
private final Map<String, Boolean> caseInSensitiveMap = new HashMap<>();
private final Map<String, Boolean> recommendationTypeMap = new HashMap<>();
private final Map<String, Integer> rankTypeMap = new HashMap<>();
private final Colors colors = new Colors();
private final static String TEST_RULESET_ID = "123";
@ -133,7 +135,9 @@ public class RedactionIntegrationTest {
loadDictionaryForTest();
loadTypeForTest();
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypes(TEST_RULESET_ID)).thenReturn(TypeResponse.builder().types(getTypeResponse()).build());
when(dictionaryClient.getAllTypes(TEST_RULESET_ID)).thenReturn(TypeResponse.builder()
.types(getTypeResponse())
.build());
when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(VERTEBRATE));
when(dictionaryClient.getDictionaryForType(ADDRESS, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(ADDRESS));
when(dictionaryClient.getDictionaryForType(AUTHOR, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(AUTHOR));
@ -305,6 +309,22 @@ public class RedactionIntegrationTest {
recommendationTypeMap.put(FALSE_POSITIVE, false);
recommendationTypeMap.put(PURITY, false);
rankTypeMap.put(FALSE_POSITIVE, 160);
rankTypeMap.put(PURITY, 155);
rankTypeMap.put(PII, 150);
rankTypeMap.put(ADDRESS, 140);
rankTypeMap.put(AUTHOR, 130);
rankTypeMap.put(SPONSOR, 120);
rankTypeMap.put(VERTEBRATE, 110);
rankTypeMap.put(MUST_REDACT, 100);
rankTypeMap.put(REDACTION_INDICATOR, 90);
rankTypeMap.put(NO_REDACTION_INDICATOR, 80);
rankTypeMap.put(PUBLISHED_INFORMATION, 70);
rankTypeMap.put(TEST_METHOD, 60);
rankTypeMap.put(HINT_ONLY, 50);
rankTypeMap.put(RECOMMENDATION_AUTHOR, 40);
rankTypeMap.put(RECOMMENDATION_ADDRESS, 30);
colors.setDefaultColor("#acfc00");
colors.setNotRedacted("#cccccc");
colors.setRequestAdd("#04b093");
@ -323,6 +343,7 @@ public class RedactionIntegrationTest {
.isHint(hintTypeMap.get(typeColor.getKey()))
.isCaseInsensitive(caseInSensitiveMap.get(typeColor.getKey()))
.isRecommendation(recommendationTypeMap.get(typeColor.getKey()))
.rank(rankTypeMap.get(typeColor.getKey()))
.build())
.collect(Collectors.toList());
@ -337,6 +358,7 @@ public class RedactionIntegrationTest {
.isHint(hintTypeMap.get(type))
.isCaseInsensitive(caseInSensitiveMap.get(type))
.isRecommendation(recommendationTypeMap.get(type))
.rank(rankTypeMap.get(type))
.build();
}
@ -401,7 +423,7 @@ public class RedactionIntegrationTest {
System.out.println("redactionTest");
long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_08_Volume_3CA_B-6_2018-09-06.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
RedactionRequest request = RedactionRequest.builder()
.ruleSetId(TEST_RULESET_ID)

View File

@ -234,4 +234,5 @@ Syngenta AG
N/A
No details reported
Not available
Test facility
Test facility
TBD