From 88e6ac3d2291ced8bb2229c3e7daeb8d78d6da93 Mon Sep 17 00:00:00 2001 From: deiflaender Date: Mon, 21 Mar 2022 12:47:14 +0100 Subject: [PATCH] RED-2836: Enabled false positives per dictionary --- .../redaction/v1/model/RedactionLogEntry.java | 1 + .../redaction/v1/model/RedactionRequest.java | 2 + .../controller/RedactionController.java | 6 +- .../v1/server/redaction/model/Dictionary.java | 33 +-- .../redaction/model/DictionaryEntries.java | 26 ++ .../redaction/model/DictionaryModel.java | 13 +- .../v1/server/redaction/model/Entity.java | 8 +- .../v1/server/redaction/model/EntityType.java | 5 + .../v1/server/redaction/model/Section.java | 66 ++--- .../redaction/service/DictionaryService.java | 44 ++-- .../service/EntityRedactionService.java | 21 +- ...ManualRedactionSurroundingTextService.java | 3 +- .../service/RedactionLogCreatorService.java | 10 +- .../redaction/utils/EntitySearchUtils.java | 51 +++- .../v1/server/RedactionIntegrationTest.java | 55 +--- .../utils/EntitySearchUtilsTest.java | 45 ++-- .../dictionaries/PII_false_positive.txt | 1 + .../resources/dictionaries/false_positive.txt | 239 ------------------ .../recommendation_CBI_address.txt | 0 .../recommendation_CBI_author.txt | 0 .../src/test/resources/drools/rules.drl | 10 +- 21 files changed, 223 insertions(+), 416 deletions(-) create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryEntries.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/EntityType.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/PII_false_positive.txt delete mode 100644 redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/false_positive.txt delete mode 100644 redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/recommendation_CBI_address.txt delete mode 100644 redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/recommendation_CBI_author.txt diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLogEntry.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLogEntry.java index 25d8474b..0444a28e 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLogEntry.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLogEntry.java @@ -27,6 +27,7 @@ public class RedactionLogEntry { private boolean redacted; private boolean isHint; private boolean isRecommendation; + private boolean isFalsePositive; private String section; private float[] color; diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionRequest.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionRequest.java index cc6fe0e3..194224a3 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionRequest.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionRequest.java @@ -22,4 +22,6 @@ public class RedactionRequest { private ManualRedactions manualRedactions; @Builder.Default private Set excludedPages = new HashSet<>(); + + private boolean includeFalsePositives; } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java index df67898b..2965cd29 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java @@ -198,7 +198,11 @@ public class RedactionController implements RedactionResource { } log.info("Loaded redaction log with computationalVersion: {}", redactionLog.getAnalysisVersion()); - return redactionLogMergeService.mergeRedactionLogData(redactionLog, sectionGrid, redactionRequest.getDossierTemplateId(), redactionRequest.getManualRedactions(), redactionRequest.getExcludedPages()); + var merged = redactionLogMergeService.mergeRedactionLogData(redactionLog, sectionGrid, redactionRequest.getDossierTemplateId(), redactionRequest.getManualRedactions(), redactionRequest.getExcludedPages()); + + merged.getRedactionLogEntry().removeIf(e -> e.isFalsePositive() && !redactionRequest.isIncludeFalsePositives()); + + return merged; } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Dictionary.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Dictionary.java index 4d16125c..e224d402 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Dictionary.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Dictionary.java @@ -1,22 +1,16 @@ package com.iqser.red.service.redaction.v1.server.redaction.model; -import static java.util.stream.Collectors.toSet; - -import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.DictionaryEntry; -import lombok.Data; -import lombok.Getter; - import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; +import lombok.Data; +import lombok.Getter; @Data public class Dictionary { - public static final String RECOMMENDATION_PREFIX = "recommendation_"; - @Getter private List dictionaryModels; private Map localAccessMap = new HashMap<>(); @@ -26,6 +20,7 @@ public class Dictionary { public Dictionary(List dictionaryModels, DictionaryVersion version) { + this.dictionaryModels = dictionaryModels; this.dictionaryModels.forEach(dm -> localAccessMap.put(dm.getType(), dm)); this.version = version; @@ -33,6 +28,7 @@ public class Dictionary { public int getDictionaryRank(String type) { + if (!localAccessMap.containsKey(type)) { return 0; } @@ -40,16 +36,6 @@ public class Dictionary { } - public boolean isRecommendation(String type) { - - DictionaryModel model = localAccessMap.get(type); - if (model != null) { - return model.isRecommendation(); - } - return false; - } - - public boolean hasLocalEntries() { return dictionaryModels.stream().anyMatch(dm -> !dm.getLocalEntries().isEmpty()); @@ -62,16 +48,18 @@ public class Dictionary { } + public DictionaryModel getType(String type) { + + return localAccessMap.get(type); + } + + public boolean containsValue(String type, String value) { return localAccessMap.containsKey(type) && localAccessMap.get(type) .getValues(false) .contains(value) || localAccessMap.containsKey(type) && localAccessMap.get(type) .getValues(true) - .contains(value) || localAccessMap.containsKey(RECOMMENDATION_PREFIX + type) && localAccessMap.get(RECOMMENDATION_PREFIX + type) - .getValues(false) - .contains(value) || localAccessMap.containsKey(RECOMMENDATION_PREFIX + type) && localAccessMap.get(RECOMMENDATION_PREFIX + type) - .getValues(true) .contains(value); } @@ -85,6 +73,7 @@ public class Dictionary { return false; } + public boolean isCaseInsensitiveDictionary(String type) { DictionaryModel dictionaryModel = localAccessMap.get(type); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryEntries.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryEntries.java new file mode 100644 index 00000000..456b7bc0 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryEntries.java @@ -0,0 +1,26 @@ +package com.iqser.red.service.redaction.v1.server.redaction.model; + +import java.util.HashSet; +import java.util.Set; + +import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.DictionaryEntry; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class DictionaryEntries { + + @Builder.Default + Set entries = new HashSet<>(); + @Builder.Default + Set falsePositives = new HashSet<>(); + @Builder.Default + Set falseRecommendations = new HashSet<>(); + +} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryModel.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryModel.java index 12db03bd..8dcd2acb 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryModel.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryModel.java @@ -19,8 +19,9 @@ public class DictionaryModel implements Serializable { private float[] color; private boolean caseInsensitive; private boolean hint; - private boolean recommendation; private Set entries; + private Set falsePositives; + private Set falseRecommendations; private Set localEntries; private boolean isDossierDictionary; @@ -29,4 +30,14 @@ public class DictionaryModel implements Serializable { .toSet()); } + public Set getFalsePositiveValues() { + return falsePositives.stream().filter(e -> !e.isDeleted()).map(e -> e.getValue()).collect(Collectors + .toSet()); + } + + public Set getFalseRecommendationValues() { + return falseRecommendations.stream().filter(e -> !e.isDeleted()).map(e -> e.getValue()).collect(Collectors + .toSet()); + } + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Entity.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Entity.java index c313d734..a08e2de5 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Entity.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Entity.java @@ -48,11 +48,13 @@ public class Entity implements ReasonHolder { private Set references = new HashSet<>(); + private EntityType entityType; + public Entity(String word, String type, boolean redaction, String redactionReason, List positionSequences, String headline, int matchedRule, int sectionNumber, String legalBasis, boolean isDictionaryEntry, String textBefore, String textAfter, Integer start, - Integer end, boolean isDossierDictionaryEntry, Set engines, Set references) { + Integer end, boolean isDossierDictionaryEntry, Set engines, Set references, EntityType entityType) { this.word = word; this.type = type; @@ -71,11 +73,12 @@ public class Entity implements ReasonHolder { this.isDossierDictionaryEntry = isDossierDictionaryEntry; this.engines = engines; this.references = references; + this.entityType = entityType; } public Entity(String word, String type, Integer start, Integer end, String headline, int sectionNumber, - boolean isDictionaryEntry, boolean isDossierDictionaryEntry, Engine engine) { + boolean isDictionaryEntry, boolean isDossierDictionaryEntry, Engine engine, EntityType entityType) { this.word = word; this.type = type; @@ -86,6 +89,7 @@ public class Entity implements ReasonHolder { this.isDictionaryEntry = isDictionaryEntry; this.isDossierDictionaryEntry = isDossierDictionaryEntry; this.engines.add(engine); + this.entityType = entityType; } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/EntityType.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/EntityType.java new file mode 100644 index 00000000..027679a2 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/EntityType.java @@ -0,0 +1,5 @@ +package com.iqser.red.service.redaction.v1.server.redaction.model; + +public enum EntityType { + ENTITY, RECOMMENDATION, FALSE_POSITIVE, FALSE_RECOMMENDATION +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java index 9a9b4734..f0d2f7a9 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java @@ -1,6 +1,5 @@ package com.iqser.red.service.redaction.v1.server.redaction.model; -import static com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary.RECOMMENDATION_PREFIX; import java.lang.annotation.ElementType; import java.lang.annotation.Retention; @@ -74,7 +73,7 @@ public class Section { Set entitiesOfType = nerEntities.stream().filter(nerEntity -> nerEntity.getType().equals(type)).collect(Collectors.toSet()); Set values = entitiesOfType.stream().map(Entity::getWord).collect(Collectors.toSet()); - Set found = EntitySearchUtils.find(searchText, values, asType, headline, sectionNumber, false, false, Engine.NER, true); + Set found = EntitySearchUtils.findEntities(searchText, values, dictionary.getType(asType), headline, sectionNumber, false, false, Engine.NER, true, true); EntitySearchUtils.clearAndFindPositions(found, searchableText, dictionary); Set finalResult = new HashSet<>(); @@ -119,7 +118,7 @@ public class Section { } else if (!allowDuplicateTypes && foundParts.contains(entity.getType())) { if (numberOfMatchParts >= minPartMatches) { String value = searchText.substring(start, lastEnd); - found.addAll(findEntities(value, asType, false, true, 0, null, null, Engine.NER)); + found.addAll(findEntities(value, asType, false, true, 0, null, null, Engine.NER, true)); } start = -1; lastEnd = -1; @@ -134,7 +133,7 @@ public class Section { } else if (entity.getType().equals(startType) && start != -1) { if (numberOfMatchParts >= minPartMatches) { String value = searchText.substring(start, lastEnd); - found.addAll(findEntities(value, asType, false, true, 0, null, null, Engine.NER)); + found.addAll(findEntities(value, asType, false, true, 0, null, null, Engine.NER, true)); } start = entity.getStart(); lastEnd = entity.getEnd(); @@ -151,7 +150,7 @@ public class Section { if (numberOfMatchParts >= minPartMatches) { String value = searchText.substring(start, lastEnd); - found.addAll(findEntities(value, asType, false, true, 0, null, null, Engine.NER)); + found.addAll(findEntities(value, asType, false, true, 0, null, null, Engine.NER, true)); } if (!found.isEmpty()) { @@ -291,7 +290,7 @@ public class Section { String match = matcher.group(group); if (StringUtils.isNotBlank(match)) { - Set expandedEntities = findEntities(entity.getWord() + match, type, false, entity.isRedaction(), entity.getMatchedRule(), entity.getRedactionReason(), entity.getLegalBasis(), Engine.RULE); + Set expandedEntities = findEntities(entity.getWord() + match, type, false, entity.isRedaction(), entity.getMatchedRule(), entity.getRedactionReason(), entity.getLegalBasis(), Engine.RULE, false); expanded.addAll(EntitySearchUtils.findNonOverlappingMatchEntities(entities, expandedEntities)); } } @@ -450,7 +449,7 @@ public class Section { while (matcher.find()) { String match = matcher.group(group); if (StringUtils.isNotBlank(match) && match.length() >= 3) { - localDictionaryAdds.computeIfAbsent(RECOMMENDATION_PREFIX + asType, (x) -> new HashSet<>()).add(match); + localDictionaryAdds.computeIfAbsent(asType, (x) -> new HashSet<>()).add(match); } } } @@ -460,12 +459,11 @@ public class Section { public void redactNotAndReference(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.REFERENCE_TYPE) String referenceType, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.STRING) String reason) { - boolean hasRecommendationDictionary = dictionaryTypes.contains(RECOMMENDATION_PREFIX + type); Set references = entities.stream().filter(entity -> entity.getType().equals(referenceType)).collect(Collectors.toSet()); entities.forEach(entity -> { - if (entity.getType().equals(type) || hasRecommendationDictionary && entity.getType().equals(RECOMMENDATION_PREFIX + type)) { + if (entity.getType().equals(type)) { entity.setRedaction(false); entity.setMatchedRule(ruleNumber); entity.setRedactionReason(reason); @@ -494,21 +492,26 @@ public class Section { public void addRedaction(@Argument(ArgumentType.STRING) String value, @Argument(ArgumentType.TYPE) String asType, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.STRING) String reason, @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) { - Set found = findEntities(value.trim(), asType, true, true, ruleNumber, reason, legalBasis, Engine.RULE); + Set found = findEntities(value.trim(), asType, true, true, ruleNumber, reason, legalBasis, Engine.RULE, false); EntitySearchUtils.addEntitiesIgnoreRank(entities, found); } public void ignore(String type) { - entities.removeIf(entity -> entity.getType().equals(type)); + entities.removeIf(entity -> entity.getType().equals(type) && entity.getEntityType().equals(EntityType.ENTITY)); + } + + + public void ignoreRecommendations(String type) { + + entities.removeIf(entity -> entity.getType().equals(type) && entity.getEntityType().equals(EntityType.RECOMMENDATION)); } @ThenAction - public void expandToHintAnnotationByRegEx(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.STRING) String pattern, - @Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive, @Argument(ArgumentType.INTEGER) int group, - @Argument(ArgumentType.TYPE) String asType) { + public void expandToFalsePositiveByRegEx(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.STRING) String pattern, + @Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive, @Argument(ArgumentType.INTEGER) int group) { Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive); @@ -524,13 +527,14 @@ public class Section { while (matcher.find()) { String match = matcher.group(group); if (StringUtils.isNotBlank(match)) { - expanded.addAll(findEntities(entity.getWord() + match, asType, false, false, 0, null, null, Engine.RULE)); + expanded.addAll(findEntities(entity.getWord() + match, type, false, false, 0, null, null, Engine.RULE, false)); } } } EntitySearchUtils.addEntitiesWithHigherRank(entities, expanded, dictionary); EntitySearchUtils.removeEntitiesContainedInLarger(entities); + expanded.forEach(e -> e.setEntityType(EntityType.FALSE_POSITIVE)); } @@ -545,7 +549,7 @@ public class Section { while (matcher.find()) { String match = matcher.group(group); if (StringUtils.isNotBlank(match)) { - Set found = findEntities(match.trim(), asType, false, false, 0, null, null, Engine.RULE); + Set found = findEntities(match.trim(), asType, false, false, 0, null, null, Engine.RULE, false); EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary); } } @@ -555,7 +559,7 @@ public class Section { @ThenAction public void addHintAnnotation(@Argument(ArgumentType.STRING) String value, @Argument(ArgumentType.TYPE) String asType) { - Set found = findEntities(value.trim(), asType, true, false, 0, null, null, Engine.RULE); + Set found = findEntities(value.trim(), asType, true, false, 0, null, null, Engine.RULE, false); EntitySearchUtils.addEntitiesIgnoreRank(entities, found); } @@ -577,7 +581,7 @@ public class Section { } if (StringUtils.isNotBlank(cleanValue) && cleanValue.length() >= 3) { - localDictionaryAdds.computeIfAbsent(RECOMMENDATION_PREFIX + asType, (x) -> new HashSet<>()).add(cleanValue); + localDictionaryAdds.computeIfAbsent(asType, (x) -> new HashSet<>()).add(cleanValue); } } } @@ -599,20 +603,20 @@ public class Section { while (matcher.find()) { String match = matcher.group(group); if (StringUtils.isNotBlank(match) && match.length() >= 3) { - Set found = findEntities(match.trim(), asType, false, redaction, ruleNumber, reason, legalBasis, Engine.RULE); + Set found = findEntities(match.trim(), asType, false, redaction, ruleNumber, reason, legalBasis, Engine.RULE, false); EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary); - localDictionaryAdds.computeIfAbsent(RECOMMENDATION_PREFIX + asType, (x) -> new HashSet<>()).add(match); + localDictionaryAdds.computeIfAbsent(asType, (x) -> new HashSet<>()).add(match); } } } - private Set findEntities(String value, String asType, boolean caseInsensitive, boolean redacted, int ruleNumber, String reason, String legalBasis, Engine engine) { + private Set findEntities(String value, String asType, boolean caseInsensitive, boolean redacted, int ruleNumber, String reason, String legalBasis, Engine engine, boolean asRecommendation) { String text = caseInsensitive ? searchText.toLowerCase() : searchText; String searchValue = caseInsensitive ? value.toLowerCase() : value; - Set found = EntitySearchUtils.find(text, Set.of(searchValue), asType, headline, sectionNumber, false, false, engine, false); + Set found = EntitySearchUtils.findEntities(text, Set.of(searchValue), dictionary.getType(asType), headline, sectionNumber, false, false, engine, false, asRecommendation); found.forEach(entity -> { if (redacted) { @@ -629,10 +633,8 @@ public class Section { private void redact(String type, int ruleNumber, String reason, String legalBasis, boolean redaction) { - boolean hasRecommendationDictionary = dictionaryTypes.contains(RECOMMENDATION_PREFIX + type); - entities.forEach(entity -> { - if (entity.getType().equals(type) || hasRecommendationDictionary && entity.getType().equals(RECOMMENDATION_PREFIX + type)) { + if (entity.getType().equals(type)) { entity.setRedaction(redaction); entity.setMatchedRule(ruleNumber); entity.setRedactionReason(reason); @@ -665,7 +667,7 @@ public class Section { } else { String word = value.toString(); - Entity entity = new Entity(word, type, value.getRowSpanStart(), value.getRowSpanStart() + word.length(), headline, sectionNumber, false, false, Engine.RULE); + Entity entity = new Entity(word, type, value.getRowSpanStart(), value.getRowSpanStart() + word.length(), headline, sectionNumber, false, false, Engine.RULE, EntityType.ENTITY); entity.setRedaction(redact); entity.setMatchedRule(ruleNumber); entity.setRedactionReason(reason); @@ -692,9 +694,9 @@ public class Section { while (matcher.find()) { String match = matcher.group().trim(); if (match.length() >= 3) { - localDictionaryAdds.computeIfAbsent(RECOMMENDATION_PREFIX + type, (x) -> new HashSet<>()).add(match); + localDictionaryAdds.computeIfAbsent(type, (x) -> new HashSet<>()).add(match); String lastname = match.split(" ")[0]; - localDictionaryAdds.computeIfAbsent(RECOMMENDATION_PREFIX + type, (x) -> new HashSet<>()).add(lastname); + localDictionaryAdds.computeIfAbsent(type, (x) -> new HashSet<>()).add(lastname); } } } @@ -709,7 +711,7 @@ public class Section { if (values != null) { for (String value : values) { if (StringUtils.isNotBlank(value)) { - Set found = findEntities(value.trim(), asType, false, redaction, ruleNumber, reason, legalBasis, Engine.RULE); + Set found = findEntities(value.trim(), asType, false, redaction, ruleNumber, reason, legalBasis, Engine.RULE, false); EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary); if (redactEverywhere && !isLocal()) { @@ -730,7 +732,7 @@ public class Section { while (matcher.find()) { String match = matcher.group(group); if (StringUtils.isNotBlank(match)) { - Set found = findEntities(match.trim(), asType, false, redaction, ruleNumber, reason, legalBasis, Engine.RULE); + Set found = findEntities(match.trim(), asType, false, redaction, ruleNumber, reason, legalBasis, Engine.RULE, false); EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary); } } @@ -745,7 +747,7 @@ public class Section { for (String value : values) { if (StringUtils.isNotBlank(value)) { - Set found = findEntities(value.trim(), asType, false, redaction, ruleNumber, reason, legalBasis, Engine.RULE); + Set found = findEntities(value.trim(), asType, false, redaction, ruleNumber, reason, legalBasis, Engine.RULE, false); EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary); if (redactEverywhere && !isLocal()) { @@ -772,7 +774,7 @@ public class Section { return; } - Set found = findEntities(line.trim(), asType, false, redaction, ruleNumber, reason, legalBasis, Engine.RULE); + Set found = findEntities(line.trim(), asType, false, redaction, ruleNumber, reason, legalBasis, Engine.RULE, false); EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary); if (redactEverywhere && !isLocal()) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java index a71c061c..d3cd7fa0 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java @@ -99,18 +99,33 @@ public class DictionaryService { } Set entries = new HashSet<>(); - var newEntries = convertEntries(t.getId(), currentVersion); - var newValues = newEntries.stream().map(v -> v.getValue()).collect(Collectors.toSet()); + Set falsePositives = new HashSet<>(); + Set falseRecommendations = new HashSet<>(); + + DictionaryEntries newEntries = getEntries(t.getId(), currentVersion); + + var newValues = newEntries.getEntries().stream().map(v -> v.getValue()).collect(Collectors.toSet()); + var newFalsePositivesValues = newEntries.getFalsePositives().stream().map(v -> v.getValue()).collect(Collectors.toSet()); + var newFalseRecommendationsValues = newEntries.getFalseRecommendations().stream().map(v -> v.getValue()).collect(Collectors.toSet()); // add old entries from existing DictionaryModel oldModel.ifPresent(dictionaryModel -> entries.addAll(dictionaryModel.getEntries().stream().filter( f -> !newValues.contains(f.getValue())).collect(Collectors.toList()) )); + oldModel.ifPresent(dictionaryModel -> falsePositives.addAll(dictionaryModel.getFalsePositives().stream().filter( + f -> !newFalsePositivesValues.contains(f.getValue())).collect(Collectors.toList()) + )); + oldModel.ifPresent(dictionaryModel -> falseRecommendations.addAll(dictionaryModel.getFalseRecommendations().stream().filter( + f -> !newFalseRecommendationsValues.contains(f.getValue())).collect(Collectors.toList()) + )); + // Add Increments - entries.addAll(convertEntries(t.getId(), currentVersion)); + entries.addAll(newEntries.getEntries()); + falsePositives.addAll(newEntries.getFalsePositives()); + falseRecommendations.addAll(newEntries.getFalseRecommendations()); return new DictionaryModel(t.getType(), t.getRank(), convertColor(t.getHexColor()), t.isCaseInsensitive(), t - .isHint(), t.isRecommendation(), entries, new HashSet<>(), dossierId != null); + .isHint(), entries, falsePositives, falseRecommendations, new HashSet<>(), dossierId != null); }) .sorted(Comparator.comparingInt(DictionaryModel::getRank).reversed()) .collect(Collectors.toList()); @@ -140,17 +155,20 @@ public class DictionaryService { } - private Set convertEntries(String typeId, Long fromVersion) { + private DictionaryEntries getEntries(String typeId, Long fromVersion) { var type = dictionaryClient.getDictionaryForType(typeId, fromVersion); - Set entries = new HashSet<>(type - .getEntries()); + Set entries = type.getEntries() != null ? new HashSet<>(type.getEntries()) : new HashSet<>(); + Set falsePositives = type.getFalsePositiveEntries() != null ? new HashSet<>(type.getFalsePositiveEntries()) : new HashSet<>(); + Set falseRecommendations = type.getFalseRecommendationEntries() != null ? new HashSet<>(type.getFalseRecommendationEntries()) : new HashSet<>(); if (type.isCaseInsensitive()) { entries.forEach(entry -> entry.setValue(entry.getValue().toLowerCase(Locale.ROOT))); + falsePositives.forEach(entry -> entry.setValue(entry.getValue().toLowerCase(Locale.ROOT))); + falseRecommendations.forEach(entry -> entry.setValue(entry.getValue().toLowerCase(Locale.ROOT))); } - return entries; + return new DictionaryEntries(entries, falsePositives, falseRecommendations); } @@ -191,16 +209,6 @@ public class DictionaryService { } - public boolean isRecommendation(String type, String dossierTemplateId) { - - DictionaryModel model = dictionariesByDossierTemplate.get(dossierTemplateId).getLocalAccessMap().get(type); - if (model != null) { - return model.isRecommendation(); - } - return false; - } - - public Dictionary getDeepCopyDictionary(String dossierTemplateId, String dossierId) { List copy = new ArrayList<>(); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java index 3e1b1dee..e82f4991 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java @@ -25,6 +25,7 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel import com.iqser.red.service.redaction.v1.server.redaction.model.Entities; import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence; +import com.iqser.red.service.redaction.v1.server.redaction.model.EntityType; import com.iqser.red.service.redaction.v1.server.redaction.model.Image; import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities; import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText; @@ -177,7 +178,7 @@ public class EntityRedactionService { .getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber(), entity .getLegalBasis(), entity.isDictionaryEntry(), entity.getTextBefore(), entity.getTextAfter(), entity .getStart(), entity.getEnd(), entity.isDossierDictionaryEntry(), entity.getEngines(), entity - .getReferences())); + .getReferences(), entity.getEntityType())); } } return entitiesPerPage; @@ -199,13 +200,6 @@ public class EntityRedactionService { private void addLocalValuesToDictionary(Section analysedSection, Dictionary dictionary) { analysedSection.getLocalDictionaryAdds().keySet().forEach(key -> { - if (dictionary.isRecommendation(key)) { - analysedSection.getLocalDictionaryAdds().get(key).forEach(value -> { - if (!dictionary.containsValue(key, value)) { - dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value); - } - }); - } else { analysedSection.getLocalDictionaryAdds().get(key).forEach(value -> { if (dictionary.getLocalAccessMap().get(key) == null) { @@ -218,7 +212,6 @@ public class EntityRedactionService { dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value); }); - } }); } @@ -236,11 +229,9 @@ public class EntityRedactionService { String lowercaseInputString = searchableString.toLowerCase(); for (DictionaryModel model : dictionary.getDictionaryModels()) { if (model.isCaseInsensitive()) { - EntitySearchUtils.addOrAddEngine(found, EntitySearchUtils.find(lowercaseInputString, model.getValues(local), model - .getType(), headline, sectionNumber, !local, model.isDossierDictionary(), local ? Engine.RULE : Engine.DICTIONARY, false)); + EntitySearchUtils.addOrAddEngine(found, EntitySearchUtils.findEntities(lowercaseInputString, model.getValues(local), model, headline, sectionNumber, !local, model.isDossierDictionary(), local ? Engine.RULE : Engine.DICTIONARY, false, local ? true : false)); } else { - EntitySearchUtils.addOrAddEngine(found, EntitySearchUtils.find(searchableString, model.getValues(local), model - .getType(), headline, sectionNumber, !local, model.isDossierDictionary(), local ? Engine.RULE : Engine.DICTIONARY, false)); + EntitySearchUtils.addOrAddEngine(found, EntitySearchUtils.findEntities(searchableString, model.getValues(local), model, headline, sectionNumber, !local, model.isDossierDictionary(), local ? Engine.RULE : Engine.DICTIONARY, false, local ? true : false)); } } @@ -261,7 +252,7 @@ public class EntityRedactionService { if (redactionServiceSettings.isNerServiceEnabled() && nerEntities.getResult().containsKey(sectionNumber)) { nerEntities.getResult().get(sectionNumber).forEach(res -> { if (cellStarts == null || cellStarts.isEmpty()) { - entities.add(new Entity(res.getValue(), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false, Engine.NER)); + entities.add(new Entity(res.getValue(), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false, Engine.NER, EntityType.RECOMMENDATION)); } else { boolean intersectsCellStart = false; for (Integer cellStart : cellStarts) { @@ -271,7 +262,7 @@ public class EntityRedactionService { } } if (!intersectsCellStart) { - entities.add(new Entity(res.getValue(), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false, Engine.NER)); + entities.add(new Entity(res.getValue(), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false, Engine.NER, EntityType.RECOMMENDATION)); } } }); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ManualRedactionSurroundingTextService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ManualRedactionSurroundingTextService.java index f8256b04..9a529f2d 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ManualRedactionSurroundingTextService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ManualRedactionSurroundingTextService.java @@ -19,6 +19,7 @@ import com.iqser.red.service.redaction.v1.server.classification.model.Text; import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence; +import com.iqser.red.service.redaction.v1.server.redaction.model.EntityType; import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils; import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; @@ -90,7 +91,7 @@ public class ManualRedactionSurroundingTextService { private Pair findSurroundingText(SectionText sectionText, String value, List toFindPositions) { - Set entities = EntitySearchUtils.find(sectionText.getText(), Set.of(value), "dummy", sectionText.getHeadline(), sectionText.getSectionNumber(), false, false, Engine.DICTIONARY, false); + Set entities = EntitySearchUtils.find(sectionText.getText(), Set.of(value), "dummy", sectionText.getHeadline(), sectionText.getSectionNumber(), false, false, Engine.DICTIONARY, false, EntityType.ENTITY); Set entitiesWithPositions = EntitySearchUtils.clearAndFindPositions(entities, sectionText.getSearchableText(), null); Entity correctEntity = getEntityOnCorrectPosition(entitiesWithPositions, toFindPositions); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java index e5f41214..1cc07e89 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java @@ -17,6 +17,7 @@ import com.iqser.red.service.redaction.v1.server.parsing.model.RedTextPosition; import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence; +import com.iqser.red.service.redaction.v1.server.redaction.model.EntityType; import com.iqser.red.service.redaction.v1.server.redaction.model.Image; import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities; import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder; @@ -203,7 +204,8 @@ public class RedactionLogCreatorService { .type(entity.getType()) .redacted(entity.isRedaction()) .isHint(isHint(entity.getType(), dossierTemplateId)) - .isRecommendation(isRecommendation(entity.getType(), dossierTemplateId)) + .isRecommendation(entity.getEntityType().equals(EntityType.RECOMMENDATION)) + .isFalsePositive(entity.getEntityType().equals(EntityType.FALSE_POSITIVE) || entity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION)) .section(entity.getHeadline()) .sectionNumber(entity.getSectionNumber()) .matchedRule(entity.getMatchedRule()) @@ -233,10 +235,4 @@ public class RedactionLogCreatorService { return dictionaryService.isHint(type, dossierTemplateId); } - - private boolean isRecommendation(String type, String dossierTemplateId) { - - return dictionaryService.isRecommendation(type, dossierTemplateId); - } - } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java index 4ccf0096..b10eb7ed 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java @@ -15,8 +15,10 @@ import java.util.stream.Collectors; import com.iqser.red.service.redaction.v1.model.Engine; import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary; import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrementValue; +import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel; import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence; +import com.iqser.red.service.redaction.v1.server.redaction.model.EntityType; import com.iqser.red.service.redaction.v1.server.redaction.model.Image; import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText; @@ -55,9 +57,27 @@ public class EntitySearchUtils { } - public Set find(String inputString, Set values, String type, String headline, int sectionNumber, boolean isDictionaryEntry, boolean isDossierDictionary, - Engine engine, boolean ignoreMinLength) { + public Set findEntities(String inputString, Set values, DictionaryModel type, String headline, int sectionNumber, boolean isDictionaryEntry, boolean isDossierDictionary, + Engine engine, boolean ignoreMinLength, boolean asRecommendation) { + Set found = find(inputString, values, type.getType(), headline, sectionNumber, isDictionaryEntry, isDossierDictionary, engine, ignoreMinLength, asRecommendation ? EntityType.RECOMMENDATION : EntityType.ENTITY); + + if(asRecommendation){ + Set falseRecommendations = find(inputString, type.getFalseRecommendationValues(), type.getType(), headline, sectionNumber, isDictionaryEntry, isDossierDictionary, engine, ignoreMinLength, EntityType.FALSE_RECOMMENDATION); + removeFalsePositives(found, falseRecommendations); + found.addAll(falseRecommendations); + } else { + Set falsePositives = find(inputString, type.getFalsePositiveValues(), type.getType(), headline, sectionNumber, isDictionaryEntry, isDossierDictionary, engine, ignoreMinLength, EntityType.FALSE_POSITIVE); + removeFalsePositives(found, falsePositives); + found.addAll(falsePositives); + } + + return found; + } + + + public Set find(String inputString, Set values, String type, String headline, int sectionNumber, boolean isDictionaryEntry, boolean isDossierDictionary, + Engine engine, boolean ignoreMinLength, EntityType entityType) { Set found = new HashSet<>(); for (String value : values) { @@ -75,7 +95,7 @@ public class EntitySearchUtils { stopIndex = startIndex + cleanValue.length(); if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString.charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) { - found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline, sectionNumber, isDictionaryEntry, isDossierDictionary, engine)); + found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline, sectionNumber, isDictionaryEntry, isDossierDictionary, engine, entityType)); } } while (startIndex > -1); } @@ -83,6 +103,7 @@ public class EntitySearchUtils { } + private boolean isSeparator(char c) { return Character.isWhitespace(c) || Pattern.matches("\\p{Punct}", String.valueOf(c)) || c == '\"' || c == '‘' || c == '’'; @@ -119,6 +140,25 @@ public class EntitySearchUtils { } + + public void removeFalsePositives(Set entities, Set falsePositives) { + + List wordsToRemove = new ArrayList<>(); + for (Entity word : falsePositives) { + for (Entity inner : entities) { + if (inner.getWord().length() < word.getWord() + .length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner && word.getSectionNumber() == inner.getSectionNumber()) { + wordsToRemove.add(inner); + } + } + } + entities.removeAll(wordsToRemove); + entities.removeAll(falsePositives); + } + + + + public void removeEntitiesContainedInLarger(Set entities) { List wordsToRemove = new ArrayList<>(); @@ -126,8 +166,7 @@ public class EntitySearchUtils { for (Entity inner : entities) { if (inner.getWord().length() < word.getWord() .length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner && word.getSectionNumber() == inner.getSectionNumber()) { - // FIXME this is workaround for RED-3327 and should be removed in the future. - if (word.getType().contains("recommendation_") && !inner.getType().contains("recommendation_")) { + if (word.getEntityType().equals(EntityType.RECOMMENDATION) && word.getEntityType().equals(EntityType.ENTITY)) { wordsToRemove.add(word); } else { wordsToRemove.add(inner); @@ -135,7 +174,7 @@ public class EntitySearchUtils { } } } - wordsToRemove.forEach(entities::remove); + wordsToRemove.stream().filter(e -> !e.getEntityType().equals(EntityType.FALSE_POSITIVE) && !e.getEntityType().equals(EntityType.FALSE_RECOMMENDATION)).forEach(entities::remove); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index 1027dca5..3319bd04 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -86,12 +86,6 @@ public class RedactionIntegrationTest { private static final String OCR = "ocr"; private static final String DOSSIER_REDACTIONS = "dossier_redactions"; private static final String IMPORTED_REDACTION = "imported_redaction"; - - private static final String RECOMMENDATION_AUTHOR = "recommendation_CBI_author"; - private static final String RECOMMENDATION_ADDRESS = "recommendation_CBI_address"; - - private static final String FALSE_POSITIVE = "false_positive"; - private static final String PII = "PII"; @Autowired @@ -129,6 +123,8 @@ public class RedactionIntegrationTest { private final Map> dictionary = new HashMap<>(); private final Map> dossierDictionary = new HashMap<>(); + private final Map> falsePositive = new HashMap<>(); + private final Map> falseRecommendation = new HashMap<>(); private final Map typeColorMap = new HashMap<>(); private final Map hintTypeMap = new HashMap<>(); private final Map caseInSensitiveMap = new HashMap<>(); @@ -226,9 +222,6 @@ public class RedactionIntegrationTest { when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PUBLISHED_INFORMATION, false)); when(dictionaryClient.getDictionaryForType(TEST_METHOD + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(TEST_METHOD, false)); when(dictionaryClient.getDictionaryForType(PII + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PII, false)); - when(dictionaryClient.getDictionaryForType(RECOMMENDATION_AUTHOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(RECOMMENDATION_AUTHOR, false)); - when(dictionaryClient.getDictionaryForType(RECOMMENDATION_ADDRESS + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(RECOMMENDATION_ADDRESS, false)); - when(dictionaryClient.getDictionaryForType(FALSE_POSITIVE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(FALSE_POSITIVE, false)); when(dictionaryClient.getDictionaryForType(PURITY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PURITY, false)); when(dictionaryClient.getDictionaryForType(IMAGE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(IMAGE, false)); when(dictionaryClient.getDictionaryForType(OCR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(OCR, false)); @@ -492,8 +485,6 @@ public class RedactionIntegrationTest { deleted.add("David Chubb"); deleted.add("mouse"); - dictionary.get(FALSE_POSITIVE).add("David Chubb"); - reanlysisVersions.put("David Chubb", 3L); reanlysisVersions.put("mouse", 3L); @@ -501,7 +492,6 @@ public class RedactionIntegrationTest { when(dictionaryClient.getDictionaryForType(VERTEBRATE, null)).thenReturn(getDictionaryResponse(VERTEBRATE, false)); - when(dictionaryClient.getDictionaryForType(FALSE_POSITIVE, null)).thenReturn(getDictionaryResponse(FALSE_POSITIVE, false)); start = System.currentTimeMillis(); @@ -665,7 +655,6 @@ public class RedactionIntegrationTest { deleted.add("David Chubb"); deleted.add("mouse"); - dictionary.get(FALSE_POSITIVE).add("David Chubb"); reanlysisVersions.put("David Chubb", 3L); reanlysisVersions.put("mouse", 3L); @@ -674,7 +663,6 @@ public class RedactionIntegrationTest { when(dictionaryClient.getDictionaryForType(VERTEBRATE, null)).thenReturn(getDictionaryResponse(VERTEBRATE, false)); - when(dictionaryClient.getDictionaryForType(FALSE_POSITIVE, null)).thenReturn(getDictionaryResponse(FALSE_POSITIVE, false)); start = System.currentTimeMillis(); @@ -1263,21 +1251,6 @@ public class RedactionIntegrationTest { .stream() .map(this::cleanDictionaryEntry) .collect(Collectors.toSet())); - dictionary.computeIfAbsent(RECOMMENDATION_AUTHOR, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/recommendation_CBI_author.txt") - .stream() - .map(this::cleanDictionaryEntry) - .collect(Collectors.toSet())); - dictionary.computeIfAbsent(RECOMMENDATION_ADDRESS, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/recommendation_CBI_address.txt") - .stream() - .map(this::cleanDictionaryEntry) - .collect(Collectors.toSet())); - dictionary.computeIfAbsent(FALSE_POSITIVE, v -> new ArrayList<>()) - .addAll(ResourceLoader.load("dictionaries/false_positive.txt") - .stream() - .map(this::cleanDictionaryEntry) - .collect(Collectors.toSet())); dictionary.computeIfAbsent(PURITY, v -> new ArrayList<>()) .addAll(ResourceLoader.load("dictionaries/purity.txt") .stream() @@ -1314,6 +1287,13 @@ public class RedactionIntegrationTest { .map(this::cleanDictionaryEntry) .collect(Collectors.toSet())); dossierDictionary.put(IMPORTED_REDACTION, new ArrayList<>()); + + falsePositive.computeIfAbsent(PII, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); + } @@ -1367,9 +1347,6 @@ public class RedactionIntegrationTest { typeColorMap.put(PUBLISHED_INFORMATION, "#85ebff"); typeColorMap.put(TEST_METHOD, "#91fae8"); typeColorMap.put(PII, "#66ccff"); - typeColorMap.put(RECOMMENDATION_AUTHOR, "#8df06c"); - typeColorMap.put(RECOMMENDATION_ADDRESS, "#8df06c"); - typeColorMap.put(FALSE_POSITIVE, "#ffffff"); typeColorMap.put(PURITY, "#ffe187"); typeColorMap.put(IMAGE, "#fcc5fb"); typeColorMap.put(OCR, "#fcc5fb"); @@ -1389,9 +1366,6 @@ public class RedactionIntegrationTest { hintTypeMap.put(PUBLISHED_INFORMATION, true); hintTypeMap.put(TEST_METHOD, true); hintTypeMap.put(PII, false); - hintTypeMap.put(RECOMMENDATION_AUTHOR, false); - hintTypeMap.put(RECOMMENDATION_ADDRESS, false); - hintTypeMap.put(FALSE_POSITIVE, true); hintTypeMap.put(PURITY, false); hintTypeMap.put(IMAGE, true); hintTypeMap.put(OCR, true); @@ -1412,9 +1386,6 @@ public class RedactionIntegrationTest { caseInSensitiveMap.put(PUBLISHED_INFORMATION, true); caseInSensitiveMap.put(TEST_METHOD, false); caseInSensitiveMap.put(PII, false); - caseInSensitiveMap.put(RECOMMENDATION_AUTHOR, false); - caseInSensitiveMap.put(RECOMMENDATION_ADDRESS, false); - caseInSensitiveMap.put(FALSE_POSITIVE, false); caseInSensitiveMap.put(PURITY, false); caseInSensitiveMap.put(IMAGE, true); caseInSensitiveMap.put(OCR, true); @@ -1435,9 +1406,6 @@ public class RedactionIntegrationTest { recommendationTypeMap.put(PUBLISHED_INFORMATION, false); recommendationTypeMap.put(TEST_METHOD, false); recommendationTypeMap.put(PII, false); - recommendationTypeMap.put(RECOMMENDATION_AUTHOR, true); - recommendationTypeMap.put(RECOMMENDATION_ADDRESS, true); - recommendationTypeMap.put(FALSE_POSITIVE, false); recommendationTypeMap.put(PURITY, false); recommendationTypeMap.put(IMAGE, false); recommendationTypeMap.put(OCR, false); @@ -1447,7 +1415,6 @@ public class RedactionIntegrationTest { recommendationTypeMap.put(DOSSIER_REDACTIONS, false); recommendationTypeMap.put(IMPORTED_REDACTION, false); - rankTypeMap.put(FALSE_POSITIVE, 160); rankTypeMap.put(PURITY, 155); rankTypeMap.put(PII, 150); rankTypeMap.put(ADDRESS, 140); @@ -1460,8 +1427,6 @@ public class RedactionIntegrationTest { rankTypeMap.put(PUBLISHED_INFORMATION, 70); rankTypeMap.put(TEST_METHOD, 60); rankTypeMap.put(HINT_ONLY, 50); - rankTypeMap.put(RECOMMENDATION_AUTHOR, 40); - rankTypeMap.put(RECOMMENDATION_ADDRESS, 30); rankTypeMap.put(IMAGE, 30); rankTypeMap.put(OCR, 29); rankTypeMap.put(LOGO, 28); @@ -1511,6 +1476,8 @@ public class RedactionIntegrationTest { .id(type + ":" + TEST_DOSSIER_TEMPLATE_ID) .hexColor(typeColorMap.get(type)) .entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type))) + .falsePositiveEntries(falsePositive.containsKey(type) ? toDictionaryEntry(falsePositive.get(type)) : new ArrayList<>()) + .falseRecommendationEntries(falseRecommendation.containsKey(type) ? toDictionaryEntry(falseRecommendation.get(type)) : new ArrayList<>()) .isHint(hintTypeMap.get(type)) .isCaseInsensitive(caseInSensitiveMap.get(type)) .isRecommendation(recommendationTypeMap.get(type)) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtilsTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtilsTest.java index f1d419d4..7db8f371 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtilsTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtilsTest.java @@ -9,6 +9,7 @@ import org.junit.Test; import com.iqser.red.service.redaction.v1.model.Engine; import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; +import com.iqser.red.service.redaction.v1.server.redaction.model.EntityType; public class EntitySearchUtilsTest { @@ -16,8 +17,8 @@ public class EntitySearchUtilsTest { public void testNestedEntitiesRemoval() { Set entities = new HashSet<>(); - Entity nested = new Entity("nested", "fake type", 10, 16, "fake headline", 0, false, false, Engine.RULE); - Entity nesting = new Entity("nesting nested", "fake type", 2, 16, "fake headline", 0, false, false, Engine.RULE); + Entity nested = new Entity("nested", "fake type", 10, 16, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY); + Entity nesting = new Entity("nesting nested", "fake type", 2, 16, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY); entities.add(nested); entities.add(nesting); EntitySearchUtils.removeEntitiesContainedInLarger(entities); @@ -39,14 +40,14 @@ public class EntitySearchUtilsTest { // Arrange Set existingEntities = new HashSet<>(); - Entity existingEntity1 = new Entity("Batman", "fake type", 0, 5, "fake headline", 0, false, false, Engine.RULE); - Entity existingEntity2 = new Entity("Superman", "fake type", 10, 17, "fake headline", 0, false, false, Engine.RULE); + Entity existingEntity1 = new Entity("Batman", "fake type", 0, 5, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY); + Entity existingEntity2 = new Entity("Superman", "fake type", 10, 17, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY); existingEntities.add(existingEntity1); existingEntities.add(existingEntity2); Set foundEntities = new HashSet<>(); - Entity foundEntities1 = new Entity("Batman X.", "fake type", 0, 8, "fake headline", 0, false, false, Engine.RULE); - Entity foundEntities2 = new Entity("Superman Y.", "fake type", 10, 20, "fake headline", 0, false, false, Engine.RULE); + Entity foundEntities1 = new Entity("Batman X.", "fake type", 0, 8, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY); + Entity foundEntities2 = new Entity("Superman Y.", "fake type", 10, 20, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY); foundEntities.add(foundEntities1); foundEntities.add(foundEntities2); @@ -72,14 +73,14 @@ public class EntitySearchUtilsTest { // Arrange Set existingEntities = new HashSet<>(); - Entity existingEntity1 = new Entity("Batman X.", "fake type", 0, 8, "fake headline", 0, false, false, Engine.RULE); - Entity existingEntity2 = new Entity("Superman", "fake type", 10, 17, "fake headline", 0, false, false, Engine.RULE); + Entity existingEntity1 = new Entity("Batman X.", "fake type", 0, 8, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY); + Entity existingEntity2 = new Entity("Superman", "fake type", 10, 17, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY); existingEntities.add(existingEntity1); existingEntities.add(existingEntity2); Set foundEntities = new HashSet<>(); - Entity foundEntities1 = new Entity("Batman X.", "fake type", 0, 8, "fake headline", 0, false, false, Engine.RULE); - Entity foundEntities2 = new Entity("X. Superman Y.", "fake type", 7, 20, "fake headline", 0, false, false, Engine.RULE); + Entity foundEntities1 = new Entity("Batman X.", "fake type", 0, 8, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY); + Entity foundEntities2 = new Entity("X. Superman Y.", "fake type", 7, 20, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY); foundEntities.add(foundEntities1); foundEntities.add(foundEntities2); @@ -104,14 +105,14 @@ public class EntitySearchUtilsTest { // Arrange Set existingEntities = new HashSet<>(); - Entity existingEntity1 = new Entity("Batman X.", "fake type", 0, 8, "fake headline", 0, false, false, Engine.RULE); - Entity existingEntity2 = new Entity("Superman", "fake type", 10, 17, "fake headline", 0, false, false, Engine.RULE); + Entity existingEntity1 = new Entity("Batman X.", "fake type", 0, 8, "fake headline", 0, false, false, Engine.RULE,EntityType.ENTITY); + Entity existingEntity2 = new Entity("Superman", "fake type", 10, 17, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY); existingEntities.add(existingEntity1); existingEntities.add(existingEntity2); Set foundEntities = new HashSet<>(); - Entity foundEntities1 = new Entity("Batman X.", "fake type", 0, 8, "fake headline", 0, false, false, Engine.RULE); - Entity foundEntities2 = new Entity("X. Superman", "fake type", 7, 17, "fake headline", 0, false, false, Engine.RULE); + Entity foundEntities1 = new Entity("Batman X.", "fake type", 0, 8, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY); + Entity foundEntities2 = new Entity("X. Superman", "fake type", 7, 17, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY); foundEntities.add(foundEntities1); foundEntities.add(foundEntities2); @@ -136,14 +137,14 @@ public class EntitySearchUtilsTest { // Arrange Set existingEntities = new HashSet<>(); - Entity existingEntity1 = new Entity("X. Superman", "fake type", 7, 17, "fake headline", 0, false, false, Engine.RULE); - Entity existingEntity2 = new Entity("Batman", "fake type", 0, 5, "fake headline", 0, false, false, Engine.RULE); + Entity existingEntity1 = new Entity("X. Superman", "fake type", 7, 17, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY); + Entity existingEntity2 = new Entity("Batman", "fake type", 0, 5, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY); existingEntities.add(existingEntity1); existingEntities.add(existingEntity2); Set foundEntities = new HashSet<>(); - Entity foundEntities1 = new Entity("Batman X.", "fake type", 0, 8, "fake headline", 0, false, false, Engine.RULE); - Entity foundEntities2 = new Entity("Superman", "fake type", 10, 17, "fake headline", 0, false, false, Engine.RULE); + Entity foundEntities1 = new Entity("Batman X.", "fake type", 0, 8, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY); + Entity foundEntities2 = new Entity("Superman", "fake type", 10, 17, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY); foundEntities.add(foundEntities1); foundEntities.add(foundEntities2); @@ -166,14 +167,14 @@ public class EntitySearchUtilsTest { // Arrange Set existingEntities = new HashSet<>(); - Entity existingEntity1 = new Entity("X. Superman", "fake type", 7, 17, "fake headline", 0, false, false, Engine.RULE); - Entity existingEntity2 = new Entity("Batman", "fake type", 0, 5, "fake headline", 0, false, false, Engine.RULE); + Entity existingEntity1 = new Entity("X. Superman", "fake type", 7, 17, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY); + Entity existingEntity2 = new Entity("Batman", "fake type", 0, 5, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY); existingEntities.add(existingEntity1); existingEntities.add(existingEntity2); Set foundEntities = new HashSet<>(); - Entity foundEntities1 = new Entity("Batman X. Superman", "fake type", 0, 17, "fake headline", 0, false, false, Engine.RULE); - Entity foundEntities2 = new Entity("Superman", "fake type", 10, 17, "fake headline", 0, false, false, Engine.RULE); + Entity foundEntities1 = new Entity("Batman X. Superman", "fake type", 0, 17, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY); + Entity foundEntities2 = new Entity("Superman", "fake type", 10, 17, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY); foundEntities.add(foundEntities1); foundEntities.add(foundEntities2); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/PII_false_positive.txt b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/PII_false_positive.txt new file mode 100644 index 00000000..cd9fab48 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/PII_false_positive.txt @@ -0,0 +1 @@ +C. J. Alfred Xinyi \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/false_positive.txt b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/false_positive.txt deleted file mode 100644 index a696cee0..00000000 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/false_positive.txt +++ /dev/null @@ -1,239 +0,0 @@ -(Parent -ADME - Bioanalyses, Vergèze, -Aeration: Gentle -Andrews mean -Andrews pKi -BASF study code -Billington – -Bond elute -Brown Liquid -Brown Orange -Brown Solid -Brown disco -Brown discoloured -Brown fat -Brown liquid -Brown solid -Buehler (1980), -Buehler (1980). -Buehler 9 -Buehler Test. -Buehler assay -Buehler test -Buehler) -Buehler-like : -Calandra lark -Cary, NC, -Chambersas for -Class 4 -Class III’ -Class III’) -Class method, -Class2 (effectively -ClassMethod) Vischim -Clay = -Cochran-Armitage Test. -Cochran-armitage test. -Dunnett 2 -Dunnett's "t'' -Dunnett's Test -Dunnett's multiple -Dunnett's t-test -Dunnett's test -Dunnett) ** -Dunnett) Dose -Dunnett) Food -Dunnett) Lower -Dunnett, 1964) -Dunnett’s Multiple -Dunnett’s T -Dunnett’s Test) -Dunnett’s adjustment -Dunnett’s multiple -Dunnett’s post -Dunnett’s t-test) -Fine vacuolation -Fine white -Finewhite powder -Fischer 344 -Fischer 344) -Fischer F344 -Fischer rat -Fischer) Contaminants: -Fischer-344 acceptability -Fischer-344/ -Fischer-344/DuCrj acceptability -Fischer344 rats. -Fischer344) : -Fischerrat embryo -Fischer’s exact -Fisher 344 -Fisher Exact Test -Fisher Exact test -Fisher exact test -Fisher's Exact -Fisher-344 rat -Fisher344 (F344/ -Fisher344 rat -Fisher344 rats -Fisher344 rats. -Fisher344 rats/ -Fisher344) group -Fisher’s exact. -Freeman-Tukey transform -Gentle aeration -Grade 2: -Grade 3) -Grade 3: -Green -Green Alga -Green as -Green-, blue-, -GreenAlga, Selenastrum -GreenAlga.. Springborn -Gross External -Gross Necropsy -Gross and -Gross necropsies -Gross necropsy -Gross pathology -Gross pathology: -Hall Farm -Hall Farm) -HallFarm (396 -Ham's F12 -Ham’s F10 -Hand-held -HarlanWinkelmann, Borchem, -Hill International, -Hill Top -Hill criteria -Hill, Bracknell, -HillTop Chamber, -Hillmodel was -Hill’s model -Japanese White -Jerneplaque assay -Kent. Blood -Klimisch score -Litchfield and Wilcoxon -Long term -Long-Term Toxicity -Long-term -Longbw/ -Major Macroscopic findings -Major biliary -Major defects -Major deviations -Major fetal -Major metabolic -Major metabolites -Major route -Major treatment-related -Mallard (Anas -Mallard with -Manual-Hand held -Marsh Frog -Marsh harrier -Masonjars (500 -Meier formula) -Meier survival -Miles anti-Canine -Miles anti-dog -Netherlands, 2011 -Netherlands, published: -Netherlands. Report -Penn State -Porewater: -Rattray study -Read across -Reed Bunting -Reichert-Jung Cryo -RifconGmbH. Oxon -Ritz and -S.P.-U/ -San Diego, -Shaw's Farm, -Sprague -Strain JapaneseWhite -Strong morphological -Weight - -Weight 2744 -Weight Gain -Weight On -Weight Range -Weight and -Weight and length -Weight at -Weight gain -Weight loss -Weight of -Weight only -Weight range -Weight, age -Weight, feed -Weight-of-evidence approach -Weight: -Weight; Adult -Weight; Whole -WeightGain 0-1 -WeightRange: -Weightat dosing: -Weightrange 200-250g. -White (Tif: -White 10 -White 3 -White Age -White Powder -White Strain -White blood -White cell -White crystalline -White guideline -White guinea-pigs -White hygroscopic -White powder -White solid -White stork -White swan -White to off-white -White wagtail -White, solid -WhitePowder Lot/ -Whiteguideline : -Whitehygroscopic solid -Whitepowder Lot/ -Whitepowder Stability -Whitesolid Batch -Whiteswan) -Whiteto off-white -Wildlife International specifications -Wildlife Internationalspecifications Water: -Williams E -Williams medium -Williams' medium -WilliamsE medium -Williams’ test -Wilson's method. -Wilson. All foetal -Wilson.All foetal -Wood pigeons -York. -Zeneca dossier); -Zenecadossier); see -green algae -CTL -No details reported -Not given -Not indicated -Not reported -Not reported. -Not stated -Syngenta -Syngenta AG -N/A -No details reported -Not available -Test facility -TBD -David Chubb \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/recommendation_CBI_address.txt b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/recommendation_CBI_address.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/recommendation_CBI_author.txt b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/recommendation_CBI_author.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index ff0ddc27..1de194b4 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -9,10 +9,9 @@ global Section section //rule "0: Expand CBI Authors with firstname initials" // when -// Section(matchesType("CBI_author") || matchesType("recommendation_CBI_author")) +// Section(matchesType("CBI_author")) // then // section.expandByRegEx("CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1); -// section.expandByRegEx("recommendation_CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1); // end @@ -20,22 +19,21 @@ rule "0: Add CBI_author from ai" when Section(aiMatchesType("CBI_author")) then - section.addAiEntities("CBI_author", "recommendation_CBI_author"); + section.addAiEntities("CBI_author", "CBI_author"); end rule "0: Combine ai types CBI_author from ai" when Section(aiMatchesType("ORG")) then - section.combineAiTypes("ORG", "STREET,POSTAL,COUNTRY,CARDINAL,CITY,STATE", 20, "recommendation_CBI_address", 3, false); + section.combineAiTypes("ORG", "STREET,POSTAL,COUNTRY,CARDINAL,CITY,STATE", 20, "CBI_address", 3, false); end rule "0: Expand CBI Authors with firstname initials" when - Section(matchesType("CBI_author") || matchesType("recommendation_CBI_author")) + Section(matchesType("CBI_author")) then section.expandByRegEx("CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1, "[^\\s]+"); - section.expandByRegEx("recommendation_CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1, "[^\\s]+"); end