diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java index 779d8217..50925791 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java @@ -1003,6 +1003,9 @@ public class Section { } + + + private Set findEntities(String value, String asType, boolean caseInsensitive, boolean redacted, int ruleNumber, String reason, String legalBasis, Engine engine, boolean asRecommendation) { @@ -1075,22 +1078,25 @@ public class Section { singleEntitySet.add(entity); EntitySearchUtils.clearAndFindPositions(singleEntitySet, searchableText, dictionary, manualRedactions); + EntitySearchUtils.removeFalsePositives(singleEntitySet, searchText, dictionary.getType(type), new FindEntityDetails(type, headline, sectionNumber, false, false, Engine.RULE, EntityType.ENTITY)); - EntitySearchUtils.addEntitiesWithHigherRank(entities, entity, dictionary); + if (!singleEntitySet.isEmpty()) { + EntitySearchUtils.addEntitiesWithHigherRank(entities, singleEntitySet.iterator().next(), dictionary); - EntitySearchUtils.removeEntitiesContainedInLarger(entities); + EntitySearchUtils.removeEntitiesContainedInLarger(entities); - if (addAsRecommendations && !isLocal()) { - String cleanedWord = word.replaceAll(",", " ").replaceAll(" ", " ").trim() + " "; - Pattern pattern = Patterns.AUTHOR_TABLE_SPITTER; - Matcher matcher = pattern.matcher(cleanedWord); + if (addAsRecommendations && !isLocal()) { + String cleanedWord = word.replaceAll(",", " ").replaceAll(" ", " ").trim() + " "; + Pattern pattern = Patterns.AUTHOR_TABLE_SPITTER; + Matcher matcher = pattern.matcher(cleanedWord); - while (matcher.find()) { - String match = matcher.group().trim(); - if (match.length() >= 3) { - localDictionaryAdds.computeIfAbsent(type, (x) -> new HashSet<>()).add(match); - String lastname = match.split(" ")[0]; - localDictionaryAdds.computeIfAbsent(type, (x) -> new HashSet<>()).add(lastname); + while (matcher.find()) { + String match = matcher.group().trim(); + if (match.length() >= 3) { + localDictionaryAdds.computeIfAbsent(type, (x) -> new HashSet<>()).add(match); + String lastname = match.split(" ")[0]; + localDictionaryAdds.computeIfAbsent(type, (x) -> new HashSet<>()).add(lastname); + } } } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java index 80c7d57c..bf48a7f3 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java @@ -20,6 +20,13 @@ public class EntitySearchUtils { return searchImplementation.atLeastOneMatches(sectionText); } + + public void removeFalsePositives(Set found, String inputString, DictionaryModel type, FindEntityDetails details){ + Set falsePositives = find(inputString, type.getFalsePositiveSearch(), details.withEntityType(EntityType.FALSE_POSITIVE)); + markFalsePositives(found, falsePositives); + found.removeIf(f -> f.isFalsePositive()); + } + public Set findEntities(String inputString, SearchImplementation searchImplementation, DictionaryModel type, FindEntityDetails details) { Set found = find(inputString, searchImplementation, details);