From f32d9a526787a6a74179a0e8f5589763a2080fca Mon Sep 17 00:00:00 2001 From: deiflaender Date: Fri, 21 Jan 2022 09:08:00 +0100 Subject: [PATCH] Improved rule to combine AI matches --- .../v1/server/redaction/model/Section.java | 26 ++++++++++++++----- .../src/test/resources/drools/rules.drl | 2 +- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java index c913626b..f28c54e8 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java @@ -83,7 +83,7 @@ public class Section { } - public void combineAiTypes(String startType, String combineTypes, int maxDistanceBetween, String asType){ + public void combineAiTypes(String startType, String combineTypes, int maxDistanceBetween, String asType, int minPartMatches, boolean allowDuplicateTypes){ Set combineSet = Set.of(combineTypes.split(",")); @@ -91,26 +91,38 @@ public class Section { Set found = new HashSet<>(); int start = -1; int lastEnd = -1; - boolean moreThanOne = false; + int numberOfMatchParts = 0; + Set foundParts = new HashSet<>(); for (Entity entity : sorted){ - if(entity.getType().equals(startType) && start == -1){ + if(entity.getType().equals(startType) && start == -1) { lastEnd = entity.getEnd(); start = entity.getStart(); + } else if(!allowDuplicateTypes && foundParts.contains(entity.getType())){ + if(numberOfMatchParts >= minPartMatches) { + String value = searchText.substring(start, lastEnd); + found.addAll(findEntities(value, asType, false, true, 0, null, null, Engine.NER)); + } + start = -1; + lastEnd = -1; + numberOfMatchParts = 0; + foundParts = new HashSet<>(); } else if(entity.getType().equals(startType) && start != -1){ - if(moreThanOne) { + if(numberOfMatchParts >= minPartMatches) { String value = searchText.substring(start, lastEnd); found.addAll(findEntities(value, asType, false, true, 0, null, null, Engine.NER)); } start = entity.getStart(); lastEnd = entity.getEnd(); - moreThanOne = false; + numberOfMatchParts = 0; + foundParts = new HashSet<>(); } else if(start != -1 && combineSet.contains(entity.getType()) && entity.getStart() - lastEnd < maxDistanceBetween){ lastEnd = entity.getEnd(); - moreThanOne = true; + numberOfMatchParts++; + foundParts.add(entity.getType()); } } - if(moreThanOne) { + if(numberOfMatchParts >= minPartMatches) { String value = searchText.substring(start, lastEnd); found.addAll(findEntities(value, asType, false, true, 0, null, null, Engine.NER)); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index 715d5c27..ff0ddc27 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -27,7 +27,7 @@ rule "0: Combine ai types CBI_author from ai" when Section(aiMatchesType("ORG")) then - section.combineAiTypes("ORG", "STREET,POSTAL,COUNTRY,CARDINAL,CITY,STATE", 100, "recommendation_CBI_address"); + section.combineAiTypes("ORG", "STREET,POSTAL,COUNTRY,CARDINAL,CITY,STATE", 20, "recommendation_CBI_address", 3, false); end rule "0: Expand CBI Authors with firstname initials"