Improved rule to combine AI matches

This commit is contained in:
deiflaender 2022-01-21 09:08:00 +01:00
parent c4c96c1712
commit f32d9a5267
2 changed files with 20 additions and 8 deletions

View File

@ -83,7 +83,7 @@ public class Section {
}
public void combineAiTypes(String startType, String combineTypes, int maxDistanceBetween, String asType){
public void combineAiTypes(String startType, String combineTypes, int maxDistanceBetween, String asType, int minPartMatches, boolean allowDuplicateTypes){
Set<String> combineSet = Set.of(combineTypes.split(","));
@ -91,26 +91,38 @@ public class Section {
Set<Entity> found = new HashSet<>();
int start = -1;
int lastEnd = -1;
boolean moreThanOne = false;
int numberOfMatchParts = 0;
Set<String> foundParts = new HashSet<>();
for (Entity entity : sorted){
if(entity.getType().equals(startType) && start == -1){
if(entity.getType().equals(startType) && start == -1) {
lastEnd = entity.getEnd();
start = entity.getStart();
} else if(!allowDuplicateTypes && foundParts.contains(entity.getType())){
if(numberOfMatchParts >= minPartMatches) {
String value = searchText.substring(start, lastEnd);
found.addAll(findEntities(value, asType, false, true, 0, null, null, Engine.NER));
}
start = -1;
lastEnd = -1;
numberOfMatchParts = 0;
foundParts = new HashSet<>();
} else if(entity.getType().equals(startType) && start != -1){
if(moreThanOne) {
if(numberOfMatchParts >= minPartMatches) {
String value = searchText.substring(start, lastEnd);
found.addAll(findEntities(value, asType, false, true, 0, null, null, Engine.NER));
}
start = entity.getStart();
lastEnd = entity.getEnd();
moreThanOne = false;
numberOfMatchParts = 0;
foundParts = new HashSet<>();
} else if(start != -1 && combineSet.contains(entity.getType()) && entity.getStart() - lastEnd < maxDistanceBetween){
lastEnd = entity.getEnd();
moreThanOne = true;
numberOfMatchParts++;
foundParts.add(entity.getType());
}
}
if(moreThanOne) {
if(numberOfMatchParts >= minPartMatches) {
String value = searchText.substring(start, lastEnd);
found.addAll(findEntities(value, asType, false, true, 0, null, null, Engine.NER));
}

View File

@ -27,7 +27,7 @@ rule "0: Combine ai types CBI_author from ai"
when
Section(aiMatchesType("ORG"))
then
section.combineAiTypes("ORG", "STREET,POSTAL,COUNTRY,CARDINAL,CITY,STATE", 100, "recommendation_CBI_address");
section.combineAiTypes("ORG", "STREET,POSTAL,COUNTRY,CARDINAL,CITY,STATE", 20, "recommendation_CBI_address", 3, false);
end
rule "0: Expand CBI Authors with firstname initials"