Improved rule to combine AI matches
This commit is contained in:
parent
c4c96c1712
commit
f32d9a5267
@ -83,7 +83,7 @@ public class Section {
|
||||
}
|
||||
|
||||
|
||||
public void combineAiTypes(String startType, String combineTypes, int maxDistanceBetween, String asType){
|
||||
public void combineAiTypes(String startType, String combineTypes, int maxDistanceBetween, String asType, int minPartMatches, boolean allowDuplicateTypes){
|
||||
|
||||
Set<String> combineSet = Set.of(combineTypes.split(","));
|
||||
|
||||
@ -91,26 +91,38 @@ public class Section {
|
||||
Set<Entity> found = new HashSet<>();
|
||||
int start = -1;
|
||||
int lastEnd = -1;
|
||||
boolean moreThanOne = false;
|
||||
int numberOfMatchParts = 0;
|
||||
Set<String> foundParts = new HashSet<>();
|
||||
for (Entity entity : sorted){
|
||||
if(entity.getType().equals(startType) && start == -1){
|
||||
if(entity.getType().equals(startType) && start == -1) {
|
||||
lastEnd = entity.getEnd();
|
||||
start = entity.getStart();
|
||||
} else if(!allowDuplicateTypes && foundParts.contains(entity.getType())){
|
||||
if(numberOfMatchParts >= minPartMatches) {
|
||||
String value = searchText.substring(start, lastEnd);
|
||||
found.addAll(findEntities(value, asType, false, true, 0, null, null, Engine.NER));
|
||||
}
|
||||
start = -1;
|
||||
lastEnd = -1;
|
||||
numberOfMatchParts = 0;
|
||||
foundParts = new HashSet<>();
|
||||
} else if(entity.getType().equals(startType) && start != -1){
|
||||
if(moreThanOne) {
|
||||
if(numberOfMatchParts >= minPartMatches) {
|
||||
String value = searchText.substring(start, lastEnd);
|
||||
found.addAll(findEntities(value, asType, false, true, 0, null, null, Engine.NER));
|
||||
}
|
||||
start = entity.getStart();
|
||||
lastEnd = entity.getEnd();
|
||||
moreThanOne = false;
|
||||
numberOfMatchParts = 0;
|
||||
foundParts = new HashSet<>();
|
||||
} else if(start != -1 && combineSet.contains(entity.getType()) && entity.getStart() - lastEnd < maxDistanceBetween){
|
||||
lastEnd = entity.getEnd();
|
||||
moreThanOne = true;
|
||||
numberOfMatchParts++;
|
||||
foundParts.add(entity.getType());
|
||||
}
|
||||
}
|
||||
|
||||
if(moreThanOne) {
|
||||
if(numberOfMatchParts >= minPartMatches) {
|
||||
String value = searchText.substring(start, lastEnd);
|
||||
found.addAll(findEntities(value, asType, false, true, 0, null, null, Engine.NER));
|
||||
}
|
||||
|
||||
@ -27,7 +27,7 @@ rule "0: Combine ai types CBI_author from ai"
|
||||
when
|
||||
Section(aiMatchesType("ORG"))
|
||||
then
|
||||
section.combineAiTypes("ORG", "STREET,POSTAL,COUNTRY,CARDINAL,CITY,STATE", 100, "recommendation_CBI_address");
|
||||
section.combineAiTypes("ORG", "STREET,POSTAL,COUNTRY,CARDINAL,CITY,STATE", 20, "recommendation_CBI_address", 3, false);
|
||||
end
|
||||
|
||||
rule "0: Expand CBI Authors with firstname initials"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user