RED-1970: Ignore NER entities that are found over multiple table columns
This commit is contained in:
parent
8de655d884
commit
6a75fc74d6
@ -1,6 +1,5 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
@ -20,9 +19,8 @@ import com.iqser.red.service.redaction.v1.model.ManualImageRecategorization;
|
||||
import com.iqser.red.service.redaction.v1.model.Status;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
|
||||
import com.iqser.red.service.redaction.v1.server.client.EntityRecognitionClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionRequest;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecogintionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionSection;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
@ -77,7 +75,8 @@ public class EntityRedactionService {
|
||||
for (SectionText reanalysisSection : reanalysisSections) {
|
||||
|
||||
Set<Entity> entities = findEntities(reanalysisSection.getSearchableText(), reanalysisSection.getHeadline(), reanalysisSection
|
||||
.getSectionNumber(), dictionary, local, nerEntities);
|
||||
.getSectionNumber(), dictionary, local, nerEntities, reanalysisSection.getCellStarts());
|
||||
|
||||
if (reanalysisSection.getCellStarts() != null && !reanalysisSection.getCellStarts().isEmpty()) {
|
||||
surroundingWordsService.addSurroundingText(entities, reanalysisSection.getSearchableText(), dictionary, reanalysisSection
|
||||
.getCellStarts());
|
||||
@ -199,7 +198,8 @@ public class EntityRedactionService {
|
||||
|
||||
|
||||
private Set<Entity> findEntities(SearchableText searchableText, String headline, int sectionNumber,
|
||||
Dictionary dictionary, boolean local, NerEntities nerEntities) {
|
||||
Dictionary dictionary, boolean local, NerEntities nerEntities,
|
||||
List<Integer> cellstarts) {
|
||||
|
||||
Set<Entity> found = new HashSet<>();
|
||||
String searchableString = searchableText.toString();
|
||||
@ -210,30 +210,49 @@ public class EntityRedactionService {
|
||||
String lowercaseInputString = searchableString.toLowerCase();
|
||||
for (DictionaryModel model : dictionary.getDictionaryModels()) {
|
||||
if (model.isCaseInsensitive()) {
|
||||
found.addAll(EntitySearchUtils.find(lowercaseInputString, model.getValues(local), model.getType(), headline, sectionNumber, local, model
|
||||
found.addAll(EntitySearchUtils.find(lowercaseInputString, model.getValues(local), model.getType(), headline, sectionNumber, !local, model
|
||||
.isDossierDictionary()));
|
||||
} else {
|
||||
found.addAll(EntitySearchUtils.find(searchableString, model.getValues(local), model.getType(), headline, sectionNumber, local, model
|
||||
found.addAll(EntitySearchUtils.find(searchableString, model.getValues(local), model.getType(), headline, sectionNumber, !local, model
|
||||
.isDossierDictionary()));
|
||||
}
|
||||
}
|
||||
|
||||
if (!local) {
|
||||
addNerEntities(found, sectionNumber, headline, nerEntities);
|
||||
Map<String, Set<String>> nerValuesPerType = getNerValues(sectionNumber, nerEntities, cellstarts);
|
||||
nerValuesPerType.entrySet().forEach(entry -> {
|
||||
found.addAll(EntitySearchUtils.find(searchableString, entry.getValue(), entry.getKey(), headline, sectionNumber, false, false));
|
||||
});
|
||||
}
|
||||
|
||||
return EntitySearchUtils.clearAndFindPositions(found, searchableText, dictionary);
|
||||
}
|
||||
|
||||
|
||||
private void addNerEntities(Set<Entity> found, int sectionNumber, String headline, NerEntities nerEntities) {
|
||||
private Map<String, Set<String>> getNerValues(int sectionNumber, NerEntities nerEntities,
|
||||
List<Integer> cellstarts) {
|
||||
|
||||
Map<String, Set<String>> nerValuesPerType = new HashMap<>();
|
||||
|
||||
if (redactionServiceSettings.isEnableEntityRecognition() && nerEntities.getResult()
|
||||
.containsKey(sectionNumber)) {
|
||||
nerEntities.getResult().get(sectionNumber).forEach(res -> {
|
||||
found.add(new Entity(new String(Base64.decodeBase64(res.getValue().getBytes())), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false));
|
||||
if (cellstarts == null || cellstarts.isEmpty()) {
|
||||
nerValuesPerType.computeIfAbsent(res.getType(), (a) -> new HashSet<>()).add(new String(Base64.decodeBase64(res.getValue().getBytes())));
|
||||
} else {
|
||||
boolean intersectsCellStart = false;
|
||||
for (Integer cellStart : cellstarts) {
|
||||
if (res.getStartOffset() < cellStart && cellStart < res.getEndOffset()) {
|
||||
intersectsCellStart = true;
|
||||
}
|
||||
}
|
||||
if (!intersectsCellStart) {
|
||||
nerValuesPerType.computeIfAbsent(res.getType(), (a) -> new HashSet<>()).add(new String(Base64.decodeBase64(res.getValue().getBytes())));
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
return nerValuesPerType;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -47,7 +47,7 @@ public class EntitySearchUtils {
|
||||
|
||||
|
||||
public Set<Entity> find(String inputString, Set<String> values, String type, String headline, int sectionNumber,
|
||||
boolean local, boolean isDossierDictionary) {
|
||||
boolean isDictionaryEntry, boolean isDossierDictionary) {
|
||||
|
||||
Set<Entity> found = new HashSet<>();
|
||||
|
||||
@ -67,7 +67,7 @@ public class EntitySearchUtils {
|
||||
|
||||
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString
|
||||
.charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) {
|
||||
found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline, sectionNumber, !local, isDossierDictionary));
|
||||
found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline, sectionNumber, isDictionaryEntry, isDossierDictionary));
|
||||
}
|
||||
} while (startIndex > -1);
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user