RED-6204: Moved code to its own class for metrics.
Moved a private method to find entities to its own class, so that it can produce a separate metric value.
This commit is contained in:
parent
f9b7ad4e3e
commit
00ef0eb677
@ -50,6 +50,7 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.RedRectangle2D;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.entityredaction.EntityRedactionService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.SearchImplementation;
|
||||
import com.iqser.red.service.redaction.v1.server.segmentation.ImageService;
|
||||
|
||||
@ -0,0 +1,127 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service.entityredaction;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
|
||||
import com.iqser.red.service.redaction.v1.model.Engine;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entities;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.FindEntityDetails;
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
|
||||
import io.micrometer.core.annotation.Timed;
|
||||
import lombok.AccessLevel;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
class EntityFinder {
|
||||
|
||||
RedactionServiceSettings redactionServiceSettings;
|
||||
|
||||
|
||||
@Timed("redactmanager_findEntities")
|
||||
public Entities findEntities(SearchableText searchableText,
|
||||
String headline,
|
||||
int sectionNumber,
|
||||
Dictionary dictionary,
|
||||
boolean local,
|
||||
NerEntities nerEntities,
|
||||
List<Integer> cellStarts,
|
||||
ManualRedactions manualRedactions) {
|
||||
|
||||
Set<Entity> found = new HashSet<>();
|
||||
String searchableString = searchableText.asString();
|
||||
|
||||
if (StringUtils.isEmpty(searchableString)) {
|
||||
return new Entities(new HashSet<>(), new HashSet<>());
|
||||
}
|
||||
|
||||
String lowercaseInputString = searchableString.toLowerCase();
|
||||
for (DictionaryModel model : dictionary.getDictionaryModels()) {
|
||||
|
||||
var searchImplementation = local ? model.getLocalSearch() : model.getEntriesSearch();
|
||||
var entities = EntitySearchUtils.findEntities(model.isCaseInsensitive() ? lowercaseInputString : searchableString,
|
||||
searchImplementation,
|
||||
model,
|
||||
new FindEntityDetails(model.getType(),
|
||||
headline,
|
||||
sectionNumber,
|
||||
!local,
|
||||
model.isDossierDictionary(),
|
||||
local ? Engine.RULE : Engine.DICTIONARY,
|
||||
local ? EntityType.RECOMMENDATION : EntityType.ENTITY));
|
||||
|
||||
EntitySearchUtils.addOrAddEngine(found, entities);
|
||||
}
|
||||
|
||||
Set<Entity> nerFound = new HashSet<>();
|
||||
if (!local) {
|
||||
nerFound.addAll(getNerValues(sectionNumber, nerEntities, cellStarts, headline));
|
||||
}
|
||||
|
||||
var cleared = EntitySearchUtils.clearAndFindPositions(found, searchableText, dictionary, manualRedactions);
|
||||
return new Entities(cleared.stream().filter(e -> !e.isFalsePositive()).collect(Collectors.toSet()), nerFound);
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> getNerValues(int sectionNumber, NerEntities nerEntities, List<Integer> cellStarts, String headline) {
|
||||
|
||||
Set<Entity> entities = new HashSet<>();
|
||||
|
||||
if (redactionServiceSettings.isNerServiceEnabled() && nerEntities.getData().containsKey(sectionNumber)) {
|
||||
nerEntities.getData().get(sectionNumber).forEach(res -> {
|
||||
if (cellStarts == null || cellStarts.isEmpty()) {
|
||||
entities.add(new Entity(res.getValue(),
|
||||
res.getType(),
|
||||
res.getStartOffset(),
|
||||
res.getEndOffset(),
|
||||
headline,
|
||||
sectionNumber,
|
||||
false,
|
||||
false,
|
||||
Engine.NER,
|
||||
EntityType.RECOMMENDATION));
|
||||
} else {
|
||||
boolean intersectsCellStart = false;
|
||||
for (Integer cellStart : cellStarts) {
|
||||
if (res.getStartOffset() < cellStart && cellStart < res.getEndOffset()) {
|
||||
intersectsCellStart = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!intersectsCellStart) {
|
||||
entities.add(new Entity(res.getValue(),
|
||||
res.getType(),
|
||||
res.getStartOffset(),
|
||||
res.getEndOffset(),
|
||||
headline,
|
||||
sectionNumber,
|
||||
false,
|
||||
false,
|
||||
Engine.NER,
|
||||
EntityType.RECOMMENDATION));
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
return entities;
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,41 +1,52 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service.entityredaction;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.AnnotationStatus;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.IdRemoval;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualImageRecategorization;
|
||||
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.Engine;
|
||||
import com.iqser.red.service.redaction.v1.model.FileAttribute;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.*;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entities;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.FindEntitiesResult;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SectionSearchableTextPair;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.SurroundingWordsService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.FindEntityDetails;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
|
||||
import io.micrometer.core.annotation.Timed;
|
||||
import lombok.AccessLevel;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class EntityRedactionService {
|
||||
|
||||
private final RedactionServiceSettings redactionServiceSettings;
|
||||
private final DroolsExecutionService droolsExecutionService;
|
||||
private final SurroundingWordsService surroundingWordsService;
|
||||
DroolsExecutionService droolsExecutionService;
|
||||
SurroundingWordsService surroundingWordsService;
|
||||
EntityFinder entityFinder;
|
||||
|
||||
|
||||
public PageEntities findEntities(Dictionary dictionary, List<SectionText> sectionTexts, KieContainer kieContainer, AnalyzeRequest analyzeRequest, NerEntities nerEntities) {
|
||||
@ -45,7 +56,7 @@ public class EntityRedactionService {
|
||||
|
||||
if (dictionary.hasLocalEntries() || !findEntitiesResult.getAddedFileAttributes().isEmpty()) {
|
||||
|
||||
if(!findEntitiesResult.getAddedFileAttributes().isEmpty()) {
|
||||
if (!findEntitiesResult.getAddedFileAttributes().isEmpty()) {
|
||||
//AnalyzeRequest provides immutable list.
|
||||
List<FileAttribute> mergedFileAttributes = new ArrayList<>();
|
||||
mergedFileAttributes.addAll(analyzeRequest.getFileAttributes());
|
||||
@ -54,7 +65,14 @@ public class EntityRedactionService {
|
||||
}
|
||||
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber = getHintsPerSection(findEntitiesResult.getEntities(), dictionary);
|
||||
FindEntitiesResult foundByLocalEntitiesResult = findEntities(sectionTexts, dictionary, kieContainer, analyzeRequest, true, hintsPerSectionNumber, imagesPerPage, nerEntities);
|
||||
FindEntitiesResult foundByLocalEntitiesResult = findEntities(sectionTexts,
|
||||
dictionary,
|
||||
kieContainer,
|
||||
analyzeRequest,
|
||||
true,
|
||||
hintsPerSectionNumber,
|
||||
imagesPerPage,
|
||||
nerEntities);
|
||||
EntitySearchUtils.addEntitiesWithHigherRank(findEntitiesResult.getEntities(), foundByLocalEntitiesResult.getEntities(), dictionary);
|
||||
EntitySearchUtils.removeEntitiesContainedInLarger(findEntitiesResult.getEntities());
|
||||
}
|
||||
@ -67,18 +85,78 @@ public class EntityRedactionService {
|
||||
|
||||
|
||||
public FindEntitiesResult findEntities(List<SectionText> reanalysisSections,
|
||||
Dictionary dictionary,
|
||||
KieContainer kieContainer,
|
||||
AnalyzeRequest analyzeRequest,
|
||||
boolean local,
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber,
|
||||
Map<Integer, Set<Image>> imagesPerPage,
|
||||
NerEntities nerEntities) {
|
||||
Dictionary dictionary,
|
||||
KieContainer kieContainer,
|
||||
AnalyzeRequest analyzeRequest,
|
||||
boolean local,
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber,
|
||||
Map<Integer, Set<Image>> imagesPerPage,
|
||||
NerEntities nerEntities) {
|
||||
|
||||
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
|
||||
for (SectionText reanalysisSection : reanalysisSections) {
|
||||
List<SectionSearchableTextPair> sectionSearchableTextPairs = extractSearchableTextPairs(reanalysisSections,
|
||||
dictionary,
|
||||
analyzeRequest,
|
||||
local,
|
||||
hintsPerSectionNumber,
|
||||
nerEntities);
|
||||
|
||||
Entities entities = findEntities(reanalysisSection.getSearchableText(),
|
||||
Set<FileAttribute> addedFileAttributes = new HashSet<>();
|
||||
Set<Entity> entities = new HashSet<>();
|
||||
sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> {
|
||||
|
||||
if (!addedFileAttributes.isEmpty()) {
|
||||
//Section.Builder provides immutable list.
|
||||
List<FileAttribute> mergedFileAttributes = new ArrayList<>();
|
||||
mergedFileAttributes.addAll(sectionSearchableTextPair.getSection().getAddedFileAttributes());
|
||||
mergedFileAttributes.addAll(addedFileAttributes);
|
||||
sectionSearchableTextPair.getSection().setFileAttributes(mergedFileAttributes);
|
||||
}
|
||||
|
||||
Section analysedSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair.getSection());
|
||||
|
||||
addedFileAttributes.addAll(analysedSection.getAddedFileAttributes());
|
||||
|
||||
EntitySearchUtils.removeEntitiesContainedInLarger(analysedSection.getEntities());
|
||||
|
||||
var entriesWithoutSurroundingText = analysedSection.getEntities()
|
||||
.stream()
|
||||
.filter(e -> e.getTextAfter() == null && e.getTextBefore() == null)
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
if (sectionSearchableTextPair.getCellStarts() != null && !sectionSearchableTextPair.getCellStarts().isEmpty()) {
|
||||
surroundingWordsService.addSurroundingText(entriesWithoutSurroundingText,
|
||||
sectionSearchableTextPair.getSearchableText(),
|
||||
dictionary,
|
||||
sectionSearchableTextPair.getCellStarts());
|
||||
} else {
|
||||
surroundingWordsService.addSurroundingText(entriesWithoutSurroundingText, sectionSearchableTextPair.getSearchableText(), dictionary);
|
||||
}
|
||||
|
||||
entities.addAll(analysedSection.getEntities());
|
||||
|
||||
if (!local) {
|
||||
for (Image image : analysedSection.getImages()) {
|
||||
imagesPerPage.computeIfAbsent(image.getPage(), (a) -> new HashSet<>()).add(image);
|
||||
}
|
||||
addLocalValuesToDictionary(analysedSection, dictionary);
|
||||
}
|
||||
|
||||
});
|
||||
|
||||
return FindEntitiesResult.builder().entities(entities).addedFileAttributes(addedFileAttributes).build();
|
||||
}
|
||||
|
||||
|
||||
private List<SectionSearchableTextPair> extractSearchableTextPairs(List<SectionText> reanalysisSections,
|
||||
Dictionary dictionary,
|
||||
AnalyzeRequest analyzeRequest,
|
||||
boolean local,
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber,
|
||||
NerEntities nerEntities) {
|
||||
|
||||
return reanalysisSections.stream().map(reanalysisSection -> {
|
||||
|
||||
Entities entities = entityFinder.findEntities(reanalysisSection.getSearchableText(),
|
||||
reanalysisSection.getHeadline(),
|
||||
reanalysisSection.getSectionNumber(),
|
||||
dictionary,
|
||||
@ -136,73 +214,36 @@ public class EntityRedactionService {
|
||||
|
||||
log.debug("Section {}, Images: {}", reanalysisSection.getSectionNumber(), reanalysisSection.getImages());
|
||||
|
||||
sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder()
|
||||
.isLocal(false)
|
||||
.dictionaryTypes(dictionary.getTypes())
|
||||
.entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(reanalysisSection.getSectionNumber()) ? Stream.concat(entities.getEntities()
|
||||
.stream(), hintsPerSectionNumber.get(reanalysisSection.getSectionNumber()).stream()).collect(Collectors.toSet()) : entities.getEntities())
|
||||
.nerEntities(entities.getNerEntities())
|
||||
.text(reanalysisSection.getSearchableText().getAsStringWithLinebreaks())
|
||||
.searchText(reanalysisSection.getSearchableText().toString())
|
||||
.headline(reanalysisSection.getHeadline())
|
||||
.sectionNumber(reanalysisSection.getSectionNumber())
|
||||
.tabularData(reanalysisSection.getTabularData())
|
||||
.searchableText(reanalysisSection.getSearchableText())
|
||||
.dictionary(dictionary)
|
||||
.images(reanalysisSection.getImages())
|
||||
.sectionAreas(reanalysisSection.getSectionAreas())
|
||||
.fileAttributes(analyzeRequest.getFileAttributes())
|
||||
.manualRedactions(analyzeRequest.getManualRedactions())
|
||||
.isInTable(reanalysisSection.isTable())
|
||||
.build(), reanalysisSection.getSearchableText(), reanalysisSection.getCellStarts()));
|
||||
|
||||
}
|
||||
return toSectionSearchableTextPair(dictionary, analyzeRequest, hintsPerSectionNumber, reanalysisSection, entities);
|
||||
}).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
Set<FileAttribute> addedFileAttributes = new HashSet<>();
|
||||
Set<Entity> entities = new HashSet<>();
|
||||
sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> {
|
||||
private SectionSearchableTextPair toSectionSearchableTextPair(Dictionary dictionary,
|
||||
AnalyzeRequest analyzeRequest,
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber,
|
||||
SectionText reanalysisSection,
|
||||
Entities entities) {
|
||||
|
||||
if(!addedFileAttributes.isEmpty()) {
|
||||
//Section.Builder provides immutable list.
|
||||
List<FileAttribute> mergedFileAttributes = new ArrayList<>();
|
||||
mergedFileAttributes.addAll(sectionSearchableTextPair.getSection().getAddedFileAttributes());
|
||||
mergedFileAttributes.addAll(addedFileAttributes);
|
||||
sectionSearchableTextPair.getSection().setFileAttributes(mergedFileAttributes);
|
||||
}
|
||||
|
||||
Section analysedSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair.getSection());
|
||||
|
||||
addedFileAttributes.addAll(analysedSection.getAddedFileAttributes());
|
||||
|
||||
EntitySearchUtils.removeEntitiesContainedInLarger(analysedSection.getEntities());
|
||||
|
||||
var entriesWithoutSurroundingText = analysedSection.getEntities()
|
||||
.stream()
|
||||
.filter(e -> e.getTextAfter() == null && e.getTextBefore() == null)
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
if (sectionSearchableTextPair.getCellStarts() != null && !sectionSearchableTextPair.getCellStarts().isEmpty()) {
|
||||
surroundingWordsService.addSurroundingText(entriesWithoutSurroundingText,
|
||||
sectionSearchableTextPair.getSearchableText(),
|
||||
dictionary,
|
||||
sectionSearchableTextPair.getCellStarts());
|
||||
} else {
|
||||
surroundingWordsService.addSurroundingText(entriesWithoutSurroundingText, sectionSearchableTextPair.getSearchableText(), dictionary);
|
||||
}
|
||||
|
||||
entities.addAll(analysedSection.getEntities());
|
||||
|
||||
if (!local) {
|
||||
for (Image image : analysedSection.getImages()) {
|
||||
imagesPerPage.computeIfAbsent(image.getPage(), (a) -> new HashSet<>()).add(image);
|
||||
}
|
||||
addLocalValuesToDictionary(analysedSection, dictionary);
|
||||
}
|
||||
|
||||
});
|
||||
|
||||
return FindEntitiesResult.builder().entities(entities).addedFileAttributes(addedFileAttributes).build();
|
||||
return new SectionSearchableTextPair(Section.builder()
|
||||
.isLocal(false)
|
||||
.dictionaryTypes(dictionary.getTypes())
|
||||
.entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(reanalysisSection.getSectionNumber()) ? Stream.concat(entities.getEntities().stream(),
|
||||
hintsPerSectionNumber.get(reanalysisSection.getSectionNumber()).stream()).collect(Collectors.toSet()) : entities.getEntities())
|
||||
.nerEntities(entities.getNerEntities())
|
||||
.text(reanalysisSection.getSearchableText().getAsStringWithLinebreaks())
|
||||
.searchText(reanalysisSection.getSearchableText().toString())
|
||||
.headline(reanalysisSection.getHeadline())
|
||||
.sectionNumber(reanalysisSection.getSectionNumber())
|
||||
.tabularData(reanalysisSection.getTabularData())
|
||||
.searchableText(reanalysisSection.getSearchableText())
|
||||
.dictionary(dictionary)
|
||||
.images(reanalysisSection.getImages())
|
||||
.sectionAreas(reanalysisSection.getSectionAreas())
|
||||
.fileAttributes(analyzeRequest.getFileAttributes())
|
||||
.manualRedactions(analyzeRequest.getManualRedactions())
|
||||
.isInTable(reanalysisSection.isTable())
|
||||
.build(), reanalysisSection.getSearchableText(), reanalysisSection.getCellStarts());
|
||||
}
|
||||
|
||||
|
||||
@ -244,7 +285,7 @@ public class EntityRedactionService {
|
||||
private Map<Integer, Set<Entity>> getHintsPerSection(Set<Entity> entities, Dictionary dictionary) {
|
||||
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber = new HashMap<>();
|
||||
entities.stream().forEach(entity -> {
|
||||
entities.forEach(entity -> {
|
||||
if (dictionary.isHint(entity.getType()) && entity.isDictionaryEntry()) {
|
||||
hintsPerSectionNumber.computeIfAbsent(entity.getSectionNumber(), (x) -> new HashSet<>()).add(entity);
|
||||
}
|
||||
@ -269,93 +310,4 @@ public class EntityRedactionService {
|
||||
}));
|
||||
}
|
||||
|
||||
|
||||
@Timed("redactmanager_findEntities")
|
||||
private Entities findEntities(SearchableText searchableText,
|
||||
String headline,
|
||||
int sectionNumber,
|
||||
Dictionary dictionary,
|
||||
boolean local,
|
||||
NerEntities nerEntities,
|
||||
List<Integer> cellStarts,
|
||||
ManualRedactions manualRedactions) {
|
||||
|
||||
Set<Entity> found = new HashSet<>();
|
||||
String searchableString = searchableText.asString();
|
||||
|
||||
if (StringUtils.isEmpty(searchableString)) {
|
||||
return new Entities(new HashSet<>(), new HashSet<>());
|
||||
}
|
||||
|
||||
String lowercaseInputString = searchableString.toLowerCase();
|
||||
for (DictionaryModel model : dictionary.getDictionaryModels()) {
|
||||
|
||||
var searchImplementation = local ? model.getLocalSearch() : model.getEntriesSearch();
|
||||
var entities = EntitySearchUtils.findEntities(model.isCaseInsensitive() ? lowercaseInputString : searchableString,
|
||||
searchImplementation,
|
||||
model,
|
||||
new FindEntityDetails(model.getType(),
|
||||
headline,
|
||||
sectionNumber,
|
||||
!local,
|
||||
model.isDossierDictionary(),
|
||||
local ? Engine.RULE : Engine.DICTIONARY,
|
||||
local ? EntityType.RECOMMENDATION : EntityType.ENTITY));
|
||||
|
||||
EntitySearchUtils.addOrAddEngine(found, entities);
|
||||
}
|
||||
|
||||
Set<Entity> nerFound = new HashSet<>();
|
||||
if (!local) {
|
||||
nerFound.addAll(getNerValues(sectionNumber, nerEntities, cellStarts, headline));
|
||||
}
|
||||
|
||||
var cleared = EntitySearchUtils.clearAndFindPositions(found, searchableText, dictionary, manualRedactions);
|
||||
return new Entities(cleared.stream().filter(e -> !e.isFalsePositive()).collect(Collectors.toSet()), nerFound);
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> getNerValues(int sectionNumber, NerEntities nerEntities, List<Integer> cellStarts, String headline) {
|
||||
|
||||
Set<Entity> entities = new HashSet<>();
|
||||
|
||||
if (redactionServiceSettings.isNerServiceEnabled() && nerEntities.getData().containsKey(sectionNumber)) {
|
||||
nerEntities.getData().get(sectionNumber).forEach(res -> {
|
||||
if (cellStarts == null || cellStarts.isEmpty()) {
|
||||
entities.add(new Entity(res.getValue(),
|
||||
res.getType(),
|
||||
res.getStartOffset(),
|
||||
res.getEndOffset(),
|
||||
headline,
|
||||
sectionNumber,
|
||||
false,
|
||||
false,
|
||||
Engine.NER,
|
||||
EntityType.RECOMMENDATION));
|
||||
} else {
|
||||
boolean intersectsCellStart = false;
|
||||
for (Integer cellStart : cellStarts) {
|
||||
if (res.getStartOffset() < cellStart && cellStart < res.getEndOffset()) {
|
||||
intersectsCellStart = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!intersectsCellStart) {
|
||||
entities.add(new Entity(res.getValue(),
|
||||
res.getType(),
|
||||
res.getStartOffset(),
|
||||
res.getEndOffset(),
|
||||
headline,
|
||||
sectionNumber,
|
||||
false,
|
||||
false,
|
||||
Engine.NER,
|
||||
EntityType.RECOMMENDATION));
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
return entities;
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user