Pull request #515: RED-6204
Merge in RED/redaction-service from RED-6204 to master * commit '3fad6381ce71f36083d6f545e1b9c47cecda3ef1': RED-6204: Removed redundant parenthesis RED-6204: Removed redundant variable assignment (sonar issue) & simplified code RED-6204: Remove unused import (sonar issue) RED-6204: Moved code to its own class for metrics. RED-6204: Moved code to its own class for metrics. RED-6204: Remove AspectJ mode setting, since it would require a couple of AspectJ dependencies for a very limited use case RED-6204: Switched to AspectJ to enable proxies on private methods
This commit is contained in:
commit
f7ec180710
@ -1,9 +1,5 @@
|
||||
package com.iqser.red.service.redaction.v1.server;
|
||||
|
||||
import com.iqser.red.commons.spring.DefaultWebMvcConfiguration;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
|
||||
import org.springframework.boot.SpringApplication;
|
||||
import org.springframework.boot.actuate.autoconfigure.security.servlet.ManagementWebSecurityAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
@ -13,6 +9,10 @@ import org.springframework.cloud.openfeign.EnableFeignClients;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Import;
|
||||
|
||||
import com.iqser.red.commons.spring.DefaultWebMvcConfiguration;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
|
||||
import io.micrometer.core.aop.TimedAspect;
|
||||
import io.micrometer.core.instrument.MeterRegistry;
|
||||
|
||||
|
||||
@ -3,8 +3,6 @@ package com.iqser.red.service.redaction.v1.server.classification.model;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
|
||||
@ -25,16 +25,13 @@ public final class RulingTextDirAdjustUtil {
|
||||
private Point2D convertPoint(float x, float y, float dir, float pageWidth, float pageHeight) {
|
||||
|
||||
var xAdj = getXRot(x, y, dir, pageWidth, pageHeight);
|
||||
var yAdj = 0f;
|
||||
if (dir == 0 || dir == 180) {
|
||||
yAdj = pageHeight - getYLowerLeftRot(x, y, dir, pageWidth, pageHeight);
|
||||
} else {
|
||||
yAdj = pageWidth - getYLowerLeftRot(x, y, dir, pageWidth, pageHeight);
|
||||
}
|
||||
var yLowerLeftRot = getYLowerLeftRot(x, y, dir, pageWidth, pageHeight);
|
||||
var yAdj = dir == 0 || dir == 180 ? pageHeight - yLowerLeftRot : pageWidth - yLowerLeftRot;
|
||||
return new Point2D.Float(xAdj, yAdj);
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("SuspiciousNameCombination")
|
||||
private float getXRot(float x, float y, float dir, float pageWidth, float pageHeight) {
|
||||
|
||||
if (dir == 0) {
|
||||
|
||||
@ -14,8 +14,8 @@ import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
|
||||
import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.server.client.FileStatusProcessingUpdateClient;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.analyze.AnalyzeService;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
@ -1,33 +1,24 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service.analyze;
|
||||
|
||||
import static com.iqser.red.service.redaction.v1.server.redaction.service.ImportedRedactionService.IMPORTED_REDACTION_TYPE;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.web.bind.annotation.RequestBody;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.IdRemoval;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualForceRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualImageRecategorization;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualLegalBasisChange;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualResizeRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.legalbasis.LegalBasis;
|
||||
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
|
||||
import com.iqser.red.service.redaction.v1.model.FileAttribute;
|
||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLog;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLogLegalBasis;
|
||||
@ -44,42 +35,49 @@ import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrement;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrementValue;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryVersion;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.RedRectangle2D;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.SearchImplementation;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.ImportedRedactionService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.RedactionChangeLogService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.RedactionLogCreatorService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.SectionGridCreatorService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.SectionTextBuilderService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.entityredaction.EntityRedactionService;
|
||||
import com.iqser.red.service.redaction.v1.server.segmentation.ImageService;
|
||||
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
|
||||
import io.micrometer.core.annotation.Timed;
|
||||
import lombok.AccessLevel;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class AnalyzeService {
|
||||
|
||||
private final DictionaryService dictionaryService;
|
||||
private final DroolsExecutionService droolsExecutionService;
|
||||
private final EntityRedactionService entityRedactionService;
|
||||
private final RedactionLogCreatorService redactionLogCreatorService;
|
||||
private final RedactionStorageService redactionStorageService;
|
||||
private final PdfSegmentationService pdfSegmentationService;
|
||||
private final RedactionChangeLogService redactionChangeLogService;
|
||||
private final LegalBasisClient legalBasisClient;
|
||||
private final RedactionServiceSettings redactionServiceSettings;
|
||||
private final SectionTextBuilderService sectionTextBuilderService;
|
||||
private final SectionGridCreatorService sectionGridCreatorService;
|
||||
private final ImageService imageService;
|
||||
private final ImportedRedactionService importedRedactionService;
|
||||
DictionaryService dictionaryService;
|
||||
DroolsExecutionService droolsExecutionService;
|
||||
EntityRedactionService entityRedactionService;
|
||||
RedactionLogCreatorService redactionLogCreatorService;
|
||||
RedactionStorageService redactionStorageService;
|
||||
PdfSegmentationService pdfSegmentationService;
|
||||
RedactionChangeLogService redactionChangeLogService;
|
||||
LegalBasisClient legalBasisClient;
|
||||
RedactionServiceSettings redactionServiceSettings;
|
||||
SectionTextBuilderService sectionTextBuilderService;
|
||||
SectionGridCreatorService sectionGridCreatorService;
|
||||
ImageService imageService;
|
||||
ImportedRedactionService importedRedactionService;
|
||||
SectionFinder sectionFinder;
|
||||
|
||||
|
||||
@Timed("redactmanager_analyzeDocumentStructure")
|
||||
@ -153,32 +151,25 @@ public class AnalyzeService {
|
||||
new DictionaryVersion(redactionLog.getDictionaryVersion(), redactionLog.getDossierDictionaryVersion()),
|
||||
analyzeRequest.getDossierId());
|
||||
|
||||
Set<Integer> sectionsToReanalyse = !analyzeRequest.getSectionsToReanalyse().isEmpty() ? analyzeRequest.getSectionsToReanalyse() : findSectionsToReanalyse(
|
||||
dictionaryIncrement,
|
||||
redactionLog,
|
||||
text,
|
||||
analyzeRequest);
|
||||
Set<Integer> sectionsToReanalyse = analyzeRequest.getSectionsToReanalyse().isEmpty() //
|
||||
? sectionFinder.findSectionsToReanalyse(dictionaryIncrement, redactionLog, text, analyzeRequest) //
|
||||
: analyzeRequest.getSectionsToReanalyse();
|
||||
log.info("Should reanalyze {} sections for request: {}", sectionsToReanalyse.size(), analyzeRequest);
|
||||
|
||||
if (sectionsToReanalyse.isEmpty()) {
|
||||
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true, new HashSet<>());
|
||||
}
|
||||
|
||||
NerEntities nerEntities;
|
||||
if (redactionServiceSettings.isNerServiceEnabled()) {
|
||||
nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
} else {
|
||||
nerEntities = new NerEntities();
|
||||
}
|
||||
|
||||
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
|
||||
List<SectionText> reanalysisSections = text.getSectionTexts()
|
||||
.stream()
|
||||
.filter(sectionText -> sectionsToReanalyse.contains(sectionText.getSectionNumber()))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
NerEntities nerEntities = redactionServiceSettings.isNerServiceEnabled() //
|
||||
? redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId()) //
|
||||
: new NerEntities();
|
||||
KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getDossierTemplateId());
|
||||
|
||||
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
|
||||
|
||||
PageEntities pageEntities = entityRedactionService.findEntities(dictionary, reanalysisSections, kieContainer, analyzeRequest, nerEntities);
|
||||
|
||||
var newRedactionLogEntries = redactionLogCreatorService.createRedactionLog(pageEntities, text.getNumberOfPages(), analyzeRequest.getDossierTemplateId());
|
||||
@ -238,48 +229,13 @@ public class AnalyzeService {
|
||||
}
|
||||
|
||||
|
||||
@Timed("redactmanager_findSectionsToReanalyse")
|
||||
private Set<Integer> findSectionsToReanalyse(DictionaryIncrement dictionaryIncrement, RedactionLog redactionLog, Text text, AnalyzeRequest analyzeRequest) {
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
Set<String> relevantManuallyModifiedAnnotationIds = getRelevantManuallyModifiedAnnotationIds(analyzeRequest.getManualRedactions());
|
||||
|
||||
Set<Integer> sectionsToReanalyse = new HashSet<>();
|
||||
Map<Integer, Set<Image>> imageEntries = new HashMap<>();
|
||||
for (RedactionLogEntry entry : redactionLog.getRedactionLogEntry()) {
|
||||
if (entry.isLocalManualRedaction() || relevantManuallyModifiedAnnotationIds.contains(entry.getId())) {
|
||||
sectionsToReanalyse.add(entry.getSectionNumber());
|
||||
}
|
||||
if (entry.isImage()) {
|
||||
imageEntries.computeIfAbsent(entry.getSectionNumber(), x -> new HashSet<>()).add(convert(entry));
|
||||
}
|
||||
}
|
||||
|
||||
var dictionaryIncrementsSearch = new SearchImplementation(dictionaryIncrement.getValues().stream().map(DictionaryIncrementValue::getValue).collect(Collectors.toList()),
|
||||
true);
|
||||
|
||||
for (SectionText sectionText : text.getSectionTexts()) {
|
||||
|
||||
if (EntitySearchUtils.sectionContainsAny(sectionText.getText(), dictionaryIncrementsSearch)) {
|
||||
sectionsToReanalyse.add(sectionText.getSectionNumber());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
log.info("Should reanalyze {} sections for request: {}, took: {}", sectionsToReanalyse.size(), analyzeRequest, System.currentTimeMillis() - start);
|
||||
|
||||
return sectionsToReanalyse;
|
||||
}
|
||||
|
||||
|
||||
private AnalyzeResult finalizeAnalysis(AnalyzeRequest analyzeRequest,
|
||||
long startTime,
|
||||
RedactionLog redactionLog,
|
||||
Text text,
|
||||
DictionaryVersion dictionaryVersion,
|
||||
boolean isReanalysis,
|
||||
Set<FileAttribute> addedFileAttributes
|
||||
) {
|
||||
Set<FileAttribute> addedFileAttributes) {
|
||||
|
||||
redactionLog.setDictionaryVersion(dictionaryVersion.getDossierTemplateVersion());
|
||||
redactionLog.setDossierDictionaryVersion(dictionaryVersion.getDossierVersion());
|
||||
@ -313,41 +269,12 @@ public class AnalyzeService {
|
||||
}
|
||||
|
||||
|
||||
private Set<String> getRelevantManuallyModifiedAnnotationIds(ManualRedactions manualRedactions) {
|
||||
|
||||
if (manualRedactions == null) {
|
||||
return new HashSet<>();
|
||||
}
|
||||
|
||||
return Stream.concat(manualRedactions.getResizeRedactions().stream().map(ManualResizeRedaction::getAnnotationId),
|
||||
Stream.concat(manualRedactions.getLegalBasisChanges().stream().map(ManualLegalBasisChange::getAnnotationId),
|
||||
Stream.concat(manualRedactions.getImageRecategorization().stream().map(ManualImageRecategorization::getAnnotationId),
|
||||
Stream.concat(manualRedactions.getIdsToRemove().stream().map(IdRemoval::getAnnotationId),
|
||||
manualRedactions.getForceRedactions().stream().map(ManualForceRedaction::getAnnotationId))))).collect(Collectors.toSet());
|
||||
}
|
||||
|
||||
|
||||
public List<RedactionLogLegalBasis> convert(List<LegalBasis> legalBasis) {
|
||||
|
||||
return legalBasis.stream().map(l -> new RedactionLogLegalBasis(l.getName(), l.getDescription(), l.getReason())).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
public Image convert(RedactionLogEntry entry) {
|
||||
|
||||
Rectangle position = entry.getPositions().get(0);
|
||||
|
||||
return Image.builder()
|
||||
.type(entry.getType())
|
||||
.position(new RedRectangle2D(position.getTopLeft().getX(), position.getTopLeft().getY(), position.getWidth(), position.getHeight()))
|
||||
.sectionNumber(entry.getSectionNumber())
|
||||
.section(entry.getSection())
|
||||
.page(position.getPage())
|
||||
.hasTransparency(entry.isImageHasTransparency())
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
private void excludeExcludedPages(RedactionLog redactionLog, Set<Integer> excludedPages) {
|
||||
|
||||
if (excludedPages != null && !excludedPages.isEmpty()) {
|
||||
@ -0,0 +1,101 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service.analyze;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.IdRemoval;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualForceRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualImageRecategorization;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualLegalBasisChange;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualResizeRedaction;
|
||||
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLog;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Text;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrement;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrementValue;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.RedRectangle2D;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.SearchImplementation;
|
||||
|
||||
import io.micrometer.core.annotation.Timed;
|
||||
import lombok.AccessLevel;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
class SectionFinder {
|
||||
@Timed("redactmanager_findSectionsToReanalyse")
|
||||
public Set<Integer> findSectionsToReanalyse(DictionaryIncrement dictionaryIncrement, RedactionLog redactionLog, Text text, AnalyzeRequest analyzeRequest) {
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
Set<String> relevantManuallyModifiedAnnotationIds = getRelevantManuallyModifiedAnnotationIds(analyzeRequest.getManualRedactions());
|
||||
|
||||
Set<Integer> sectionsToReanalyse = new HashSet<>();
|
||||
Map<Integer, Set<Image>> imageEntries = new HashMap<>();
|
||||
for (RedactionLogEntry entry : redactionLog.getRedactionLogEntry()) {
|
||||
if (entry.isLocalManualRedaction() || relevantManuallyModifiedAnnotationIds.contains(entry.getId())) {
|
||||
sectionsToReanalyse.add(entry.getSectionNumber());
|
||||
}
|
||||
if (entry.isImage()) {
|
||||
imageEntries.computeIfAbsent(entry.getSectionNumber(), x -> new HashSet<>()).add(convert(entry));
|
||||
}
|
||||
}
|
||||
|
||||
var dictionaryIncrementsSearch = new SearchImplementation(dictionaryIncrement.getValues().stream().map(DictionaryIncrementValue::getValue).collect(Collectors.toList()),
|
||||
true);
|
||||
|
||||
for (SectionText sectionText : text.getSectionTexts()) {
|
||||
|
||||
if (EntitySearchUtils.sectionContainsAny(sectionText.getText(), dictionaryIncrementsSearch)) {
|
||||
sectionsToReanalyse.add(sectionText.getSectionNumber());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
log.info("Took: {} milliseconds to find sections to reanalyze", System.currentTimeMillis() - start);
|
||||
|
||||
return sectionsToReanalyse;
|
||||
}
|
||||
|
||||
private static Set<String> getRelevantManuallyModifiedAnnotationIds(ManualRedactions manualRedactions) {
|
||||
|
||||
if (manualRedactions == null) {
|
||||
return new HashSet<>();
|
||||
}
|
||||
|
||||
return Stream.concat(manualRedactions.getResizeRedactions().stream().map(ManualResizeRedaction::getAnnotationId),
|
||||
Stream.concat(manualRedactions.getLegalBasisChanges().stream().map(ManualLegalBasisChange::getAnnotationId),
|
||||
Stream.concat(manualRedactions.getImageRecategorization().stream().map(ManualImageRecategorization::getAnnotationId),
|
||||
Stream.concat(manualRedactions.getIdsToRemove().stream().map(IdRemoval::getAnnotationId),
|
||||
manualRedactions.getForceRedactions().stream().map(ManualForceRedaction::getAnnotationId))))).collect(Collectors.toSet());
|
||||
}
|
||||
|
||||
private static Image convert(RedactionLogEntry entry) {
|
||||
|
||||
Rectangle position = entry.getPositions().get(0);
|
||||
|
||||
return Image.builder()
|
||||
.type(entry.getType())
|
||||
.position(new RedRectangle2D(position.getTopLeft().getX(), position.getTopLeft().getY(), position.getWidth(), position.getHeight()))
|
||||
.sectionNumber(entry.getSectionNumber())
|
||||
.section(entry.getSection())
|
||||
.page(position.getPage())
|
||||
.hasTransparency(entry.isImageHasTransparency())
|
||||
.build();
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,127 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service.entityredaction;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
|
||||
import com.iqser.red.service.redaction.v1.model.Engine;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entities;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.FindEntityDetails;
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
|
||||
import io.micrometer.core.annotation.Timed;
|
||||
import lombok.AccessLevel;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
class EntityFinder {
|
||||
|
||||
RedactionServiceSettings redactionServiceSettings;
|
||||
|
||||
|
||||
@Timed("redactmanager_findEntities")
|
||||
public Entities findEntities(SearchableText searchableText,
|
||||
String headline,
|
||||
int sectionNumber,
|
||||
Dictionary dictionary,
|
||||
boolean local,
|
||||
NerEntities nerEntities,
|
||||
List<Integer> cellStarts,
|
||||
ManualRedactions manualRedactions) {
|
||||
|
||||
Set<Entity> found = new HashSet<>();
|
||||
String searchableString = searchableText.asString();
|
||||
|
||||
if (StringUtils.isEmpty(searchableString)) {
|
||||
return new Entities(new HashSet<>(), new HashSet<>());
|
||||
}
|
||||
|
||||
String lowercaseInputString = searchableString.toLowerCase();
|
||||
for (DictionaryModel model : dictionary.getDictionaryModels()) {
|
||||
|
||||
var searchImplementation = local ? model.getLocalSearch() : model.getEntriesSearch();
|
||||
var entities = EntitySearchUtils.findEntities(model.isCaseInsensitive() ? lowercaseInputString : searchableString,
|
||||
searchImplementation,
|
||||
model,
|
||||
new FindEntityDetails(model.getType(),
|
||||
headline,
|
||||
sectionNumber,
|
||||
!local,
|
||||
model.isDossierDictionary(),
|
||||
local ? Engine.RULE : Engine.DICTIONARY,
|
||||
local ? EntityType.RECOMMENDATION : EntityType.ENTITY));
|
||||
|
||||
EntitySearchUtils.addOrAddEngine(found, entities);
|
||||
}
|
||||
|
||||
Set<Entity> nerFound = new HashSet<>();
|
||||
if (!local) {
|
||||
nerFound.addAll(getNerValues(sectionNumber, nerEntities, cellStarts, headline));
|
||||
}
|
||||
|
||||
var cleared = EntitySearchUtils.clearAndFindPositions(found, searchableText, dictionary, manualRedactions);
|
||||
return new Entities(cleared.stream().filter(e -> !e.isFalsePositive()).collect(Collectors.toSet()), nerFound);
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> getNerValues(int sectionNumber, NerEntities nerEntities, List<Integer> cellStarts, String headline) {
|
||||
|
||||
Set<Entity> entities = new HashSet<>();
|
||||
|
||||
if (redactionServiceSettings.isNerServiceEnabled() && nerEntities.getData().containsKey(sectionNumber)) {
|
||||
nerEntities.getData().get(sectionNumber).forEach(res -> {
|
||||
if (cellStarts == null || cellStarts.isEmpty()) {
|
||||
entities.add(new Entity(res.getValue(),
|
||||
res.getType(),
|
||||
res.getStartOffset(),
|
||||
res.getEndOffset(),
|
||||
headline,
|
||||
sectionNumber,
|
||||
false,
|
||||
false,
|
||||
Engine.NER,
|
||||
EntityType.RECOMMENDATION));
|
||||
} else {
|
||||
boolean intersectsCellStart = false;
|
||||
for (Integer cellStart : cellStarts) {
|
||||
if (res.getStartOffset() < cellStart && cellStart < res.getEndOffset()) {
|
||||
intersectsCellStart = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!intersectsCellStart) {
|
||||
entities.add(new Entity(res.getValue(),
|
||||
res.getType(),
|
||||
res.getStartOffset(),
|
||||
res.getEndOffset(),
|
||||
headline,
|
||||
sectionNumber,
|
||||
false,
|
||||
false,
|
||||
Engine.NER,
|
||||
EntityType.RECOMMENDATION));
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
return entities;
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,41 +1,52 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service.entityredaction;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.AnnotationStatus;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.IdRemoval;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualImageRecategorization;
|
||||
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.Engine;
|
||||
import com.iqser.red.service.redaction.v1.model.FileAttribute;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.*;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entities;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.FindEntitiesResult;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SectionSearchableTextPair;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.SurroundingWordsService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.FindEntityDetails;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
|
||||
import io.micrometer.core.annotation.Timed;
|
||||
import lombok.AccessLevel;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class EntityRedactionService {
|
||||
|
||||
private final RedactionServiceSettings redactionServiceSettings;
|
||||
private final DroolsExecutionService droolsExecutionService;
|
||||
private final SurroundingWordsService surroundingWordsService;
|
||||
DroolsExecutionService droolsExecutionService;
|
||||
SurroundingWordsService surroundingWordsService;
|
||||
EntityFinder entityFinder;
|
||||
|
||||
|
||||
public PageEntities findEntities(Dictionary dictionary, List<SectionText> sectionTexts, KieContainer kieContainer, AnalyzeRequest analyzeRequest, NerEntities nerEntities) {
|
||||
@ -45,7 +56,7 @@ public class EntityRedactionService {
|
||||
|
||||
if (dictionary.hasLocalEntries() || !findEntitiesResult.getAddedFileAttributes().isEmpty()) {
|
||||
|
||||
if(!findEntitiesResult.getAddedFileAttributes().isEmpty()) {
|
||||
if (!findEntitiesResult.getAddedFileAttributes().isEmpty()) {
|
||||
//AnalyzeRequest provides immutable list.
|
||||
List<FileAttribute> mergedFileAttributes = new ArrayList<>();
|
||||
mergedFileAttributes.addAll(analyzeRequest.getFileAttributes());
|
||||
@ -54,7 +65,14 @@ public class EntityRedactionService {
|
||||
}
|
||||
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber = getHintsPerSection(findEntitiesResult.getEntities(), dictionary);
|
||||
FindEntitiesResult foundByLocalEntitiesResult = findEntities(sectionTexts, dictionary, kieContainer, analyzeRequest, true, hintsPerSectionNumber, imagesPerPage, nerEntities);
|
||||
FindEntitiesResult foundByLocalEntitiesResult = findEntities(sectionTexts,
|
||||
dictionary,
|
||||
kieContainer,
|
||||
analyzeRequest,
|
||||
true,
|
||||
hintsPerSectionNumber,
|
||||
imagesPerPage,
|
||||
nerEntities);
|
||||
EntitySearchUtils.addEntitiesWithHigherRank(findEntitiesResult.getEntities(), foundByLocalEntitiesResult.getEntities(), dictionary);
|
||||
EntitySearchUtils.removeEntitiesContainedInLarger(findEntitiesResult.getEntities());
|
||||
}
|
||||
@ -67,18 +85,78 @@ public class EntityRedactionService {
|
||||
|
||||
|
||||
public FindEntitiesResult findEntities(List<SectionText> reanalysisSections,
|
||||
Dictionary dictionary,
|
||||
KieContainer kieContainer,
|
||||
AnalyzeRequest analyzeRequest,
|
||||
boolean local,
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber,
|
||||
Map<Integer, Set<Image>> imagesPerPage,
|
||||
NerEntities nerEntities) {
|
||||
Dictionary dictionary,
|
||||
KieContainer kieContainer,
|
||||
AnalyzeRequest analyzeRequest,
|
||||
boolean local,
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber,
|
||||
Map<Integer, Set<Image>> imagesPerPage,
|
||||
NerEntities nerEntities) {
|
||||
|
||||
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
|
||||
for (SectionText reanalysisSection : reanalysisSections) {
|
||||
List<SectionSearchableTextPair> sectionSearchableTextPairs = extractSearchableTextPairs(reanalysisSections,
|
||||
dictionary,
|
||||
analyzeRequest,
|
||||
local,
|
||||
hintsPerSectionNumber,
|
||||
nerEntities);
|
||||
|
||||
Entities entities = findEntities(reanalysisSection.getSearchableText(),
|
||||
Set<FileAttribute> addedFileAttributes = new HashSet<>();
|
||||
Set<Entity> entities = new HashSet<>();
|
||||
sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> {
|
||||
|
||||
if (!addedFileAttributes.isEmpty()) {
|
||||
//Section.Builder provides immutable list.
|
||||
List<FileAttribute> mergedFileAttributes = new ArrayList<>();
|
||||
mergedFileAttributes.addAll(sectionSearchableTextPair.getSection().getAddedFileAttributes());
|
||||
mergedFileAttributes.addAll(addedFileAttributes);
|
||||
sectionSearchableTextPair.getSection().setFileAttributes(mergedFileAttributes);
|
||||
}
|
||||
|
||||
Section analysedSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair.getSection());
|
||||
|
||||
addedFileAttributes.addAll(analysedSection.getAddedFileAttributes());
|
||||
|
||||
EntitySearchUtils.removeEntitiesContainedInLarger(analysedSection.getEntities());
|
||||
|
||||
var entriesWithoutSurroundingText = analysedSection.getEntities()
|
||||
.stream()
|
||||
.filter(e -> e.getTextAfter() == null && e.getTextBefore() == null)
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
if (sectionSearchableTextPair.getCellStarts() != null && !sectionSearchableTextPair.getCellStarts().isEmpty()) {
|
||||
surroundingWordsService.addSurroundingText(entriesWithoutSurroundingText,
|
||||
sectionSearchableTextPair.getSearchableText(),
|
||||
dictionary,
|
||||
sectionSearchableTextPair.getCellStarts());
|
||||
} else {
|
||||
surroundingWordsService.addSurroundingText(entriesWithoutSurroundingText, sectionSearchableTextPair.getSearchableText(), dictionary);
|
||||
}
|
||||
|
||||
entities.addAll(analysedSection.getEntities());
|
||||
|
||||
if (!local) {
|
||||
for (Image image : analysedSection.getImages()) {
|
||||
imagesPerPage.computeIfAbsent(image.getPage(), (a) -> new HashSet<>()).add(image);
|
||||
}
|
||||
addLocalValuesToDictionary(analysedSection, dictionary);
|
||||
}
|
||||
|
||||
});
|
||||
|
||||
return FindEntitiesResult.builder().entities(entities).addedFileAttributes(addedFileAttributes).build();
|
||||
}
|
||||
|
||||
|
||||
private List<SectionSearchableTextPair> extractSearchableTextPairs(List<SectionText> reanalysisSections,
|
||||
Dictionary dictionary,
|
||||
AnalyzeRequest analyzeRequest,
|
||||
boolean local,
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber,
|
||||
NerEntities nerEntities) {
|
||||
|
||||
return reanalysisSections.stream().map(reanalysisSection -> {
|
||||
|
||||
Entities entities = entityFinder.findEntities(reanalysisSection.getSearchableText(),
|
||||
reanalysisSection.getHeadline(),
|
||||
reanalysisSection.getSectionNumber(),
|
||||
dictionary,
|
||||
@ -136,73 +214,36 @@ public class EntityRedactionService {
|
||||
|
||||
log.debug("Section {}, Images: {}", reanalysisSection.getSectionNumber(), reanalysisSection.getImages());
|
||||
|
||||
sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder()
|
||||
.isLocal(false)
|
||||
.dictionaryTypes(dictionary.getTypes())
|
||||
.entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(reanalysisSection.getSectionNumber()) ? Stream.concat(entities.getEntities()
|
||||
.stream(), hintsPerSectionNumber.get(reanalysisSection.getSectionNumber()).stream()).collect(Collectors.toSet()) : entities.getEntities())
|
||||
.nerEntities(entities.getNerEntities())
|
||||
.text(reanalysisSection.getSearchableText().getAsStringWithLinebreaks())
|
||||
.searchText(reanalysisSection.getSearchableText().toString())
|
||||
.headline(reanalysisSection.getHeadline())
|
||||
.sectionNumber(reanalysisSection.getSectionNumber())
|
||||
.tabularData(reanalysisSection.getTabularData())
|
||||
.searchableText(reanalysisSection.getSearchableText())
|
||||
.dictionary(dictionary)
|
||||
.images(reanalysisSection.getImages())
|
||||
.sectionAreas(reanalysisSection.getSectionAreas())
|
||||
.fileAttributes(analyzeRequest.getFileAttributes())
|
||||
.manualRedactions(analyzeRequest.getManualRedactions())
|
||||
.isInTable(reanalysisSection.isTable())
|
||||
.build(), reanalysisSection.getSearchableText(), reanalysisSection.getCellStarts()));
|
||||
|
||||
}
|
||||
return toSectionSearchableTextPair(dictionary, analyzeRequest, hintsPerSectionNumber, reanalysisSection, entities);
|
||||
}).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
Set<FileAttribute> addedFileAttributes = new HashSet<>();
|
||||
Set<Entity> entities = new HashSet<>();
|
||||
sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> {
|
||||
private SectionSearchableTextPair toSectionSearchableTextPair(Dictionary dictionary,
|
||||
AnalyzeRequest analyzeRequest,
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber,
|
||||
SectionText reanalysisSection,
|
||||
Entities entities) {
|
||||
|
||||
if(!addedFileAttributes.isEmpty()) {
|
||||
//Section.Builder provides immutable list.
|
||||
List<FileAttribute> mergedFileAttributes = new ArrayList<>();
|
||||
mergedFileAttributes.addAll(sectionSearchableTextPair.getSection().getAddedFileAttributes());
|
||||
mergedFileAttributes.addAll(addedFileAttributes);
|
||||
sectionSearchableTextPair.getSection().setFileAttributes(mergedFileAttributes);
|
||||
}
|
||||
|
||||
Section analysedSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair.getSection());
|
||||
|
||||
addedFileAttributes.addAll(analysedSection.getAddedFileAttributes());
|
||||
|
||||
EntitySearchUtils.removeEntitiesContainedInLarger(analysedSection.getEntities());
|
||||
|
||||
var entriesWithoutSurroundingText = analysedSection.getEntities()
|
||||
.stream()
|
||||
.filter(e -> e.getTextAfter() == null && e.getTextBefore() == null)
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
if (sectionSearchableTextPair.getCellStarts() != null && !sectionSearchableTextPair.getCellStarts().isEmpty()) {
|
||||
surroundingWordsService.addSurroundingText(entriesWithoutSurroundingText,
|
||||
sectionSearchableTextPair.getSearchableText(),
|
||||
dictionary,
|
||||
sectionSearchableTextPair.getCellStarts());
|
||||
} else {
|
||||
surroundingWordsService.addSurroundingText(entriesWithoutSurroundingText, sectionSearchableTextPair.getSearchableText(), dictionary);
|
||||
}
|
||||
|
||||
entities.addAll(analysedSection.getEntities());
|
||||
|
||||
if (!local) {
|
||||
for (Image image : analysedSection.getImages()) {
|
||||
imagesPerPage.computeIfAbsent(image.getPage(), (a) -> new HashSet<>()).add(image);
|
||||
}
|
||||
addLocalValuesToDictionary(analysedSection, dictionary);
|
||||
}
|
||||
|
||||
});
|
||||
|
||||
return FindEntitiesResult.builder().entities(entities).addedFileAttributes(addedFileAttributes).build();
|
||||
return new SectionSearchableTextPair(Section.builder()
|
||||
.isLocal(false)
|
||||
.dictionaryTypes(dictionary.getTypes())
|
||||
.entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(reanalysisSection.getSectionNumber()) ? Stream.concat(entities.getEntities().stream(),
|
||||
hintsPerSectionNumber.get(reanalysisSection.getSectionNumber()).stream()).collect(Collectors.toSet()) : entities.getEntities())
|
||||
.nerEntities(entities.getNerEntities())
|
||||
.text(reanalysisSection.getSearchableText().getAsStringWithLinebreaks())
|
||||
.searchText(reanalysisSection.getSearchableText().toString())
|
||||
.headline(reanalysisSection.getHeadline())
|
||||
.sectionNumber(reanalysisSection.getSectionNumber())
|
||||
.tabularData(reanalysisSection.getTabularData())
|
||||
.searchableText(reanalysisSection.getSearchableText())
|
||||
.dictionary(dictionary)
|
||||
.images(reanalysisSection.getImages())
|
||||
.sectionAreas(reanalysisSection.getSectionAreas())
|
||||
.fileAttributes(analyzeRequest.getFileAttributes())
|
||||
.manualRedactions(analyzeRequest.getManualRedactions())
|
||||
.isInTable(reanalysisSection.isTable())
|
||||
.build(), reanalysisSection.getSearchableText(), reanalysisSection.getCellStarts());
|
||||
}
|
||||
|
||||
|
||||
@ -244,7 +285,7 @@ public class EntityRedactionService {
|
||||
private Map<Integer, Set<Entity>> getHintsPerSection(Set<Entity> entities, Dictionary dictionary) {
|
||||
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber = new HashMap<>();
|
||||
entities.stream().forEach(entity -> {
|
||||
entities.forEach(entity -> {
|
||||
if (dictionary.isHint(entity.getType()) && entity.isDictionaryEntry()) {
|
||||
hintsPerSectionNumber.computeIfAbsent(entity.getSectionNumber(), (x) -> new HashSet<>()).add(entity);
|
||||
}
|
||||
@ -269,93 +310,4 @@ public class EntityRedactionService {
|
||||
}));
|
||||
}
|
||||
|
||||
|
||||
@Timed("redactmanager_findEntities")
|
||||
private Entities findEntities(SearchableText searchableText,
|
||||
String headline,
|
||||
int sectionNumber,
|
||||
Dictionary dictionary,
|
||||
boolean local,
|
||||
NerEntities nerEntities,
|
||||
List<Integer> cellStarts,
|
||||
ManualRedactions manualRedactions) {
|
||||
|
||||
Set<Entity> found = new HashSet<>();
|
||||
String searchableString = searchableText.asString();
|
||||
|
||||
if (StringUtils.isEmpty(searchableString)) {
|
||||
return new Entities(new HashSet<>(), new HashSet<>());
|
||||
}
|
||||
|
||||
String lowercaseInputString = searchableString.toLowerCase();
|
||||
for (DictionaryModel model : dictionary.getDictionaryModels()) {
|
||||
|
||||
var searchImplementation = local ? model.getLocalSearch() : model.getEntriesSearch();
|
||||
var entities = EntitySearchUtils.findEntities(model.isCaseInsensitive() ? lowercaseInputString : searchableString,
|
||||
searchImplementation,
|
||||
model,
|
||||
new FindEntityDetails(model.getType(),
|
||||
headline,
|
||||
sectionNumber,
|
||||
!local,
|
||||
model.isDossierDictionary(),
|
||||
local ? Engine.RULE : Engine.DICTIONARY,
|
||||
local ? EntityType.RECOMMENDATION : EntityType.ENTITY));
|
||||
|
||||
EntitySearchUtils.addOrAddEngine(found, entities);
|
||||
}
|
||||
|
||||
Set<Entity> nerFound = new HashSet<>();
|
||||
if (!local) {
|
||||
nerFound.addAll(getNerValues(sectionNumber, nerEntities, cellStarts, headline));
|
||||
}
|
||||
|
||||
var cleared = EntitySearchUtils.clearAndFindPositions(found, searchableText, dictionary, manualRedactions);
|
||||
return new Entities(cleared.stream().filter(e -> !e.isFalsePositive()).collect(Collectors.toSet()), nerFound);
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> getNerValues(int sectionNumber, NerEntities nerEntities, List<Integer> cellStarts, String headline) {
|
||||
|
||||
Set<Entity> entities = new HashSet<>();
|
||||
|
||||
if (redactionServiceSettings.isNerServiceEnabled() && nerEntities.getData().containsKey(sectionNumber)) {
|
||||
nerEntities.getData().get(sectionNumber).forEach(res -> {
|
||||
if (cellStarts == null || cellStarts.isEmpty()) {
|
||||
entities.add(new Entity(res.getValue(),
|
||||
res.getType(),
|
||||
res.getStartOffset(),
|
||||
res.getEndOffset(),
|
||||
headline,
|
||||
sectionNumber,
|
||||
false,
|
||||
false,
|
||||
Engine.NER,
|
||||
EntityType.RECOMMENDATION));
|
||||
} else {
|
||||
boolean intersectsCellStart = false;
|
||||
for (Integer cellStart : cellStarts) {
|
||||
if (res.getStartOffset() < cellStart && cellStart < res.getEndOffset()) {
|
||||
intersectsCellStart = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!intersectsCellStart) {
|
||||
entities.add(new Entity(res.getValue(),
|
||||
res.getType(),
|
||||
res.getStartOffset(),
|
||||
res.getEndOffset(),
|
||||
headline,
|
||||
sectionNumber,
|
||||
false,
|
||||
false,
|
||||
Engine.NER,
|
||||
EntityType.RECOMMENDATION));
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
return entities;
|
||||
}
|
||||
|
||||
}
|
||||
@ -59,8 +59,8 @@ import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.analyze.AnalyzeService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
||||
|
||||
@ -81,8 +81,8 @@ import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.analyze.AnalyzeService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
|
||||
@ -76,8 +76,8 @@ import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.analyze.AnalyzeService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user