Pull request #515: RED-6204

Merge in RED/redaction-service from RED-6204 to master

* commit '3fad6381ce71f36083d6f545e1b9c47cecda3ef1':
  RED-6204: Removed redundant parenthesis
  RED-6204: Removed redundant variable assignment (sonar issue) & simplified code
  RED-6204: Remove unused import (sonar issue)
  RED-6204: Moved code to its own class for metrics.
  RED-6204: Moved code to its own class for metrics.
  RED-6204: Remove AspectJ mode setting, since it would require a couple of AspectJ dependencies for a very limited use case
  RED-6204: Switched to AspectJ to enable proxies on private methods
This commit is contained in:
Viktor Seifert 2023-02-24 10:00:01 +01:00
commit f7ec180710
11 changed files with 410 additions and 308 deletions

View File

@ -1,9 +1,5 @@
package com.iqser.red.service.redaction.v1.server;
import com.iqser.red.commons.spring.DefaultWebMvcConfiguration;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.actuate.autoconfigure.security.servlet.ManagementWebSecurityAutoConfiguration;
import org.springframework.boot.autoconfigure.SpringBootApplication;
@ -13,6 +9,10 @@ import org.springframework.cloud.openfeign.EnableFeignClients;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Import;
import com.iqser.red.commons.spring.DefaultWebMvcConfiguration;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
import io.micrometer.core.aop.TimedAspect;
import io.micrometer.core.instrument.MeterRegistry;

View File

@ -3,8 +3,6 @@ package com.iqser.red.service.redaction.v1.server.classification.model;
import java.util.ArrayList;
import java.util.List;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import com.iqser.red.service.redaction.v1.model.Rectangle;
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;

View File

@ -25,16 +25,13 @@ public final class RulingTextDirAdjustUtil {
private Point2D convertPoint(float x, float y, float dir, float pageWidth, float pageHeight) {
var xAdj = getXRot(x, y, dir, pageWidth, pageHeight);
var yAdj = 0f;
if (dir == 0 || dir == 180) {
yAdj = pageHeight - getYLowerLeftRot(x, y, dir, pageWidth, pageHeight);
} else {
yAdj = pageWidth - getYLowerLeftRot(x, y, dir, pageWidth, pageHeight);
}
var yLowerLeftRot = getYLowerLeftRot(x, y, dir, pageWidth, pageHeight);
var yAdj = dir == 0 || dir == 180 ? pageHeight - yLowerLeftRot : pageWidth - yLowerLeftRot;
return new Point2D.Float(xAdj, yAdj);
}
@SuppressWarnings("SuspiciousNameCombination")
private float getXRot(float x, float y, float dir, float pageWidth, float pageHeight) {
if (dir == 0) {

View File

@ -14,8 +14,8 @@ import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest;
import com.iqser.red.service.redaction.v1.server.client.FileStatusProcessingUpdateClient;
import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
import com.iqser.red.service.redaction.v1.server.redaction.service.analyze.AnalyzeService;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;

View File

@ -1,33 +1,24 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
package com.iqser.red.service.redaction.v1.server.redaction.service.analyze;
import static com.iqser.red.service.redaction.v1.server.redaction.service.ImportedRedactionService.IMPORTED_REDACTION_TYPE;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.kie.api.runtime.KieContainer;
import org.springframework.stereotype.Service;
import org.springframework.web.bind.annotation.RequestBody;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.IdRemoval;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualForceRedaction;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualImageRecategorization;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualLegalBasisChange;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualResizeRedaction;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.legalbasis.LegalBasis;
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
import com.iqser.red.service.redaction.v1.model.FileAttribute;
import com.iqser.red.service.redaction.v1.model.Rectangle;
import com.iqser.red.service.redaction.v1.model.RedactionLog;
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.model.RedactionLogLegalBasis;
@ -44,42 +35,49 @@ import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrement;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrementValue;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryVersion;
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities;
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
import com.iqser.red.service.redaction.v1.server.redaction.model.RedRectangle2D;
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
import com.iqser.red.service.redaction.v1.server.redaction.utils.SearchImplementation;
import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService;
import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService;
import com.iqser.red.service.redaction.v1.server.redaction.service.ImportedRedactionService;
import com.iqser.red.service.redaction.v1.server.redaction.service.RedactionChangeLogService;
import com.iqser.red.service.redaction.v1.server.redaction.service.RedactionLogCreatorService;
import com.iqser.red.service.redaction.v1.server.redaction.service.SectionGridCreatorService;
import com.iqser.red.service.redaction.v1.server.redaction.service.SectionTextBuilderService;
import com.iqser.red.service.redaction.v1.server.redaction.service.entityredaction.EntityRedactionService;
import com.iqser.red.service.redaction.v1.server.segmentation.ImageService;
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import io.micrometer.core.annotation.Timed;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.experimental.FieldDefaults;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class AnalyzeService {
private final DictionaryService dictionaryService;
private final DroolsExecutionService droolsExecutionService;
private final EntityRedactionService entityRedactionService;
private final RedactionLogCreatorService redactionLogCreatorService;
private final RedactionStorageService redactionStorageService;
private final PdfSegmentationService pdfSegmentationService;
private final RedactionChangeLogService redactionChangeLogService;
private final LegalBasisClient legalBasisClient;
private final RedactionServiceSettings redactionServiceSettings;
private final SectionTextBuilderService sectionTextBuilderService;
private final SectionGridCreatorService sectionGridCreatorService;
private final ImageService imageService;
private final ImportedRedactionService importedRedactionService;
DictionaryService dictionaryService;
DroolsExecutionService droolsExecutionService;
EntityRedactionService entityRedactionService;
RedactionLogCreatorService redactionLogCreatorService;
RedactionStorageService redactionStorageService;
PdfSegmentationService pdfSegmentationService;
RedactionChangeLogService redactionChangeLogService;
LegalBasisClient legalBasisClient;
RedactionServiceSettings redactionServiceSettings;
SectionTextBuilderService sectionTextBuilderService;
SectionGridCreatorService sectionGridCreatorService;
ImageService imageService;
ImportedRedactionService importedRedactionService;
SectionFinder sectionFinder;
@Timed("redactmanager_analyzeDocumentStructure")
@ -153,32 +151,25 @@ public class AnalyzeService {
new DictionaryVersion(redactionLog.getDictionaryVersion(), redactionLog.getDossierDictionaryVersion()),
analyzeRequest.getDossierId());
Set<Integer> sectionsToReanalyse = !analyzeRequest.getSectionsToReanalyse().isEmpty() ? analyzeRequest.getSectionsToReanalyse() : findSectionsToReanalyse(
dictionaryIncrement,
redactionLog,
text,
analyzeRequest);
Set<Integer> sectionsToReanalyse = analyzeRequest.getSectionsToReanalyse().isEmpty() //
? sectionFinder.findSectionsToReanalyse(dictionaryIncrement, redactionLog, text, analyzeRequest) //
: analyzeRequest.getSectionsToReanalyse();
log.info("Should reanalyze {} sections for request: {}", sectionsToReanalyse.size(), analyzeRequest);
if (sectionsToReanalyse.isEmpty()) {
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true, new HashSet<>());
}
NerEntities nerEntities;
if (redactionServiceSettings.isNerServiceEnabled()) {
nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
} else {
nerEntities = new NerEntities();
}
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
List<SectionText> reanalysisSections = text.getSectionTexts()
.stream()
.filter(sectionText -> sectionsToReanalyse.contains(sectionText.getSectionNumber()))
.collect(Collectors.toList());
NerEntities nerEntities = redactionServiceSettings.isNerServiceEnabled() //
? redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId()) //
: new NerEntities();
KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getDossierTemplateId());
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
PageEntities pageEntities = entityRedactionService.findEntities(dictionary, reanalysisSections, kieContainer, analyzeRequest, nerEntities);
var newRedactionLogEntries = redactionLogCreatorService.createRedactionLog(pageEntities, text.getNumberOfPages(), analyzeRequest.getDossierTemplateId());
@ -238,48 +229,13 @@ public class AnalyzeService {
}
@Timed("redactmanager_findSectionsToReanalyse")
private Set<Integer> findSectionsToReanalyse(DictionaryIncrement dictionaryIncrement, RedactionLog redactionLog, Text text, AnalyzeRequest analyzeRequest) {
long start = System.currentTimeMillis();
Set<String> relevantManuallyModifiedAnnotationIds = getRelevantManuallyModifiedAnnotationIds(analyzeRequest.getManualRedactions());
Set<Integer> sectionsToReanalyse = new HashSet<>();
Map<Integer, Set<Image>> imageEntries = new HashMap<>();
for (RedactionLogEntry entry : redactionLog.getRedactionLogEntry()) {
if (entry.isLocalManualRedaction() || relevantManuallyModifiedAnnotationIds.contains(entry.getId())) {
sectionsToReanalyse.add(entry.getSectionNumber());
}
if (entry.isImage()) {
imageEntries.computeIfAbsent(entry.getSectionNumber(), x -> new HashSet<>()).add(convert(entry));
}
}
var dictionaryIncrementsSearch = new SearchImplementation(dictionaryIncrement.getValues().stream().map(DictionaryIncrementValue::getValue).collect(Collectors.toList()),
true);
for (SectionText sectionText : text.getSectionTexts()) {
if (EntitySearchUtils.sectionContainsAny(sectionText.getText(), dictionaryIncrementsSearch)) {
sectionsToReanalyse.add(sectionText.getSectionNumber());
}
}
log.info("Should reanalyze {} sections for request: {}, took: {}", sectionsToReanalyse.size(), analyzeRequest, System.currentTimeMillis() - start);
return sectionsToReanalyse;
}
private AnalyzeResult finalizeAnalysis(AnalyzeRequest analyzeRequest,
long startTime,
RedactionLog redactionLog,
Text text,
DictionaryVersion dictionaryVersion,
boolean isReanalysis,
Set<FileAttribute> addedFileAttributes
) {
Set<FileAttribute> addedFileAttributes) {
redactionLog.setDictionaryVersion(dictionaryVersion.getDossierTemplateVersion());
redactionLog.setDossierDictionaryVersion(dictionaryVersion.getDossierVersion());
@ -313,41 +269,12 @@ public class AnalyzeService {
}
private Set<String> getRelevantManuallyModifiedAnnotationIds(ManualRedactions manualRedactions) {
if (manualRedactions == null) {
return new HashSet<>();
}
return Stream.concat(manualRedactions.getResizeRedactions().stream().map(ManualResizeRedaction::getAnnotationId),
Stream.concat(manualRedactions.getLegalBasisChanges().stream().map(ManualLegalBasisChange::getAnnotationId),
Stream.concat(manualRedactions.getImageRecategorization().stream().map(ManualImageRecategorization::getAnnotationId),
Stream.concat(manualRedactions.getIdsToRemove().stream().map(IdRemoval::getAnnotationId),
manualRedactions.getForceRedactions().stream().map(ManualForceRedaction::getAnnotationId))))).collect(Collectors.toSet());
}
public List<RedactionLogLegalBasis> convert(List<LegalBasis> legalBasis) {
return legalBasis.stream().map(l -> new RedactionLogLegalBasis(l.getName(), l.getDescription(), l.getReason())).collect(Collectors.toList());
}
public Image convert(RedactionLogEntry entry) {
Rectangle position = entry.getPositions().get(0);
return Image.builder()
.type(entry.getType())
.position(new RedRectangle2D(position.getTopLeft().getX(), position.getTopLeft().getY(), position.getWidth(), position.getHeight()))
.sectionNumber(entry.getSectionNumber())
.section(entry.getSection())
.page(position.getPage())
.hasTransparency(entry.isImageHasTransparency())
.build();
}
private void excludeExcludedPages(RedactionLog redactionLog, Set<Integer> excludedPages) {
if (excludedPages != null && !excludedPages.isEmpty()) {

View File

@ -0,0 +1,101 @@
package com.iqser.red.service.redaction.v1.server.redaction.service.analyze;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.springframework.stereotype.Component;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.IdRemoval;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualForceRedaction;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualImageRecategorization;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualLegalBasisChange;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualResizeRedaction;
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
import com.iqser.red.service.redaction.v1.model.Rectangle;
import com.iqser.red.service.redaction.v1.model.RedactionLog;
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
import com.iqser.red.service.redaction.v1.server.classification.model.Text;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrement;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrementValue;
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
import com.iqser.red.service.redaction.v1.server.redaction.model.RedRectangle2D;
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
import com.iqser.red.service.redaction.v1.server.redaction.utils.SearchImplementation;
import io.micrometer.core.annotation.Timed;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import lombok.experimental.FieldDefaults;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Component
@RequiredArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
class SectionFinder {
@Timed("redactmanager_findSectionsToReanalyse")
public Set<Integer> findSectionsToReanalyse(DictionaryIncrement dictionaryIncrement, RedactionLog redactionLog, Text text, AnalyzeRequest analyzeRequest) {
long start = System.currentTimeMillis();
Set<String> relevantManuallyModifiedAnnotationIds = getRelevantManuallyModifiedAnnotationIds(analyzeRequest.getManualRedactions());
Set<Integer> sectionsToReanalyse = new HashSet<>();
Map<Integer, Set<Image>> imageEntries = new HashMap<>();
for (RedactionLogEntry entry : redactionLog.getRedactionLogEntry()) {
if (entry.isLocalManualRedaction() || relevantManuallyModifiedAnnotationIds.contains(entry.getId())) {
sectionsToReanalyse.add(entry.getSectionNumber());
}
if (entry.isImage()) {
imageEntries.computeIfAbsent(entry.getSectionNumber(), x -> new HashSet<>()).add(convert(entry));
}
}
var dictionaryIncrementsSearch = new SearchImplementation(dictionaryIncrement.getValues().stream().map(DictionaryIncrementValue::getValue).collect(Collectors.toList()),
true);
for (SectionText sectionText : text.getSectionTexts()) {
if (EntitySearchUtils.sectionContainsAny(sectionText.getText(), dictionaryIncrementsSearch)) {
sectionsToReanalyse.add(sectionText.getSectionNumber());
}
}
log.info("Took: {} milliseconds to find sections to reanalyze", System.currentTimeMillis() - start);
return sectionsToReanalyse;
}
private static Set<String> getRelevantManuallyModifiedAnnotationIds(ManualRedactions manualRedactions) {
if (manualRedactions == null) {
return new HashSet<>();
}
return Stream.concat(manualRedactions.getResizeRedactions().stream().map(ManualResizeRedaction::getAnnotationId),
Stream.concat(manualRedactions.getLegalBasisChanges().stream().map(ManualLegalBasisChange::getAnnotationId),
Stream.concat(manualRedactions.getImageRecategorization().stream().map(ManualImageRecategorization::getAnnotationId),
Stream.concat(manualRedactions.getIdsToRemove().stream().map(IdRemoval::getAnnotationId),
manualRedactions.getForceRedactions().stream().map(ManualForceRedaction::getAnnotationId))))).collect(Collectors.toSet());
}
private static Image convert(RedactionLogEntry entry) {
Rectangle position = entry.getPositions().get(0);
return Image.builder()
.type(entry.getType())
.position(new RedRectangle2D(position.getTopLeft().getX(), position.getTopLeft().getY(), position.getWidth(), position.getHeight()))
.sectionNumber(entry.getSectionNumber())
.section(entry.getSection())
.page(position.getPage())
.hasTransparency(entry.isImageHasTransparency())
.build();
}
}

View File

@ -0,0 +1,127 @@
package com.iqser.red.service.redaction.v1.server.redaction.service.entityredaction;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Component;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entities;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityType;
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
import com.iqser.red.service.redaction.v1.server.redaction.utils.FindEntityDetails;
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
import io.micrometer.core.annotation.Timed;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import lombok.experimental.FieldDefaults;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Component
@RequiredArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
class EntityFinder {
RedactionServiceSettings redactionServiceSettings;
@Timed("redactmanager_findEntities")
public Entities findEntities(SearchableText searchableText,
String headline,
int sectionNumber,
Dictionary dictionary,
boolean local,
NerEntities nerEntities,
List<Integer> cellStarts,
ManualRedactions manualRedactions) {
Set<Entity> found = new HashSet<>();
String searchableString = searchableText.asString();
if (StringUtils.isEmpty(searchableString)) {
return new Entities(new HashSet<>(), new HashSet<>());
}
String lowercaseInputString = searchableString.toLowerCase();
for (DictionaryModel model : dictionary.getDictionaryModels()) {
var searchImplementation = local ? model.getLocalSearch() : model.getEntriesSearch();
var entities = EntitySearchUtils.findEntities(model.isCaseInsensitive() ? lowercaseInputString : searchableString,
searchImplementation,
model,
new FindEntityDetails(model.getType(),
headline,
sectionNumber,
!local,
model.isDossierDictionary(),
local ? Engine.RULE : Engine.DICTIONARY,
local ? EntityType.RECOMMENDATION : EntityType.ENTITY));
EntitySearchUtils.addOrAddEngine(found, entities);
}
Set<Entity> nerFound = new HashSet<>();
if (!local) {
nerFound.addAll(getNerValues(sectionNumber, nerEntities, cellStarts, headline));
}
var cleared = EntitySearchUtils.clearAndFindPositions(found, searchableText, dictionary, manualRedactions);
return new Entities(cleared.stream().filter(e -> !e.isFalsePositive()).collect(Collectors.toSet()), nerFound);
}
private Set<Entity> getNerValues(int sectionNumber, NerEntities nerEntities, List<Integer> cellStarts, String headline) {
Set<Entity> entities = new HashSet<>();
if (redactionServiceSettings.isNerServiceEnabled() && nerEntities.getData().containsKey(sectionNumber)) {
nerEntities.getData().get(sectionNumber).forEach(res -> {
if (cellStarts == null || cellStarts.isEmpty()) {
entities.add(new Entity(res.getValue(),
res.getType(),
res.getStartOffset(),
res.getEndOffset(),
headline,
sectionNumber,
false,
false,
Engine.NER,
EntityType.RECOMMENDATION));
} else {
boolean intersectsCellStart = false;
for (Integer cellStart : cellStarts) {
if (res.getStartOffset() < cellStart && cellStart < res.getEndOffset()) {
intersectsCellStart = true;
break;
}
}
if (!intersectsCellStart) {
entities.add(new Entity(res.getValue(),
res.getType(),
res.getStartOffset(),
res.getEndOffset(),
headline,
sectionNumber,
false,
false,
Engine.NER,
EntityType.RECOMMENDATION));
}
}
});
}
return entities;
}
}

View File

@ -1,41 +1,52 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
package com.iqser.red.service.redaction.v1.server.redaction.service.entityredaction;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.kie.api.runtime.KieContainer;
import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.AnnotationStatus;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.IdRemoval;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualImageRecategorization;
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.model.FileAttribute;
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.*;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entities;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.FindEntitiesResult;
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities;
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
import com.iqser.red.service.redaction.v1.server.redaction.model.SectionSearchableTextPair;
import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService;
import com.iqser.red.service.redaction.v1.server.redaction.service.SurroundingWordsService;
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
import com.iqser.red.service.redaction.v1.server.redaction.utils.FindEntityDetails;
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
import io.micrometer.core.annotation.Timed;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import lombok.experimental.FieldDefaults;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.kie.api.runtime.KieContainer;
import org.springframework.stereotype.Service;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@Slf4j
@Service
@RequiredArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class EntityRedactionService {
private final RedactionServiceSettings redactionServiceSettings;
private final DroolsExecutionService droolsExecutionService;
private final SurroundingWordsService surroundingWordsService;
DroolsExecutionService droolsExecutionService;
SurroundingWordsService surroundingWordsService;
EntityFinder entityFinder;
public PageEntities findEntities(Dictionary dictionary, List<SectionText> sectionTexts, KieContainer kieContainer, AnalyzeRequest analyzeRequest, NerEntities nerEntities) {
@ -45,7 +56,7 @@ public class EntityRedactionService {
if (dictionary.hasLocalEntries() || !findEntitiesResult.getAddedFileAttributes().isEmpty()) {
if(!findEntitiesResult.getAddedFileAttributes().isEmpty()) {
if (!findEntitiesResult.getAddedFileAttributes().isEmpty()) {
//AnalyzeRequest provides immutable list.
List<FileAttribute> mergedFileAttributes = new ArrayList<>();
mergedFileAttributes.addAll(analyzeRequest.getFileAttributes());
@ -54,7 +65,14 @@ public class EntityRedactionService {
}
Map<Integer, Set<Entity>> hintsPerSectionNumber = getHintsPerSection(findEntitiesResult.getEntities(), dictionary);
FindEntitiesResult foundByLocalEntitiesResult = findEntities(sectionTexts, dictionary, kieContainer, analyzeRequest, true, hintsPerSectionNumber, imagesPerPage, nerEntities);
FindEntitiesResult foundByLocalEntitiesResult = findEntities(sectionTexts,
dictionary,
kieContainer,
analyzeRequest,
true,
hintsPerSectionNumber,
imagesPerPage,
nerEntities);
EntitySearchUtils.addEntitiesWithHigherRank(findEntitiesResult.getEntities(), foundByLocalEntitiesResult.getEntities(), dictionary);
EntitySearchUtils.removeEntitiesContainedInLarger(findEntitiesResult.getEntities());
}
@ -67,18 +85,78 @@ public class EntityRedactionService {
public FindEntitiesResult findEntities(List<SectionText> reanalysisSections,
Dictionary dictionary,
KieContainer kieContainer,
AnalyzeRequest analyzeRequest,
boolean local,
Map<Integer, Set<Entity>> hintsPerSectionNumber,
Map<Integer, Set<Image>> imagesPerPage,
NerEntities nerEntities) {
Dictionary dictionary,
KieContainer kieContainer,
AnalyzeRequest analyzeRequest,
boolean local,
Map<Integer, Set<Entity>> hintsPerSectionNumber,
Map<Integer, Set<Image>> imagesPerPage,
NerEntities nerEntities) {
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
for (SectionText reanalysisSection : reanalysisSections) {
List<SectionSearchableTextPair> sectionSearchableTextPairs = extractSearchableTextPairs(reanalysisSections,
dictionary,
analyzeRequest,
local,
hintsPerSectionNumber,
nerEntities);
Entities entities = findEntities(reanalysisSection.getSearchableText(),
Set<FileAttribute> addedFileAttributes = new HashSet<>();
Set<Entity> entities = new HashSet<>();
sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> {
if (!addedFileAttributes.isEmpty()) {
//Section.Builder provides immutable list.
List<FileAttribute> mergedFileAttributes = new ArrayList<>();
mergedFileAttributes.addAll(sectionSearchableTextPair.getSection().getAddedFileAttributes());
mergedFileAttributes.addAll(addedFileAttributes);
sectionSearchableTextPair.getSection().setFileAttributes(mergedFileAttributes);
}
Section analysedSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair.getSection());
addedFileAttributes.addAll(analysedSection.getAddedFileAttributes());
EntitySearchUtils.removeEntitiesContainedInLarger(analysedSection.getEntities());
var entriesWithoutSurroundingText = analysedSection.getEntities()
.stream()
.filter(e -> e.getTextAfter() == null && e.getTextBefore() == null)
.collect(Collectors.toSet());
if (sectionSearchableTextPair.getCellStarts() != null && !sectionSearchableTextPair.getCellStarts().isEmpty()) {
surroundingWordsService.addSurroundingText(entriesWithoutSurroundingText,
sectionSearchableTextPair.getSearchableText(),
dictionary,
sectionSearchableTextPair.getCellStarts());
} else {
surroundingWordsService.addSurroundingText(entriesWithoutSurroundingText, sectionSearchableTextPair.getSearchableText(), dictionary);
}
entities.addAll(analysedSection.getEntities());
if (!local) {
for (Image image : analysedSection.getImages()) {
imagesPerPage.computeIfAbsent(image.getPage(), (a) -> new HashSet<>()).add(image);
}
addLocalValuesToDictionary(analysedSection, dictionary);
}
});
return FindEntitiesResult.builder().entities(entities).addedFileAttributes(addedFileAttributes).build();
}
private List<SectionSearchableTextPair> extractSearchableTextPairs(List<SectionText> reanalysisSections,
Dictionary dictionary,
AnalyzeRequest analyzeRequest,
boolean local,
Map<Integer, Set<Entity>> hintsPerSectionNumber,
NerEntities nerEntities) {
return reanalysisSections.stream().map(reanalysisSection -> {
Entities entities = entityFinder.findEntities(reanalysisSection.getSearchableText(),
reanalysisSection.getHeadline(),
reanalysisSection.getSectionNumber(),
dictionary,
@ -136,73 +214,36 @@ public class EntityRedactionService {
log.debug("Section {}, Images: {}", reanalysisSection.getSectionNumber(), reanalysisSection.getImages());
sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder()
.isLocal(false)
.dictionaryTypes(dictionary.getTypes())
.entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(reanalysisSection.getSectionNumber()) ? Stream.concat(entities.getEntities()
.stream(), hintsPerSectionNumber.get(reanalysisSection.getSectionNumber()).stream()).collect(Collectors.toSet()) : entities.getEntities())
.nerEntities(entities.getNerEntities())
.text(reanalysisSection.getSearchableText().getAsStringWithLinebreaks())
.searchText(reanalysisSection.getSearchableText().toString())
.headline(reanalysisSection.getHeadline())
.sectionNumber(reanalysisSection.getSectionNumber())
.tabularData(reanalysisSection.getTabularData())
.searchableText(reanalysisSection.getSearchableText())
.dictionary(dictionary)
.images(reanalysisSection.getImages())
.sectionAreas(reanalysisSection.getSectionAreas())
.fileAttributes(analyzeRequest.getFileAttributes())
.manualRedactions(analyzeRequest.getManualRedactions())
.isInTable(reanalysisSection.isTable())
.build(), reanalysisSection.getSearchableText(), reanalysisSection.getCellStarts()));
}
return toSectionSearchableTextPair(dictionary, analyzeRequest, hintsPerSectionNumber, reanalysisSection, entities);
}).collect(Collectors.toList());
}
Set<FileAttribute> addedFileAttributes = new HashSet<>();
Set<Entity> entities = new HashSet<>();
sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> {
private SectionSearchableTextPair toSectionSearchableTextPair(Dictionary dictionary,
AnalyzeRequest analyzeRequest,
Map<Integer, Set<Entity>> hintsPerSectionNumber,
SectionText reanalysisSection,
Entities entities) {
if(!addedFileAttributes.isEmpty()) {
//Section.Builder provides immutable list.
List<FileAttribute> mergedFileAttributes = new ArrayList<>();
mergedFileAttributes.addAll(sectionSearchableTextPair.getSection().getAddedFileAttributes());
mergedFileAttributes.addAll(addedFileAttributes);
sectionSearchableTextPair.getSection().setFileAttributes(mergedFileAttributes);
}
Section analysedSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair.getSection());
addedFileAttributes.addAll(analysedSection.getAddedFileAttributes());
EntitySearchUtils.removeEntitiesContainedInLarger(analysedSection.getEntities());
var entriesWithoutSurroundingText = analysedSection.getEntities()
.stream()
.filter(e -> e.getTextAfter() == null && e.getTextBefore() == null)
.collect(Collectors.toSet());
if (sectionSearchableTextPair.getCellStarts() != null && !sectionSearchableTextPair.getCellStarts().isEmpty()) {
surroundingWordsService.addSurroundingText(entriesWithoutSurroundingText,
sectionSearchableTextPair.getSearchableText(),
dictionary,
sectionSearchableTextPair.getCellStarts());
} else {
surroundingWordsService.addSurroundingText(entriesWithoutSurroundingText, sectionSearchableTextPair.getSearchableText(), dictionary);
}
entities.addAll(analysedSection.getEntities());
if (!local) {
for (Image image : analysedSection.getImages()) {
imagesPerPage.computeIfAbsent(image.getPage(), (a) -> new HashSet<>()).add(image);
}
addLocalValuesToDictionary(analysedSection, dictionary);
}
});
return FindEntitiesResult.builder().entities(entities).addedFileAttributes(addedFileAttributes).build();
return new SectionSearchableTextPair(Section.builder()
.isLocal(false)
.dictionaryTypes(dictionary.getTypes())
.entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(reanalysisSection.getSectionNumber()) ? Stream.concat(entities.getEntities().stream(),
hintsPerSectionNumber.get(reanalysisSection.getSectionNumber()).stream()).collect(Collectors.toSet()) : entities.getEntities())
.nerEntities(entities.getNerEntities())
.text(reanalysisSection.getSearchableText().getAsStringWithLinebreaks())
.searchText(reanalysisSection.getSearchableText().toString())
.headline(reanalysisSection.getHeadline())
.sectionNumber(reanalysisSection.getSectionNumber())
.tabularData(reanalysisSection.getTabularData())
.searchableText(reanalysisSection.getSearchableText())
.dictionary(dictionary)
.images(reanalysisSection.getImages())
.sectionAreas(reanalysisSection.getSectionAreas())
.fileAttributes(analyzeRequest.getFileAttributes())
.manualRedactions(analyzeRequest.getManualRedactions())
.isInTable(reanalysisSection.isTable())
.build(), reanalysisSection.getSearchableText(), reanalysisSection.getCellStarts());
}
@ -244,7 +285,7 @@ public class EntityRedactionService {
private Map<Integer, Set<Entity>> getHintsPerSection(Set<Entity> entities, Dictionary dictionary) {
Map<Integer, Set<Entity>> hintsPerSectionNumber = new HashMap<>();
entities.stream().forEach(entity -> {
entities.forEach(entity -> {
if (dictionary.isHint(entity.getType()) && entity.isDictionaryEntry()) {
hintsPerSectionNumber.computeIfAbsent(entity.getSectionNumber(), (x) -> new HashSet<>()).add(entity);
}
@ -269,93 +310,4 @@ public class EntityRedactionService {
}));
}
@Timed("redactmanager_findEntities")
private Entities findEntities(SearchableText searchableText,
String headline,
int sectionNumber,
Dictionary dictionary,
boolean local,
NerEntities nerEntities,
List<Integer> cellStarts,
ManualRedactions manualRedactions) {
Set<Entity> found = new HashSet<>();
String searchableString = searchableText.asString();
if (StringUtils.isEmpty(searchableString)) {
return new Entities(new HashSet<>(), new HashSet<>());
}
String lowercaseInputString = searchableString.toLowerCase();
for (DictionaryModel model : dictionary.getDictionaryModels()) {
var searchImplementation = local ? model.getLocalSearch() : model.getEntriesSearch();
var entities = EntitySearchUtils.findEntities(model.isCaseInsensitive() ? lowercaseInputString : searchableString,
searchImplementation,
model,
new FindEntityDetails(model.getType(),
headline,
sectionNumber,
!local,
model.isDossierDictionary(),
local ? Engine.RULE : Engine.DICTIONARY,
local ? EntityType.RECOMMENDATION : EntityType.ENTITY));
EntitySearchUtils.addOrAddEngine(found, entities);
}
Set<Entity> nerFound = new HashSet<>();
if (!local) {
nerFound.addAll(getNerValues(sectionNumber, nerEntities, cellStarts, headline));
}
var cleared = EntitySearchUtils.clearAndFindPositions(found, searchableText, dictionary, manualRedactions);
return new Entities(cleared.stream().filter(e -> !e.isFalsePositive()).collect(Collectors.toSet()), nerFound);
}
private Set<Entity> getNerValues(int sectionNumber, NerEntities nerEntities, List<Integer> cellStarts, String headline) {
Set<Entity> entities = new HashSet<>();
if (redactionServiceSettings.isNerServiceEnabled() && nerEntities.getData().containsKey(sectionNumber)) {
nerEntities.getData().get(sectionNumber).forEach(res -> {
if (cellStarts == null || cellStarts.isEmpty()) {
entities.add(new Entity(res.getValue(),
res.getType(),
res.getStartOffset(),
res.getEndOffset(),
headline,
sectionNumber,
false,
false,
Engine.NER,
EntityType.RECOMMENDATION));
} else {
boolean intersectsCellStart = false;
for (Integer cellStart : cellStarts) {
if (res.getStartOffset() < cellStart && cellStart < res.getEndOffset()) {
intersectsCellStart = true;
break;
}
}
if (!intersectsCellStart) {
entities.add(new Entity(res.getValue(),
res.getType(),
res.getStartOffset(),
res.getEndOffset(),
headline,
sectionNumber,
false,
false,
Engine.NER,
EntityType.RECOMMENDATION));
}
}
});
}
return entities;
}
}

View File

@ -59,8 +59,8 @@ import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
import com.iqser.red.service.redaction.v1.server.redaction.service.analyze.AnalyzeService;
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.storage.commons.StorageAutoConfiguration;

View File

@ -81,8 +81,8 @@ import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
import com.iqser.red.service.redaction.v1.server.redaction.service.analyze.AnalyzeService;
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;

View File

@ -76,8 +76,8 @@ import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
import com.iqser.red.service.redaction.v1.server.redaction.service.analyze.AnalyzeService;
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;