Pull request #219: RED-1970: Call entity-redaction-service once per document
Merge in RED/redaction-service from RED-1970-2 to master * commit 'f1b3d129ee32d40a78fad038a69027a24d8ccdd8': RED-1970: Call entity-redaction-service once per document
This commit is contained in:
commit
a388a28cec
@ -24,7 +24,7 @@
|
||||
<dependency>
|
||||
<groupId>com.iqser.red.service</groupId>
|
||||
<artifactId>file-management-service-api-v1</artifactId>
|
||||
<version>2.25.0</version>
|
||||
<version>2.96.0</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>com.iqser.red.service</groupId>
|
||||
|
||||
@ -9,10 +9,11 @@ import org.springframework.web.bind.annotation.PostMapping;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecogintionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionRequest;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
|
||||
|
||||
@FeignClient(name = "EntityRecognitionClient", url = "${entity-recognition-service.url}")
|
||||
public interface EntityRecognitionClient {
|
||||
|
||||
@PostMapping(value = "/find_authors", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
|
||||
Map<String, Map<String, List<EntityRecogintionEntity>>> findAuthors(EntityRecognitionRequest entityRecognitionRequest);
|
||||
NerEntities findAuthors(EntityRecognitionRequest entityRecognitionRequest);
|
||||
}
|
||||
|
||||
@ -13,9 +13,9 @@ import lombok.NoArgsConstructor;
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
public class EntityRecognitionResponse {
|
||||
public class NerEntities {
|
||||
|
||||
@Builder.Default
|
||||
private Map<String, List<EntityRecogintionEntity>> result = new HashMap<>();
|
||||
private Map<Integer, List<EntityRecogintionEntity>> result = new HashMap<>();
|
||||
|
||||
}
|
||||
@ -7,6 +7,7 @@ import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
|
||||
import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.server.client.FileStatusProcessingUpdateClient;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.NerAnalyserService;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
@ -26,6 +27,7 @@ public class RedactionMessageReceiver {
|
||||
private final ObjectMapper objectMapper;
|
||||
private final AnalyzeService analyzeService;
|
||||
private final FileStatusProcessingUpdateClient fileStatusProcessingUpdateClient;
|
||||
private final NerAnalyserService nerAnalyserService;
|
||||
|
||||
|
||||
@RabbitHandler
|
||||
@ -43,6 +45,9 @@ public class RedactionMessageReceiver {
|
||||
// TODO Seperate stucture analysis by other queue
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(analyzeRequest.getDossierId(), analyzeRequest.getFileId()));
|
||||
|
||||
// TODO NerEntities should be computed and stored in entity-recognition-service, should be triggered by a seperate queue after structure analysis
|
||||
nerAnalyserService.computeNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
|
||||
result = analyzeService.analyze(analyzeRequest);
|
||||
log.info("Successfully analyzed dossier {} file {} took: {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId(), result
|
||||
.getDuration());
|
||||
|
||||
@ -60,6 +60,7 @@ public class AnalyzeService {
|
||||
private final RedactionServiceSettings redactionServiceSettings;
|
||||
private final SectionTextBuilderService sectionTextBuilderService;
|
||||
private final SectionGridCreatorService sectionGridCreatorService;
|
||||
private final NerAnalyserService nerAnalyserService;
|
||||
|
||||
|
||||
public void analyzeDocumentStructure(StructureAnalyzeRequest analyzeRequest) {
|
||||
@ -81,7 +82,8 @@ public class AnalyzeService {
|
||||
List<SectionText> sectionTexts = sectionTextBuilderService.buildSectionText(classifiedDoc);
|
||||
sectionGridCreatorService.createSectionGrid(classifiedDoc, pageCount);
|
||||
|
||||
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.TEXT, new Text(pageCount, sectionTexts));
|
||||
Text text = new Text(pageCount, sectionTexts);
|
||||
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.TEXT, text);
|
||||
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.SECTION_GRID, classifiedDoc
|
||||
.getSectionGrid());
|
||||
|
||||
@ -94,6 +96,11 @@ public class AnalyzeService {
|
||||
long startTime = System.currentTimeMillis();
|
||||
|
||||
var text = redactionStorageService.getText(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
var nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
if(redactionServiceSettings.isEnableEntityRecognition() && nerEntities == null){
|
||||
nerAnalyserService.computeNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
}
|
||||
|
||||
dictionaryService.updateDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
|
||||
KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getDossierTemplateId());
|
||||
@ -101,7 +108,7 @@ public class AnalyzeService {
|
||||
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest
|
||||
.getDossierId());
|
||||
|
||||
PageEntities pageEntities = entityRedactionService.findEntities(dictionary, text.getSectionTexts(), kieContainer, analyzeRequest);
|
||||
PageEntities pageEntities = entityRedactionService.findEntities(dictionary, text.getSectionTexts(), kieContainer, analyzeRequest, nerEntities);
|
||||
|
||||
dictionaryService.updateExternalDictionary(dictionary, analyzeRequest.getDossierTemplateId());
|
||||
|
||||
@ -141,6 +148,12 @@ public class AnalyzeService {
|
||||
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true);
|
||||
}
|
||||
|
||||
var nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
if(redactionServiceSettings.isEnableEntityRecognition() && nerEntities == null){
|
||||
nerAnalyserService.computeNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
}
|
||||
|
||||
List<SectionText> reanalysisSections = text.getSectionTexts()
|
||||
.stream()
|
||||
.filter(sectionText -> sectionsToReanalyse.contains(sectionText.getSectionNumber()))
|
||||
@ -151,7 +164,7 @@ public class AnalyzeService {
|
||||
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest
|
||||
.getDossierId());
|
||||
|
||||
PageEntities pageEntities = entityRedactionService.findEntities(dictionary, reanalysisSections, kieContainer, analyzeRequest);
|
||||
PageEntities pageEntities = entityRedactionService.findEntities(dictionary, reanalysisSections, kieContainer, analyzeRequest, nerEntities);
|
||||
var newRedactionLogEntries = redactionLogCreatorService.createRedactionLog(pageEntities, text.getNumberOfPages(), analyzeRequest
|
||||
.getDossierTemplateId());
|
||||
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
@ -18,9 +19,8 @@ import com.iqser.red.service.redaction.v1.model.ManualImageRecategorization;
|
||||
import com.iqser.red.service.redaction.v1.model.Status;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
|
||||
import com.iqser.red.service.redaction.v1.server.client.EntityRecognitionClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecogintionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionRequest;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionResponse;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionSection;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
|
||||
@ -50,14 +50,14 @@ public class EntityRedactionService {
|
||||
|
||||
|
||||
public PageEntities findEntities(Dictionary dictionary, List<SectionText> sectionTexts, KieContainer kieContainer,
|
||||
AnalyzeRequest analyzeRequest) {
|
||||
AnalyzeRequest analyzeRequest, NerEntities nerEntities) {
|
||||
|
||||
Map<Integer, Set<Image>> imagesPerPage = new HashMap<>();
|
||||
Set<Entity> entities = findEntities(sectionTexts, dictionary, kieContainer, analyzeRequest, false, null, imagesPerPage);
|
||||
Set<Entity> entities = findEntities(sectionTexts, dictionary, kieContainer, analyzeRequest, false, null, imagesPerPage, nerEntities);
|
||||
|
||||
if (dictionary.hasLocalEntries()) {
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber = getHintsPerSection(entities, dictionary);
|
||||
Set<Entity> foundByLocal = findEntities(sectionTexts, dictionary, kieContainer, analyzeRequest, true, hintsPerSectionNumber, imagesPerPage);
|
||||
Set<Entity> foundByLocal = findEntities(sectionTexts, dictionary, kieContainer, analyzeRequest, true, hintsPerSectionNumber, imagesPerPage, nerEntities);
|
||||
EntitySearchUtils.addEntitiesWithHigherRank(entities, foundByLocal, dictionary);
|
||||
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
|
||||
}
|
||||
@ -70,13 +70,13 @@ public class EntityRedactionService {
|
||||
public Set<Entity> findEntities(List<SectionText> reanalysisSections, Dictionary dictionary,
|
||||
KieContainer kieContainer, AnalyzeRequest analyzeRequest, boolean local,
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber,
|
||||
Map<Integer, Set<Image>> imagesPerPage) {
|
||||
Map<Integer, Set<Image>> imagesPerPage, NerEntities nerEntities) {
|
||||
|
||||
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
|
||||
for (SectionText reanalysisSection : reanalysisSections) {
|
||||
|
||||
Set<Entity> entities = findEntities(reanalysisSection.getSearchableText(), reanalysisSection.getHeadline(), reanalysisSection
|
||||
.getSectionNumber(), dictionary, local);
|
||||
.getSectionNumber(), dictionary, local, nerEntities);
|
||||
if (reanalysisSection.getCellStarts() != null && !reanalysisSection.getCellStarts().isEmpty()) {
|
||||
surroundingWordsService.addSurroundingText(entities, reanalysisSection.getSearchableText(), dictionary, reanalysisSection
|
||||
.getCellStarts());
|
||||
@ -124,7 +124,7 @@ public class EntityRedactionService {
|
||||
EntitySearchUtils.removeEntitiesContainedInLarger(analysedSection.getEntities());
|
||||
entities.addAll(analysedSection.getEntities());
|
||||
|
||||
if(!local) {
|
||||
if (!local) {
|
||||
for (Image image : analysedSection.getImages()) {
|
||||
imagesPerPage.computeIfAbsent(image.getPage(), (a) -> new HashSet<>()).add(image);
|
||||
}
|
||||
@ -198,7 +198,7 @@ public class EntityRedactionService {
|
||||
|
||||
|
||||
private Set<Entity> findEntities(SearchableText searchableText, String headline, int sectionNumber,
|
||||
Dictionary dictionary, boolean local) {
|
||||
Dictionary dictionary, boolean local, NerEntities nerEntities) {
|
||||
|
||||
Set<Entity> found = new HashSet<>();
|
||||
String searchableString = searchableText.toString();
|
||||
@ -217,35 +217,22 @@ public class EntityRedactionService {
|
||||
}
|
||||
}
|
||||
|
||||
if (redactionServiceSettings.isEnableEntityRecognition() && !local) {
|
||||
found.addAll(getAiEntities(sectionNumber, searchableString, headline));
|
||||
if (!local) {
|
||||
addNerEntities(found, sectionNumber, headline, nerEntities);
|
||||
}
|
||||
|
||||
return EntitySearchUtils.clearAndFindPositions(found, searchableText, dictionary);
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> getAiEntities(int sectionNumber, String searchableString, String headline) {
|
||||
private void addNerEntities(Set<Entity> found, int sectionNumber, String headline, NerEntities nerEntities) {
|
||||
|
||||
Set<Entity> found = new HashSet<>();
|
||||
|
||||
Map<String, Map<String, List<EntityRecogintionEntity>>> response = entityRecognitionClient.findAuthors(EntityRecognitionRequest
|
||||
.builder()
|
||||
.data(List.of(EntityRecognitionSection.builder()
|
||||
.sectionNumber(sectionNumber)
|
||||
.text(searchableString)
|
||||
.build()))
|
||||
.build());
|
||||
|
||||
EntityRecognitionResponse entityRecognitionResponse = new EntityRecognitionResponse(response.get("result:"));
|
||||
|
||||
if (entityRecognitionResponse.getResult() != null && entityRecognitionResponse.getResult()
|
||||
.containsKey(String.valueOf(sectionNumber))) {
|
||||
entityRecognitionResponse.getResult().get(String.valueOf(sectionNumber)).forEach(res -> {
|
||||
if (redactionServiceSettings.isEnableEntityRecognition() && nerEntities.getResult()
|
||||
.containsKey(sectionNumber)) {
|
||||
nerEntities.getResult().get(sectionNumber).forEach(res -> {
|
||||
found.add(new Entity(res.getValue(), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false));
|
||||
});
|
||||
}
|
||||
return found;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,49 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.file.management.v1.api.model.FileType;
|
||||
import com.iqser.red.service.redaction.v1.server.client.EntityRecognitionClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionRequest;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionSection;
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class NerAnalyserService {
|
||||
|
||||
private final RedactionStorageService redactionStorageService;
|
||||
private final EntityRecognitionClient entityRecognitionClient;
|
||||
private final RedactionServiceSettings redactionServiceSettings;
|
||||
|
||||
public void computeNerEntities(String dossierId, String fileId) {
|
||||
|
||||
if (redactionServiceSettings.isEnableEntityRecognition()) {
|
||||
var text = redactionStorageService.getText(dossierId, fileId);
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
|
||||
var nerRequest = EntityRecognitionRequest.builder()
|
||||
.data(text.getSectionTexts()
|
||||
.stream()
|
||||
.map(sectionText -> new EntityRecognitionSection(sectionText.getSectionNumber(), sectionText
|
||||
.getText()))
|
||||
.collect(Collectors.toList()))
|
||||
.build();
|
||||
|
||||
var nerResponse = entityRecognitionClient.findAuthors(nerRequest);
|
||||
|
||||
log.info("Computing NER entities took: {} ms for dossierId {} and fileId {}", System.currentTimeMillis() - start, dossierId, fileId);
|
||||
|
||||
redactionStorageService.storeObject(dossierId, fileId, FileType.NER_ENTITIES, nerResponse);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -5,6 +5,7 @@ import com.iqser.red.service.file.management.v1.api.model.FileType;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLog;
|
||||
import com.iqser.red.service.redaction.v1.model.SectionGrid;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Text;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
|
||||
import com.iqser.red.storage.commons.exception.StorageObjectDoesNotExist;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
import lombok.Getter;
|
||||
@ -73,6 +74,25 @@ public class RedactionStorageService {
|
||||
}
|
||||
|
||||
|
||||
public NerEntities getNerEntities(String dossierId, String fileId) {
|
||||
|
||||
InputStreamResource inputStreamResource;
|
||||
try {
|
||||
inputStreamResource = storageService.getObject(StorageIdUtils.getStorageId(dossierId, fileId, FileType.NER_ENTITIES));
|
||||
} catch (StorageObjectDoesNotExist e) {
|
||||
log.debug("NER Entities not available.");
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
return objectMapper.readValue(inputStreamResource.getInputStream(), NerEntities.class);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Could not convert NerEntities", e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
public SectionGrid getSectionGrid(String dossierId, String fileId) {
|
||||
|
||||
var sectionGrid = storageService.getObject(StorageIdUtils.getStorageId(dossierId, fileId, FileType.SECTION_GRID));
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
configuration-service.url: "http://configuration-service-v1:8080"
|
||||
image-service.url: "http://image-service-v1:8080"
|
||||
file-management-service.url: "http://file-management-service-v1:8080"
|
||||
entity-recognition-service.url: "http://entity-recognition-service-v1:8080"
|
||||
entity-recognition-service.url: "localhost:8080"
|
||||
|
||||
ribbon:
|
||||
ConnectTimeout: 600000
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user