diff --git a/redaction-service-v1/redaction-service-server-v1/pom.xml b/redaction-service-v1/redaction-service-server-v1/pom.xml index 2bf40817..67e97d07 100644 --- a/redaction-service-v1/redaction-service-server-v1/pom.xml +++ b/redaction-service-v1/redaction-service-server-v1/pom.xml @@ -24,7 +24,7 @@ com.iqser.red.service file-management-service-api-v1 - 2.25.0 + 2.96.0 com.iqser.red.service diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/EntityRecognitionClient.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/EntityRecognitionClient.java index 589e6dc3..336b64f8 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/EntityRecognitionClient.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/EntityRecognitionClient.java @@ -9,10 +9,11 @@ import org.springframework.web.bind.annotation.PostMapping; import com.iqser.red.service.redaction.v1.server.client.model.EntityRecogintionEntity; import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionRequest; +import com.iqser.red.service.redaction.v1.server.client.model.NerEntities; @FeignClient(name = "EntityRecognitionClient", url = "${entity-recognition-service.url}") public interface EntityRecognitionClient { @PostMapping(value = "/find_authors", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE) - Map>> findAuthors(EntityRecognitionRequest entityRecognitionRequest); + NerEntities findAuthors(EntityRecognitionRequest entityRecognitionRequest); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/model/EntityRecognitionResponse.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/model/NerEntities.java similarity index 72% rename from redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/model/EntityRecognitionResponse.java rename to redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/model/NerEntities.java index af0d6beb..f45a55a5 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/model/EntityRecognitionResponse.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/model/NerEntities.java @@ -13,9 +13,9 @@ import lombok.NoArgsConstructor; @Builder @AllArgsConstructor @NoArgsConstructor -public class EntityRecognitionResponse { +public class NerEntities { @Builder.Default - private Map> result = new HashMap<>(); + private Map> result = new HashMap<>(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/queue/RedactionMessageReceiver.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/queue/RedactionMessageReceiver.java index b4aed110..adfb6323 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/queue/RedactionMessageReceiver.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/queue/RedactionMessageReceiver.java @@ -7,6 +7,7 @@ import com.iqser.red.service.redaction.v1.model.AnalyzeResult; import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest; import com.iqser.red.service.redaction.v1.server.client.FileStatusProcessingUpdateClient; import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService; +import com.iqser.red.service.redaction.v1.server.redaction.service.NerAnalyserService; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -26,6 +27,7 @@ public class RedactionMessageReceiver { private final ObjectMapper objectMapper; private final AnalyzeService analyzeService; private final FileStatusProcessingUpdateClient fileStatusProcessingUpdateClient; + private final NerAnalyserService nerAnalyserService; @RabbitHandler @@ -43,6 +45,9 @@ public class RedactionMessageReceiver { // TODO Seperate stucture analysis by other queue analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(analyzeRequest.getDossierId(), analyzeRequest.getFileId())); + // TODO NerEntities should be computed and stored in entity-recognition-service, should be triggered by a seperate queue after structure analysis + nerAnalyserService.computeNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); + result = analyzeService.analyze(analyzeRequest); log.info("Successfully analyzed dossier {} file {} took: {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId(), result .getDuration()); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java index 1e257ac6..c9dc03c8 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeService.java @@ -60,6 +60,7 @@ public class AnalyzeService { private final RedactionServiceSettings redactionServiceSettings; private final SectionTextBuilderService sectionTextBuilderService; private final SectionGridCreatorService sectionGridCreatorService; + private final NerAnalyserService nerAnalyserService; public void analyzeDocumentStructure(StructureAnalyzeRequest analyzeRequest) { @@ -81,7 +82,8 @@ public class AnalyzeService { List sectionTexts = sectionTextBuilderService.buildSectionText(classifiedDoc); sectionGridCreatorService.createSectionGrid(classifiedDoc, pageCount); - redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.TEXT, new Text(pageCount, sectionTexts)); + Text text = new Text(pageCount, sectionTexts); + redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.TEXT, text); redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.SECTION_GRID, classifiedDoc .getSectionGrid()); @@ -94,6 +96,11 @@ public class AnalyzeService { long startTime = System.currentTimeMillis(); var text = redactionStorageService.getText(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); + var nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); + if(redactionServiceSettings.isEnableEntityRecognition() && nerEntities == null){ + nerAnalyserService.computeNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); + nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); + } dictionaryService.updateDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId()); KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getDossierTemplateId()); @@ -101,7 +108,7 @@ public class AnalyzeService { Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest .getDossierId()); - PageEntities pageEntities = entityRedactionService.findEntities(dictionary, text.getSectionTexts(), kieContainer, analyzeRequest); + PageEntities pageEntities = entityRedactionService.findEntities(dictionary, text.getSectionTexts(), kieContainer, analyzeRequest, nerEntities); dictionaryService.updateExternalDictionary(dictionary, analyzeRequest.getDossierTemplateId()); @@ -141,6 +148,12 @@ public class AnalyzeService { return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true); } + var nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); + if(redactionServiceSettings.isEnableEntityRecognition() && nerEntities == null){ + nerAnalyserService.computeNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); + nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId()); + } + List reanalysisSections = text.getSectionTexts() .stream() .filter(sectionText -> sectionsToReanalyse.contains(sectionText.getSectionNumber())) @@ -151,7 +164,7 @@ public class AnalyzeService { Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest .getDossierId()); - PageEntities pageEntities = entityRedactionService.findEntities(dictionary, reanalysisSections, kieContainer, analyzeRequest); + PageEntities pageEntities = entityRedactionService.findEntities(dictionary, reanalysisSections, kieContainer, analyzeRequest, nerEntities); var newRedactionLogEntries = redactionLogCreatorService.createRedactionLog(pageEntities, text.getNumberOfPages(), analyzeRequest .getDossierTemplateId()); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java index 0c302dcc..7a2f95f7 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java @@ -1,5 +1,6 @@ package com.iqser.red.service.redaction.v1.server.redaction.service; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; @@ -18,9 +19,8 @@ import com.iqser.red.service.redaction.v1.model.ManualImageRecategorization; import com.iqser.red.service.redaction.v1.model.Status; import com.iqser.red.service.redaction.v1.server.classification.model.SectionText; import com.iqser.red.service.redaction.v1.server.client.EntityRecognitionClient; -import com.iqser.red.service.redaction.v1.server.client.model.EntityRecogintionEntity; import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionRequest; -import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionResponse; +import com.iqser.red.service.redaction.v1.server.client.model.NerEntities; import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionSection; import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary; import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel; @@ -50,14 +50,14 @@ public class EntityRedactionService { public PageEntities findEntities(Dictionary dictionary, List sectionTexts, KieContainer kieContainer, - AnalyzeRequest analyzeRequest) { + AnalyzeRequest analyzeRequest, NerEntities nerEntities) { Map> imagesPerPage = new HashMap<>(); - Set entities = findEntities(sectionTexts, dictionary, kieContainer, analyzeRequest, false, null, imagesPerPage); + Set entities = findEntities(sectionTexts, dictionary, kieContainer, analyzeRequest, false, null, imagesPerPage, nerEntities); if (dictionary.hasLocalEntries()) { Map> hintsPerSectionNumber = getHintsPerSection(entities, dictionary); - Set foundByLocal = findEntities(sectionTexts, dictionary, kieContainer, analyzeRequest, true, hintsPerSectionNumber, imagesPerPage); + Set foundByLocal = findEntities(sectionTexts, dictionary, kieContainer, analyzeRequest, true, hintsPerSectionNumber, imagesPerPage, nerEntities); EntitySearchUtils.addEntitiesWithHigherRank(entities, foundByLocal, dictionary); EntitySearchUtils.removeEntitiesContainedInLarger(entities); } @@ -70,13 +70,13 @@ public class EntityRedactionService { public Set findEntities(List reanalysisSections, Dictionary dictionary, KieContainer kieContainer, AnalyzeRequest analyzeRequest, boolean local, Map> hintsPerSectionNumber, - Map> imagesPerPage) { + Map> imagesPerPage, NerEntities nerEntities) { List sectionSearchableTextPairs = new ArrayList<>(); for (SectionText reanalysisSection : reanalysisSections) { Set entities = findEntities(reanalysisSection.getSearchableText(), reanalysisSection.getHeadline(), reanalysisSection - .getSectionNumber(), dictionary, local); + .getSectionNumber(), dictionary, local, nerEntities); if (reanalysisSection.getCellStarts() != null && !reanalysisSection.getCellStarts().isEmpty()) { surroundingWordsService.addSurroundingText(entities, reanalysisSection.getSearchableText(), dictionary, reanalysisSection .getCellStarts()); @@ -124,7 +124,7 @@ public class EntityRedactionService { EntitySearchUtils.removeEntitiesContainedInLarger(analysedSection.getEntities()); entities.addAll(analysedSection.getEntities()); - if(!local) { + if (!local) { for (Image image : analysedSection.getImages()) { imagesPerPage.computeIfAbsent(image.getPage(), (a) -> new HashSet<>()).add(image); } @@ -198,7 +198,7 @@ public class EntityRedactionService { private Set findEntities(SearchableText searchableText, String headline, int sectionNumber, - Dictionary dictionary, boolean local) { + Dictionary dictionary, boolean local, NerEntities nerEntities) { Set found = new HashSet<>(); String searchableString = searchableText.toString(); @@ -217,35 +217,22 @@ public class EntityRedactionService { } } - if (redactionServiceSettings.isEnableEntityRecognition() && !local) { - found.addAll(getAiEntities(sectionNumber, searchableString, headline)); + if (!local) { + addNerEntities(found, sectionNumber, headline, nerEntities); } return EntitySearchUtils.clearAndFindPositions(found, searchableText, dictionary); } - private Set getAiEntities(int sectionNumber, String searchableString, String headline) { + private void addNerEntities(Set found, int sectionNumber, String headline, NerEntities nerEntities) { - Set found = new HashSet<>(); - - Map>> response = entityRecognitionClient.findAuthors(EntityRecognitionRequest - .builder() - .data(List.of(EntityRecognitionSection.builder() - .sectionNumber(sectionNumber) - .text(searchableString) - .build())) - .build()); - - EntityRecognitionResponse entityRecognitionResponse = new EntityRecognitionResponse(response.get("result:")); - - if (entityRecognitionResponse.getResult() != null && entityRecognitionResponse.getResult() - .containsKey(String.valueOf(sectionNumber))) { - entityRecognitionResponse.getResult().get(String.valueOf(sectionNumber)).forEach(res -> { + if (redactionServiceSettings.isEnableEntityRecognition() && nerEntities.getResult() + .containsKey(sectionNumber)) { + nerEntities.getResult().get(sectionNumber).forEach(res -> { found.add(new Entity(res.getValue(), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false)); }); } - return found; } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/NerAnalyserService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/NerAnalyserService.java new file mode 100644 index 00000000..df717214 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/NerAnalyserService.java @@ -0,0 +1,49 @@ +package com.iqser.red.service.redaction.v1.server.redaction.service; + +import java.util.stream.Collectors; + +import org.springframework.stereotype.Service; + +import com.iqser.red.service.file.management.v1.api.model.FileType; +import com.iqser.red.service.redaction.v1.server.client.EntityRecognitionClient; +import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionRequest; +import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionSection; +import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings; +import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Service +@RequiredArgsConstructor +public class NerAnalyserService { + + private final RedactionStorageService redactionStorageService; + private final EntityRecognitionClient entityRecognitionClient; + private final RedactionServiceSettings redactionServiceSettings; + + public void computeNerEntities(String dossierId, String fileId) { + + if (redactionServiceSettings.isEnableEntityRecognition()) { + var text = redactionStorageService.getText(dossierId, fileId); + + long start = System.currentTimeMillis(); + + var nerRequest = EntityRecognitionRequest.builder() + .data(text.getSectionTexts() + .stream() + .map(sectionText -> new EntityRecognitionSection(sectionText.getSectionNumber(), sectionText + .getText())) + .collect(Collectors.toList())) + .build(); + + var nerResponse = entityRecognitionClient.findAuthors(nerRequest); + + log.info("Computing NER entities took: {} ms for dossierId {} and fileId {}", System.currentTimeMillis() - start, dossierId, fileId); + + redactionStorageService.storeObject(dossierId, fileId, FileType.NER_ENTITIES, nerResponse); + } + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/storage/RedactionStorageService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/storage/RedactionStorageService.java index 021beeff..503b3a84 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/storage/RedactionStorageService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/storage/RedactionStorageService.java @@ -5,6 +5,7 @@ import com.iqser.red.service.file.management.v1.api.model.FileType; import com.iqser.red.service.redaction.v1.model.RedactionLog; import com.iqser.red.service.redaction.v1.model.SectionGrid; import com.iqser.red.service.redaction.v1.server.classification.model.Text; +import com.iqser.red.service.redaction.v1.server.client.model.NerEntities; import com.iqser.red.storage.commons.exception.StorageObjectDoesNotExist; import com.iqser.red.storage.commons.service.StorageService; import lombok.Getter; @@ -73,6 +74,25 @@ public class RedactionStorageService { } + public NerEntities getNerEntities(String dossierId, String fileId) { + + InputStreamResource inputStreamResource; + try { + inputStreamResource = storageService.getObject(StorageIdUtils.getStorageId(dossierId, fileId, FileType.NER_ENTITIES)); + } catch (StorageObjectDoesNotExist e) { + log.debug("NER Entities not available."); + return null; + } + + try { + return objectMapper.readValue(inputStreamResource.getInputStream(), NerEntities.class); + } catch (IOException e) { + throw new RuntimeException("Could not convert NerEntities", e); + } + } + + + public SectionGrid getSectionGrid(String dossierId, String fileId) { var sectionGrid = storageService.getObject(StorageIdUtils.getStorageId(dossierId, fileId, FileType.SECTION_GRID)); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/application.yml b/redaction-service-v1/redaction-service-server-v1/src/test/resources/application.yml index 72e05696..d8543d66 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/application.yml +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/application.yml @@ -1,7 +1,7 @@ configuration-service.url: "http://configuration-service-v1:8080" image-service.url: "http://image-service-v1:8080" file-management-service.url: "http://file-management-service-v1:8080" -entity-recognition-service.url: "http://entity-recognition-service-v1:8080" +entity-recognition-service.url: "localhost:8080" ribbon: ConnectTimeout: 600000