diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/EntityRecognitionClient.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/EntityRecognitionClient.java new file mode 100644 index 00000000..589e6dc3 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/EntityRecognitionClient.java @@ -0,0 +1,18 @@ +package com.iqser.red.service.redaction.v1.server.client; + +import java.util.List; +import java.util.Map; + +import org.springframework.cloud.openfeign.FeignClient; +import org.springframework.http.MediaType; +import org.springframework.web.bind.annotation.PostMapping; + +import com.iqser.red.service.redaction.v1.server.client.model.EntityRecogintionEntity; +import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionRequest; + +@FeignClient(name = "EntityRecognitionClient", url = "${entity-recognition-service.url}") +public interface EntityRecognitionClient { + + @PostMapping(value = "/find_authors", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE) + Map>> findAuthors(EntityRecognitionRequest entityRecognitionRequest); +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/model/EntityRecogintionEntity.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/model/EntityRecogintionEntity.java new file mode 100644 index 00000000..b86a1b66 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/model/EntityRecogintionEntity.java @@ -0,0 +1,19 @@ +package com.iqser.red.service.redaction.v1.server.client.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@AllArgsConstructor +@NoArgsConstructor +public class EntityRecogintionEntity { + + private String value; + private int startOffset; + private int endOffset; + private String type; + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/model/EntityRecognitionRequest.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/model/EntityRecognitionRequest.java new file mode 100644 index 00000000..7e15bc69 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/model/EntityRecognitionRequest.java @@ -0,0 +1,18 @@ +package com.iqser.red.service.redaction.v1.server.client.model; + +import java.util.List; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@AllArgsConstructor +@NoArgsConstructor +public class EntityRecognitionRequest { + + private List data; + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/model/EntityRecognitionResponse.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/model/EntityRecognitionResponse.java new file mode 100644 index 00000000..af0d6beb --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/model/EntityRecognitionResponse.java @@ -0,0 +1,21 @@ +package com.iqser.red.service.redaction.v1.server.client.model; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@AllArgsConstructor +@NoArgsConstructor +public class EntityRecognitionResponse { + + @Builder.Default + private Map> result = new HashMap<>(); + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/model/EntityRecognitionResult.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/model/EntityRecognitionResult.java new file mode 100644 index 00000000..cb894b5d --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/model/EntityRecognitionResult.java @@ -0,0 +1,20 @@ +package com.iqser.red.service.redaction.v1.server.client.model; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@AllArgsConstructor +@NoArgsConstructor +public class EntityRecognitionResult { + + @Builder.Default + private Map> entities = new HashMap<>(); +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/model/EntityRecognitionSection.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/model/EntityRecognitionSection.java new file mode 100644 index 00000000..3172aa52 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/model/EntityRecognitionSection.java @@ -0,0 +1,16 @@ +package com.iqser.red.service.redaction.v1.server.client.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@AllArgsConstructor +@NoArgsConstructor +public class EntityRecognitionSection { + + private int sectionNumber; + private String text; +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java index c1dc3f11..67e86ca0 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java @@ -2,14 +2,22 @@ package com.iqser.red.service.redaction.v1.server.redaction.service; import com.iqser.red.service.redaction.v1.model.*; import com.iqser.red.service.redaction.v1.server.classification.model.*; +import com.iqser.red.service.redaction.v1.server.client.EntityRecognitionClient; +import com.iqser.red.service.redaction.v1.server.client.model.EntityRecogintionEntity; +import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionRequest; +import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionResponse; +import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionSection; import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary; import com.iqser.red.service.redaction.v1.server.redaction.model.*; import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils; import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder; +import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; + import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; + import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.StringUtils; import org.kie.api.runtime.KieContainer; @@ -28,6 +36,8 @@ public class EntityRedactionService { private final DictionaryService dictionaryService; private final DroolsExecutionService droolsExecutionService; private final SurroundingWordsService surroundingWordsService; + private final EntityRecognitionClient entityRecognitionClient; + private final RedactionServiceSettings redactionServiceSettings; public void processDocument(Document classifiedDoc, String dossierTemplateId, ManualRedactions manualRedactions, @@ -58,7 +68,8 @@ public class EntityRedactionService { } - public Map> convertToEnititesPerPage(Set entities){ + public Map> convertToEnititesPerPage(Set entities) { + Map> entitiesPerPage = new HashMap<>(); for (Entity entity : entities) { Map> sequenceOnPage = new HashMap<>(); @@ -68,8 +79,7 @@ public class EntityRedactionService { } for (Map.Entry> entry : sequenceOnPage.entrySet()) { - entitiesPerPage - .computeIfAbsent(entry.getKey(), (x) -> new ArrayList<>()) + entitiesPerPage.computeIfAbsent(entry.getKey(), (x) -> new ArrayList<>()) .add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), entry .getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber(), entity .getLegalBasis(), entity.isDictionaryEntry(), entity.getTextBefore(), entity.getTextAfter(), entity @@ -80,17 +90,18 @@ public class EntityRedactionService { } - public Map> getHintsPerSection(Set entities, Dictionary dictionary){ + public Map> getHintsPerSection(Set entities, Dictionary dictionary) { + Map> hintsPerSectionNumber = new HashMap<>(); entities.stream().forEach(entity -> { if (dictionary.isHint(entity.getType()) && entity.isDictionaryEntry()) { - hintsPerSectionNumber.computeIfAbsent(entity.getSectionNumber(), (x) -> new HashSet<>()) - .add(entity); + hintsPerSectionNumber.computeIfAbsent(entity.getSectionNumber(), (x) -> new HashSet<>()).add(entity); } }); return hintsPerSectionNumber; } + private Set findEntities(Document classifiedDoc, KieContainer kieContainer, ManualRedactions manualRedactions, Dictionary dictionary, boolean local, Map> hintsPerSectionNumber, @@ -148,7 +159,8 @@ public class EntityRedactionService { } - public void addLocalValuesToDictionary(Section analysedSection, Dictionary dictionary){ + public void addLocalValuesToDictionary(Section analysedSection, Dictionary dictionary) { + analysedSection.getLocalDictionaryAdds().keySet().forEach(key -> { if (dictionary.isRecommendation(key)) { analysedSection.getLocalDictionaryAdds().get(key).forEach(value -> { @@ -393,11 +405,16 @@ public class EntityRedactionService { } } + if (redactionServiceSettings.isEnableEntityRecognition() && !local) { + found.addAll(getAiEntities(sectionNumber, searchableString, headline)); + } + return EntitySearchUtils.clearAndFindPositions(found, searchableText, dictionary); } - private Image convertAndRecategorize(PdfImage pdfImage, int sectionNumber, String headline, ManualRedactions manualRedactions) { + private Image convertAndRecategorize(PdfImage pdfImage, int sectionNumber, String headline, + ManualRedactions manualRedactions) { Image image = Image.builder() .type(pdfImage.getImageType().equals(ImageType.OTHER) ? "image" : pdfImage.getImageType() @@ -423,4 +440,28 @@ public class EntityRedactionService { return image; } + + private Set getAiEntities(int sectionNumber, String searchableString, String headline) { + + Set found = new HashSet<>(); + + Map>> response = entityRecognitionClient.findAuthors(EntityRecognitionRequest + .builder() + .data(List.of(EntityRecognitionSection.builder() + .sectionNumber(sectionNumber) + .text(searchableString) + .build())) + .build()); + + EntityRecognitionResponse entityRecognitionResponse = new EntityRecognitionResponse(response.get("result:")); + + if (entityRecognitionResponse.getResult() != null && entityRecognitionResponse.getResult() + .containsKey(String.valueOf(sectionNumber))) { + entityRecognitionResponse.getResult().get(String.valueOf(sectionNumber)).forEach(res -> { + found.add(new Entity(res.getValue(), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false)); + }); + } + return found; + } + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/settings/RedactionServiceSettings.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/settings/RedactionServiceSettings.java index dfc34079..901d8042 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/settings/RedactionServiceSettings.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/settings/RedactionServiceSettings.java @@ -17,4 +17,6 @@ public class RedactionServiceSettings { private int analysisVersion = 1; + private boolean enableEntityRecognition = true; + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/application.yml b/redaction-service-v1/redaction-service-server-v1/src/test/resources/application.yml index 4b511179..72e05696 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/application.yml +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/application.yml @@ -1,6 +1,7 @@ configuration-service.url: "http://configuration-service-v1:8080" image-service.url: "http://image-service-v1:8080" file-management-service.url: "http://file-management-service-v1:8080" +entity-recognition-service.url: "http://entity-recognition-service-v1:8080" ribbon: ConnectTimeout: 600000 @@ -17,3 +18,4 @@ platform.multi-tenancy: redaction-service: enable-image-classification: false + enable-entity-recognition: false