RED-1920: Integrated entity-recognition-service

This commit is contained in:
Dominique Eifländer 2021-08-26 11:53:02 +02:00
parent e3992f169a
commit 76bf6773db
9 changed files with 165 additions and 8 deletions

View File

@ -0,0 +1,18 @@
package com.iqser.red.service.redaction.v1.server.client;
import java.util.List;
import java.util.Map;
import org.springframework.cloud.openfeign.FeignClient;
import org.springframework.http.MediaType;
import org.springframework.web.bind.annotation.PostMapping;
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecogintionEntity;
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionRequest;
@FeignClient(name = "EntityRecognitionClient", url = "${entity-recognition-service.url}")
public interface EntityRecognitionClient {
@PostMapping(value = "/find_authors", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
Map<String, Map<String, List<EntityRecogintionEntity>>> findAuthors(EntityRecognitionRequest entityRecognitionRequest);
}

View File

@ -0,0 +1,19 @@
package com.iqser.red.service.redaction.v1.server.client.model;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class EntityRecogintionEntity {
private String value;
private int startOffset;
private int endOffset;
private String type;
}

View File

@ -0,0 +1,18 @@
package com.iqser.red.service.redaction.v1.server.client.model;
import java.util.List;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class EntityRecognitionRequest {
private List<EntityRecognitionSection> data;
}

View File

@ -0,0 +1,21 @@
package com.iqser.red.service.redaction.v1.server.client.model;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class EntityRecognitionResponse {
@Builder.Default
private Map<String, List<EntityRecogintionEntity>> result = new HashMap<>();
}

View File

@ -0,0 +1,20 @@
package com.iqser.red.service.redaction.v1.server.client.model;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class EntityRecognitionResult {
@Builder.Default
private Map<Integer, List<EntityRecogintionEntity>> entities = new HashMap<>();
}

View File

@ -0,0 +1,16 @@
package com.iqser.red.service.redaction.v1.server.client.model;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class EntityRecognitionSection {
private int sectionNumber;
private String text;
}

View File

@ -2,14 +2,22 @@ package com.iqser.red.service.redaction.v1.server.redaction.service;
import com.iqser.red.service.redaction.v1.model.*;
import com.iqser.red.service.redaction.v1.server.classification.model.*;
import com.iqser.red.service.redaction.v1.server.client.EntityRecognitionClient;
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecogintionEntity;
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionRequest;
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionResponse;
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionSection;
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.*;
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.kie.api.runtime.KieContainer;
@ -28,6 +36,8 @@ public class EntityRedactionService {
private final DictionaryService dictionaryService;
private final DroolsExecutionService droolsExecutionService;
private final SurroundingWordsService surroundingWordsService;
private final EntityRecognitionClient entityRecognitionClient;
private final RedactionServiceSettings redactionServiceSettings;
public void processDocument(Document classifiedDoc, String dossierTemplateId, ManualRedactions manualRedactions,
@ -58,7 +68,8 @@ public class EntityRedactionService {
}
public Map<Integer, List<Entity>> convertToEnititesPerPage(Set<Entity> entities){
public Map<Integer, List<Entity>> convertToEnititesPerPage(Set<Entity> entities) {
Map<Integer, List<Entity>> entitiesPerPage = new HashMap<>();
for (Entity entity : entities) {
Map<Integer, List<EntityPositionSequence>> sequenceOnPage = new HashMap<>();
@ -68,8 +79,7 @@ public class EntityRedactionService {
}
for (Map.Entry<Integer, List<EntityPositionSequence>> entry : sequenceOnPage.entrySet()) {
entitiesPerPage
.computeIfAbsent(entry.getKey(), (x) -> new ArrayList<>())
entitiesPerPage.computeIfAbsent(entry.getKey(), (x) -> new ArrayList<>())
.add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), entry
.getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber(), entity
.getLegalBasis(), entity.isDictionaryEntry(), entity.getTextBefore(), entity.getTextAfter(), entity
@ -80,17 +90,18 @@ public class EntityRedactionService {
}
public Map<Integer, Set<Entity>> getHintsPerSection(Set<Entity> entities, Dictionary dictionary){
public Map<Integer, Set<Entity>> getHintsPerSection(Set<Entity> entities, Dictionary dictionary) {
Map<Integer, Set<Entity>> hintsPerSectionNumber = new HashMap<>();
entities.stream().forEach(entity -> {
if (dictionary.isHint(entity.getType()) && entity.isDictionaryEntry()) {
hintsPerSectionNumber.computeIfAbsent(entity.getSectionNumber(), (x) -> new HashSet<>())
.add(entity);
hintsPerSectionNumber.computeIfAbsent(entity.getSectionNumber(), (x) -> new HashSet<>()).add(entity);
}
});
return hintsPerSectionNumber;
}
private Set<Entity> findEntities(Document classifiedDoc, KieContainer kieContainer,
ManualRedactions manualRedactions, Dictionary dictionary, boolean local,
Map<Integer, Set<Entity>> hintsPerSectionNumber,
@ -148,7 +159,8 @@ public class EntityRedactionService {
}
public void addLocalValuesToDictionary(Section analysedSection, Dictionary dictionary){
public void addLocalValuesToDictionary(Section analysedSection, Dictionary dictionary) {
analysedSection.getLocalDictionaryAdds().keySet().forEach(key -> {
if (dictionary.isRecommendation(key)) {
analysedSection.getLocalDictionaryAdds().get(key).forEach(value -> {
@ -393,11 +405,16 @@ public class EntityRedactionService {
}
}
if (redactionServiceSettings.isEnableEntityRecognition() && !local) {
found.addAll(getAiEntities(sectionNumber, searchableString, headline));
}
return EntitySearchUtils.clearAndFindPositions(found, searchableText, dictionary);
}
private Image convertAndRecategorize(PdfImage pdfImage, int sectionNumber, String headline, ManualRedactions manualRedactions) {
private Image convertAndRecategorize(PdfImage pdfImage, int sectionNumber, String headline,
ManualRedactions manualRedactions) {
Image image = Image.builder()
.type(pdfImage.getImageType().equals(ImageType.OTHER) ? "image" : pdfImage.getImageType()
@ -423,4 +440,28 @@ public class EntityRedactionService {
return image;
}
private Set<Entity> getAiEntities(int sectionNumber, String searchableString, String headline) {
Set<Entity> found = new HashSet<>();
Map<String, Map<String, List<EntityRecogintionEntity>>> response = entityRecognitionClient.findAuthors(EntityRecognitionRequest
.builder()
.data(List.of(EntityRecognitionSection.builder()
.sectionNumber(sectionNumber)
.text(searchableString)
.build()))
.build());
EntityRecognitionResponse entityRecognitionResponse = new EntityRecognitionResponse(response.get("result:"));
if (entityRecognitionResponse.getResult() != null && entityRecognitionResponse.getResult()
.containsKey(String.valueOf(sectionNumber))) {
entityRecognitionResponse.getResult().get(String.valueOf(sectionNumber)).forEach(res -> {
found.add(new Entity(res.getValue(), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false));
});
}
return found;
}
}

View File

@ -17,4 +17,6 @@ public class RedactionServiceSettings {
private int analysisVersion = 1;
private boolean enableEntityRecognition = true;
}

View File

@ -1,6 +1,7 @@
configuration-service.url: "http://configuration-service-v1:8080"
image-service.url: "http://image-service-v1:8080"
file-management-service.url: "http://file-management-service-v1:8080"
entity-recognition-service.url: "http://entity-recognition-service-v1:8080"
ribbon:
ConnectTimeout: 600000
@ -17,3 +18,4 @@ platform.multi-tenancy:
redaction-service:
enable-image-classification: false
enable-entity-recognition: false