Merge branch 'master' of ssh://git.iqser.com:2222/red/redaction-service into Test
This commit is contained in:
commit
dd81ac79e4
@ -0,0 +1,15 @@
|
||||
package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class Argument {
|
||||
|
||||
private String name;
|
||||
private ArgumentType type;
|
||||
|
||||
}
|
||||
@ -0,0 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
public enum ArgumentType {
|
||||
|
||||
INTEGER, BOOLEAN, STRING, FILE_ATTRIBUTE, REGEX, TYPE, RULE_NUMBER, LEGAL_BASIS, REFERENCE_TYPE
|
||||
|
||||
}
|
||||
@ -0,0 +1,5 @@
|
||||
package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
public enum Engine {
|
||||
DICTIONARY, NER, RULE
|
||||
}
|
||||
@ -7,9 +7,9 @@ import lombok.EqualsAndHashCode;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@ -62,4 +62,10 @@ public class RedactionLogEntry {
|
||||
@Builder.Default
|
||||
private List<Change> changes = new ArrayList<>();
|
||||
|
||||
private Set<Engine> engines= new HashSet<>();
|
||||
|
||||
private Set<String> reference = new HashSet<>();
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,14 @@
|
||||
package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
@Data
|
||||
public class RuleBuilderModel {
|
||||
|
||||
private List<RuleElement> whenClauses = new ArrayList<>();
|
||||
private List<RuleElement> thenConditions = new ArrayList<>();
|
||||
|
||||
}
|
||||
@ -0,0 +1,18 @@
|
||||
package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
@Data
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class RuleElement {
|
||||
|
||||
private String conditionName;
|
||||
private List<Argument> arguments = new ArrayList<>();
|
||||
|
||||
}
|
||||
@ -0,0 +1,18 @@
|
||||
package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class StructureAnalyzeRequest {
|
||||
|
||||
private String dossierId;
|
||||
private String fileId;
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,12 @@
|
||||
package com.iqser.red.service.redaction.v1.resources;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.RuleBuilderModel;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
|
||||
public interface RuleBuilderResource {
|
||||
|
||||
@PostMapping(value = "/rule-builder-model", produces = MediaType.APPLICATION_JSON_VALUE)
|
||||
RuleBuilderModel getRuleBuilderModel();
|
||||
|
||||
}
|
||||
@ -24,7 +24,7 @@
|
||||
<dependency>
|
||||
<groupId>com.iqser.red.service</groupId>
|
||||
<artifactId>file-management-service-api-v1</artifactId>
|
||||
<version>2.25.0</version>
|
||||
<version>2.96.0</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>com.iqser.red.service</groupId>
|
||||
|
||||
@ -1,19 +1,14 @@
|
||||
package com.iqser.red.service.redaction.v1.server.classification.model;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.SectionGrid;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryVersion;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
@Data
|
||||
@NoArgsConstructor
|
||||
public class Document {
|
||||
@ -23,20 +18,14 @@ public class Document {
|
||||
private List<Header> headers = new ArrayList<>();
|
||||
private List<Footer> footers = new ArrayList<>();
|
||||
private List<UnclassifiedText> unclassifiedTexts = new ArrayList<>();
|
||||
private Map<Integer, List<Entity>> entities = new HashMap<>();
|
||||
private FloatFrequencyCounter textHeightCounter = new FloatFrequencyCounter();
|
||||
private FloatFrequencyCounter fontSizeCounter = new FloatFrequencyCounter();
|
||||
private StringFrequencyCounter fontCounter = new StringFrequencyCounter();
|
||||
private StringFrequencyCounter fontStyleCounter = new StringFrequencyCounter();
|
||||
private boolean headlines;
|
||||
|
||||
private List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();
|
||||
private SectionGrid sectionGrid = new SectionGrid();
|
||||
private DictionaryVersion dictionaryVersion;
|
||||
private long rulesVersion;
|
||||
|
||||
private List<SectionText> sectionText = new ArrayList<>();
|
||||
|
||||
private Map<Integer, Set<Image>> images = new HashMap<>();
|
||||
|
||||
}
|
||||
|
||||
@ -9,10 +9,11 @@ import org.springframework.web.bind.annotation.PostMapping;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecogintionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionRequest;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
|
||||
|
||||
@FeignClient(name = "EntityRecognitionClient", url = "${entity-recognition-service.url}")
|
||||
public interface EntityRecognitionClient {
|
||||
|
||||
@PostMapping(value = "/find_authors", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
|
||||
Map<String, Map<String, List<EntityRecogintionEntity>>> findAuthors(EntityRecognitionRequest entityRecognitionRequest);
|
||||
NerEntities findAuthors(EntityRecognitionRequest entityRecognitionRequest);
|
||||
}
|
||||
|
||||
@ -13,9 +13,9 @@ import lombok.NoArgsConstructor;
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
public class EntityRecognitionResponse {
|
||||
public class NerEntities {
|
||||
|
||||
@Builder.Default
|
||||
private Map<String, List<EntityRecogintionEntity>> result = new HashMap<>();
|
||||
private Map<Integer, List<EntityRecogintionEntity>> result = new HashMap<>();
|
||||
|
||||
}
|
||||
@ -0,0 +1,21 @@
|
||||
package com.iqser.red.service.redaction.v1.server.controller;
|
||||
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.RuleBuilderModel;
|
||||
import com.iqser.red.service.redaction.v1.resources.RuleBuilderResource;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.rulebuilder.RuleBuilderModelService;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
|
||||
@RestController
|
||||
@RequiredArgsConstructor
|
||||
public class RuleBuilderController implements RuleBuilderResource {
|
||||
|
||||
private final RuleBuilderModelService ruleBuilderModelService;
|
||||
|
||||
@Override
|
||||
public RuleBuilderModel getRuleBuilderModel() {
|
||||
return ruleBuilderModelService.getRuleBuilderModel();
|
||||
}
|
||||
|
||||
}
|
||||
@ -4,10 +4,14 @@ import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
|
||||
import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.server.client.FileStatusProcessingUpdateClient;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.ReanalyzeService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.NerAnalyserService;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import org.springframework.amqp.rabbit.annotation.RabbitHandler;
|
||||
import org.springframework.amqp.rabbit.annotation.RabbitListener;
|
||||
import org.springframework.stereotype.Service;
|
||||
@ -21,8 +25,10 @@ import static com.iqser.red.service.redaction.v1.server.queue.MessagingConfigura
|
||||
public class RedactionMessageReceiver {
|
||||
|
||||
private final ObjectMapper objectMapper;
|
||||
private final ReanalyzeService reanalyzeService;
|
||||
private final AnalyzeService analyzeService;
|
||||
private final FileStatusProcessingUpdateClient fileStatusProcessingUpdateClient;
|
||||
private final NerAnalyserService nerAnalyserService;
|
||||
|
||||
|
||||
@RabbitHandler
|
||||
@RabbitListener(queues = REDACTION_QUEUE)
|
||||
@ -32,15 +38,25 @@ public class RedactionMessageReceiver {
|
||||
log.info("Processing analyze request: {}", analyzeRequest);
|
||||
AnalyzeResult result;
|
||||
if (analyzeRequest.isReanalyseOnlyIfPossible()) {
|
||||
result = reanalyzeService.reanalyze(analyzeRequest);
|
||||
result = analyzeService.reanalyze(analyzeRequest);
|
||||
log.info("Successfully reanalyzed dossier {} file {} took: {}", analyzeRequest.getDossierId(), analyzeRequest
|
||||
.getFileId(), result.getDuration());
|
||||
} else {
|
||||
result = reanalyzeService.analyze(analyzeRequest);
|
||||
// TODO Seperate stucture analysis by other queue
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(analyzeRequest.getDossierId(), analyzeRequest.getFileId()));
|
||||
|
||||
// TODO NerEntities should be computed and stored in entity-recognition-service, should be triggered by a seperate queue after structure analysis
|
||||
nerAnalyserService.computeNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
|
||||
result = analyzeService.analyze(analyzeRequest);
|
||||
log.info("Successfully analyzed dossier {} file {} took: {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId(), result
|
||||
.getDuration());
|
||||
}
|
||||
log.info("Successfully analyzed {}", analyzeRequest);
|
||||
|
||||
fileStatusProcessingUpdateClient.analysisSuccessful(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), result);
|
||||
}
|
||||
|
||||
|
||||
@RabbitHandler
|
||||
@RabbitListener(queues = REDACTION_DQL)
|
||||
public void receiveAnalyzeRequestDQL(String in) throws JsonProcessingException {
|
||||
|
||||
@ -1,17 +1,20 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.Engine;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
@Data
|
||||
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
|
||||
public class Entity implements ReasonHolder {
|
||||
|
||||
|
||||
private final String word;
|
||||
private final String type;
|
||||
private boolean redaction;
|
||||
@ -39,8 +42,15 @@ public class Entity implements ReasonHolder {
|
||||
|
||||
private boolean isDossierDictionaryEntry;
|
||||
|
||||
private Set<Engine> engines = new HashSet<>();
|
||||
|
||||
public Entity(String word, String type, boolean redaction, String redactionReason, List<EntityPositionSequence> positionSequences, String headline, int matchedRule, int sectionNumber, String legalBasis, boolean isDictionaryEntry, String textBefore, String textAfter, Integer start, Integer end, boolean isDossierDictionaryEntry) {
|
||||
private Set<Entity> references = new HashSet<>();
|
||||
|
||||
|
||||
public Entity(String word, String type, boolean redaction, String redactionReason,
|
||||
List<EntityPositionSequence> positionSequences, String headline, int matchedRule, int sectionNumber,
|
||||
String legalBasis, boolean isDictionaryEntry, String textBefore, String textAfter, Integer start,
|
||||
Integer end, boolean isDossierDictionaryEntry, Set<Engine> engines, Set<Entity> references) {
|
||||
|
||||
this.word = word;
|
||||
this.type = type;
|
||||
@ -57,10 +67,13 @@ public class Entity implements ReasonHolder {
|
||||
this.start = start;
|
||||
this.end = end;
|
||||
this.isDossierDictionaryEntry = isDossierDictionaryEntry;
|
||||
this.engines = engines;
|
||||
this.references = references;
|
||||
}
|
||||
|
||||
|
||||
public Entity(String word, String type, Integer start, Integer end, String headline, int sectionNumber, boolean isDictionaryEntry, boolean isDossierDictionaryEntry) {
|
||||
public Entity(String word, String type, Integer start, Integer end, String headline, int sectionNumber,
|
||||
boolean isDictionaryEntry, boolean isDossierDictionaryEntry, Engine engine) {
|
||||
|
||||
this.word = word;
|
||||
this.type = type;
|
||||
@ -70,6 +83,8 @@ public class Entity implements ReasonHolder {
|
||||
this.sectionNumber = sectionNumber;
|
||||
this.isDictionaryEntry = isDictionaryEntry;
|
||||
this.isDossierDictionaryEntry = isDossierDictionaryEntry;
|
||||
this.engines.add(engine);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,23 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
public class PageEntities {
|
||||
|
||||
@Builder.Default
|
||||
private Map<Integer, List<Entity>> entitiesPerPage = new HashMap<>();
|
||||
|
||||
@Builder.Default
|
||||
private Map<Integer, Set<Image>> imagesPerPage = new HashMap<>();
|
||||
|
||||
|
||||
}
|
||||
@ -1,5 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.ArgumentType;
|
||||
import com.iqser.red.service.redaction.v1.model.Engine;
|
||||
import com.iqser.red.service.redaction.v1.model.FileAttribute;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
||||
@ -9,13 +11,11 @@ import lombok.Data;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.lang.annotation.ElementType;
|
||||
import java.lang.annotation.Retention;
|
||||
import java.lang.annotation.RetentionPolicy;
|
||||
import java.lang.annotation.Target;
|
||||
import java.util.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
@ -59,33 +59,45 @@ public class Section {
|
||||
private List<FileAttribute> fileAttributes = new ArrayList<>();
|
||||
|
||||
|
||||
public boolean fileAttributeByIdEquals(String id, String value){
|
||||
@WhenCondition
|
||||
public boolean fileAttributeByIdEquals(@Argument(ArgumentType.FILE_ATTRIBUTE) String id,
|
||||
@Argument(ArgumentType.STRING) String value) {
|
||||
return fileAttributes != null && fileAttributes.stream().filter(attribute -> id.equals(attribute.getId()) && value.equals(attribute.getValue())).findFirst().isPresent();
|
||||
}
|
||||
|
||||
public boolean fileAttributeByPlaceholderEquals(String placeholder, String value){
|
||||
@WhenCondition
|
||||
public boolean fileAttributeByPlaceholderEquals(@Argument(ArgumentType.FILE_ATTRIBUTE) String placeholder,
|
||||
@Argument(ArgumentType.STRING) String value) {
|
||||
return fileAttributes != null && fileAttributes.stream().filter(attribute -> placeholder.equals(attribute.getPlaceholder()) && value.equals(attribute.getValue())).findFirst().isPresent();
|
||||
}
|
||||
|
||||
public boolean fileAttributeByLabelEquals(String label, String value){
|
||||
@WhenCondition
|
||||
public boolean fileAttributeByLabelEquals(@Argument(ArgumentType.FILE_ATTRIBUTE) String label,
|
||||
@Argument(ArgumentType.STRING) String value) {
|
||||
return fileAttributes != null && fileAttributes.stream().filter(attribute -> label.equals(attribute.getLabel()) && value.equals(attribute.getValue())).findFirst().isPresent();
|
||||
}
|
||||
|
||||
|
||||
public boolean fileAttributeByIdEqualsIgnoreCase(String id, String value){
|
||||
@WhenCondition
|
||||
public boolean fileAttributeByIdEqualsIgnoreCase(@Argument(ArgumentType.FILE_ATTRIBUTE) String id,
|
||||
@Argument(ArgumentType.STRING) String value) {
|
||||
return fileAttributes != null && fileAttributes.stream().filter(attribute -> id.equals(attribute.getId()) && value.equalsIgnoreCase(attribute.getValue())).findFirst().isPresent();
|
||||
}
|
||||
|
||||
public boolean fileAttributeByPlaceholderEqualsIgnoreCase(String placeholder, String value){
|
||||
@WhenCondition
|
||||
public boolean fileAttributeByPlaceholderEqualsIgnoreCase(@Argument(ArgumentType.FILE_ATTRIBUTE) String placeholder,
|
||||
@Argument(ArgumentType.STRING) String value) {
|
||||
return fileAttributes != null && fileAttributes.stream().filter(attribute -> placeholder.equals(attribute.getPlaceholder()) && value.equalsIgnoreCase(attribute.getValue())).findFirst().isPresent();
|
||||
}
|
||||
|
||||
public boolean fileAttributeByLabelEqualsIgnoreCase(String label, String value){
|
||||
@WhenCondition
|
||||
public boolean fileAttributeByLabelEqualsIgnoreCase(@Argument(ArgumentType.FILE_ATTRIBUTE) String label,
|
||||
@Argument(ArgumentType.STRING) String value) {
|
||||
return fileAttributes != null && fileAttributes.stream().filter(attribute -> label.equals(attribute.getLabel()) && value.equalsIgnoreCase(attribute.getValue())).findFirst().isPresent();
|
||||
}
|
||||
|
||||
|
||||
public boolean rowEquals(String headerName, String value) {
|
||||
@WhenCondition
|
||||
public boolean rowEquals(@Argument(ArgumentType.STRING) String headerName,
|
||||
@Argument(ArgumentType.STRING) String value) {
|
||||
|
||||
String cleanHeaderName = headerName.replaceAll("\n", "").replaceAll(" ", "").replaceAll("-", "");
|
||||
|
||||
@ -94,33 +106,36 @@ public class Section {
|
||||
.equals(value);
|
||||
}
|
||||
|
||||
|
||||
public boolean hasTableHeader(String headerName) {
|
||||
@WhenCondition
|
||||
public boolean hasTableHeader(@Argument(ArgumentType.STRING) String headerName) {
|
||||
|
||||
String cleanHeaderName = headerName.replaceAll("\n", "").replaceAll(" ", "").replaceAll("-", "");
|
||||
return tabularData != null && tabularData.containsKey(cleanHeaderName);
|
||||
}
|
||||
|
||||
|
||||
public boolean matchesType(String type) {
|
||||
@WhenCondition
|
||||
public boolean matchesType(@Argument(ArgumentType.TYPE) String type) {
|
||||
|
||||
return entities.stream().anyMatch(entity -> entity.getType().equals(type));
|
||||
}
|
||||
|
||||
|
||||
public boolean matchesImageType(String type) {
|
||||
@WhenCondition
|
||||
public boolean matchesImageType(@Argument(ArgumentType.TYPE) String type) {
|
||||
|
||||
return images.stream().anyMatch(image -> image.getType().equals(type));
|
||||
}
|
||||
|
||||
|
||||
public boolean headlineContainsWord(String word) {
|
||||
@WhenCondition
|
||||
public boolean headlineContainsWord(@Argument(ArgumentType.STRING) String word) {
|
||||
|
||||
return StringUtils.containsIgnoreCase(headline, word);
|
||||
}
|
||||
|
||||
|
||||
public void expandByRegEx(String type, String pattern, boolean patternCaseInsensitive, int group) {
|
||||
@ThenAction
|
||||
public void expandByRegEx(@Argument(ArgumentType.TYPE) String type,
|
||||
@Argument(ArgumentType.REGEX) String pattern,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
|
||||
@Argument(ArgumentType.INTEGER) int group) {
|
||||
|
||||
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
|
||||
|
||||
@ -147,8 +162,11 @@ public class Section {
|
||||
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
|
||||
}
|
||||
|
||||
|
||||
public void redactImage(String type, int ruleNumber, String reason, String legalBasis) {
|
||||
@ThenAction
|
||||
public void redactImage(@Argument(ArgumentType.TYPE) String type,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
images.forEach(image -> {
|
||||
if (image.getType().equals(type)) {
|
||||
@ -160,8 +178,11 @@ public class Section {
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
public void redact(String type, int ruleNumber, String reason, String legalBasis) {
|
||||
@ThenAction
|
||||
public void redact(@Argument(ArgumentType.TYPE) String type,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
boolean hasRecommendationDictionary = dictionaryTypes.contains(RECOMMENDATION_PREFIX + type);
|
||||
|
||||
@ -176,8 +197,10 @@ public class Section {
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
public void redactNotImage(String type, int ruleNumber, String reason) {
|
||||
@ThenAction
|
||||
public void redactNotImage(@Argument(ArgumentType.TYPE) String type,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.STRING) String reason) {
|
||||
|
||||
images.forEach(image -> {
|
||||
if (image.getType().equals(type)) {
|
||||
@ -188,8 +211,10 @@ public class Section {
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
public void redactNot(String type, int ruleNumber, String reason) {
|
||||
@ThenAction
|
||||
public void redactNot(@Argument(ArgumentType.TYPE) String type,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.STRING) String reason) {
|
||||
|
||||
boolean hasRecommendationDictionary = dictionaryTypes.contains(RECOMMENDATION_PREFIX + type);
|
||||
|
||||
@ -204,8 +229,35 @@ public class Section {
|
||||
}
|
||||
|
||||
|
||||
public void expandToHintAnnotationByRegEx(String type, String pattern, boolean patternCaseInsensitive, int group,
|
||||
String asType) {
|
||||
@ThenAction
|
||||
public void redactNotAndReference(@Argument(ArgumentType.TYPE) String type,
|
||||
@Argument(ArgumentType.REFERENCE_TYPE) String referenceType,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.STRING) String reason) {
|
||||
|
||||
boolean hasRecommendationDictionary = dictionaryTypes.contains(RECOMMENDATION_PREFIX + type);
|
||||
|
||||
Set<Entity> references = entities.stream().filter(entity -> entity.getType().equals(referenceType)).collect(Collectors.toSet());
|
||||
|
||||
entities.forEach(entity -> {
|
||||
if (entity.getType().equals(type) || hasRecommendationDictionary && entity.getType()
|
||||
.equals(RECOMMENDATION_PREFIX + type)) {
|
||||
entity.setRedaction(false);
|
||||
entity.setMatchedRule(ruleNumber);
|
||||
entity.setRedactionReason(reason);
|
||||
entity.setReferences(references);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ThenAction
|
||||
public void expandToHintAnnotationByRegEx(@Argument(ArgumentType.TYPE) String type,
|
||||
@Argument(ArgumentType.STRING) String pattern,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
|
||||
@Argument(ArgumentType.INTEGER) int group,
|
||||
@Argument(ArgumentType.TYPE) String asType) {
|
||||
|
||||
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
|
||||
|
||||
@ -230,8 +282,11 @@ public class Section {
|
||||
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
|
||||
}
|
||||
|
||||
|
||||
public void addHintAnnotationByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType) {
|
||||
@ThenAction
|
||||
public void addHintAnnotationByRegEx(@Argument(ArgumentType.REGEX) String pattern,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
|
||||
@Argument(ArgumentType.INTEGER) int group,
|
||||
@Argument(ArgumentType.TYPE) String asType) {
|
||||
|
||||
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
|
||||
|
||||
@ -246,8 +301,12 @@ public class Section {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void redactIfPrecededBy(String prefix, String type, int ruleNumber, String reason, String legalBasis) {
|
||||
@ThenAction
|
||||
public void redactIfPrecededBy(@Argument(ArgumentType.STRING) String prefix,
|
||||
@Argument(ArgumentType.TYPE) String type,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
entities.forEach(entity -> {
|
||||
if (entity.getType().equals(type) && searchText.indexOf(prefix + entity.getWord()) != 1) {
|
||||
@ -259,23 +318,32 @@ public class Section {
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
public void addHintAnnotation(String value, String asType) {
|
||||
@ThenAction
|
||||
public void addHintAnnotation(@Argument(ArgumentType.STRING) String value,
|
||||
@Argument(ArgumentType.TYPE) String asType) {
|
||||
|
||||
Set<Entity> found = findEntities(value.trim(), asType, true, false, 0, null, null);
|
||||
EntitySearchUtils.addEntitiesIgnoreRank(entities, found);
|
||||
}
|
||||
|
||||
|
||||
public void addRedaction(String value, String asType, int ruleNumber, String reason, String legalBasis) {
|
||||
@ThenAction
|
||||
public void addRedaction(@Argument(ArgumentType.STRING) String value,
|
||||
@Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
Set<Entity> found = findEntities(value.trim(), asType, true, true, ruleNumber, reason, legalBasis);
|
||||
EntitySearchUtils.addEntitiesIgnoreRank(entities, found);
|
||||
}
|
||||
|
||||
|
||||
public void redactLineAfter(String start, String asType, int ruleNumber, boolean redactEverywhere, String reason,
|
||||
String legalBasis) {
|
||||
@ThenAction
|
||||
public void redactLineAfter(@Argument(ArgumentType.STRING) String start,
|
||||
@Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
|
||||
@Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
String[] values = StringUtils.substringsBetween(text, start, "\n");
|
||||
|
||||
@ -293,8 +361,9 @@ public class Section {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void recommendLineAfter(String start, String asType) {
|
||||
@ThenAction
|
||||
public void recommendLineAfter(@Argument(ArgumentType.STRING) String start,
|
||||
@Argument(ArgumentType.TYPE) String asType) {
|
||||
|
||||
String[] values = StringUtils.substringsBetween(text, start, "\n");
|
||||
|
||||
@ -317,9 +386,14 @@ public class Section {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void redactByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType, int ruleNumber,
|
||||
String reason, String legalBasis) {
|
||||
@ThenAction
|
||||
public void redactByRegEx(@Argument(ArgumentType.REGEX) String pattern,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
|
||||
@Argument(ArgumentType.INTEGER) int group,
|
||||
@Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
|
||||
|
||||
@ -334,8 +408,11 @@ public class Section {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void addRecommendationByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType) {
|
||||
@ThenAction
|
||||
public void addRecommendationByRegEx(@Argument(ArgumentType.REGEX) String pattern,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
|
||||
@Argument(ArgumentType.INTEGER) int group,
|
||||
@Argument(ArgumentType.TYPE) String asType) {
|
||||
|
||||
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
|
||||
|
||||
@ -349,9 +426,14 @@ public class Section {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void redactAndRecommendByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType,
|
||||
int ruleNumber, String reason, String legalBasis) {
|
||||
@ThenAction
|
||||
public void redactAndRecommendByRegEx(@Argument(ArgumentType.REGEX) String pattern,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
|
||||
@Argument(ArgumentType.INTEGER) int group,
|
||||
@Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
|
||||
|
||||
@ -366,9 +448,14 @@ public class Section {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void redactBetween(String start, String stop, String asType, int ruleNumber, boolean redactEverywhere,
|
||||
String reason, String legalBasis) {
|
||||
@ThenAction
|
||||
public void redactBetween(@Argument(ArgumentType.STRING) String start,
|
||||
@Argument(ArgumentType.STRING) String stop,
|
||||
@Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
|
||||
@Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
String[] values = StringUtils.substringsBetween(searchText, start, stop);
|
||||
|
||||
@ -387,9 +474,14 @@ public class Section {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void redactLinesBetween(String start, String stop, String asType, int ruleNumber, boolean redactEverywhere,
|
||||
String reason, String legalBasis) {
|
||||
@ThenAction
|
||||
public void redactLinesBetween(@Argument(ArgumentType.STRING) String start,
|
||||
@Argument(ArgumentType.STRING) String stop,
|
||||
@Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
|
||||
@Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
String[] values = StringUtils.substringsBetween(text, start, stop);
|
||||
|
||||
@ -416,34 +508,48 @@ public class Section {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void highlightCell(String cellHeader, int ruleNumber, String type) {
|
||||
@ThenAction
|
||||
public void highlightCell(@Argument(ArgumentType.STRING) String cellHeader,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.TYPE) String type) {
|
||||
|
||||
annotateCell(cellHeader, ruleNumber, type, false, false, null, null);
|
||||
}
|
||||
|
||||
|
||||
public void redactCell(String cellHeader, int ruleNumber, String type, boolean addAsRecommendations, String reason,
|
||||
String legalBasis) {
|
||||
@ThenAction
|
||||
public void redactCell(@Argument(ArgumentType.STRING) String cellHeader,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.TYPE) String type,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean addAsRecommendations,
|
||||
@Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
annotateCell(cellHeader, ruleNumber, type, true, addAsRecommendations, reason, legalBasis);
|
||||
}
|
||||
|
||||
|
||||
public void redactNotCell(String cellHeader, int ruleNumber, String type, boolean addAsRecommendations,
|
||||
String reason) {
|
||||
@ThenAction
|
||||
public void redactNotCell(@Argument(ArgumentType.STRING) String cellHeader,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.TYPE) String type,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean addAsRecommendations,
|
||||
@Argument(ArgumentType.STRING) String reason) {
|
||||
|
||||
annotateCell(cellHeader, ruleNumber, type, false, addAsRecommendations, reason, null);
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> findEntities(String value, String asType, boolean caseInsensitive, boolean redacted,
|
||||
int ruleNumber, String reason, String legalBasis) {
|
||||
private Set<Entity> findEntities(@Argument(ArgumentType.STRING) String value,
|
||||
@Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean caseInsensitive,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean redacted,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
String text = caseInsensitive ? searchText.toLowerCase() : searchText;
|
||||
String searchValue = caseInsensitive ? value.toLowerCase() : value;
|
||||
|
||||
Set<Entity> found = EntitySearchUtils.find(text, Set.of(searchValue), asType, headline, sectionNumber, true, false);
|
||||
Set<Entity> found = EntitySearchUtils.find(text, Set.of(searchValue), asType, headline, sectionNumber, false, false, Engine.RULE);
|
||||
|
||||
found.forEach(entity -> {
|
||||
if (redacted) {
|
||||
@ -469,7 +575,7 @@ public class Section {
|
||||
} else {
|
||||
String word = value.toString();
|
||||
|
||||
Entity entity = new Entity(word, type, value.getRowSpanStart(), value.getRowSpanStart() + word.length(), headline, sectionNumber, false, false);
|
||||
Entity entity = new Entity(word, type, value.getRowSpanStart(), value.getRowSpanStart() + word.length(), headline, sectionNumber, false, false, Engine.RULE);
|
||||
entity.setRedaction(redact);
|
||||
entity.setMatchedRule(ruleNumber);
|
||||
entity.setRedactionReason(reason);
|
||||
@ -507,6 +613,25 @@ public class Section {
|
||||
}
|
||||
}
|
||||
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
@Target(ElementType.METHOD)
|
||||
public @interface WhenCondition {
|
||||
|
||||
}
|
||||
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
@Target(ElementType.METHOD)
|
||||
public @interface ThenAction {
|
||||
|
||||
}
|
||||
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
@Target(ElementType.PARAMETER)
|
||||
public @interface Argument {
|
||||
|
||||
ArgumentType value() default ArgumentType.STRING;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,36 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.rulebuilder;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.Argument;
|
||||
import com.iqser.red.service.redaction.v1.model.RuleBuilderModel;
|
||||
import com.iqser.red.service.redaction.v1.model.RuleElement;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Service
|
||||
public class RuleBuilderModelService {
|
||||
|
||||
public RuleBuilderModel getRuleBuilderModel() {
|
||||
|
||||
var whenConditions = Arrays.stream(Section.class.getDeclaredMethods()).filter(m -> m.isAnnotationPresent(Section.WhenCondition.class)).collect(Collectors.toList());
|
||||
var thenActions = Arrays.stream(Section.class.getDeclaredMethods()).filter(m -> m.isAnnotationPresent(Section.ThenAction.class)).collect(Collectors.toList());
|
||||
|
||||
RuleBuilderModel ruleBuilderModel = new RuleBuilderModel();
|
||||
|
||||
|
||||
ruleBuilderModel.setWhenClauses(whenConditions.stream().map(c -> new RuleElement(c.getName(), toArguments(c))).collect(Collectors.toList()));
|
||||
ruleBuilderModel.setThenConditions(thenActions.stream().map(c -> new RuleElement(c.getName(), toArguments(c))).collect(Collectors.toList()));
|
||||
|
||||
return ruleBuilderModel;
|
||||
}
|
||||
|
||||
private List<Argument> toArguments(Method c) {
|
||||
return Arrays.stream(c.getParameters())
|
||||
.map(parameter -> new Argument(parameter.getName(), parameter.getAnnotation(Section.Argument.class).value()))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
}
|
||||
@ -1,35 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLog;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class AnalyzeResponseService {
|
||||
|
||||
private final RedactionServiceSettings redactionServiceSettings;
|
||||
|
||||
public AnalyzeResult createAnalyzeResponse(String dossierId, String fileId, long duration, int pageCount,
|
||||
RedactionLog redactionLog, boolean hasUpdates) {
|
||||
|
||||
|
||||
return AnalyzeResult.builder()
|
||||
.dossierId(dossierId)
|
||||
.fileId(fileId)
|
||||
.duration(duration)
|
||||
.numberOfPages(pageCount)
|
||||
.hasUpdates(hasUpdates)
|
||||
.analysisVersion(redactionServiceSettings.getAnalysisVersion())
|
||||
.rulesVersion(redactionLog.getRulesVersion())
|
||||
.dictionaryVersion(redactionLog.getDictionaryVersion())
|
||||
.legalBasisVersion(redactionLog.getLegalBasisVersion())
|
||||
.dossierDictionaryVersion(redactionLog.getDossierDictionaryVersion())
|
||||
.build();
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,283 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.web.bind.annotation.RequestBody;
|
||||
|
||||
import com.iqser.red.service.file.management.v1.api.model.FileType;
|
||||
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
|
||||
import com.iqser.red.service.redaction.v1.model.IdRemoval;
|
||||
import com.iqser.red.service.redaction.v1.model.ManualForceRedact;
|
||||
import com.iqser.red.service.redaction.v1.model.ManualImageRecategorization;
|
||||
import com.iqser.red.service.redaction.v1.model.ManualLegalBasisChange;
|
||||
import com.iqser.red.service.redaction.v1.model.ManualRedactions;
|
||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLog;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
||||
import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Text;
|
||||
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
|
||||
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrement;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryVersion;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.RedRectangle2D;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class AnalyzeService {
|
||||
|
||||
private final DictionaryService dictionaryService;
|
||||
private final DroolsExecutionService droolsExecutionService;
|
||||
private final EntityRedactionService entityRedactionService;
|
||||
private final RedactionLogCreatorService redactionLogCreatorService;
|
||||
private final RedactionStorageService redactionStorageService;
|
||||
private final PdfSegmentationService pdfSegmentationService;
|
||||
private final RedactionChangeLogService redactionChangeLogService;
|
||||
private final LegalBasisClient legalBasisClient;
|
||||
private final RedactionServiceSettings redactionServiceSettings;
|
||||
private final SectionTextBuilderService sectionTextBuilderService;
|
||||
private final SectionGridCreatorService sectionGridCreatorService;
|
||||
private final NerAnalyserService nerAnalyserService;
|
||||
|
||||
|
||||
public void analyzeDocumentStructure(StructureAnalyzeRequest analyzeRequest) {
|
||||
|
||||
long startTime = System.currentTimeMillis();
|
||||
|
||||
var pageCount = 0;
|
||||
Document classifiedDoc;
|
||||
|
||||
try {
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(analyzeRequest
|
||||
.getDossierId(), analyzeRequest.getFileId(), FileType.ORIGIN));
|
||||
classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream);
|
||||
pageCount = classifiedDoc.getPages().size();
|
||||
} catch (Exception e) {
|
||||
throw new RedactionException(e);
|
||||
}
|
||||
|
||||
List<SectionText> sectionTexts = sectionTextBuilderService.buildSectionText(classifiedDoc);
|
||||
sectionGridCreatorService.createSectionGrid(classifiedDoc, pageCount);
|
||||
|
||||
Text text = new Text(pageCount, sectionTexts);
|
||||
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.TEXT, text);
|
||||
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.SECTION_GRID, classifiedDoc
|
||||
.getSectionGrid());
|
||||
|
||||
log.info("Document structure analysis successful, took: {}", System.currentTimeMillis() - startTime);
|
||||
}
|
||||
|
||||
|
||||
public AnalyzeResult analyze(AnalyzeRequest analyzeRequest) {
|
||||
|
||||
long startTime = System.currentTimeMillis();
|
||||
|
||||
var text = redactionStorageService.getText(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
var nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
if(redactionServiceSettings.isEnableEntityRecognition() && nerEntities == null){
|
||||
nerAnalyserService.computeNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
}
|
||||
|
||||
dictionaryService.updateDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
|
||||
KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getDossierTemplateId());
|
||||
long rulesVersion = droolsExecutionService.getRulesVersion(analyzeRequest.getDossierTemplateId());
|
||||
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest
|
||||
.getDossierId());
|
||||
|
||||
PageEntities pageEntities = entityRedactionService.findEntities(dictionary, text.getSectionTexts(), kieContainer, analyzeRequest, nerEntities);
|
||||
|
||||
dictionaryService.updateExternalDictionary(dictionary, analyzeRequest.getDossierTemplateId());
|
||||
|
||||
List<RedactionLogEntry> redactionLogEntries = redactionLogCreatorService.createRedactionLog(pageEntities, text.getNumberOfPages(), analyzeRequest
|
||||
.getDossierTemplateId());
|
||||
|
||||
var legalBasis = legalBasisClient.getLegalBasisMapping(analyzeRequest.getDossierTemplateId());
|
||||
var redactionLog = new RedactionLog(redactionServiceSettings.getAnalysisVersion(), redactionLogEntries, legalBasis, dictionary
|
||||
.getVersion()
|
||||
.getDossierTemplateVersion(), dictionary.getVersion()
|
||||
.getDossierVersion(), rulesVersion, legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId()));
|
||||
|
||||
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionary.getVersion(), false);
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public AnalyzeResult reanalyze(@RequestBody AnalyzeRequest analyzeRequest) {
|
||||
|
||||
long startTime = System.currentTimeMillis();
|
||||
|
||||
var redactionLog = redactionStorageService.getRedactionLog(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
var text = redactionStorageService.getText(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
|
||||
// not yet ready for reanalysis
|
||||
if (redactionLog == null || text == null || text.getNumberOfPages() == 0) {
|
||||
return analyze(analyzeRequest);
|
||||
}
|
||||
|
||||
DictionaryIncrement dictionaryIncrement = dictionaryService.getDictionaryIncrements(analyzeRequest.getDossierTemplateId(), new DictionaryVersion(redactionLog
|
||||
.getDictionaryVersion(), redactionLog.getDossierDictionaryVersion()), analyzeRequest.getDossierId());
|
||||
|
||||
Set<Integer> sectionsToReanalyse = !analyzeRequest.getSectionsToReanalyse()
|
||||
.isEmpty() ? analyzeRequest.getSectionsToReanalyse() : findSectionsToReanalyse(dictionaryIncrement, redactionLog, text, analyzeRequest);
|
||||
|
||||
if (sectionsToReanalyse.isEmpty()) {
|
||||
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true);
|
||||
}
|
||||
|
||||
var nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
if(redactionServiceSettings.isEnableEntityRecognition() && nerEntities == null){
|
||||
nerAnalyserService.computeNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
}
|
||||
|
||||
List<SectionText> reanalysisSections = text.getSectionTexts()
|
||||
.stream()
|
||||
.filter(sectionText -> sectionsToReanalyse.contains(sectionText.getSectionNumber()))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getDossierTemplateId());
|
||||
|
||||
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest
|
||||
.getDossierId());
|
||||
|
||||
PageEntities pageEntities = entityRedactionService.findEntities(dictionary, reanalysisSections, kieContainer, analyzeRequest, nerEntities);
|
||||
var newRedactionLogEntries = redactionLogCreatorService.createRedactionLog(pageEntities, text.getNumberOfPages(), analyzeRequest
|
||||
.getDossierTemplateId());
|
||||
|
||||
redactionLog.getRedactionLogEntry().removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()));
|
||||
redactionLog.getRedactionLogEntry().addAll(newRedactionLogEntries);
|
||||
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true);
|
||||
}
|
||||
|
||||
|
||||
private Set<Integer> findSectionsToReanalyse(DictionaryIncrement dictionaryIncrement, RedactionLog redactionLog,
|
||||
Text text, AnalyzeRequest analyzeRequest) {
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
Set<String> relevantManuallyModifiedAnnotationIds = getRelevantManuallyModifiedAnnotationIds(analyzeRequest.getManualRedactions());
|
||||
|
||||
Set<Integer> sectionsToReanalyse = new HashSet<>();
|
||||
Map<Integer, Set<Image>> imageEntries = new HashMap<>();
|
||||
for (RedactionLogEntry entry : redactionLog.getRedactionLogEntry()) {
|
||||
if (entry.isManual() || relevantManuallyModifiedAnnotationIds.contains(entry.getId())) {
|
||||
sectionsToReanalyse.add(entry.getSectionNumber());
|
||||
}
|
||||
if (entry.isImage() || entry.getType().equals("image")) {
|
||||
imageEntries.computeIfAbsent(entry.getSectionNumber(), x -> new HashSet<>()).add(convert(entry));
|
||||
}
|
||||
}
|
||||
|
||||
for (SectionText sectionText : text.getSectionTexts()) {
|
||||
|
||||
if (EntitySearchUtils.sectionContainsAny(sectionText.getText(), dictionaryIncrement.getValues())) {
|
||||
sectionsToReanalyse.add(sectionText.getSectionNumber());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
log.info("Should reanalyze {} sections for request: {}, took: {}", sectionsToReanalyse.size(), analyzeRequest, System.currentTimeMillis() - start);
|
||||
|
||||
return sectionsToReanalyse;
|
||||
}
|
||||
|
||||
|
||||
private AnalyzeResult finalizeAnalysis(@RequestBody AnalyzeRequest analyzeRequest, long startTime,
|
||||
RedactionLog redactionLog, Text text, DictionaryVersion dictionaryVersion,
|
||||
boolean isReanalysis) {
|
||||
|
||||
redactionLog.setDictionaryVersion(dictionaryVersion.getDossierTemplateVersion());
|
||||
redactionLog.setDossierDictionaryVersion(dictionaryVersion.getDossierVersion());
|
||||
|
||||
excludeExcludedPages(redactionLog, analyzeRequest.getExcludedPages());
|
||||
|
||||
var redactionLogChange = redactionChangeLogService.computeChanges(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), redactionLog);
|
||||
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.REDACTION_LOG, redactionLogChange
|
||||
.getRedactionLog());
|
||||
|
||||
long duration = System.currentTimeMillis() - startTime;
|
||||
|
||||
return AnalyzeResult.builder()
|
||||
.dossierId(analyzeRequest.getDossierId())
|
||||
.fileId(analyzeRequest.getFileId())
|
||||
.duration(duration)
|
||||
.numberOfPages(text.getNumberOfPages())
|
||||
.hasUpdates(redactionLogChange.isHasChanges())
|
||||
.analysisVersion(redactionServiceSettings.getAnalysisVersion())
|
||||
.rulesVersion(redactionLog.getRulesVersion())
|
||||
.dictionaryVersion(redactionLog.getDictionaryVersion())
|
||||
.legalBasisVersion(redactionLog.getLegalBasisVersion())
|
||||
.dossierDictionaryVersion(redactionLog.getDossierDictionaryVersion())
|
||||
.wasReanalyzed(isReanalysis)
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
private Set<String> getRelevantManuallyModifiedAnnotationIds(ManualRedactions manualRedactions) {
|
||||
|
||||
if (manualRedactions == null) {
|
||||
return new HashSet<>();
|
||||
}
|
||||
|
||||
return Stream.concat(manualRedactions.getManualLegalBasisChanges()
|
||||
.stream()
|
||||
.map(ManualLegalBasisChange::getId), Stream.concat(manualRedactions.getImageRecategorizations()
|
||||
.stream()
|
||||
.map(ManualImageRecategorization::getId), Stream.concat(manualRedactions.getIdsToRemove()
|
||||
.stream()
|
||||
.map(IdRemoval::getId), manualRedactions.getForceRedacts().stream().map(ManualForceRedact::getId))))
|
||||
.collect(Collectors.toSet());
|
||||
}
|
||||
|
||||
|
||||
public Image convert(RedactionLogEntry entry) {
|
||||
|
||||
Rectangle position = entry.getPositions().get(0);
|
||||
|
||||
return Image.builder()
|
||||
.type(entry.getType())
|
||||
.position(new RedRectangle2D(position.getTopLeft().getX(), position.getTopLeft()
|
||||
.getY(), position.getWidth(), position.getHeight()))
|
||||
.sectionNumber(entry.getSectionNumber())
|
||||
.section(entry.getSection())
|
||||
.page(position.getPage())
|
||||
.hasTransparency(entry.isImageHasTransparency())
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
private void excludeExcludedPages(RedactionLog redactionLog, Set<Integer> excludedPages) {
|
||||
|
||||
if (excludedPages != null && !excludedPages.isEmpty()) {
|
||||
redactionLog.getRedactionLogEntry().forEach(entry -> entry.getPositions().forEach(pos -> {
|
||||
if (excludedPages.contains(pos.getPage())) {
|
||||
entry.setExcluded(true);
|
||||
}
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -33,7 +33,7 @@ public class DictionaryService {
|
||||
|
||||
public DictionaryVersion updateDictionary(String dossierTemplateId, String dossierId) {
|
||||
|
||||
log.info("Updating dictionary data for: {} / {}", dossierTemplateId, dossierId);
|
||||
log.info("Updating dictionary data for dossierTemplate {} and dossier {}", dossierTemplateId, dossierId);
|
||||
long dossierTemplateDictionaryVersion = dictionaryClient.getVersion(dossierTemplateId, GLOBAL_DOSSIER);
|
||||
var dossierTemplateDictionary = dictionariesByDossierTemplate.get(dossierTemplateId);
|
||||
if (dossierTemplateDictionary == null || dossierTemplateDictionaryVersion > dossierTemplateDictionary.getDictionaryVersion()) {
|
||||
@ -164,7 +164,6 @@ public class DictionaryService {
|
||||
|
||||
public float[] getColor(String type, String dossierTemplateId) {
|
||||
|
||||
log.info("requested : {} / {}",type,dossierTemplateId);
|
||||
DictionaryModel model = dictionariesByDossierTemplate.get(dossierTemplateId).getLocalAccessMap().get(type);
|
||||
if (model != null) {
|
||||
return model.getColor();
|
||||
|
||||
@ -1,74 +1,140 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.*;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.*;
|
||||
import com.iqser.red.service.redaction.v1.server.client.EntityRecognitionClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecogintionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionRequest;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionResponse;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionSection;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.*;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.commons.codec.binary.Base64;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.Engine;
|
||||
import com.iqser.red.service.redaction.v1.model.ManualImageRecategorization;
|
||||
import com.iqser.red.service.redaction.v1.model.Status;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SectionSearchableTextPair;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class EntityRedactionService {
|
||||
|
||||
private final DictionaryService dictionaryService;
|
||||
private final RedactionServiceSettings redactionServiceSettings;
|
||||
private final DroolsExecutionService droolsExecutionService;
|
||||
private final SurroundingWordsService surroundingWordsService;
|
||||
private final EntityRecognitionClient entityRecognitionClient;
|
||||
private final RedactionServiceSettings redactionServiceSettings;
|
||||
|
||||
|
||||
public void processDocument(Document classifiedDoc, String dossierTemplateId, ManualRedactions manualRedactions,
|
||||
String dossierId, List<FileAttribute> fileAttributes) {
|
||||
public PageEntities findEntities(Dictionary dictionary, List<SectionText> sectionTexts, KieContainer kieContainer,
|
||||
AnalyzeRequest analyzeRequest, NerEntities nerEntities) {
|
||||
|
||||
dictionaryService.updateDictionary(dossierTemplateId, dossierId);
|
||||
KieContainer container = droolsExecutionService.updateRules(dossierTemplateId);
|
||||
long rulesVersion = droolsExecutionService.getRulesVersion(dossierTemplateId);
|
||||
|
||||
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(dossierTemplateId, dossierId);
|
||||
|
||||
Set<Entity> documentEntities = new HashSet<>(findEntities(classifiedDoc, container, manualRedactions, dictionary, false, null, fileAttributes));
|
||||
Map<Integer, Set<Image>> imagesPerPage = new HashMap<>();
|
||||
Set<Entity> entities = findEntities(sectionTexts, dictionary, kieContainer, analyzeRequest, false, null, imagesPerPage, nerEntities);
|
||||
|
||||
if (dictionary.hasLocalEntries()) {
|
||||
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber = getHintsPerSection(documentEntities, dictionary);
|
||||
Set<Entity> foundByLocal = findEntities(classifiedDoc, container, manualRedactions, dictionary, true, hintsPerSectionNumber, fileAttributes);
|
||||
EntitySearchUtils.addEntitiesWithHigherRank(documentEntities, foundByLocal, dictionary);
|
||||
EntitySearchUtils.removeEntitiesContainedInLarger(documentEntities);
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber = getHintsPerSection(entities, dictionary);
|
||||
Set<Entity> foundByLocal = findEntities(sectionTexts, dictionary, kieContainer, analyzeRequest, true, hintsPerSectionNumber, imagesPerPage, nerEntities);
|
||||
EntitySearchUtils.addEntitiesWithHigherRank(entities, foundByLocal, dictionary);
|
||||
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
|
||||
}
|
||||
|
||||
classifiedDoc.setEntities(convertToEnititesPerPage(documentEntities));
|
||||
|
||||
dictionaryService.updateExternalDictionary(dictionary, dossierTemplateId);
|
||||
|
||||
classifiedDoc.setDictionaryVersion(dictionary.getVersion());
|
||||
classifiedDoc.setRulesVersion(rulesVersion);
|
||||
Map<Integer, List<Entity>> entitiesPerPage = convertToEnititesPerPage(entities);
|
||||
return new PageEntities(entitiesPerPage, imagesPerPage);
|
||||
}
|
||||
|
||||
|
||||
public Map<Integer, List<Entity>> convertToEnititesPerPage(Set<Entity> entities) {
|
||||
public Set<Entity> findEntities(List<SectionText> reanalysisSections, Dictionary dictionary,
|
||||
KieContainer kieContainer, AnalyzeRequest analyzeRequest, boolean local,
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber,
|
||||
Map<Integer, Set<Image>> imagesPerPage, NerEntities nerEntities) {
|
||||
|
||||
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
|
||||
for (SectionText reanalysisSection : reanalysisSections) {
|
||||
|
||||
Set<Entity> entities = findEntities(reanalysisSection.getSearchableText(), reanalysisSection.getHeadline(), reanalysisSection
|
||||
.getSectionNumber(), dictionary, local, nerEntities, reanalysisSection.getCellStarts());
|
||||
|
||||
if (reanalysisSection.getCellStarts() != null && !reanalysisSection.getCellStarts().isEmpty()) {
|
||||
surroundingWordsService.addSurroundingText(entities, reanalysisSection.getSearchableText(), dictionary, reanalysisSection
|
||||
.getCellStarts());
|
||||
} else {
|
||||
surroundingWordsService.addSurroundingText(entities, reanalysisSection.getSearchableText(), dictionary);
|
||||
}
|
||||
|
||||
if (!local && reanalysisSection.getImages() != null && !reanalysisSection.getImages()
|
||||
.isEmpty() && analyzeRequest.getManualRedactions() != null && analyzeRequest.getManualRedactions()
|
||||
.getImageRecategorizations() != null) {
|
||||
for (Image image : reanalysisSection.getImages()) {
|
||||
String imageId = IdBuilder.buildId(image.getPosition(), image.getPage());
|
||||
for (ManualImageRecategorization imageRecategorization : analyzeRequest.getManualRedactions()
|
||||
.getImageRecategorizations()) {
|
||||
if (imageRecategorization.getStatus().equals(Status.APPROVED) && imageRecategorization.getId()
|
||||
.equals(imageId)) {
|
||||
image.setType(imageRecategorization.getType());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder()
|
||||
.isLocal(false)
|
||||
.dictionaryTypes(dictionary.getTypes())
|
||||
.entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(reanalysisSection.getSectionNumber()) ? Stream
|
||||
.concat(entities.stream(), hintsPerSectionNumber.get(reanalysisSection.getSectionNumber())
|
||||
.stream())
|
||||
.collect(Collectors.toSet()) : entities)
|
||||
.text(reanalysisSection.getSearchableText().getAsStringWithLinebreaks())
|
||||
.searchText(reanalysisSection.getSearchableText().toString())
|
||||
.headline(reanalysisSection.getHeadline())
|
||||
.sectionNumber(reanalysisSection.getSectionNumber())
|
||||
.tabularData(reanalysisSection.getTabularData())
|
||||
.searchableText(reanalysisSection.getSearchableText())
|
||||
.dictionary(dictionary)
|
||||
.images(reanalysisSection.getImages())
|
||||
.fileAttributes(analyzeRequest.getFileAttributes())
|
||||
.build(), reanalysisSection.getSearchableText()));
|
||||
}
|
||||
|
||||
Set<Entity> entities = new HashSet<>();
|
||||
sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> {
|
||||
Section analysedSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair.getSection());
|
||||
EntitySearchUtils.removeEntitiesContainedInLarger(analysedSection.getEntities());
|
||||
entities.addAll(analysedSection.getEntities());
|
||||
|
||||
if (!local) {
|
||||
for (Image image : analysedSection.getImages()) {
|
||||
imagesPerPage.computeIfAbsent(image.getPage(), (a) -> new HashSet<>()).add(image);
|
||||
}
|
||||
addLocalValuesToDictionary(analysedSection, dictionary);
|
||||
}
|
||||
});
|
||||
|
||||
return entities;
|
||||
}
|
||||
|
||||
|
||||
private Map<Integer, List<Entity>> convertToEnititesPerPage(Set<Entity> entities) {
|
||||
|
||||
Map<Integer, List<Entity>> entitiesPerPage = new HashMap<>();
|
||||
for (Entity entity : entities) {
|
||||
@ -83,14 +149,14 @@ public class EntityRedactionService {
|
||||
.add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), entry
|
||||
.getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber(), entity
|
||||
.getLegalBasis(), entity.isDictionaryEntry(), entity.getTextBefore(), entity.getTextAfter(), entity
|
||||
.getStart(), entity.getEnd(), entity.isDossierDictionaryEntry()));
|
||||
.getStart(), entity.getEnd(), entity.isDossierDictionaryEntry(), entity.getEngines(), entity.getReferences()));
|
||||
}
|
||||
}
|
||||
return entitiesPerPage;
|
||||
}
|
||||
|
||||
|
||||
public Map<Integer, Set<Entity>> getHintsPerSection(Set<Entity> entities, Dictionary dictionary) {
|
||||
private Map<Integer, Set<Entity>> getHintsPerSection(Set<Entity> entities, Dictionary dictionary) {
|
||||
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber = new HashMap<>();
|
||||
entities.stream().forEach(entity -> {
|
||||
@ -102,64 +168,7 @@ public class EntityRedactionService {
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> findEntities(Document classifiedDoc, KieContainer kieContainer,
|
||||
ManualRedactions manualRedactions, Dictionary dictionary, boolean local,
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber,
|
||||
List<FileAttribute> fileAttributes) {
|
||||
|
||||
Set<Entity> documentEntities = new HashSet<>();
|
||||
|
||||
AtomicInteger sectionNumber = new AtomicInteger(1);
|
||||
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
|
||||
for (Paragraph paragraph : classifiedDoc.getParagraphs()) {
|
||||
|
||||
List<Table> tables = paragraph.getTables();
|
||||
for (Table table : tables) {
|
||||
if (table.getColCount() == 2) {
|
||||
sectionSearchableTextPairs.addAll(processTableAsOneText(classifiedDoc, table, sectionNumber, dictionary, local, hintsPerSectionNumber, fileAttributes));
|
||||
} else {
|
||||
sectionSearchableTextPairs.addAll(processTablePerRow(classifiedDoc, table, sectionNumber, dictionary, local, hintsPerSectionNumber, fileAttributes));
|
||||
}
|
||||
sectionNumber.incrementAndGet();
|
||||
}
|
||||
sectionSearchableTextPairs.add(processText(classifiedDoc, paragraph.getSearchableText(), paragraph.getTextBlocks(), paragraph
|
||||
.getHeadline(), manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber, paragraph
|
||||
.getImages(), fileAttributes));
|
||||
sectionNumber.incrementAndGet();
|
||||
}
|
||||
|
||||
for (Header header : classifiedDoc.getHeaders()) {
|
||||
sectionSearchableTextPairs.add(processText(classifiedDoc, header.getSearchableText(), header.getTextBlocks(), "Header", manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber, new ArrayList<>(), fileAttributes));
|
||||
sectionNumber.incrementAndGet();
|
||||
}
|
||||
|
||||
for (Footer footer : classifiedDoc.getFooters()) {
|
||||
sectionSearchableTextPairs.add(processText(classifiedDoc, footer.getSearchableText(), footer.getTextBlocks(), "Footer", manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber, new ArrayList<>(), fileAttributes));
|
||||
sectionNumber.incrementAndGet();
|
||||
}
|
||||
|
||||
for (UnclassifiedText unclassifiedText : classifiedDoc.getUnclassifiedTexts()) {
|
||||
sectionSearchableTextPairs.add(processText(classifiedDoc, unclassifiedText.getSearchableText(), unclassifiedText
|
||||
.getTextBlocks(), "", manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber, new ArrayList<>(), fileAttributes));
|
||||
sectionNumber.incrementAndGet();
|
||||
}
|
||||
|
||||
sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> {
|
||||
Section analysedSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair.getSection());
|
||||
documentEntities.addAll(analysedSection.getEntities());
|
||||
|
||||
for (Image image : analysedSection.getImages()) {
|
||||
classifiedDoc.getImages().computeIfAbsent(image.getPage(), (a) -> new HashSet<>()).add(image);
|
||||
}
|
||||
|
||||
addLocalValuesToDictionary(analysedSection, dictionary);
|
||||
});
|
||||
|
||||
return documentEntities;
|
||||
}
|
||||
|
||||
|
||||
public void addLocalValuesToDictionary(Section analysedSection, Dictionary dictionary) {
|
||||
private void addLocalValuesToDictionary(Section analysedSection, Dictionary dictionary) {
|
||||
|
||||
analysedSection.getLocalDictionaryAdds().keySet().forEach(key -> {
|
||||
if (dictionary.isRecommendation(key)) {
|
||||
@ -186,207 +195,9 @@ public class EntityRedactionService {
|
||||
}
|
||||
|
||||
|
||||
private List<SectionSearchableTextPair> processTablePerRow(Document classifiedDoc, Table table,
|
||||
AtomicInteger sectionNumber, Dictionary dictionary,
|
||||
boolean local,
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber,
|
||||
List<FileAttribute> fileAttributes) {
|
||||
|
||||
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
|
||||
|
||||
for (List<Cell> row : table.getRows()) {
|
||||
SearchableText searchableRow = new SearchableText();
|
||||
Map<String, CellValue> tabularData = new HashMap<>();
|
||||
int start = 0;
|
||||
List<Integer> cellStarts = new ArrayList<>();
|
||||
SectionText sectionText = new SectionText();
|
||||
for (Cell cell : row) {
|
||||
|
||||
if (CollectionUtils.isEmpty(cell.getTextBlocks())) {
|
||||
continue;
|
||||
}
|
||||
|
||||
SectionArea sectionArea = new SectionArea(new Point((float) cell.getX(), (float) cell.getY()), (float) cell
|
||||
.getWidth(), (float) cell.getHeight(), cell.getTextBlocks()
|
||||
.get(0)
|
||||
.getSequences()
|
||||
.get(0)
|
||||
.getPage());
|
||||
sectionText.getSectionAreas().add(sectionArea);
|
||||
sectionText.getTextBlocks().addAll(cell.getTextBlocks());
|
||||
|
||||
int cellStart = start;
|
||||
|
||||
if (!cell.isHeaderCell()) {
|
||||
cell.getHeaderCells().forEach(headerCell -> {
|
||||
StringBuilder headerBuilder = new StringBuilder();
|
||||
headerCell.getTextBlocks().forEach(textBlock -> headerBuilder.append(textBlock.getText()));
|
||||
String headerName = headerBuilder.toString()
|
||||
.replaceAll("\n", "")
|
||||
.replaceAll(" ", "")
|
||||
.replaceAll("-", "");
|
||||
sectionArea.setHeader(headerName);
|
||||
tabularData.put(headerName, new CellValue(cell.getTextBlocks(), cellStart));
|
||||
});
|
||||
}
|
||||
|
||||
for (TextBlock textBlock : cell.getTextBlocks()) {
|
||||
// TODO avoid cell overlap merging.
|
||||
searchableRow.addAll(textBlock.getSequences());
|
||||
}
|
||||
cellStarts.add(cellStart);
|
||||
start = start + cell.toString().trim().length() + 1;
|
||||
|
||||
}
|
||||
Set<Entity> rowEntities = findEntities(searchableRow, table.getHeadline(), sectionNumber.intValue(), dictionary, local);
|
||||
surroundingWordsService.addSurroundingText(rowEntities, searchableRow, dictionary, cellStarts);
|
||||
|
||||
sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder()
|
||||
.isLocal(local)
|
||||
.dictionaryTypes(dictionary.getTypes())
|
||||
.entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(sectionNumber.intValue()) ? Stream
|
||||
.concat(rowEntities.stream(), hintsPerSectionNumber.get(sectionNumber.intValue()).stream())
|
||||
.collect(Collectors.toSet()) : rowEntities)
|
||||
.text(searchableRow.getAsStringWithLinebreaks())
|
||||
.searchText(searchableRow.toString())
|
||||
.headline(table.getHeadline())
|
||||
.sectionNumber(sectionNumber.intValue())
|
||||
.tabularData(tabularData)
|
||||
.searchableText(searchableRow)
|
||||
.dictionary(dictionary)
|
||||
.fileAttributes(fileAttributes)
|
||||
.build(), searchableRow));
|
||||
|
||||
if (!local) {
|
||||
sectionText.setText(searchableRow.toString());
|
||||
sectionText.setHeadline(table.getHeadline());
|
||||
sectionText.setSectionNumber(sectionNumber.intValue());
|
||||
sectionText.setTable(true);
|
||||
sectionText.setTabularData(tabularData);
|
||||
sectionText.setCellStarts(cellStarts);
|
||||
classifiedDoc.getSectionText().add(sectionText);
|
||||
}
|
||||
|
||||
sectionNumber.incrementAndGet();
|
||||
}
|
||||
|
||||
return sectionSearchableTextPairs;
|
||||
}
|
||||
|
||||
|
||||
private List<SectionSearchableTextPair> processTableAsOneText(Document classifiedDoc, Table table,
|
||||
AtomicInteger sectionNumber, Dictionary dictionary,
|
||||
boolean local,
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber,
|
||||
List<FileAttribute> fileAttributes) {
|
||||
|
||||
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
|
||||
SearchableText entireTableText = new SearchableText();
|
||||
SectionText sectionText = new SectionText();
|
||||
for (List<Cell> row : table.getRows()) {
|
||||
for (Cell cell : row) {
|
||||
if (CollectionUtils.isEmpty(cell.getTextBlocks())) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!local) {
|
||||
SectionArea sectionArea = new SectionArea(new Point((float) cell.getX(), (float) cell.getY()), (float) cell
|
||||
.getWidth(), (float) cell.getHeight(), cell.getTextBlocks()
|
||||
.get(0)
|
||||
.getSequences()
|
||||
.get(0)
|
||||
.getPage());
|
||||
sectionText.getTextBlocks().addAll(cell.getTextBlocks());
|
||||
sectionText.getSectionAreas().add(sectionArea);
|
||||
}
|
||||
|
||||
for (TextBlock textBlock : cell.getTextBlocks()) {
|
||||
entireTableText.addAll(textBlock.getSequences());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Set<Entity> rowEntities = findEntities(entireTableText, table.getHeadline(), sectionNumber.intValue(), dictionary, local);
|
||||
surroundingWordsService.addSurroundingText(rowEntities, entireTableText, dictionary);
|
||||
|
||||
sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder()
|
||||
.isLocal(local)
|
||||
.dictionaryTypes(dictionary.getTypes())
|
||||
.entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(sectionNumber.intValue()) ? Stream
|
||||
.concat(rowEntities.stream(), hintsPerSectionNumber.get(sectionNumber.intValue()).stream())
|
||||
.collect(Collectors.toSet()) : rowEntities)
|
||||
.text(entireTableText.getAsStringWithLinebreaks())
|
||||
.searchText(entireTableText.toString())
|
||||
.headline(table.getHeadline())
|
||||
.sectionNumber(sectionNumber.intValue())
|
||||
.searchableText(entireTableText)
|
||||
.dictionary(dictionary)
|
||||
.fileAttributes(fileAttributes)
|
||||
.build(), entireTableText));
|
||||
|
||||
if (!local) {
|
||||
sectionText.setText(entireTableText.toString());
|
||||
sectionText.setHeadline(table.getHeadline());
|
||||
sectionText.setSectionNumber(sectionNumber.intValue());
|
||||
sectionText.setTable(true);
|
||||
classifiedDoc.getSectionText().add(sectionText);
|
||||
}
|
||||
|
||||
return sectionSearchableTextPairs;
|
||||
}
|
||||
|
||||
|
||||
private SectionSearchableTextPair processText(Document classifiedDoc, SearchableText searchableText,
|
||||
List<TextBlock> paragraphTextBlocks, String headline,
|
||||
ManualRedactions manualRedactions, AtomicInteger sectionNumber,
|
||||
Dictionary dictionary, boolean local,
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber,
|
||||
List<PdfImage> images, List<FileAttribute> fileAttributes) {
|
||||
|
||||
if (!local) {
|
||||
SectionText sectionText = new SectionText();
|
||||
for (TextBlock paragraphTextBlock : paragraphTextBlocks) {
|
||||
SectionArea sectionArea = new SectionArea(new Point(paragraphTextBlock.getMinX(), paragraphTextBlock.getMinY()), paragraphTextBlock
|
||||
.getWidth(), paragraphTextBlock.getHeight(), paragraphTextBlock.getPage());
|
||||
sectionText.getSectionAreas().add(sectionArea);
|
||||
}
|
||||
|
||||
sectionText.setText(searchableText.toString());
|
||||
sectionText.setHeadline(headline);
|
||||
sectionText.setSectionNumber(sectionNumber.intValue());
|
||||
sectionText.setTable(false);
|
||||
sectionText.setImages(images.stream()
|
||||
.map(image -> convertAndRecategorize(image, sectionNumber.intValue(), headline, manualRedactions))
|
||||
.collect(Collectors.toSet()));
|
||||
sectionText.setTextBlocks(paragraphTextBlocks);
|
||||
classifiedDoc.getSectionText().add(sectionText);
|
||||
}
|
||||
|
||||
Set<Entity> entities = findEntities(searchableText, headline, sectionNumber.intValue(), dictionary, local);
|
||||
surroundingWordsService.addSurroundingText(entities, searchableText, dictionary);
|
||||
|
||||
return new SectionSearchableTextPair(Section.builder()
|
||||
.isLocal(local)
|
||||
.dictionaryTypes(dictionary.getTypes())
|
||||
.entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(sectionNumber.intValue()) ? Stream
|
||||
.concat(entities.stream(), hintsPerSectionNumber.get(sectionNumber.intValue()).stream())
|
||||
.collect(Collectors.toSet()) : entities)
|
||||
.text(searchableText.getAsStringWithLinebreaks())
|
||||
.searchText(searchableText.toString())
|
||||
.headline(headline)
|
||||
.sectionNumber(sectionNumber.intValue())
|
||||
.searchableText(searchableText)
|
||||
.dictionary(dictionary)
|
||||
.images(images.stream()
|
||||
.map(image -> convertAndRecategorize(image, sectionNumber.intValue(), headline, manualRedactions))
|
||||
.collect(Collectors.toSet()))
|
||||
.fileAttributes(fileAttributes)
|
||||
.build(), searchableText);
|
||||
}
|
||||
|
||||
|
||||
public Set<Entity> findEntities(SearchableText searchableText, String headline, int sectionNumber,
|
||||
Dictionary dictionary, boolean local) {
|
||||
private Set<Entity> findEntities(SearchableText searchableText, String headline, int sectionNumber,
|
||||
Dictionary dictionary, boolean local, NerEntities nerEntities,
|
||||
List<Integer> cellstarts) {
|
||||
|
||||
Set<Entity> found = new HashSet<>();
|
||||
String searchableString = searchableText.toString();
|
||||
@ -397,71 +208,52 @@ public class EntityRedactionService {
|
||||
String lowercaseInputString = searchableString.toLowerCase();
|
||||
for (DictionaryModel model : dictionary.getDictionaryModels()) {
|
||||
if (model.isCaseInsensitive()) {
|
||||
found.addAll(EntitySearchUtils.find(lowercaseInputString, model.getValues(local), model.getType(), headline, sectionNumber, local, model
|
||||
.isDossierDictionary()));
|
||||
EntitySearchUtils.addOrAddEngine(found, EntitySearchUtils.find(lowercaseInputString, model.getValues(local), model
|
||||
.getType(), headline, sectionNumber, !local, model.isDossierDictionary(), Engine.DICTIONARY));
|
||||
} else {
|
||||
found.addAll(EntitySearchUtils.find(searchableString, model.getValues(local), model.getType(), headline, sectionNumber, local, model
|
||||
.isDossierDictionary()));
|
||||
EntitySearchUtils.addOrAddEngine(found, EntitySearchUtils.find(searchableString, model.getValues(local), model
|
||||
.getType(), headline, sectionNumber, !local, model.isDossierDictionary(), Engine.DICTIONARY));
|
||||
}
|
||||
}
|
||||
|
||||
if (redactionServiceSettings.isEnableEntityRecognition() && !local) {
|
||||
found.addAll(getAiEntities(sectionNumber, searchableString, headline));
|
||||
if (!local) {
|
||||
Map<String, Set<String>> nerValuesPerType = getNerValues(sectionNumber, nerEntities, cellstarts);
|
||||
nerValuesPerType.entrySet().forEach(entry -> {
|
||||
EntitySearchUtils.addOrAddEngine(found, EntitySearchUtils.find(searchableString, entry.getValue(), entry
|
||||
.getKey(), headline, sectionNumber, false, false, Engine.NER));
|
||||
});
|
||||
}
|
||||
|
||||
return EntitySearchUtils.clearAndFindPositions(found, searchableText, dictionary);
|
||||
}
|
||||
|
||||
|
||||
private Image convertAndRecategorize(PdfImage pdfImage, int sectionNumber, String headline,
|
||||
ManualRedactions manualRedactions) {
|
||||
private Map<String, Set<String>> getNerValues(int sectionNumber, NerEntities nerEntities,
|
||||
List<Integer> cellstarts) {
|
||||
|
||||
Image image = Image.builder()
|
||||
.type(pdfImage.getImageType().equals(ImageType.OTHER) ? "image" : pdfImage.getImageType()
|
||||
.name()
|
||||
.toLowerCase(Locale.ROOT))
|
||||
.position(pdfImage.getPosition())
|
||||
.sectionNumber(sectionNumber)
|
||||
.section(headline)
|
||||
.page(pdfImage.getPage())
|
||||
.hasTransparency(pdfImage.isHasTransparency())
|
||||
.build();
|
||||
Map<String, Set<String>> nerValuesPerType = new HashMap<>();
|
||||
|
||||
String imageId = IdBuilder.buildId(image.getPosition(), image.getPage());
|
||||
if (manualRedactions != null && manualRedactions.getImageRecategorizations() != null) {
|
||||
for (ManualImageRecategorization imageRecategorization : manualRedactions.getImageRecategorizations()) {
|
||||
if (imageRecategorization.getStatus().equals(Status.APPROVED) && imageRecategorization.getId()
|
||||
.equals(imageId)) {
|
||||
image.setType(imageRecategorization.getType());
|
||||
if (redactionServiceSettings.isEnableEntityRecognition() && nerEntities.getResult()
|
||||
.containsKey(sectionNumber)) {
|
||||
nerEntities.getResult().get(sectionNumber).forEach(res -> {
|
||||
if (cellstarts == null || cellstarts.isEmpty()) {
|
||||
nerValuesPerType.computeIfAbsent(res.getType(), (a) -> new HashSet<>())
|
||||
.add(new String(Base64.decodeBase64(res.getValue().getBytes())));
|
||||
} else {
|
||||
boolean intersectsCellStart = false;
|
||||
for (Integer cellStart : cellstarts) {
|
||||
if (res.getStartOffset() < cellStart && cellStart < res.getEndOffset()) {
|
||||
intersectsCellStart = true;
|
||||
}
|
||||
}
|
||||
if (!intersectsCellStart) {
|
||||
nerValuesPerType.computeIfAbsent(res.getType(), (a) -> new HashSet<>())
|
||||
.add(new String(Base64.decodeBase64(res.getValue().getBytes())));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return image;
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> getAiEntities(int sectionNumber, String searchableString, String headline) {
|
||||
|
||||
Set<Entity> found = new HashSet<>();
|
||||
|
||||
Map<String, Map<String, List<EntityRecogintionEntity>>> response = entityRecognitionClient.findAuthors(EntityRecognitionRequest
|
||||
.builder()
|
||||
.data(List.of(EntityRecognitionSection.builder()
|
||||
.sectionNumber(sectionNumber)
|
||||
.text(searchableString)
|
||||
.build()))
|
||||
.build());
|
||||
|
||||
EntityRecognitionResponse entityRecognitionResponse = new EntityRecognitionResponse(response.get("result:"));
|
||||
|
||||
if (entityRecognitionResponse.getResult() != null && entityRecognitionResponse.getResult()
|
||||
.containsKey(String.valueOf(sectionNumber))) {
|
||||
entityRecognitionResponse.getResult().get(String.valueOf(sectionNumber)).forEach(res -> {
|
||||
found.add(new Entity(res.getValue(), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false));
|
||||
});
|
||||
}
|
||||
return found;
|
||||
return nerValuesPerType;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,51 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.codec.binary.Base64;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.file.management.v1.api.model.FileType;
|
||||
import com.iqser.red.service.redaction.v1.server.client.EntityRecognitionClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionRequest;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionSection;
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class NerAnalyserService {
|
||||
|
||||
private final RedactionStorageService redactionStorageService;
|
||||
private final EntityRecognitionClient entityRecognitionClient;
|
||||
private final RedactionServiceSettings redactionServiceSettings;
|
||||
|
||||
public void computeNerEntities(String dossierId, String fileId) {
|
||||
|
||||
if (redactionServiceSettings.isEnableEntityRecognition()) {
|
||||
var text = redactionStorageService.getText(dossierId, fileId);
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
|
||||
var nerRequest = EntityRecognitionRequest.builder()
|
||||
.data(text.getSectionTexts()
|
||||
.stream()
|
||||
.map(sectionText -> new EntityRecognitionSection(sectionText.getSectionNumber(), new String(Base64
|
||||
.encodeBase64(sectionText
|
||||
.getText().getBytes()))))
|
||||
.collect(Collectors.toList()))
|
||||
.build();
|
||||
|
||||
var nerResponse = entityRecognitionClient.findAuthors(nerRequest);
|
||||
|
||||
log.info("Computing NER entities took: {} ms for dossierId {} and fileId {}", System.currentTimeMillis() - start, dossierId, fileId);
|
||||
|
||||
redactionStorageService.storeObject(dossierId, fileId, FileType.NER_ENTITIES, nerResponse);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,321 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import com.iqser.red.service.file.management.v1.api.model.FileType;
|
||||
import com.iqser.red.service.redaction.v1.model.*;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Text;
|
||||
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
|
||||
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.*;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
|
||||
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.web.bind.annotation.RequestBody;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class ReanalyzeService {
|
||||
|
||||
private final DictionaryService dictionaryService;
|
||||
private final DroolsExecutionService droolsExecutionService;
|
||||
private final SurroundingWordsService surroundingWordsService;
|
||||
private final EntityRedactionService entityRedactionService;
|
||||
private final RedactionLogCreatorService redactionLogCreatorService;
|
||||
private final RedactionStorageService redactionStorageService;
|
||||
private final PdfSegmentationService pdfSegmentationService;
|
||||
private final RedactionChangeLogService redactionChangeLogService;
|
||||
private final AnalyzeResponseService analyzeResponseService;
|
||||
private final LegalBasisClient legalBasisClient;
|
||||
private final RedactionServiceSettings redactionServiceSettings;
|
||||
|
||||
|
||||
public AnalyzeResult analyze(AnalyzeRequest analyzeRequest) {
|
||||
|
||||
long startTime = System.currentTimeMillis();
|
||||
|
||||
var pageCount = 0;
|
||||
Document classifiedDoc;
|
||||
|
||||
try {
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(analyzeRequest
|
||||
.getDossierId(), analyzeRequest.getFileId(), FileType.ORIGIN));
|
||||
classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream);
|
||||
pageCount = classifiedDoc.getPages().size();
|
||||
} catch (Exception e) {
|
||||
throw new RedactionException(e);
|
||||
}
|
||||
log.info("Document structure analysis successful, starting redaction analysis...");
|
||||
|
||||
entityRedactionService.processDocument(classifiedDoc, analyzeRequest.getDossierTemplateId(), analyzeRequest.getManualRedactions(), analyzeRequest
|
||||
.getDossierId(), analyzeRequest.getFileAttributes());
|
||||
redactionLogCreatorService.createRedactionLog(classifiedDoc, pageCount, analyzeRequest.getDossierTemplateId());
|
||||
|
||||
log.info("Redaction analysis successful...");
|
||||
|
||||
var legalBasis = legalBasisClient.getLegalBasisMapping(analyzeRequest.getDossierTemplateId());
|
||||
var redactionLog = new RedactionLog(redactionServiceSettings.getAnalysisVersion(), classifiedDoc.getRedactionLogEntities(), legalBasis, classifiedDoc.getDictionaryVersion()
|
||||
.getDossierTemplateVersion(), classifiedDoc.getDictionaryVersion()
|
||||
.getDossierVersion(), classifiedDoc.getRulesVersion(), legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId()));
|
||||
|
||||
excludeExcludedPages(redactionLog, analyzeRequest.getExcludedPages());
|
||||
|
||||
log.info("Analyzed with rules {} and dictionary {} for dossierTemplate: {}", classifiedDoc.getRulesVersion(), classifiedDoc
|
||||
.getDictionaryVersion(), analyzeRequest.getDossierTemplateId());
|
||||
|
||||
var redactionLogChange = redactionChangeLogService.computeChanges(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), redactionLog);
|
||||
redactionLog = redactionLogChange.getRedactionLog();
|
||||
|
||||
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.REDACTION_LOG, redactionLog);
|
||||
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.TEXT, new Text(pageCount, classifiedDoc
|
||||
.getSectionText()));
|
||||
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.SECTION_GRID, classifiedDoc
|
||||
.getSectionGrid());
|
||||
|
||||
long duration = System.currentTimeMillis() - startTime;
|
||||
return analyzeResponseService.createAnalyzeResponse(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), duration, pageCount, redactionLog, redactionLogChange.isHasChanges());
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public AnalyzeResult reanalyze(@RequestBody AnalyzeRequest analyzeRequest) {
|
||||
|
||||
long startTime = System.currentTimeMillis();
|
||||
|
||||
var redactionLog = redactionStorageService.getRedactionLog(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
var text = redactionStorageService.getText(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
|
||||
// not yet ready for reanalysis
|
||||
if (redactionLog == null || text == null || text.getNumberOfPages() == 0) {
|
||||
return analyze(analyzeRequest);
|
||||
}
|
||||
|
||||
DictionaryIncrement dictionaryIncrement = dictionaryService.getDictionaryIncrements(analyzeRequest.getDossierTemplateId(), new DictionaryVersion(redactionLog
|
||||
.getDictionaryVersion(), redactionLog.getDossierDictionaryVersion()), analyzeRequest.getDossierId());
|
||||
|
||||
Set<Integer> sectionsToReanalyse = !analyzeRequest.getSectionsToReanalyse().isEmpty() ? analyzeRequest.getSectionsToReanalyse() :
|
||||
findSectionsToReanalyse(dictionaryIncrement, redactionLog, text, analyzeRequest);
|
||||
|
||||
if (sectionsToReanalyse.isEmpty()) {
|
||||
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement);
|
||||
}
|
||||
|
||||
List<SectionText> reanalysisSections = text.getSectionTexts()
|
||||
.stream()
|
||||
.filter(sectionText -> sectionsToReanalyse.contains(sectionText.getSectionNumber()))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getDossierTemplateId());
|
||||
|
||||
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest
|
||||
.getDossierId());
|
||||
|
||||
Map<Integer, Set<Image>> imagesPerPage = new HashMap<>();
|
||||
Set<Entity> entities = findEntities(reanalysisSections, dictionary, kieContainer, analyzeRequest, false, null, imagesPerPage);
|
||||
|
||||
if (dictionary.hasLocalEntries()) {
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber = entityRedactionService.getHintsPerSection(entities, dictionary);
|
||||
Set<Entity> foundByLocal = findEntities(reanalysisSections, dictionary, kieContainer, analyzeRequest, true, hintsPerSectionNumber, imagesPerPage);
|
||||
EntitySearchUtils.addEntitiesWithHigherRank(entities, foundByLocal, dictionary);
|
||||
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
|
||||
}
|
||||
|
||||
Map<Integer, List<Entity>> entitiesPerPage = entityRedactionService.convertToEnititesPerPage(entities);
|
||||
|
||||
List<RedactionLogEntry> newRedactionLogEntries = new ArrayList<>();
|
||||
for (int page = 1; page <= text.getNumberOfPages(); page++) {
|
||||
if (entitiesPerPage.get(page) != null) {
|
||||
newRedactionLogEntries.addAll(redactionLogCreatorService.addEntries(entitiesPerPage, page, analyzeRequest
|
||||
.getDossierTemplateId()));
|
||||
}
|
||||
|
||||
if (imagesPerPage.get(page) != null) {
|
||||
newRedactionLogEntries.addAll(redactionLogCreatorService.addImageEntries(imagesPerPage, page, analyzeRequest
|
||||
.getDossierTemplateId()));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
redactionLog.getRedactionLogEntry().removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()));
|
||||
redactionLog.getRedactionLogEntry().addAll(newRedactionLogEntries);
|
||||
AnalyzeResult analyzeResult = finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement);
|
||||
analyzeResult.setWasReanalyzed(true);
|
||||
return analyzeResult;
|
||||
}
|
||||
|
||||
|
||||
private Set<Integer> findSectionsToReanalyse(DictionaryIncrement dictionaryIncrement, RedactionLog redactionLog,
|
||||
Text text, AnalyzeRequest analyzeRequest) {
|
||||
|
||||
Set<String> relevantManuallyModifiedAnnotationIds = getRelevantManuallyModifiedAnnotationIds(analyzeRequest.getManualRedactions());
|
||||
|
||||
Set<Integer> sectionsToReanalyse = new HashSet<>();
|
||||
Map<Integer, Set<Image>> imageEntries = new HashMap<>();
|
||||
for (RedactionLogEntry entry : redactionLog.getRedactionLogEntry()) {
|
||||
if (entry.isManual() || relevantManuallyModifiedAnnotationIds.contains(entry.getId())) {
|
||||
sectionsToReanalyse.add(entry.getSectionNumber());
|
||||
}
|
||||
if (entry.isImage() || entry.getType().equals("image")) {
|
||||
imageEntries.computeIfAbsent(entry.getSectionNumber(), x -> new HashSet<>()).add(convert(entry));
|
||||
}
|
||||
}
|
||||
|
||||
for (SectionText sectionText : text.getSectionTexts()) {
|
||||
|
||||
if (EntitySearchUtils.sectionContainsAny(sectionText.getText(), dictionaryIncrement.getValues())) {
|
||||
sectionsToReanalyse.add(sectionText.getSectionNumber());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
log.info("Should reanalyze {} sections for request: {}", sectionsToReanalyse.size(), analyzeRequest);
|
||||
|
||||
return sectionsToReanalyse;
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> findEntities(List<SectionText> reanalysisSections, Dictionary dictionary,
|
||||
KieContainer kieContainer, AnalyzeRequest analyzeRequest, boolean local,
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber,
|
||||
Map<Integer, Set<Image>> imagesPerPage) {
|
||||
|
||||
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
|
||||
for (SectionText reanalysisSection : reanalysisSections) {
|
||||
|
||||
Set<Entity> entities = entityRedactionService.findEntities(reanalysisSection.getSearchableText(), reanalysisSection
|
||||
.getHeadline(), reanalysisSection.getSectionNumber(), dictionary, local);
|
||||
if (reanalysisSection.getCellStarts() != null && !reanalysisSection.getCellStarts().isEmpty()) {
|
||||
surroundingWordsService.addSurroundingText(entities, reanalysisSection.getSearchableText(), dictionary, reanalysisSection
|
||||
.getCellStarts());
|
||||
} else {
|
||||
surroundingWordsService.addSurroundingText(entities, reanalysisSection.getSearchableText(), dictionary);
|
||||
}
|
||||
|
||||
if (!local && reanalysisSection.getImages() != null && !reanalysisSection.getImages()
|
||||
.isEmpty() && analyzeRequest.getManualRedactions() != null && analyzeRequest.getManualRedactions()
|
||||
.getImageRecategorizations() != null) {
|
||||
for (Image image : reanalysisSection.getImages()) {
|
||||
String imageId = IdBuilder.buildId(image.getPosition(), image.getPage());
|
||||
for (ManualImageRecategorization imageRecategorization : analyzeRequest.getManualRedactions()
|
||||
.getImageRecategorizations()) {
|
||||
if (imageRecategorization.getStatus().equals(Status.APPROVED) && imageRecategorization.getId()
|
||||
.equals(imageId)) {
|
||||
image.setType(imageRecategorization.getType());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder()
|
||||
.isLocal(false)
|
||||
.dictionaryTypes(dictionary.getTypes())
|
||||
.entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(reanalysisSection.getSectionNumber()) ? Stream
|
||||
.concat(entities.stream(), hintsPerSectionNumber.get(reanalysisSection.getSectionNumber())
|
||||
.stream())
|
||||
.collect(Collectors.toSet()) : entities)
|
||||
.text(reanalysisSection.getSearchableText().getAsStringWithLinebreaks())
|
||||
.searchText(reanalysisSection.getSearchableText().toString())
|
||||
.headline(reanalysisSection.getHeadline())
|
||||
.sectionNumber(reanalysisSection.getSectionNumber())
|
||||
.tabularData(reanalysisSection.getTabularData())
|
||||
.searchableText(reanalysisSection.getSearchableText())
|
||||
.dictionary(dictionary)
|
||||
.images(reanalysisSection.getImages())
|
||||
.fileAttributes(analyzeRequest.getFileAttributes())
|
||||
.build(), reanalysisSection.getSearchableText()));
|
||||
}
|
||||
|
||||
Set<Entity> entities = new HashSet<>();
|
||||
sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> {
|
||||
Section analysedSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair.getSection());
|
||||
entities.addAll(analysedSection.getEntities());
|
||||
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
|
||||
|
||||
for (Image image : analysedSection.getImages()) {
|
||||
imagesPerPage.computeIfAbsent(image.getPage(), (a) -> new HashSet<>()).add(image);
|
||||
}
|
||||
|
||||
entityRedactionService.addLocalValuesToDictionary(analysedSection, dictionary);
|
||||
});
|
||||
|
||||
return entities;
|
||||
}
|
||||
|
||||
|
||||
private AnalyzeResult finalizeAnalysis(@RequestBody AnalyzeRequest analyzeRequest, long startTime,
|
||||
RedactionLog redactionLog, Text text,
|
||||
DictionaryIncrement dictionaryIncrement) {
|
||||
|
||||
redactionLog.setDictionaryVersion(dictionaryIncrement.getDictionaryVersion().getDossierTemplateVersion());
|
||||
redactionLog.setDossierDictionaryVersion(dictionaryIncrement.getDictionaryVersion().getDossierVersion());
|
||||
|
||||
excludeExcludedPages(redactionLog, analyzeRequest.getExcludedPages());
|
||||
|
||||
var redactionLogChange = redactionChangeLogService.computeChanges(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), redactionLog);
|
||||
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.REDACTION_LOG, redactionLogChange.getRedactionLog());
|
||||
|
||||
long duration = System.currentTimeMillis() - startTime;
|
||||
|
||||
return analyzeResponseService.createAnalyzeResponse(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), duration, text
|
||||
.getNumberOfPages(), redactionLogChange.getRedactionLog(), redactionLogChange.isHasChanges());
|
||||
}
|
||||
|
||||
|
||||
private Set<String> getRelevantManuallyModifiedAnnotationIds(ManualRedactions manualRedactions) {
|
||||
|
||||
if (manualRedactions == null) {
|
||||
return new HashSet<>();
|
||||
}
|
||||
|
||||
return Stream.concat(manualRedactions.getManualLegalBasisChanges()
|
||||
.stream()
|
||||
.map(ManualLegalBasisChange::getId), Stream.concat(manualRedactions.getImageRecategorizations()
|
||||
.stream()
|
||||
.map(ManualImageRecategorization::getId), Stream.concat(manualRedactions.getIdsToRemove()
|
||||
.stream()
|
||||
.map(IdRemoval::getId), manualRedactions.getForceRedacts().stream().map(ManualForceRedact::getId))))
|
||||
.collect(Collectors.toSet());
|
||||
}
|
||||
|
||||
|
||||
public Image convert(RedactionLogEntry entry) {
|
||||
|
||||
Rectangle position = entry.getPositions().get(0);
|
||||
|
||||
return Image.builder()
|
||||
.type(entry.getType())
|
||||
.position(new RedRectangle2D(position.getTopLeft().getX(), position.getTopLeft()
|
||||
.getY(), position.getWidth(), position.getHeight()))
|
||||
.sectionNumber(entry.getSectionNumber())
|
||||
.section(entry.getSection())
|
||||
.page(position.getPage())
|
||||
.hasTransparency(entry.isImageHasTransparency())
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
private void excludeExcludedPages(RedactionLog redactionLog, Set<Integer> excludedPages) {
|
||||
|
||||
if(excludedPages != null && !excludedPages.isEmpty()) {
|
||||
redactionLog.getRedactionLogEntry().forEach(entry -> entry.getPositions().forEach(pos -> { if (excludedPages.contains(pos.getPage())) {
|
||||
entry.setExcluded(true);
|
||||
}}));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -31,6 +31,8 @@ public class RedactionChangeLogService {
|
||||
|
||||
public RedactionLogChanges computeChanges(String dossierId, String fileId, RedactionLog currentRedactionLog) {
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
|
||||
RedactionLog previousRedactionLog = redactionStorageService.getRedactionLog(dossierId, fileId);
|
||||
|
||||
if (previousRedactionLog == null) {
|
||||
@ -98,6 +100,7 @@ public class RedactionChangeLogService {
|
||||
|
||||
currentRedactionLog.setRedactionLogEntry(newRedactionLogEntries);
|
||||
|
||||
log.info("Change computation took: {}", System.currentTimeMillis() - start);
|
||||
return new RedactionLogChanges(currentRedactionLog, !addedIds.isEmpty() || !removedIds.isEmpty());
|
||||
}
|
||||
|
||||
|
||||
@ -1,26 +1,5 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.CellRectangle;
|
||||
import com.iqser.red.service.redaction.v1.model.Point;
|
||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
||||
import com.iqser.red.service.redaction.v1.model.SectionRectangle;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.RedTextPosition;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
@ -28,6 +7,22 @@ import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.Point;
|
||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.RedTextPosition;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class RedactionLogCreatorService {
|
||||
@ -35,26 +30,27 @@ public class RedactionLogCreatorService {
|
||||
private final DictionaryService dictionaryService;
|
||||
|
||||
|
||||
public void createRedactionLog(Document classifiedDoc, int numberOfPages, String dossierTemplateId) {
|
||||
public List<RedactionLogEntry> createRedactionLog(PageEntities pageEntities, int numberOfPages,
|
||||
String dossierTemplateId) {
|
||||
|
||||
List<RedactionLogEntry> entries = new ArrayList<>();
|
||||
|
||||
for (int page = 1; page <= numberOfPages; page++) {
|
||||
|
||||
addSectionGrid(classifiedDoc, page);
|
||||
|
||||
if (classifiedDoc.getEntities().get(page) != null) {
|
||||
classifiedDoc.getRedactionLogEntities()
|
||||
.addAll(addEntries(classifiedDoc.getEntities(), page, dossierTemplateId));
|
||||
if (pageEntities.getEntitiesPerPage().get(page) != null) {
|
||||
entries.addAll(addEntries(pageEntities.getEntitiesPerPage(), page, dossierTemplateId));
|
||||
}
|
||||
|
||||
if (classifiedDoc.getImages().get(page) != null && !classifiedDoc.getImages().get(page).isEmpty()) {
|
||||
classifiedDoc.getRedactionLogEntities()
|
||||
.addAll(addImageEntries(classifiedDoc.getImages(), page, dossierTemplateId));
|
||||
if (pageEntities.getImagesPerPage().get(page) != null) {
|
||||
entries.addAll(addImageEntries(pageEntities.getImagesPerPage(), page, dossierTemplateId));
|
||||
}
|
||||
}
|
||||
|
||||
return entries;
|
||||
}
|
||||
|
||||
|
||||
public List<RedactionLogEntry> addImageEntries(Map<Integer, Set<Image>> images, int pageNumber, String dossierTemplateId) {
|
||||
public List<RedactionLogEntry> addImageEntries(Map<Integer, Set<Image>> images, int pageNumber,
|
||||
String dossierTemplateId) {
|
||||
|
||||
List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();
|
||||
|
||||
@ -83,7 +79,6 @@ public class RedactionLogCreatorService {
|
||||
.imageHasTransparency(image.isHasTransparency())
|
||||
.build();
|
||||
|
||||
|
||||
redactionLogEntities.add(redactionLogEntry);
|
||||
}
|
||||
|
||||
@ -101,7 +96,6 @@ public class RedactionLogCreatorService {
|
||||
entityLoop:
|
||||
for (Entity entity : entities.get(page)) {
|
||||
|
||||
|
||||
for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) {
|
||||
|
||||
RedactionLogEntry redactionLogEntry = createRedactionLogEntry(entity, dossierTemplateId);
|
||||
@ -121,12 +115,10 @@ public class RedactionLogCreatorService {
|
||||
.flatMap(seq -> seq.getTextPositions().stream())
|
||||
.collect(Collectors.toList()), page);
|
||||
|
||||
|
||||
redactionLogEntry.getPositions().addAll(rectanglesPerLine);
|
||||
|
||||
}
|
||||
|
||||
|
||||
// FIXME ids should never be null. Figure out why this happens.
|
||||
if (redactionLogEntry.getId() != null) {
|
||||
redactionLogEntities.add(redactionLogEntry);
|
||||
@ -167,6 +159,9 @@ public class RedactionLogCreatorService {
|
||||
|
||||
private RedactionLogEntry createRedactionLogEntry(Entity entity, String dossierTemplateId) {
|
||||
|
||||
Set<String> referenceIds = new HashSet<>();
|
||||
entity.getReferences().forEach(ref -> ref.getPositionSequences().forEach(pos -> referenceIds.add(pos.getId())));
|
||||
|
||||
return RedactionLogEntry.builder()
|
||||
.color(getColor(entity.getType(), dossierTemplateId, entity.isRedaction()))
|
||||
.reason(entity.getRedactionReason())
|
||||
@ -185,54 +180,12 @@ public class RedactionLogCreatorService {
|
||||
.startOffset(entity.getStart())
|
||||
.endOffset(entity.getEnd())
|
||||
.isDossierDictionaryEntry(entity.isDossierDictionaryEntry())
|
||||
.engines(entity.getEngines())
|
||||
.reference(referenceIds)
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
private void addSectionGrid(Document classifiedDoc, int page) {
|
||||
|
||||
for (Paragraph paragraph : classifiedDoc.getParagraphs()) {
|
||||
|
||||
for (int i = 0; i <= paragraph.getPageBlocks().size() - 1; i++) {
|
||||
|
||||
AbstractTextContainer textBlock = paragraph.getPageBlocks().get(i);
|
||||
|
||||
if (textBlock.getPage() != page) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (textBlock instanceof TextBlock) {
|
||||
|
||||
classifiedDoc.getSectionGrid()
|
||||
.getRectanglesPerPage()
|
||||
.computeIfAbsent(page, (x) -> new ArrayList<>())
|
||||
.add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()), textBlock.getWidth(), textBlock
|
||||
.getHeight(), i + 1, paragraph.getPageBlocks().size()));
|
||||
|
||||
} else if (textBlock instanceof Table) {
|
||||
|
||||
List<CellRectangle> cellRectangles = new ArrayList<>();
|
||||
for (List<Cell> row : ((Table) textBlock).getRows()) {
|
||||
for (Cell cell : row) {
|
||||
if (cell != null) {
|
||||
cellRectangles.add(new CellRectangle(new Point((float) cell.getX(), (float) cell.getY()), (float) cell
|
||||
.getWidth(), (float) cell.getHeight()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
classifiedDoc.getSectionGrid()
|
||||
.getRectanglesPerPage()
|
||||
.computeIfAbsent(page, (x) -> new ArrayList<>())
|
||||
.add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()), textBlock.getWidth(), textBlock
|
||||
.getHeight(), i + 1, paragraph.getPageBlocks().size(), cellRectangles));
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private float[] getColor(String type, String dossierTemplateId, boolean isRedaction) {
|
||||
|
||||
if (!isRedaction && !isHint(type, dossierTemplateId)) {
|
||||
@ -253,5 +206,4 @@ public class RedactionLogCreatorService {
|
||||
return dictionaryService.isRecommendation(type, dossierTemplateId);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,76 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.CellRectangle;
|
||||
import com.iqser.red.service.redaction.v1.model.Point;
|
||||
import com.iqser.red.service.redaction.v1.model.SectionRectangle;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class SectionGridCreatorService {
|
||||
|
||||
|
||||
public void createSectionGrid(Document classifiedDoc, int numberOfPages) {
|
||||
|
||||
for (int page = 1; page <= numberOfPages; page++) {
|
||||
addSectionGrid(classifiedDoc, page);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void addSectionGrid(Document classifiedDoc, int page) {
|
||||
|
||||
for (Paragraph paragraph : classifiedDoc.getParagraphs()) {
|
||||
|
||||
for (int i = 0; i <= paragraph.getPageBlocks().size() - 1; i++) {
|
||||
|
||||
AbstractTextContainer textBlock = paragraph.getPageBlocks().get(i);
|
||||
|
||||
if (textBlock.getPage() != page) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (textBlock instanceof TextBlock) {
|
||||
|
||||
classifiedDoc.getSectionGrid()
|
||||
.getRectanglesPerPage()
|
||||
.computeIfAbsent(page, (x) -> new ArrayList<>())
|
||||
.add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()), textBlock.getWidth(), textBlock
|
||||
.getHeight(), i + 1, paragraph.getPageBlocks().size()));
|
||||
|
||||
} else if (textBlock instanceof Table) {
|
||||
|
||||
List<CellRectangle> cellRectangles = new ArrayList<>();
|
||||
for (List<Cell> row : ((Table) textBlock).getRows()) {
|
||||
for (Cell cell : row) {
|
||||
if (cell != null) {
|
||||
cellRectangles.add(new CellRectangle(new Point((float) cell.getX(), (float) cell.getY()), (float) cell
|
||||
.getWidth(), (float) cell.getHeight()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
classifiedDoc.getSectionGrid()
|
||||
.getRectanglesPerPage()
|
||||
.computeIfAbsent(page, (x) -> new ArrayList<>())
|
||||
.add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()), textBlock.getWidth(), textBlock
|
||||
.getHeight(), i + 1, paragraph.getPageBlocks().size(), cellRectangles));
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,210 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.Point;
|
||||
import com.iqser.red.service.redaction.v1.model.SectionArea;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Footer;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Header;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.UnclassifiedText;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.ImageType;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class SectionTextBuilderService {
|
||||
|
||||
public List<SectionText> buildSectionText(Document classifiedDoc) {
|
||||
|
||||
List<SectionText> sectionTexts = new ArrayList<>();
|
||||
AtomicInteger sectionNumber = new AtomicInteger(1);
|
||||
for (Paragraph paragraph : classifiedDoc.getParagraphs()) {
|
||||
|
||||
List<Table> tables = paragraph.getTables();
|
||||
for (Table table : tables) {
|
||||
if (table.getColCount() == 2) {
|
||||
sectionTexts.add(processTableAsOneText(table, sectionNumber));
|
||||
} else {
|
||||
sectionTexts.addAll(processTablePerRow(table, sectionNumber));
|
||||
}
|
||||
sectionNumber.incrementAndGet();
|
||||
}
|
||||
sectionTexts.add(processText(paragraph.getSearchableText(), paragraph.getTextBlocks(), paragraph.getHeadline(), sectionNumber, paragraph
|
||||
.getImages()));
|
||||
sectionNumber.incrementAndGet();
|
||||
}
|
||||
|
||||
for (Header header : classifiedDoc.getHeaders()) {
|
||||
sectionTexts.add(processText(header.getSearchableText(), header.getTextBlocks(), "Header", sectionNumber, new ArrayList<>()));
|
||||
sectionNumber.incrementAndGet();
|
||||
}
|
||||
|
||||
for (Footer footer : classifiedDoc.getFooters()) {
|
||||
sectionTexts.add(processText(footer.getSearchableText(), footer.getTextBlocks(), "Footer", sectionNumber, new ArrayList<>()));
|
||||
sectionNumber.incrementAndGet();
|
||||
}
|
||||
|
||||
for (UnclassifiedText unclassifiedText : classifiedDoc.getUnclassifiedTexts()) {
|
||||
sectionTexts.add(processText(unclassifiedText.getSearchableText(), unclassifiedText.getTextBlocks(), "", sectionNumber, new ArrayList<>()));
|
||||
sectionNumber.incrementAndGet();
|
||||
}
|
||||
|
||||
return sectionTexts;
|
||||
}
|
||||
|
||||
|
||||
private List<SectionText> processTablePerRow(Table table, AtomicInteger sectionNumber) {
|
||||
|
||||
List<SectionText> sectionTexts = new ArrayList<>();
|
||||
for (List<Cell> row : table.getRows()) {
|
||||
SearchableText searchableRow = new SearchableText();
|
||||
Map<String, CellValue> tabularData = new HashMap<>();
|
||||
int start = 0;
|
||||
List<Integer> cellStarts = new ArrayList<>();
|
||||
SectionText sectionText = new SectionText();
|
||||
for (Cell cell : row) {
|
||||
|
||||
if (CollectionUtils.isEmpty(cell.getTextBlocks())) {
|
||||
continue;
|
||||
}
|
||||
|
||||
SectionArea sectionArea = new SectionArea(new Point((float) cell.getX(), (float) cell.getY()), (float) cell
|
||||
.getWidth(), (float) cell.getHeight(), cell.getTextBlocks()
|
||||
.get(0)
|
||||
.getSequences()
|
||||
.get(0)
|
||||
.getPage());
|
||||
sectionText.getSectionAreas().add(sectionArea);
|
||||
sectionText.getTextBlocks().addAll(cell.getTextBlocks());
|
||||
|
||||
int cellStart = start;
|
||||
|
||||
if (!cell.isHeaderCell()) {
|
||||
cell.getHeaderCells().forEach(headerCell -> {
|
||||
StringBuilder headerBuilder = new StringBuilder();
|
||||
headerCell.getTextBlocks().forEach(textBlock -> headerBuilder.append(textBlock.getText()));
|
||||
String headerName = headerBuilder.toString()
|
||||
.replaceAll("\n", "")
|
||||
.replaceAll(" ", "")
|
||||
.replaceAll("-", "");
|
||||
sectionArea.setHeader(headerName);
|
||||
tabularData.put(headerName, new CellValue(cell.getTextBlocks(), cellStart));
|
||||
});
|
||||
}
|
||||
|
||||
for (TextBlock textBlock : cell.getTextBlocks()) {
|
||||
// TODO avoid cell overlap merging.
|
||||
searchableRow.addAll(textBlock.getSequences());
|
||||
}
|
||||
cellStarts.add(cellStart);
|
||||
start = start + cell.toString().trim().length() + 1;
|
||||
|
||||
}
|
||||
|
||||
sectionText.setText(searchableRow.toString());
|
||||
sectionText.setHeadline(table.getHeadline());
|
||||
sectionText.setSectionNumber(sectionNumber.intValue());
|
||||
sectionText.setTable(true);
|
||||
sectionText.setTabularData(tabularData);
|
||||
sectionText.setCellStarts(cellStarts);
|
||||
sectionTexts.add(sectionText);
|
||||
|
||||
sectionNumber.incrementAndGet();
|
||||
}
|
||||
|
||||
return sectionTexts;
|
||||
}
|
||||
|
||||
|
||||
private SectionText processTableAsOneText(Table table, AtomicInteger sectionNumber) {
|
||||
|
||||
SearchableText entireTableText = new SearchableText();
|
||||
SectionText sectionText = new SectionText();
|
||||
for (List<Cell> row : table.getRows()) {
|
||||
for (Cell cell : row) {
|
||||
if (CollectionUtils.isEmpty(cell.getTextBlocks())) {
|
||||
continue;
|
||||
}
|
||||
|
||||
SectionArea sectionArea = new SectionArea(new Point((float) cell.getX(), (float) cell.getY()), (float) cell
|
||||
.getWidth(), (float) cell.getHeight(), cell.getTextBlocks()
|
||||
.get(0)
|
||||
.getSequences()
|
||||
.get(0)
|
||||
.getPage());
|
||||
sectionText.getTextBlocks().addAll(cell.getTextBlocks());
|
||||
sectionText.getSectionAreas().add(sectionArea);
|
||||
|
||||
for (TextBlock textBlock : cell.getTextBlocks()) {
|
||||
entireTableText.addAll(textBlock.getSequences());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sectionText.setText(entireTableText.toString());
|
||||
sectionText.setHeadline(table.getHeadline());
|
||||
sectionText.setSectionNumber(sectionNumber.intValue());
|
||||
sectionText.setTable(true);
|
||||
return sectionText;
|
||||
}
|
||||
|
||||
|
||||
private SectionText processText(SearchableText searchableText, List<TextBlock> paragraphTextBlocks, String headline,
|
||||
AtomicInteger sectionNumber, List<PdfImage> images) {
|
||||
|
||||
SectionText sectionText = new SectionText();
|
||||
for (TextBlock paragraphTextBlock : paragraphTextBlocks) {
|
||||
SectionArea sectionArea = new SectionArea(new Point(paragraphTextBlock.getMinX(), paragraphTextBlock.getMinY()), paragraphTextBlock
|
||||
.getWidth(), paragraphTextBlock.getHeight(), paragraphTextBlock.getPage());
|
||||
sectionText.getSectionAreas().add(sectionArea);
|
||||
}
|
||||
|
||||
sectionText.setText(searchableText.toString());
|
||||
sectionText.setHeadline(headline);
|
||||
sectionText.setSectionNumber(sectionNumber.intValue());
|
||||
sectionText.setTable(false);
|
||||
sectionText.setImages(images.stream()
|
||||
.map(image -> convertImage(image, sectionNumber.intValue(), headline))
|
||||
.collect(Collectors.toSet()));
|
||||
sectionText.setTextBlocks(paragraphTextBlocks);
|
||||
return sectionText;
|
||||
}
|
||||
|
||||
|
||||
private Image convertImage(PdfImage pdfImage, int sectionNumber, String headline) {
|
||||
|
||||
return Image.builder()
|
||||
.type(pdfImage.getImageType().equals(ImageType.OTHER) ? "image" : pdfImage.getImageType()
|
||||
.name()
|
||||
.toLowerCase(Locale.ROOT))
|
||||
.position(pdfImage.getPosition())
|
||||
.sectionNumber(sectionNumber)
|
||||
.section(headline)
|
||||
.page(pdfImage.getPage())
|
||||
.hasTransparency(pdfImage.isHasTransparency())
|
||||
.build();
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,5 +1,6 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.utils;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.Engine;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrementValue;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
@ -47,7 +48,7 @@ public class EntitySearchUtils {
|
||||
|
||||
|
||||
public Set<Entity> find(String inputString, Set<String> values, String type, String headline, int sectionNumber,
|
||||
boolean local, boolean isDossierDictionary) {
|
||||
boolean isDictionaryEntry, boolean isDossierDictionary, Engine engine) {
|
||||
|
||||
Set<Entity> found = new HashSet<>();
|
||||
|
||||
@ -67,7 +68,7 @@ public class EntitySearchUtils {
|
||||
|
||||
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString
|
||||
.charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) {
|
||||
found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline, sectionNumber, !local, isDossierDictionary));
|
||||
found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline, sectionNumber, isDictionaryEntry, isDossierDictionary, engine));
|
||||
}
|
||||
} while (startIndex > -1);
|
||||
}
|
||||
@ -142,9 +143,13 @@ public class EntitySearchUtils {
|
||||
Entity existing = entities.stream().filter(entity -> entity.equals(found)).findFirst().get();
|
||||
if (dictionary.getDictionaryRank(existing.getType()) <= dictionary.getDictionaryRank(found.getType())) {
|
||||
entities.remove(found);
|
||||
entities.add(found);
|
||||
} else {
|
||||
existing.getEngines().addAll(found.getEngines());
|
||||
}
|
||||
} else {
|
||||
entities.add(found);
|
||||
}
|
||||
entities.add(found);
|
||||
}
|
||||
|
||||
|
||||
@ -154,4 +159,17 @@ public class EntitySearchUtils {
|
||||
entities.addAll(found);
|
||||
}
|
||||
|
||||
|
||||
public void addOrAddEngine(Set<Entity> existing, Set<Entity> toBeAdded){
|
||||
|
||||
for(Entity toAdd: toBeAdded){
|
||||
if (existing.contains(toAdd)) {
|
||||
Entity existingEntity = existing.stream().filter(entity -> entity.equals(toAdd)).findFirst().get();
|
||||
existingEntity.getEngines().addAll(toAdd.getEngines());
|
||||
} else {
|
||||
existing.add(toAdd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -5,6 +5,7 @@ import com.iqser.red.service.file.management.v1.api.model.FileType;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLog;
|
||||
import com.iqser.red.service.redaction.v1.model.SectionGrid;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Text;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
|
||||
import com.iqser.red.storage.commons.exception.StorageObjectDoesNotExist;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
import lombok.Getter;
|
||||
@ -73,6 +74,25 @@ public class RedactionStorageService {
|
||||
}
|
||||
|
||||
|
||||
public NerEntities getNerEntities(String dossierId, String fileId) {
|
||||
|
||||
InputStreamResource inputStreamResource;
|
||||
try {
|
||||
inputStreamResource = storageService.getObject(StorageIdUtils.getStorageId(dossierId, fileId, FileType.NER_ENTITIES));
|
||||
} catch (StorageObjectDoesNotExist e) {
|
||||
log.debug("NER Entities not available.");
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
return objectMapper.readValue(inputStreamResource.getInputStream(), NerEntities.class);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Could not convert NerEntities", e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
public SectionGrid getSectionGrid(String dossierId, String fileId) {
|
||||
|
||||
var sectionGrid = storageService.getObject(StorageIdUtils.getStorageId(dossierId, fileId, FileType.SECTION_GRID));
|
||||
|
||||
@ -13,7 +13,7 @@ import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
|
||||
import com.iqser.red.service.redaction.v1.server.memory.MemoryStats;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.ReanalyzeService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
@ -90,7 +90,7 @@ public class RedactionIntegrationTest {
|
||||
private RedactionController redactionController;
|
||||
|
||||
@Autowired
|
||||
private ReanalyzeService reanalyzeService;
|
||||
private AnalyzeService analyzeService;
|
||||
|
||||
@Autowired
|
||||
private ObjectMapper objectMapper;
|
||||
@ -525,7 +525,8 @@ public class RedactionIntegrationTest {
|
||||
|
||||
AnalyzeRequest request = prepareStorage("files/Minimal Examples/270Rotated.pdf");
|
||||
MemoryStats.printMemoryStats();
|
||||
AnalyzeResult result = reanalyzeService.analyze(request);
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
assertThat(result).isNotNull();
|
||||
}
|
||||
|
||||
@ -536,7 +537,8 @@ public class RedactionIntegrationTest {
|
||||
|
||||
AnalyzeRequest request = prepareStorage("scanned/VV-377031.pdf");
|
||||
MemoryStats.printMemoryStats();
|
||||
AnalyzeResult result = reanalyzeService.analyze(request);
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
assertThat(result).isNotNull();
|
||||
}
|
||||
|
||||
@ -548,7 +550,8 @@ public class RedactionIntegrationTest {
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/merge_images.pdf");
|
||||
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
AnalyzeResult result = reanalyzeService.analyze(request);
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
|
||||
Map<String, List<RedactionLogEntry>> duplicates = new HashMap<>();
|
||||
|
||||
@ -574,7 +577,7 @@ public class RedactionIntegrationTest {
|
||||
fileOutputStream.write(annotateResponse.getDocument());
|
||||
}
|
||||
long rstart = System.currentTimeMillis();
|
||||
reanalyzeService.reanalyze(request);
|
||||
analyzeService.reanalyze(request);
|
||||
|
||||
long rend = System.currentTimeMillis();
|
||||
System.out.println("reanalysis analysis duration: " + (rend - rstart));
|
||||
@ -603,7 +606,11 @@ public class RedactionIntegrationTest {
|
||||
|
||||
AnalyzeRequest request = prepareStorage(new FileInputStream((path)));
|
||||
System.out.println("Redacting file : " + path.getName());
|
||||
AnalyzeResult result = reanalyzeService.analyze(request);
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
|
||||
long fstart = System.currentTimeMillis();
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
System.out.println("analysis analysis duration: " + (System.currentTimeMillis() - fstart));
|
||||
|
||||
Map<String, List<RedactionLogEntry>> duplicates = new HashMap<>();
|
||||
|
||||
@ -621,7 +628,7 @@ public class RedactionIntegrationTest {
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(1L);
|
||||
|
||||
long rstart = System.currentTimeMillis();
|
||||
reanalyzeService.reanalyze(request);
|
||||
analyzeService.reanalyze(request);
|
||||
|
||||
long rend = System.currentTimeMillis();
|
||||
System.out.println("reanalysis analysis duration: " + (rend - rstart));
|
||||
@ -668,7 +675,8 @@ public class RedactionIntegrationTest {
|
||||
.value("true")
|
||||
.build()));
|
||||
|
||||
AnalyzeResult result = reanalyzeService.analyze(request);
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
|
||||
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
var text = redactionStorageService.getText(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
@ -742,7 +750,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
request.setManualRedactions(manualRedactions);
|
||||
|
||||
AnalyzeResult reanalyzeResult = reanalyzeService.reanalyze(request);
|
||||
AnalyzeResult reanalyzeResult = analyzeService.reanalyze(request);
|
||||
|
||||
redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
|
||||
@ -767,7 +775,7 @@ public class RedactionIntegrationTest {
|
||||
when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER))
|
||||
.thenReturn(getDictionaryResponse(VERTEBRATE, false));
|
||||
|
||||
reanalyzeService.reanalyze(request);
|
||||
analyzeService.reanalyze(request);
|
||||
|
||||
redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
|
||||
@ -782,7 +790,8 @@ public class RedactionIntegrationTest {
|
||||
long start = System.currentTimeMillis();
|
||||
|
||||
AnalyzeRequest request = prepareStorage("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
|
||||
AnalyzeResult result = reanalyzeService.analyze(request);
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
|
||||
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
|
||||
.dossierId(TEST_DOSSIER_ID)
|
||||
@ -843,7 +852,8 @@ public class RedactionIntegrationTest {
|
||||
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
request.setManualRedactions(manualRedactions);
|
||||
AnalyzeResult result = reanalyzeService.analyze(request);
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
|
||||
manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
|
||||
manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder()
|
||||
@ -856,7 +866,7 @@ public class RedactionIntegrationTest {
|
||||
.status(Status.APPROVED)
|
||||
.build()));
|
||||
|
||||
reanalyzeService.reanalyze(request);
|
||||
analyzeService.reanalyze(request);
|
||||
|
||||
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
|
||||
@ -970,7 +980,8 @@ public class RedactionIntegrationTest {
|
||||
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
|
||||
AnalyzeResult result = reanalyzeService.analyze(request);
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
|
||||
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
|
||||
@ -1018,7 +1029,8 @@ public class RedactionIntegrationTest {
|
||||
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
|
||||
AnalyzeResult result = reanalyzeService.analyze(request);
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
|
||||
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
|
||||
.dossierId(TEST_DOSSIER_ID)
|
||||
|
||||
@ -0,0 +1,18 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.rulebuilder;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.RuleBuilderModel;
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.assertj.core.api.AssertionsForClassTypes.assertThat;
|
||||
|
||||
public class RuleBuilderModelServiceTest {
|
||||
|
||||
@Test
|
||||
public void testRuleBuilderModelProvider() {
|
||||
|
||||
RuleBuilderModel model = new RuleBuilderModelService().getRuleBuilderModel();
|
||||
|
||||
assertThat(model.getWhenClauses().size()).isGreaterThan(1);
|
||||
assertThat(model.getThenConditions().size()).isGreaterThan(1);
|
||||
}
|
||||
}
|
||||
@ -1,511 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import com.amazonaws.services.s3.AmazonS3;
|
||||
import com.iqser.red.service.configuration.v1.api.model.*;
|
||||
import com.iqser.red.service.configuration.v1.api.resource.DictionaryResource;
|
||||
import com.iqser.red.service.redaction.v1.server.Application;
|
||||
import com.iqser.red.service.redaction.v1.server.FileSystemBackedStorageService;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
|
||||
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
import org.junit.Before;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.kie.api.KieServices;
|
||||
import org.kie.api.builder.KieBuilder;
|
||||
import org.kie.api.builder.KieFileSystem;
|
||||
import org.kie.api.builder.KieModule;
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.boot.test.mock.mockito.MockBean;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.context.annotation.Primary;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.test.context.junit4.SpringRunner;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.URL;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
@RunWith(SpringRunner.class)
|
||||
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
|
||||
@Import(EntityRedactionServiceTest.RedactionIntegrationTestConfiguration.class)
|
||||
public class EntityRedactionServiceTest {
|
||||
|
||||
private static final String DEFAULT_RULES = loadFromClassPath("drools/rules.drl");
|
||||
private static final String AUTHOR_CODE = "author";
|
||||
private static final String ADDRESS_CODE = "address";
|
||||
private static final String SPONSOR_CODE = "sponsor";
|
||||
|
||||
private static final AtomicLong DICTIONARY_VERSION = new AtomicLong();
|
||||
private static final AtomicLong RULES_VERSION = new AtomicLong();
|
||||
|
||||
@MockBean
|
||||
private DictionaryClient dictionaryClient;
|
||||
|
||||
@MockBean
|
||||
private RulesClient rulesClient;
|
||||
|
||||
@Autowired
|
||||
private EntityRedactionService entityRedactionService;
|
||||
|
||||
@Autowired
|
||||
private PdfSegmentationService pdfSegmentationService;
|
||||
|
||||
@Autowired
|
||||
private DroolsExecutionService droolsExecutionService;
|
||||
|
||||
@MockBean
|
||||
private AmazonS3 amazonS3;
|
||||
|
||||
@MockBean
|
||||
private LegalBasisClient legalBasisClient;
|
||||
|
||||
private final static String TEST_DOSSIER_TEMPLATE_ID = "123";
|
||||
|
||||
@Configuration
|
||||
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class})
|
||||
public static class RedactionIntegrationTestConfiguration {
|
||||
|
||||
@Bean
|
||||
public KieContainer kieContainer() {
|
||||
|
||||
KieServices kieServices = KieServices.Factory.get();
|
||||
|
||||
KieFileSystem kieFileSystem = kieServices.newKieFileSystem();
|
||||
InputStream input = new ByteArrayInputStream(DEFAULT_RULES.getBytes(StandardCharsets.UTF_8));
|
||||
kieFileSystem.write("src/test/resources/drools/rules.drl", kieServices.getResources()
|
||||
.newInputStreamResource(input));
|
||||
KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem);
|
||||
kieBuilder.buildAll();
|
||||
KieModule kieModule = kieBuilder.getKieModule();
|
||||
|
||||
return kieServices.newKieContainer(kieModule.getReleaseId());
|
||||
}
|
||||
|
||||
|
||||
@Bean
|
||||
@Primary
|
||||
public StorageService inmemoryStorage() {
|
||||
return new FileSystemBackedStorageService();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testNestedEntitiesRemoval() {
|
||||
|
||||
Set<Entity> entities = new HashSet<>();
|
||||
Entity nested = new Entity("nested", "fake type", 10, 16, "fake headline", 0, false, false);
|
||||
Entity nesting = new Entity("nesting nested", "fake type", 2, 16, "fake headline", 0, false, false);
|
||||
entities.add(nested);
|
||||
entities.add(nesting);
|
||||
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
|
||||
|
||||
assertThat(entities.size()).isEqualTo(1);
|
||||
assertThat(entities).contains(nesting);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testTableRedaction() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
|
||||
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(Arrays.asList("Casey, H.W.", "O’Loughlin, C.K.", "Salamon, C.M.", "Smith, S.H.")))
|
||||
.build();
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(Collections.singletonList("Toxigenics, Inc., Decatur, IL 62526, USA")))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
|
||||
|
||||
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.emptyList())
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
|
||||
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId", null);
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||
assertThat(classifiedDoc.getEntities().get(1)).hasSize(7);// 3 author cells, 1 address, 1 Y and 2 N entities
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testNestedRedaction() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/nested_redaction.pdf");
|
||||
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(Arrays.asList("Casey, H.W.", "O’Loughlin, C.K.", "Salamon, C.M.", "Smith, S.H.")))
|
||||
.build();
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(Collections.singletonList("Toxigenics, Inc., Decatur, IL 62526, USA")))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
|
||||
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.emptyList())
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
|
||||
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId", null);
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||
assertThat(classifiedDoc.getEntities().get(1)).hasSize(7);// 3 author cells, 1 address, 1 Y and 2 N entities
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testTrueNegativesInTable() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Cyprodinil/40 Cyprodinil - EU AIR3 - LCA Section 1" +
|
||||
" Supplement - Identity of the active substance - Reference list.pdf");
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt"))))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt"))))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
|
||||
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.emptyList())
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId", null);
|
||||
assertThat(classifiedDoc.getEntities()
|
||||
.entrySet()
|
||||
.stream()
|
||||
.noneMatch(entry -> entry.getValue().stream().anyMatch(e -> e.getMatchedRule() == 9))).isTrue();
|
||||
pdfFileResource = new ClassPathResource("files/Compounds/27 A8637C - EU AIR3 - MCP Section 1 - Identity of " +
|
||||
"the plant protection product.pdf");
|
||||
classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId", null);
|
||||
assertThat(classifiedDoc.getEntities()
|
||||
.entrySet()
|
||||
.stream()
|
||||
.noneMatch(entry -> entry.getValue().stream().anyMatch(e -> e.getMatchedRule() == 9))).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFalsePositiveInWrongCell() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Row With Ambiguous Redaction.pdf");
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt"))))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt"))))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
|
||||
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_sponsor.txt"))))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId", null);
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||
assertThat(classifiedDoc.getEntities().get(1).stream()
|
||||
.filter(entity -> entity.getMatchedRule() == 9)
|
||||
.count()).isEqualTo(10);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testApplicantInTableRedaction() throws IOException {
|
||||
|
||||
String tableRules = "package drools\n" +
|
||||
"\n" +
|
||||
"import com.iqser.red.service.redaction.v1.server.redaction.model.Section\n" +
|
||||
"\n" +
|
||||
"global Section section\n" +
|
||||
"rule \"6: Redact contact information if applicant is found\"\n" +
|
||||
" when\n" +
|
||||
" eval(section.headlineContainsWord(\"applicant\") || section.getText().contains(\"Applicant\"));\n" +
|
||||
" then\n" +
|
||||
" section.redactLineAfter(\"Name:\", \"address\", 6,true, \"Applicant information was found\", \"Reg" +
|
||||
" (EC) No 1107/2009 Art. 63 (2g)\");\n" +
|
||||
" section.redactBetween(\"Address:\", \"Contact\", \"address\", 6,true, \"Applicant information was found\", \"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
|
||||
" section.redactLineAfter(\"Contact point:\", \"address\", 6,true, \"Applicant information was found\", \"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
|
||||
" section.redactLineAfter(\"Phone:\", \"address\", 6,true, \"Applicant information was found\", " +
|
||||
"\"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
|
||||
" section.redactLineAfter(\"Fax:\", \"address\", 6,true, \"Applicant information was found\", \"Reg " +
|
||||
"(EC) No 1107/2009 Art. 63 (2g)\");\n" +
|
||||
" section.redactLineAfter(\"Tel.:\", \"address\", 6,true, \"Applicant information was found\", \"Reg" +
|
||||
" (EC) No 1107/2009 Art. 63 (2g)\");\n" +
|
||||
" section.redactLineAfter(\"Tel:\", \"address\", 6,true, \"Applicant information was found\", \"Reg " +
|
||||
"(EC) No 1107/2009 Art. 63 (2g)\");\n" +
|
||||
" section.redactLineAfter(\"E-mail:\", \"address\", 6,true, \"Applicant information was found\", " +
|
||||
"\"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
|
||||
" section.redactLineAfter(\"Email:\", \"address\", 6,true, \"Applicant information was found\", " +
|
||||
"\"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
|
||||
" section.redactLineAfter(\"Contact:\", \"address\", 6,true, \"Applicant information was found\", " +
|
||||
"\"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
|
||||
" section.redactLineAfter(\"Telephone number:\", \"address\", 6,true, \"Applicant information was found\", \"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
|
||||
" section.redactLineAfter(\"Fax number:\", \"address\", 6,true, \"Applicant information was found\"," +
|
||||
" \"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
|
||||
" section.redactLineAfter(\"Telephone:\", \"address\", 6,true, \"Applicant information was found\", " +
|
||||
"\"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
|
||||
" section.redactBetween(\"No:\", \"Fax\", \"address\", 6,true, \"Applicant information was found\", " +
|
||||
"\"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
|
||||
" section.redactBetween(\"Contact:\", \"Tel.:\", \"address\", 6,true, \"Applicant information was found\", \"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
|
||||
" end";
|
||||
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(RULES_VERSION.incrementAndGet());
|
||||
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(new RulesResponse(tableRules));
|
||||
droolsExecutionService.updateRules(TEST_DOSSIER_TEMPLATE_ID);
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Applicant Producer Table.pdf");
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt"))))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt"))))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
|
||||
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.emptyList())
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId", null);
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||
assertThat(classifiedDoc.getEntities().get(1).stream()
|
||||
.filter(entity -> entity.getMatchedRule() == 6)
|
||||
.count()).isEqualTo(13);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testSponsorInCell() throws IOException {
|
||||
|
||||
String tableRules = "package drools\n" +
|
||||
"\n" +
|
||||
"import com.iqser.red.service.redaction.v1.server.redaction.model.Section\n" +
|
||||
"\n" +
|
||||
"global Section section\n" + "rule \"11: Redact sponsor company\"\n" + " when\n" + " " +
|
||||
"Section(searchText.toLowerCase().contains(\"batches produced at\"))\n" + " then\n" + " section" +
|
||||
".redactIfPrecededBy(\"batches produced at\", \"sponsor\", 11, \"Redacted because it represents a " +
|
||||
"sponsor company\", \"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" + " end";
|
||||
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(RULES_VERSION.incrementAndGet());
|
||||
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(new RulesResponse(tableRules));
|
||||
droolsExecutionService.updateRules(TEST_DOSSIER_TEMPLATE_ID);
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/batches_new_line.pdf");
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.emptyList())
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
|
||||
DictionaryResponse authorResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.emptyList())
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(authorResponse);
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_sponsor.txt"))))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId", null);
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||
assertThat(classifiedDoc.getEntities().get(1).stream()
|
||||
.filter(entity -> entity.getMatchedRule() == 11)
|
||||
.count()).isEqualTo(1);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void headerPropagation() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Header Propagation.pdf");
|
||||
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(Arrays.asList("Bissig R.", "Thanei P.")))
|
||||
.build();
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland")))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
|
||||
|
||||
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.emptyList())
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId", null);
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(2); // two pages
|
||||
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(8);
|
||||
assertThat(classifiedDoc.getEntities().get(2).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(5); // 2 names, 1 address, 2 Y
|
||||
|
||||
pdfFileResource = new ClassPathResource("files/Minimal Examples/Header Propagation2.pdf");
|
||||
|
||||
dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(Arrays.asList("Tribolet, R.", "Muir, G.", "Kühne-Thu, H.", "Close, C.")))
|
||||
.build();
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
|
||||
addressResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland")))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
|
||||
|
||||
classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId", null);
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(3);
|
||||
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 8).count()).isEqualTo(9);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
@Ignore
|
||||
public void testNGuideline() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Empty Tabular Data.pdf");
|
||||
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(Collections.singletonList("Aldershof S.")))
|
||||
.build();
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland")))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
|
||||
|
||||
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.emptyList())
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId", null);
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 8).count()).isEqualTo(6);
|
||||
}
|
||||
|
||||
|
||||
@Before
|
||||
public void stubRedaction() {
|
||||
String tableRules = "package drools\n" +
|
||||
"\n" +
|
||||
"import com.iqser.red.service.redaction.v1.server.redaction.model.Section\n" +
|
||||
"\n" +
|
||||
"global Section section\n" +
|
||||
"rule \"8: Not redacted because Vertebrate Study = N\"\n" +
|
||||
" when\n" +
|
||||
" Section(rowEquals(\"Vertebrate study Y/N\", \"N\") || rowEquals(\"Vertebrate study Y/N\", \"No\"))\n" +
|
||||
" then\n" +
|
||||
" section.redactNotCell(\"Author(s)\", 8, \"name\", false, \"Not redacted because row is not a vertebrate study\");\n" +
|
||||
" section.redactNot(\"address\", 8, \"Not redacted because row is not a vertebrate study\");\n" +
|
||||
" section.highlightCell(\"Vertebrate study Y/N\", 8, \"hint_only\");\n" +
|
||||
" end\n" +
|
||||
"rule \"9: Redact Authors and Addresses in Reference Table, if it is a Vertebrate study\"\n" +
|
||||
" when\n" +
|
||||
" Section(rowEquals(\"Vertebrate study Y/N\", \"Y\") || rowEquals(\"Vertebrate study Y/N\", " +
|
||||
"\"Yes\"))\n" +
|
||||
" then\n" +
|
||||
" section.redactCell(\"Author(s)\", 9, \"name\", false, \"Redacted because row is a vertebrate study\", \"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
|
||||
" section.redact(\"address\", 9, \"Redacted because row is a vertebrate sgitudy\", \"Reg (EC) No" +
|
||||
" 1107/2009 Art. 63 (2g)\");\n" +
|
||||
" section.highlightCell(\"Vertebrate study Y/N\", 9, \"must_redact\");\n" +
|
||||
" end";
|
||||
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(RULES_VERSION.incrementAndGet());
|
||||
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(new RulesResponse(tableRules));
|
||||
TypeResponse typeResponse = TypeResponse.builder()
|
||||
.types(Arrays.asList(
|
||||
TypeResult.builder().dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID).type(AUTHOR_CODE).hexColor("#ffff00").build(),
|
||||
TypeResult.builder().dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID).type(ADDRESS_CODE).hexColor("#ff00ff").build(),
|
||||
TypeResult.builder().dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID).type(SPONSOR_CODE).hexColor("#00ffff").build()))
|
||||
.build();
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getAllTypes(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(typeResponse);
|
||||
|
||||
// Default empty return to prevent NPEs
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
|
||||
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
|
||||
|
||||
Colors colors = new Colors();
|
||||
colors.setDefaultColor("#acfc00");
|
||||
colors.setNotRedacted("#cccccc");
|
||||
colors.setRequestAdd("#04b093");
|
||||
colors.setRequestRemove("#04b093");
|
||||
|
||||
when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors);
|
||||
}
|
||||
|
||||
|
||||
private static String loadFromClassPath(String path) {
|
||||
|
||||
URL resource = ResourceLoader.class.getClassLoader().getResource(path);
|
||||
if (resource == null) {
|
||||
throw new IllegalArgumentException("could not load classpath resource: drools/rules.drl");
|
||||
}
|
||||
try (BufferedReader br = new BufferedReader(new InputStreamReader(resource.openStream(), StandardCharsets.UTF_8))) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
String str;
|
||||
while ((str = br.readLine()) != null) {
|
||||
sb.append(str).append("\n");
|
||||
}
|
||||
return sb.toString();
|
||||
} catch (IOException e) {
|
||||
throw new IllegalArgumentException("could not load classpath resource: " + path, e);
|
||||
}
|
||||
}
|
||||
|
||||
private List<DictionaryEntry> toDictionaryEntry(List<String> entries) {
|
||||
List<DictionaryEntry> dictionaryEntries = new ArrayList<>();
|
||||
entries.forEach(entry -> {
|
||||
dictionaryEntries.add(new DictionaryEntry(entry, 1L, false));
|
||||
});
|
||||
return dictionaryEntries;
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,30 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.utils;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.Engine;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
|
||||
public class EntitySearchUtilsTest {
|
||||
|
||||
@Test
|
||||
public void testNestedEntitiesRemoval() {
|
||||
|
||||
Set<Entity> entities = new HashSet<>();
|
||||
Entity nested = new Entity("nested", "fake type", 10, 16, "fake headline", 0, false, false, Engine.RULE);
|
||||
Entity nesting = new Entity("nesting nested", "fake type", 2, 16, "fake headline", 0, false, false, Engine.RULE);
|
||||
entities.add(nested);
|
||||
entities.add(nesting);
|
||||
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
|
||||
|
||||
assertThat(entities.size()).isEqualTo(1);
|
||||
assertThat(entities).contains(nesting);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,7 +1,7 @@
|
||||
configuration-service.url: "http://configuration-service-v1:8080"
|
||||
image-service.url: "http://image-service-v1:8080"
|
||||
file-management-service.url: "http://file-management-service-v1:8080"
|
||||
entity-recognition-service.url: "http://entity-recognition-service-v1:8080"
|
||||
entity-recognition-service.url: "localhost:8080"
|
||||
|
||||
ribbon:
|
||||
ConnectTimeout: 600000
|
||||
|
||||
@ -56,8 +56,8 @@ rule "5: Do not redact Names and Addresses if no redaction Indicator is containe
|
||||
when
|
||||
Section(matchesType("vertebrate"), matchesType("published_information"))
|
||||
then
|
||||
section.redactNot("CBI_author", 5, "Vertebrate and Published Information found");
|
||||
section.redactNot("CBI_address", 5, "Vertebrate and Published Information found");
|
||||
section.redactNotAndReference("CBI_author","published_information", 5, "Vertebrate and Published Information found");
|
||||
section.redactNotAndReference("CBI_address","published_information", 5, "Vertebrate and Published Information found");
|
||||
end
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user