Made dictionaries Theadsafe
This commit is contained in:
parent
3f69030b03
commit
44613ee117
@ -27,4 +27,6 @@ public class Document {
|
|||||||
|
|
||||||
private List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();
|
private List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();
|
||||||
private SectionGrid sectionGrid = new SectionGrid();
|
private SectionGrid sectionGrid = new SectionGrid();
|
||||||
|
private long dictionaryVersion;
|
||||||
|
private long rulesVersion;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -18,7 +18,6 @@ import com.iqser.red.service.redaction.v1.resources.RedactionResource;
|
|||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||||
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
|
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService;
|
import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.service.EntityRedactionService;
|
import com.iqser.red.service.redaction.v1.server.redaction.service.EntityRedactionService;
|
||||||
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
|
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
|
||||||
@ -40,7 +39,6 @@ public class RedactionController implements RedactionResource {
|
|||||||
private final EntityRedactionService entityRedactionService;
|
private final EntityRedactionService entityRedactionService;
|
||||||
private final PdfFlattenService pdfFlattenService;
|
private final PdfFlattenService pdfFlattenService;
|
||||||
private final DroolsExecutionService droolsExecutionService;
|
private final DroolsExecutionService droolsExecutionService;
|
||||||
private final DictionaryService dictionaryService;
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -57,11 +55,11 @@ public class RedactionController implements RedactionResource {
|
|||||||
if (redactionRequest.isFlatRedaction()) {
|
if (redactionRequest.isFlatRedaction()) {
|
||||||
PDDocument flatDocument = pdfFlattenService.flattenPDF(pdDocument);
|
PDDocument flatDocument = pdfFlattenService.flattenPDF(pdDocument);
|
||||||
return convert(flatDocument, classifiedDoc.getPages()
|
return convert(flatDocument, classifiedDoc.getPages()
|
||||||
.size(), classifiedDoc.getRedactionLogEntities(), classifiedDoc.getSectionGrid());
|
.size(), classifiedDoc.getRedactionLogEntities(), classifiedDoc.getSectionGrid(), classifiedDoc.getDictionaryVersion(), classifiedDoc.getRulesVersion());
|
||||||
}
|
}
|
||||||
|
|
||||||
return convert(pdDocument, classifiedDoc.getPages()
|
return convert(pdDocument, classifiedDoc.getPages()
|
||||||
.size(), classifiedDoc.getRedactionLogEntities(), classifiedDoc.getSectionGrid());
|
.size(), classifiedDoc.getRedactionLogEntities(), classifiedDoc.getSectionGrid(), classifiedDoc.getDictionaryVersion(), classifiedDoc.getRulesVersion());
|
||||||
|
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new RedactionException(e);
|
throw new RedactionException(e);
|
||||||
@ -142,21 +140,20 @@ public class RedactionController implements RedactionResource {
|
|||||||
|
|
||||||
private RedactionResult convert(PDDocument document, int numberOfPages) throws IOException {
|
private RedactionResult convert(PDDocument document, int numberOfPages) throws IOException {
|
||||||
|
|
||||||
return convert(document, numberOfPages, null, null);
|
return convert(document, numberOfPages, null, null, 0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private RedactionResult convert(PDDocument document, int numberOfPages,
|
private RedactionResult convert(PDDocument document, int numberOfPages,
|
||||||
List<RedactionLogEntry> redactionLogEntities,
|
List<RedactionLogEntry> redactionLogEntities,
|
||||||
SectionGrid sectionGrid) throws IOException {
|
SectionGrid sectionGrid, long dictionaryVersion, long rulesVersion) throws IOException {
|
||||||
|
|
||||||
try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
|
try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
|
||||||
document.save(byteArrayOutputStream);
|
document.save(byteArrayOutputStream);
|
||||||
return RedactionResult.builder()
|
return RedactionResult.builder()
|
||||||
.document(byteArrayOutputStream.toByteArray())
|
.document(byteArrayOutputStream.toByteArray())
|
||||||
.numberOfPages(numberOfPages)
|
.numberOfPages(numberOfPages)
|
||||||
.redactionLog(new RedactionLog(redactionLogEntities, dictionaryService.getDictionaryVersion(), droolsExecutionService
|
.redactionLog(new RedactionLog(redactionLogEntities,dictionaryVersion, rulesVersion))
|
||||||
.getRulesVersion()))
|
|
||||||
.sectionGrid(sectionGrid)
|
.sectionGrid(sectionGrid)
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
|
|||||||
@ -0,0 +1,88 @@
|
|||||||
|
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import lombok.Data;
|
||||||
|
import lombok.Getter;
|
||||||
|
|
||||||
|
@Data
|
||||||
|
public class Dictionary {
|
||||||
|
|
||||||
|
public static final String RECOMMENDATION_PREFIX = "recommendation_";
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
private List<DictionaryModel> dictionaryModels;
|
||||||
|
private Map<String, DictionaryModel> localAccessMap = new HashMap<>();
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
private long version;
|
||||||
|
|
||||||
|
|
||||||
|
public Dictionary(List<DictionaryModel> dictionaryModels, long dictionaryVersion){
|
||||||
|
this.dictionaryModels = dictionaryModels;
|
||||||
|
this.dictionaryModels.forEach(dm -> localAccessMap.put(dm.getType(), dm));
|
||||||
|
this.version = dictionaryVersion;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public boolean isRecommendation(String type) {
|
||||||
|
|
||||||
|
DictionaryModel model = localAccessMap.get(type);
|
||||||
|
if (model != null) {
|
||||||
|
return model.isRecommendation();
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public boolean hasLocalEntries() {
|
||||||
|
|
||||||
|
return dictionaryModels.stream().anyMatch(dm -> !dm.getLocalEntries().isEmpty());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public Set<String> getTypes() {
|
||||||
|
|
||||||
|
return localAccessMap.keySet();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public boolean containsValue(String type, String value) {
|
||||||
|
|
||||||
|
if (localAccessMap.containsKey(type) && localAccessMap.get(type)
|
||||||
|
.getEntries()
|
||||||
|
.contains(value) || localAccessMap.containsKey(type) && localAccessMap.get(type)
|
||||||
|
.getLocalEntries()
|
||||||
|
.contains(value) || localAccessMap.containsKey(RECOMMENDATION_PREFIX + type) && localAccessMap.get(RECOMMENDATION_PREFIX + type)
|
||||||
|
.getEntries()
|
||||||
|
.contains(value) || localAccessMap.containsKey(RECOMMENDATION_PREFIX + type) && localAccessMap.get(RECOMMENDATION_PREFIX + type)
|
||||||
|
.getLocalEntries()
|
||||||
|
.contains(value)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public boolean isHint(String type) {
|
||||||
|
|
||||||
|
DictionaryModel model = localAccessMap.get(type);
|
||||||
|
if (model != null) {
|
||||||
|
return model.isHint();
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isCaseInsensitiveDictionary(String type) {
|
||||||
|
|
||||||
|
DictionaryModel dictionaryModel = localAccessMap.get(type);
|
||||||
|
if (dictionaryModel != null) {
|
||||||
|
return dictionaryModel.isCaseInsensitive();
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@ -1,6 +1,7 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||||
|
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
@ -8,7 +9,7 @@ import lombok.Data;
|
|||||||
|
|
||||||
@Data
|
@Data
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
public class DictionaryModel {
|
public class DictionaryModel implements Serializable {
|
||||||
|
|
||||||
private String type;
|
private String type;
|
||||||
private int rank;
|
private int rank;
|
||||||
|
|||||||
@ -1,8 +1,9 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||||
|
|
||||||
import static com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService.RECOMMENDATION_PREFIX;
|
import static com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary.RECOMMENDATION_PREFIX;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
@ -12,7 +13,6 @@ import java.util.regex.Pattern;
|
|||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.Patterns;
|
import com.iqser.red.service.redaction.v1.server.redaction.utils.Patterns;
|
||||||
|
|
||||||
import lombok.Builder;
|
import lombok.Builder;
|
||||||
@ -24,7 +24,12 @@ import lombok.extern.slf4j.Slf4j;
|
|||||||
@Builder
|
@Builder
|
||||||
public class Section {
|
public class Section {
|
||||||
|
|
||||||
private DictionaryService dictionaryService;
|
private boolean isLocal;
|
||||||
|
|
||||||
|
private Set<String> dictionaryTypes;
|
||||||
|
|
||||||
|
@Builder.Default
|
||||||
|
private Map<String, Set<String>> localDictionaryAdds = new HashMap<>();
|
||||||
|
|
||||||
private Set<Entity> entities;
|
private Set<Entity> entities;
|
||||||
|
|
||||||
@ -66,7 +71,7 @@ public class Section {
|
|||||||
|
|
||||||
public void redact(String type, int ruleNumber, String reason, String legalBasis) {
|
public void redact(String type, int ruleNumber, String reason, String legalBasis) {
|
||||||
|
|
||||||
boolean hasRecommendactionDictionary = dictionaryService.hasRecommendationDictionary(type);
|
boolean hasRecommendactionDictionary = dictionaryTypes.contains(RECOMMENDATION_PREFIX + type);
|
||||||
|
|
||||||
entities.forEach(entity -> {
|
entities.forEach(entity -> {
|
||||||
if (entity.getType().equals(type) || hasRecommendactionDictionary && entity.getType()
|
if (entity.getType().equals(type) || hasRecommendactionDictionary && entity.getType()
|
||||||
@ -82,7 +87,7 @@ public class Section {
|
|||||||
|
|
||||||
public void redactNot(String type, int ruleNumber, String reason) {
|
public void redactNot(String type, int ruleNumber, String reason) {
|
||||||
|
|
||||||
boolean hasRecommendactionDictionary = dictionaryService.hasRecommendationDictionary(type);
|
boolean hasRecommendactionDictionary = dictionaryTypes.contains(RECOMMENDATION_PREFIX + type);
|
||||||
|
|
||||||
entities.forEach(entity -> {
|
entities.forEach(entity -> {
|
||||||
if (entity.getType().equals(type) || hasRecommendactionDictionary && entity.getType()
|
if (entity.getType().equals(type) || hasRecommendactionDictionary && entity.getType()
|
||||||
@ -156,8 +161,8 @@ public class Section {
|
|||||||
// HashSet keeps the older value, but we want the new only.
|
// HashSet keeps the older value, but we want the new only.
|
||||||
entities.removeAll(found);
|
entities.removeAll(found);
|
||||||
entities.addAll(found);
|
entities.addAll(found);
|
||||||
if (redactEverywhere) {
|
if (redactEverywhere && !isLocal()) {
|
||||||
dictionaryService.addToLocalDictionary(asType, value.trim());
|
localDictionaryAdds.computeIfAbsent(asType, (x) -> new HashSet<>()).add(value.trim());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -190,8 +195,8 @@ public class Section {
|
|||||||
// HashSet keeps the older value, but we want the new only.
|
// HashSet keeps the older value, but we want the new only.
|
||||||
entities.removeAll(found);
|
entities.removeAll(found);
|
||||||
entities.addAll(found);
|
entities.addAll(found);
|
||||||
if (redactEverywhere) {
|
if (redactEverywhere && !isLocal()) {
|
||||||
dictionaryService.addToLocalDictionary(asType, line.trim());
|
localDictionaryAdds.computeIfAbsent(asType, (x) -> new HashSet<>()).add(line.trim());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -300,7 +305,7 @@ public class Section {
|
|||||||
|
|
||||||
entities = removeEntitiesContainedInLarger(entities);
|
entities = removeEntitiesContainedInLarger(entities);
|
||||||
|
|
||||||
if (addAsRecommendations) {
|
if (addAsRecommendations && !isLocal()) {
|
||||||
String cleanedWord = word.replaceAll(",", " ").replaceAll(" ", " ").trim() + " ";
|
String cleanedWord = word.replaceAll(",", " ").replaceAll(" ", " ").trim() + " ";
|
||||||
Pattern pattern = Patterns.AUTHOR_TABLE_SPITTER;
|
Pattern pattern = Patterns.AUTHOR_TABLE_SPITTER;
|
||||||
Matcher matcher = pattern.matcher(cleanedWord);
|
Matcher matcher = pattern.matcher(cleanedWord);
|
||||||
@ -308,13 +313,11 @@ public class Section {
|
|||||||
while (matcher.find()) {
|
while (matcher.find()) {
|
||||||
String match = matcher.group().trim();
|
String match = matcher.group().trim();
|
||||||
if (match.length() >= 3) {
|
if (match.length() >= 3) {
|
||||||
if(!dictionaryService.getDictionary(type).getEntries().contains(match) && !dictionaryService.getDictionary(RECOMMENDATION_PREFIX + type).getEntries().contains(match)) {
|
localDictionaryAdds.computeIfAbsent(RECOMMENDATION_PREFIX + type, (x) -> new HashSet<>())
|
||||||
dictionaryService.addToLocalDictionary(RECOMMENDATION_PREFIX + type, match);
|
.add(match);
|
||||||
}
|
|
||||||
String lastname = match.split(" ")[0];
|
String lastname = match.split(" ")[0];
|
||||||
if(!dictionaryService.getDictionary(type).getEntries().contains(lastname) && !dictionaryService.getDictionary(RECOMMENDATION_PREFIX + type).getEntries().contains(lastname)) {
|
localDictionaryAdds.computeIfAbsent(RECOMMENDATION_PREFIX + type, (x) -> new HashSet<>())
|
||||||
dictionaryService.addToLocalDictionary(RECOMMENDATION_PREFIX + type, lastname);
|
.add(lastname);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -322,3 +325,9 @@ public class Section {
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -0,0 +1,13 @@
|
|||||||
|
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||||
|
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Data;
|
||||||
|
|
||||||
|
@Data
|
||||||
|
@AllArgsConstructor
|
||||||
|
public class SectionSearchableTextPair {
|
||||||
|
|
||||||
|
private Section section;
|
||||||
|
private SearchableText searchableText;
|
||||||
|
|
||||||
|
}
|
||||||
@ -11,12 +11,14 @@ import java.util.Set;
|
|||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.collections4.CollectionUtils;
|
import org.apache.commons.collections4.CollectionUtils;
|
||||||
|
import org.apache.commons.lang3.SerializationUtils;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
import com.iqser.red.service.configuration.v1.api.model.Colors;
|
import com.iqser.red.service.configuration.v1.api.model.Colors;
|
||||||
import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
|
import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
|
||||||
import com.iqser.red.service.configuration.v1.api.model.TypeResult;
|
import com.iqser.red.service.configuration.v1.api.model.TypeResult;
|
||||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
|
||||||
|
|
||||||
import feign.FeignException;
|
import feign.FeignException;
|
||||||
@ -29,7 +31,6 @@ import lombok.extern.slf4j.Slf4j;
|
|||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
public class DictionaryService {
|
public class DictionaryService {
|
||||||
|
|
||||||
public static final String RECOMMENDATION_PREFIX = "recommendation_";
|
|
||||||
|
|
||||||
private final DictionaryClient dictionaryClient;
|
private final DictionaryClient dictionaryClient;
|
||||||
|
|
||||||
@ -55,23 +56,6 @@ public class DictionaryService {
|
|||||||
private Map<String, DictionaryModel> localAccessMap = new HashMap<>();
|
private Map<String, DictionaryModel> localAccessMap = new HashMap<>();
|
||||||
|
|
||||||
|
|
||||||
public boolean hasLocalEntries() {
|
|
||||||
|
|
||||||
return this.dictionary.stream().anyMatch(dm -> !dm.getLocalEntries().isEmpty());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public void addToLocalDictionary(String type, String value) {
|
|
||||||
|
|
||||||
localAccessMap.get(type).getLocalEntries().add(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public void clearLocalEntries() {
|
|
||||||
|
|
||||||
this.dictionary.forEach(dm -> dm.getLocalEntries().clear());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public void updateDictionary() {
|
public void updateDictionary() {
|
||||||
|
|
||||||
@ -112,13 +96,13 @@ public class DictionaryService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void updateExternalDictionary(){
|
public void updateExternalDictionary(Dictionary dictionary){
|
||||||
dictionary.forEach(dm -> {
|
dictionary.getDictionaryModels().forEach(dm -> {
|
||||||
if(dm.isRecommendation() && !dm.getLocalEntries().isEmpty()){
|
if(dm.isRecommendation() && !dm.getLocalEntries().isEmpty()){
|
||||||
dictionaryClient.addEntries(dm.getType(), new ArrayList<>(dm.getLocalEntries()), false);
|
dictionaryClient.addEntries(dm.getType(), new ArrayList<>(dm.getLocalEntries()), false);
|
||||||
long externalVersion = dictionaryClient.getVersion();
|
long externalVersion = dictionaryClient.getVersion();
|
||||||
if(externalVersion == dictionaryVersion + 1){
|
if(externalVersion == dictionary.getVersion() + 1){
|
||||||
dictionaryVersion = externalVersion;
|
dictionary.setVersion(externalVersion);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
@ -185,19 +169,14 @@ public class DictionaryService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public boolean hasRecommendationDictionary(String type) {
|
public Dictionary getDeepCopyDictionary(){
|
||||||
|
List<DictionaryModel> copy = new ArrayList<>();
|
||||||
|
|
||||||
DictionaryModel model = localAccessMap.get(RECOMMENDATION_PREFIX + type);
|
dictionary.forEach(dm -> {
|
||||||
if (model != null) {
|
copy.add(SerializationUtils.clone(dm));
|
||||||
return true;
|
});
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
return new Dictionary(copy, dictionaryVersion);
|
||||||
public DictionaryModel getDictionary(String type) {
|
|
||||||
|
|
||||||
return localAccessMap.get(type);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -21,11 +21,13 @@ import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
|||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
|
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.redaction.model.SectionSearchableTextPair;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||||
|
|
||||||
@ -43,21 +45,23 @@ public class EntityRedactionService {
|
|||||||
|
|
||||||
dictionaryService.updateDictionary();
|
dictionaryService.updateDictionary();
|
||||||
droolsExecutionService.updateRules();
|
droolsExecutionService.updateRules();
|
||||||
dictionaryService.clearLocalEntries();
|
long rulesVersion = droolsExecutionService.getRulesVersion();
|
||||||
|
|
||||||
Set<Entity> documentEntities = new HashSet<>(findEntities(classifiedDoc, manualRedactions, false, null));
|
Dictionary dictionary = dictionaryService.getDeepCopyDictionary();
|
||||||
|
|
||||||
if (dictionaryService.hasLocalEntries()) {
|
Set<Entity> documentEntities = new HashSet<>(findEntities(classifiedDoc, manualRedactions, dictionary, false, null));
|
||||||
|
|
||||||
|
if (dictionary.hasLocalEntries()) {
|
||||||
|
|
||||||
Map<Integer, Set<Entity>> hintsPerSectionNumber = new HashMap<>();
|
Map<Integer, Set<Entity>> hintsPerSectionNumber = new HashMap<>();
|
||||||
documentEntities.stream().forEach(entity -> {
|
documentEntities.stream().forEach(entity -> {
|
||||||
if (dictionaryService.isHint(entity.getType())) {
|
if (dictionary.isHint(entity.getType())) {
|
||||||
hintsPerSectionNumber.computeIfAbsent(entity.getSectionNumber(), (x) -> new HashSet<>())
|
hintsPerSectionNumber.computeIfAbsent(entity.getSectionNumber(), (x) -> new HashSet<>())
|
||||||
.add(entity);
|
.add(entity);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
Set<Entity> foundByLocal = findEntities(classifiedDoc, manualRedactions, true, hintsPerSectionNumber);
|
Set<Entity> foundByLocal = findEntities(classifiedDoc, manualRedactions, dictionary, true, hintsPerSectionNumber);
|
||||||
// HashSet keeps the older value, but we want the new only.
|
// HashSet keeps the older value, but we want the new only.
|
||||||
documentEntities.removeAll(foundByLocal);
|
documentEntities.removeAll(foundByLocal);
|
||||||
documentEntities.addAll(foundByLocal);
|
documentEntities.addAll(foundByLocal);
|
||||||
@ -81,15 +85,20 @@ public class EntityRedactionService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
dictionaryService.updateExternalDictionary();
|
dictionaryService.updateExternalDictionary(dictionary);
|
||||||
|
|
||||||
|
classifiedDoc.setDictionaryVersion(dictionary.getVersion());
|
||||||
|
classifiedDoc.setRulesVersion(rulesVersion);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private Set<Entity> findEntities(Document classifiedDoc, ManualRedactions manualRedactions, boolean localEntries,
|
private Set<Entity> findEntities(Document classifiedDoc, ManualRedactions manualRedactions,
|
||||||
|
Dictionary dictionary, boolean local,
|
||||||
Map<Integer, Set<Entity>> hintsPerSectionNumber) {
|
Map<Integer, Set<Entity>> hintsPerSectionNumber) {
|
||||||
|
|
||||||
Set<Entity> documentEntities = new HashSet<>();
|
Set<Entity> documentEntities = new HashSet<>();
|
||||||
int sectionNumber = 1;
|
int sectionNumber = 1;
|
||||||
|
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
|
||||||
for (Paragraph paragraph : classifiedDoc.getParagraphs()) {
|
for (Paragraph paragraph : classifiedDoc.getParagraphs()) {
|
||||||
|
|
||||||
SearchableText searchableText = paragraph.getSearchableText();
|
SearchableText searchableText = paragraph.getSearchableText();
|
||||||
@ -122,10 +131,11 @@ public class EntityRedactionService {
|
|||||||
searchableRow.addAll(textBlock.getSequences());
|
searchableRow.addAll(textBlock.getSequences());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Set<Entity> rowEntities = findEntities(searchableRow, table.getHeadline(), sectionNumber, localEntries);
|
Set<Entity> rowEntities = findEntities(searchableRow, table.getHeadline(), sectionNumber, dictionary.getDictionaryModels(), local);
|
||||||
|
|
||||||
Section analysedRowSection = droolsExecutionService.executeRules(Section.builder()
|
sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder()
|
||||||
.dictionaryService(dictionaryService)
|
.isLocal(local)
|
||||||
|
.dictionaryTypes(dictionary.getTypes())
|
||||||
.entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(sectionNumber) ? Stream
|
.entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(sectionNumber) ? Stream
|
||||||
.concat(rowEntities.stream(), hintsPerSectionNumber.get(sectionNumber).stream())
|
.concat(rowEntities.stream(), hintsPerSectionNumber.get(sectionNumber).stream())
|
||||||
.collect(Collectors.toSet()) : rowEntities)
|
.collect(Collectors.toSet()) : rowEntities)
|
||||||
@ -134,18 +144,19 @@ public class EntityRedactionService {
|
|||||||
.headline(table.getHeadline())
|
.headline(table.getHeadline())
|
||||||
.sectionNumber(sectionNumber)
|
.sectionNumber(sectionNumber)
|
||||||
.tabularData(tabularData)
|
.tabularData(tabularData)
|
||||||
.build());
|
.build(), searchableRow));
|
||||||
|
|
||||||
documentEntities.addAll(clearAndFindPositions(analysedRowSection.getEntities(), searchableRow));
|
|
||||||
sectionNumber++;
|
sectionNumber++;
|
||||||
}
|
}
|
||||||
sectionNumber++;
|
sectionNumber++;
|
||||||
}
|
}
|
||||||
|
|
||||||
addSectionToManualRedactions(paragraph.getTextBlocks(), manualRedactions, paragraph.getHeadline(), sectionNumber);
|
addSectionToManualRedactions(paragraph.getTextBlocks(), manualRedactions, paragraph.getHeadline(), sectionNumber);
|
||||||
Set<Entity> entities = findEntities(searchableText, paragraph.getHeadline(), sectionNumber, localEntries);
|
Set<Entity> entities = findEntities(searchableText, paragraph.getHeadline(), sectionNumber, dictionary.getDictionaryModels(), local);
|
||||||
Section analysedSection = droolsExecutionService.executeRules(Section.builder()
|
|
||||||
.dictionaryService(dictionaryService)
|
sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder()
|
||||||
|
.isLocal(local)
|
||||||
|
.dictionaryTypes(dictionary.getTypes())
|
||||||
.entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(sectionNumber) ? Stream
|
.entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(sectionNumber) ? Stream
|
||||||
.concat(entities.stream(), hintsPerSectionNumber.get(sectionNumber).stream())
|
.concat(entities.stream(), hintsPerSectionNumber.get(sectionNumber).stream())
|
||||||
.collect(Collectors.toSet()) : entities)
|
.collect(Collectors.toSet()) : entities)
|
||||||
@ -153,22 +164,43 @@ public class EntityRedactionService {
|
|||||||
.searchText(searchableText.toString())
|
.searchText(searchableText.toString())
|
||||||
.headline(paragraph.getHeadline())
|
.headline(paragraph.getHeadline())
|
||||||
.sectionNumber(sectionNumber)
|
.sectionNumber(sectionNumber)
|
||||||
.build());
|
.build(), searchableText));
|
||||||
|
|
||||||
documentEntities.addAll(clearAndFindPositions(analysedSection.getEntities(), searchableText));
|
|
||||||
sectionNumber++;
|
sectionNumber++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> {
|
||||||
|
Section analysedRowSection = droolsExecutionService.executeRules(sectionSearchableTextPair.getSection());
|
||||||
|
documentEntities.addAll(clearAndFindPositions(analysedRowSection.getEntities(), sectionSearchableTextPair.getSearchableText(), dictionary));
|
||||||
|
|
||||||
|
analysedRowSection.getLocalDictionaryAdds().keySet().forEach(key -> {
|
||||||
|
if (dictionary.isRecommendation(key)){
|
||||||
|
analysedRowSection.getLocalDictionaryAdds().get(key).forEach(value -> {
|
||||||
|
if (!dictionary.containsValue(key, value)){
|
||||||
|
dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
analysedRowSection.getLocalDictionaryAdds().get(key).forEach( value -> {
|
||||||
|
dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
return documentEntities;
|
return documentEntities;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private Set<Entity> clearAndFindPositions(Set<Entity> entities, SearchableText text) {
|
private Set<Entity> clearAndFindPositions(Set<Entity> entities, SearchableText text, Dictionary dictionary) {
|
||||||
|
|
||||||
removeEntitiesContainedInLarger(entities);
|
removeEntitiesContainedInLarger(entities);
|
||||||
|
|
||||||
for (Entity entity : entities) {
|
for (Entity entity : entities) {
|
||||||
if (entity.getPositionSequences().isEmpty()) {
|
if (entity.getPositionSequences().isEmpty()) {
|
||||||
entity.setPositionSequences(text.getSequences(entity.getWord(), dictionaryService.isCaseInsensitiveDictionary(entity
|
entity.setPositionSequences(text.getSequences(entity.getWord(), dictionary.isCaseInsensitiveDictionary(entity
|
||||||
.getType()), entity.getTargetSequences()));
|
.getType()), entity.getTargetSequences()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -177,7 +209,8 @@ public class EntityRedactionService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private Set<Entity> findEntities(SearchableText searchableText, String headline, int sectionNumber, boolean local) {
|
private Set<Entity> findEntities(SearchableText searchableText, String headline, int sectionNumber,
|
||||||
|
List<DictionaryModel> dictionary, boolean local) {
|
||||||
|
|
||||||
Set<Entity> found = new HashSet<>();
|
Set<Entity> found = new HashSet<>();
|
||||||
String searchableString = searchableText.toString();
|
String searchableString = searchableText.toString();
|
||||||
@ -186,7 +219,7 @@ public class EntityRedactionService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
String lowercaseInputString = searchableString.toLowerCase();
|
String lowercaseInputString = searchableString.toLowerCase();
|
||||||
for (DictionaryModel model : dictionaryService.getDictionary()) {
|
for (DictionaryModel model : dictionary) {
|
||||||
if (model.isCaseInsensitive()) {
|
if (model.isCaseInsensitive()) {
|
||||||
found.addAll(find(lowercaseInputString, model.getValues(local), model.getType(), headline, sectionNumber));
|
found.addAll(find(lowercaseInputString, model.getValues(local), model.getType(), headline, sectionNumber));
|
||||||
} else {
|
} else {
|
||||||
@ -231,7 +264,8 @@ public class EntityRedactionService {
|
|||||||
for (Entity word : entities) {
|
for (Entity word : entities) {
|
||||||
for (Entity inner : entities) {
|
for (Entity inner : entities) {
|
||||||
if (inner.getWord().length() < word.getWord()
|
if (inner.getWord().length() < word.getWord()
|
||||||
.length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner && word.getSectionNumber() == inner.getSectionNumber()) {
|
.length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner && word
|
||||||
|
.getSectionNumber() == inner.getSectionNumber()) {
|
||||||
wordsToRemove.add(inner);
|
wordsToRemove.add(inner);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -382,7 +382,7 @@ public class RedactionIntegrationTest {
|
|||||||
|
|
||||||
System.out.println("redactionTest");
|
System.out.println("redactionTest");
|
||||||
long start = System.currentTimeMillis();
|
long start = System.currentTimeMillis();
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/52 Fludioxonil_RAR_07_Volume_3CA_B-5_2018-02-21.pdf");
|
||||||
|
|
||||||
RedactionRequest request = RedactionRequest.builder()
|
RedactionRequest request = RedactionRequest.builder()
|
||||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||||
|
|||||||
@ -1,2 +1,10 @@
|
|||||||
Long-term
|
Long-term
|
||||||
Brown liquid
|
Brown liquid
|
||||||
|
Brown solid
|
||||||
|
Hand-held
|
||||||
|
Manual-Hand held
|
||||||
|
Manual-Hand held
|
||||||
|
Weight:
|
||||||
|
Sprague
|
||||||
|
Weight and length
|
||||||
|
Aeration: Gentle
|
||||||
Loading…
x
Reference in New Issue
Block a user