Made dictionaries Theadsafe
This commit is contained in:
parent
3f69030b03
commit
44613ee117
@ -27,4 +27,6 @@ public class Document {
|
||||
|
||||
private List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();
|
||||
private SectionGrid sectionGrid = new SectionGrid();
|
||||
private long dictionaryVersion;
|
||||
private long rulesVersion;
|
||||
}
|
||||
|
||||
@ -18,7 +18,6 @@ import com.iqser.red.service.redaction.v1.resources.RedactionResource;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.EntityRedactionService;
|
||||
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
|
||||
@ -40,7 +39,6 @@ public class RedactionController implements RedactionResource {
|
||||
private final EntityRedactionService entityRedactionService;
|
||||
private final PdfFlattenService pdfFlattenService;
|
||||
private final DroolsExecutionService droolsExecutionService;
|
||||
private final DictionaryService dictionaryService;
|
||||
|
||||
|
||||
@Override
|
||||
@ -57,11 +55,11 @@ public class RedactionController implements RedactionResource {
|
||||
if (redactionRequest.isFlatRedaction()) {
|
||||
PDDocument flatDocument = pdfFlattenService.flattenPDF(pdDocument);
|
||||
return convert(flatDocument, classifiedDoc.getPages()
|
||||
.size(), classifiedDoc.getRedactionLogEntities(), classifiedDoc.getSectionGrid());
|
||||
.size(), classifiedDoc.getRedactionLogEntities(), classifiedDoc.getSectionGrid(), classifiedDoc.getDictionaryVersion(), classifiedDoc.getRulesVersion());
|
||||
}
|
||||
|
||||
return convert(pdDocument, classifiedDoc.getPages()
|
||||
.size(), classifiedDoc.getRedactionLogEntities(), classifiedDoc.getSectionGrid());
|
||||
.size(), classifiedDoc.getRedactionLogEntities(), classifiedDoc.getSectionGrid(), classifiedDoc.getDictionaryVersion(), classifiedDoc.getRulesVersion());
|
||||
|
||||
} catch (IOException e) {
|
||||
throw new RedactionException(e);
|
||||
@ -142,21 +140,20 @@ public class RedactionController implements RedactionResource {
|
||||
|
||||
private RedactionResult convert(PDDocument document, int numberOfPages) throws IOException {
|
||||
|
||||
return convert(document, numberOfPages, null, null);
|
||||
return convert(document, numberOfPages, null, null, 0, 0);
|
||||
}
|
||||
|
||||
|
||||
private RedactionResult convert(PDDocument document, int numberOfPages,
|
||||
List<RedactionLogEntry> redactionLogEntities,
|
||||
SectionGrid sectionGrid) throws IOException {
|
||||
SectionGrid sectionGrid, long dictionaryVersion, long rulesVersion) throws IOException {
|
||||
|
||||
try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
|
||||
document.save(byteArrayOutputStream);
|
||||
return RedactionResult.builder()
|
||||
.document(byteArrayOutputStream.toByteArray())
|
||||
.numberOfPages(numberOfPages)
|
||||
.redactionLog(new RedactionLog(redactionLogEntities, dictionaryService.getDictionaryVersion(), droolsExecutionService
|
||||
.getRulesVersion()))
|
||||
.redactionLog(new RedactionLog(redactionLogEntities,dictionaryVersion, rulesVersion))
|
||||
.sectionGrid(sectionGrid)
|
||||
.build();
|
||||
}
|
||||
|
||||
@ -0,0 +1,88 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.Getter;
|
||||
|
||||
@Data
|
||||
public class Dictionary {
|
||||
|
||||
public static final String RECOMMENDATION_PREFIX = "recommendation_";
|
||||
|
||||
@Getter
|
||||
private List<DictionaryModel> dictionaryModels;
|
||||
private Map<String, DictionaryModel> localAccessMap = new HashMap<>();
|
||||
|
||||
@Getter
|
||||
private long version;
|
||||
|
||||
|
||||
public Dictionary(List<DictionaryModel> dictionaryModels, long dictionaryVersion){
|
||||
this.dictionaryModels = dictionaryModels;
|
||||
this.dictionaryModels.forEach(dm -> localAccessMap.put(dm.getType(), dm));
|
||||
this.version = dictionaryVersion;
|
||||
}
|
||||
|
||||
|
||||
public boolean isRecommendation(String type) {
|
||||
|
||||
DictionaryModel model = localAccessMap.get(type);
|
||||
if (model != null) {
|
||||
return model.isRecommendation();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
public boolean hasLocalEntries() {
|
||||
|
||||
return dictionaryModels.stream().anyMatch(dm -> !dm.getLocalEntries().isEmpty());
|
||||
}
|
||||
|
||||
|
||||
public Set<String> getTypes() {
|
||||
|
||||
return localAccessMap.keySet();
|
||||
}
|
||||
|
||||
|
||||
public boolean containsValue(String type, String value) {
|
||||
|
||||
if (localAccessMap.containsKey(type) && localAccessMap.get(type)
|
||||
.getEntries()
|
||||
.contains(value) || localAccessMap.containsKey(type) && localAccessMap.get(type)
|
||||
.getLocalEntries()
|
||||
.contains(value) || localAccessMap.containsKey(RECOMMENDATION_PREFIX + type) && localAccessMap.get(RECOMMENDATION_PREFIX + type)
|
||||
.getEntries()
|
||||
.contains(value) || localAccessMap.containsKey(RECOMMENDATION_PREFIX + type) && localAccessMap.get(RECOMMENDATION_PREFIX + type)
|
||||
.getLocalEntries()
|
||||
.contains(value)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
public boolean isHint(String type) {
|
||||
|
||||
DictionaryModel model = localAccessMap.get(type);
|
||||
if (model != null) {
|
||||
return model.isHint();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean isCaseInsensitiveDictionary(String type) {
|
||||
|
||||
DictionaryModel dictionaryModel = localAccessMap.get(type);
|
||||
if (dictionaryModel != null) {
|
||||
return dictionaryModel.isCaseInsensitive();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,6 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Set;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -8,7 +9,7 @@ import lombok.Data;
|
||||
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
public class DictionaryModel {
|
||||
public class DictionaryModel implements Serializable {
|
||||
|
||||
private String type;
|
||||
private int rank;
|
||||
|
||||
@ -1,8 +1,9 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
import static com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService.RECOMMENDATION_PREFIX;
|
||||
import static com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary.RECOMMENDATION_PREFIX;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
@ -12,7 +13,6 @@ import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.Patterns;
|
||||
|
||||
import lombok.Builder;
|
||||
@ -24,7 +24,12 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@Builder
|
||||
public class Section {
|
||||
|
||||
private DictionaryService dictionaryService;
|
||||
private boolean isLocal;
|
||||
|
||||
private Set<String> dictionaryTypes;
|
||||
|
||||
@Builder.Default
|
||||
private Map<String, Set<String>> localDictionaryAdds = new HashMap<>();
|
||||
|
||||
private Set<Entity> entities;
|
||||
|
||||
@ -66,7 +71,7 @@ public class Section {
|
||||
|
||||
public void redact(String type, int ruleNumber, String reason, String legalBasis) {
|
||||
|
||||
boolean hasRecommendactionDictionary = dictionaryService.hasRecommendationDictionary(type);
|
||||
boolean hasRecommendactionDictionary = dictionaryTypes.contains(RECOMMENDATION_PREFIX + type);
|
||||
|
||||
entities.forEach(entity -> {
|
||||
if (entity.getType().equals(type) || hasRecommendactionDictionary && entity.getType()
|
||||
@ -82,7 +87,7 @@ public class Section {
|
||||
|
||||
public void redactNot(String type, int ruleNumber, String reason) {
|
||||
|
||||
boolean hasRecommendactionDictionary = dictionaryService.hasRecommendationDictionary(type);
|
||||
boolean hasRecommendactionDictionary = dictionaryTypes.contains(RECOMMENDATION_PREFIX + type);
|
||||
|
||||
entities.forEach(entity -> {
|
||||
if (entity.getType().equals(type) || hasRecommendactionDictionary && entity.getType()
|
||||
@ -156,8 +161,8 @@ public class Section {
|
||||
// HashSet keeps the older value, but we want the new only.
|
||||
entities.removeAll(found);
|
||||
entities.addAll(found);
|
||||
if (redactEverywhere) {
|
||||
dictionaryService.addToLocalDictionary(asType, value.trim());
|
||||
if (redactEverywhere && !isLocal()) {
|
||||
localDictionaryAdds.computeIfAbsent(asType, (x) -> new HashSet<>()).add(value.trim());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -190,8 +195,8 @@ public class Section {
|
||||
// HashSet keeps the older value, but we want the new only.
|
||||
entities.removeAll(found);
|
||||
entities.addAll(found);
|
||||
if (redactEverywhere) {
|
||||
dictionaryService.addToLocalDictionary(asType, line.trim());
|
||||
if (redactEverywhere && !isLocal()) {
|
||||
localDictionaryAdds.computeIfAbsent(asType, (x) -> new HashSet<>()).add(line.trim());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -300,7 +305,7 @@ public class Section {
|
||||
|
||||
entities = removeEntitiesContainedInLarger(entities);
|
||||
|
||||
if (addAsRecommendations) {
|
||||
if (addAsRecommendations && !isLocal()) {
|
||||
String cleanedWord = word.replaceAll(",", " ").replaceAll(" ", " ").trim() + " ";
|
||||
Pattern pattern = Patterns.AUTHOR_TABLE_SPITTER;
|
||||
Matcher matcher = pattern.matcher(cleanedWord);
|
||||
@ -308,13 +313,11 @@ public class Section {
|
||||
while (matcher.find()) {
|
||||
String match = matcher.group().trim();
|
||||
if (match.length() >= 3) {
|
||||
if(!dictionaryService.getDictionary(type).getEntries().contains(match) && !dictionaryService.getDictionary(RECOMMENDATION_PREFIX + type).getEntries().contains(match)) {
|
||||
dictionaryService.addToLocalDictionary(RECOMMENDATION_PREFIX + type, match);
|
||||
}
|
||||
localDictionaryAdds.computeIfAbsent(RECOMMENDATION_PREFIX + type, (x) -> new HashSet<>())
|
||||
.add(match);
|
||||
String lastname = match.split(" ")[0];
|
||||
if(!dictionaryService.getDictionary(type).getEntries().contains(lastname) && !dictionaryService.getDictionary(RECOMMENDATION_PREFIX + type).getEntries().contains(lastname)) {
|
||||
dictionaryService.addToLocalDictionary(RECOMMENDATION_PREFIX + type, lastname);
|
||||
}
|
||||
localDictionaryAdds.computeIfAbsent(RECOMMENDATION_PREFIX + type, (x) -> new HashSet<>())
|
||||
.add(lastname);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -322,3 +325,9 @@ public class Section {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,13 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
public class SectionSearchableTextPair {
|
||||
|
||||
private Section section;
|
||||
private SearchableText searchableText;
|
||||
|
||||
}
|
||||
@ -11,12 +11,14 @@ import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.commons.lang3.SerializationUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.configuration.v1.api.model.Colors;
|
||||
import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
|
||||
import com.iqser.red.service.configuration.v1.api.model.TypeResult;
|
||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
|
||||
|
||||
import feign.FeignException;
|
||||
@ -29,7 +31,6 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@RequiredArgsConstructor
|
||||
public class DictionaryService {
|
||||
|
||||
public static final String RECOMMENDATION_PREFIX = "recommendation_";
|
||||
|
||||
private final DictionaryClient dictionaryClient;
|
||||
|
||||
@ -55,23 +56,6 @@ public class DictionaryService {
|
||||
private Map<String, DictionaryModel> localAccessMap = new HashMap<>();
|
||||
|
||||
|
||||
public boolean hasLocalEntries() {
|
||||
|
||||
return this.dictionary.stream().anyMatch(dm -> !dm.getLocalEntries().isEmpty());
|
||||
}
|
||||
|
||||
|
||||
public void addToLocalDictionary(String type, String value) {
|
||||
|
||||
localAccessMap.get(type).getLocalEntries().add(value);
|
||||
}
|
||||
|
||||
|
||||
public void clearLocalEntries() {
|
||||
|
||||
this.dictionary.forEach(dm -> dm.getLocalEntries().clear());
|
||||
}
|
||||
|
||||
|
||||
public void updateDictionary() {
|
||||
|
||||
@ -112,13 +96,13 @@ public class DictionaryService {
|
||||
}
|
||||
|
||||
|
||||
public void updateExternalDictionary(){
|
||||
dictionary.forEach(dm -> {
|
||||
public void updateExternalDictionary(Dictionary dictionary){
|
||||
dictionary.getDictionaryModels().forEach(dm -> {
|
||||
if(dm.isRecommendation() && !dm.getLocalEntries().isEmpty()){
|
||||
dictionaryClient.addEntries(dm.getType(), new ArrayList<>(dm.getLocalEntries()), false);
|
||||
long externalVersion = dictionaryClient.getVersion();
|
||||
if(externalVersion == dictionaryVersion + 1){
|
||||
dictionaryVersion = externalVersion;
|
||||
if(externalVersion == dictionary.getVersion() + 1){
|
||||
dictionary.setVersion(externalVersion);
|
||||
}
|
||||
}
|
||||
});
|
||||
@ -185,19 +169,14 @@ public class DictionaryService {
|
||||
}
|
||||
|
||||
|
||||
public boolean hasRecommendationDictionary(String type) {
|
||||
public Dictionary getDeepCopyDictionary(){
|
||||
List<DictionaryModel> copy = new ArrayList<>();
|
||||
|
||||
DictionaryModel model = localAccessMap.get(RECOMMENDATION_PREFIX + type);
|
||||
if (model != null) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
dictionary.forEach(dm -> {
|
||||
copy.add(SerializationUtils.clone(dm));
|
||||
});
|
||||
|
||||
|
||||
public DictionaryModel getDictionary(String type) {
|
||||
|
||||
return localAccessMap.get(type);
|
||||
return new Dictionary(copy, dictionaryVersion);
|
||||
}
|
||||
|
||||
}
|
||||
@ -21,11 +21,13 @@ import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SectionSearchableTextPair;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
|
||||
@ -43,21 +45,23 @@ public class EntityRedactionService {
|
||||
|
||||
dictionaryService.updateDictionary();
|
||||
droolsExecutionService.updateRules();
|
||||
dictionaryService.clearLocalEntries();
|
||||
long rulesVersion = droolsExecutionService.getRulesVersion();
|
||||
|
||||
Set<Entity> documentEntities = new HashSet<>(findEntities(classifiedDoc, manualRedactions, false, null));
|
||||
Dictionary dictionary = dictionaryService.getDeepCopyDictionary();
|
||||
|
||||
if (dictionaryService.hasLocalEntries()) {
|
||||
Set<Entity> documentEntities = new HashSet<>(findEntities(classifiedDoc, manualRedactions, dictionary, false, null));
|
||||
|
||||
if (dictionary.hasLocalEntries()) {
|
||||
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber = new HashMap<>();
|
||||
documentEntities.stream().forEach(entity -> {
|
||||
if (dictionaryService.isHint(entity.getType())) {
|
||||
if (dictionary.isHint(entity.getType())) {
|
||||
hintsPerSectionNumber.computeIfAbsent(entity.getSectionNumber(), (x) -> new HashSet<>())
|
||||
.add(entity);
|
||||
}
|
||||
});
|
||||
|
||||
Set<Entity> foundByLocal = findEntities(classifiedDoc, manualRedactions, true, hintsPerSectionNumber);
|
||||
Set<Entity> foundByLocal = findEntities(classifiedDoc, manualRedactions, dictionary, true, hintsPerSectionNumber);
|
||||
// HashSet keeps the older value, but we want the new only.
|
||||
documentEntities.removeAll(foundByLocal);
|
||||
documentEntities.addAll(foundByLocal);
|
||||
@ -81,15 +85,20 @@ public class EntityRedactionService {
|
||||
}
|
||||
}
|
||||
|
||||
dictionaryService.updateExternalDictionary();
|
||||
dictionaryService.updateExternalDictionary(dictionary);
|
||||
|
||||
classifiedDoc.setDictionaryVersion(dictionary.getVersion());
|
||||
classifiedDoc.setRulesVersion(rulesVersion);
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> findEntities(Document classifiedDoc, ManualRedactions manualRedactions, boolean localEntries,
|
||||
private Set<Entity> findEntities(Document classifiedDoc, ManualRedactions manualRedactions,
|
||||
Dictionary dictionary, boolean local,
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber) {
|
||||
|
||||
Set<Entity> documentEntities = new HashSet<>();
|
||||
int sectionNumber = 1;
|
||||
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
|
||||
for (Paragraph paragraph : classifiedDoc.getParagraphs()) {
|
||||
|
||||
SearchableText searchableText = paragraph.getSearchableText();
|
||||
@ -122,10 +131,11 @@ public class EntityRedactionService {
|
||||
searchableRow.addAll(textBlock.getSequences());
|
||||
}
|
||||
}
|
||||
Set<Entity> rowEntities = findEntities(searchableRow, table.getHeadline(), sectionNumber, localEntries);
|
||||
Set<Entity> rowEntities = findEntities(searchableRow, table.getHeadline(), sectionNumber, dictionary.getDictionaryModels(), local);
|
||||
|
||||
Section analysedRowSection = droolsExecutionService.executeRules(Section.builder()
|
||||
.dictionaryService(dictionaryService)
|
||||
sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder()
|
||||
.isLocal(local)
|
||||
.dictionaryTypes(dictionary.getTypes())
|
||||
.entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(sectionNumber) ? Stream
|
||||
.concat(rowEntities.stream(), hintsPerSectionNumber.get(sectionNumber).stream())
|
||||
.collect(Collectors.toSet()) : rowEntities)
|
||||
@ -134,18 +144,19 @@ public class EntityRedactionService {
|
||||
.headline(table.getHeadline())
|
||||
.sectionNumber(sectionNumber)
|
||||
.tabularData(tabularData)
|
||||
.build());
|
||||
.build(), searchableRow));
|
||||
|
||||
documentEntities.addAll(clearAndFindPositions(analysedRowSection.getEntities(), searchableRow));
|
||||
sectionNumber++;
|
||||
}
|
||||
sectionNumber++;
|
||||
}
|
||||
|
||||
addSectionToManualRedactions(paragraph.getTextBlocks(), manualRedactions, paragraph.getHeadline(), sectionNumber);
|
||||
Set<Entity> entities = findEntities(searchableText, paragraph.getHeadline(), sectionNumber, localEntries);
|
||||
Section analysedSection = droolsExecutionService.executeRules(Section.builder()
|
||||
.dictionaryService(dictionaryService)
|
||||
Set<Entity> entities = findEntities(searchableText, paragraph.getHeadline(), sectionNumber, dictionary.getDictionaryModels(), local);
|
||||
|
||||
sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder()
|
||||
.isLocal(local)
|
||||
.dictionaryTypes(dictionary.getTypes())
|
||||
.entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(sectionNumber) ? Stream
|
||||
.concat(entities.stream(), hintsPerSectionNumber.get(sectionNumber).stream())
|
||||
.collect(Collectors.toSet()) : entities)
|
||||
@ -153,22 +164,43 @@ public class EntityRedactionService {
|
||||
.searchText(searchableText.toString())
|
||||
.headline(paragraph.getHeadline())
|
||||
.sectionNumber(sectionNumber)
|
||||
.build());
|
||||
.build(), searchableText));
|
||||
|
||||
documentEntities.addAll(clearAndFindPositions(analysedSection.getEntities(), searchableText));
|
||||
sectionNumber++;
|
||||
}
|
||||
|
||||
sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> {
|
||||
Section analysedRowSection = droolsExecutionService.executeRules(sectionSearchableTextPair.getSection());
|
||||
documentEntities.addAll(clearAndFindPositions(analysedRowSection.getEntities(), sectionSearchableTextPair.getSearchableText(), dictionary));
|
||||
|
||||
analysedRowSection.getLocalDictionaryAdds().keySet().forEach(key -> {
|
||||
if (dictionary.isRecommendation(key)){
|
||||
analysedRowSection.getLocalDictionaryAdds().get(key).forEach(value -> {
|
||||
if (!dictionary.containsValue(key, value)){
|
||||
dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value);
|
||||
}
|
||||
});
|
||||
} else {
|
||||
analysedRowSection.getLocalDictionaryAdds().get(key).forEach( value -> {
|
||||
dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
});
|
||||
|
||||
return documentEntities;
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> clearAndFindPositions(Set<Entity> entities, SearchableText text) {
|
||||
private Set<Entity> clearAndFindPositions(Set<Entity> entities, SearchableText text, Dictionary dictionary) {
|
||||
|
||||
removeEntitiesContainedInLarger(entities);
|
||||
|
||||
for (Entity entity : entities) {
|
||||
if(entity.getPositionSequences().isEmpty()) {
|
||||
entity.setPositionSequences(text.getSequences(entity.getWord(), dictionaryService.isCaseInsensitiveDictionary(entity
|
||||
if (entity.getPositionSequences().isEmpty()) {
|
||||
entity.setPositionSequences(text.getSequences(entity.getWord(), dictionary.isCaseInsensitiveDictionary(entity
|
||||
.getType()), entity.getTargetSequences()));
|
||||
}
|
||||
}
|
||||
@ -177,7 +209,8 @@ public class EntityRedactionService {
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> findEntities(SearchableText searchableText, String headline, int sectionNumber, boolean local) {
|
||||
private Set<Entity> findEntities(SearchableText searchableText, String headline, int sectionNumber,
|
||||
List<DictionaryModel> dictionary, boolean local) {
|
||||
|
||||
Set<Entity> found = new HashSet<>();
|
||||
String searchableString = searchableText.toString();
|
||||
@ -186,7 +219,7 @@ public class EntityRedactionService {
|
||||
}
|
||||
|
||||
String lowercaseInputString = searchableString.toLowerCase();
|
||||
for (DictionaryModel model : dictionaryService.getDictionary()) {
|
||||
for (DictionaryModel model : dictionary) {
|
||||
if (model.isCaseInsensitive()) {
|
||||
found.addAll(find(lowercaseInputString, model.getValues(local), model.getType(), headline, sectionNumber));
|
||||
} else {
|
||||
@ -231,7 +264,8 @@ public class EntityRedactionService {
|
||||
for (Entity word : entities) {
|
||||
for (Entity inner : entities) {
|
||||
if (inner.getWord().length() < word.getWord()
|
||||
.length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner && word.getSectionNumber() == inner.getSectionNumber()) {
|
||||
.length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner && word
|
||||
.getSectionNumber() == inner.getSectionNumber()) {
|
||||
wordsToRemove.add(inner);
|
||||
}
|
||||
}
|
||||
|
||||
@ -382,7 +382,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
System.out.println("redactionTest");
|
||||
long start = System.currentTimeMillis();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf");
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/52 Fludioxonil_RAR_07_Volume_3CA_B-5_2018-02-21.pdf");
|
||||
|
||||
RedactionRequest request = RedactionRequest.builder()
|
||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||
|
||||
@ -1,2 +1,10 @@
|
||||
Long-term
|
||||
Brown liquid
|
||||
Brown liquid
|
||||
Brown solid
|
||||
Hand-held
|
||||
Manual-Hand held
|
||||
Manual-Hand held
|
||||
Weight:
|
||||
Sprague
|
||||
Weight and length
|
||||
Aeration: Gentle
|
||||
Loading…
x
Reference in New Issue
Block a user