Made dictionaries Theadsafe

This commit is contained in:
deiflaender 2020-12-09 13:14:04 +01:00
parent 3f69030b03
commit 44613ee117
10 changed files with 214 additions and 83 deletions

View File

@ -27,4 +27,6 @@ public class Document {
private List<RedactionLogEntry> redactionLogEntities = new ArrayList<>(); private List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();
private SectionGrid sectionGrid = new SectionGrid(); private SectionGrid sectionGrid = new SectionGrid();
private long dictionaryVersion;
private long rulesVersion;
} }

View File

@ -18,7 +18,6 @@ import com.iqser.red.service.redaction.v1.resources.RedactionResource;
import com.iqser.red.service.redaction.v1.server.classification.model.Document; import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.Page; import com.iqser.red.service.redaction.v1.server.classification.model.Page;
import com.iqser.red.service.redaction.v1.server.exception.RedactionException; import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService;
import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService; import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService;
import com.iqser.red.service.redaction.v1.server.redaction.service.EntityRedactionService; import com.iqser.red.service.redaction.v1.server.redaction.service.EntityRedactionService;
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService; import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
@ -40,7 +39,6 @@ public class RedactionController implements RedactionResource {
private final EntityRedactionService entityRedactionService; private final EntityRedactionService entityRedactionService;
private final PdfFlattenService pdfFlattenService; private final PdfFlattenService pdfFlattenService;
private final DroolsExecutionService droolsExecutionService; private final DroolsExecutionService droolsExecutionService;
private final DictionaryService dictionaryService;
@Override @Override
@ -57,11 +55,11 @@ public class RedactionController implements RedactionResource {
if (redactionRequest.isFlatRedaction()) { if (redactionRequest.isFlatRedaction()) {
PDDocument flatDocument = pdfFlattenService.flattenPDF(pdDocument); PDDocument flatDocument = pdfFlattenService.flattenPDF(pdDocument);
return convert(flatDocument, classifiedDoc.getPages() return convert(flatDocument, classifiedDoc.getPages()
.size(), classifiedDoc.getRedactionLogEntities(), classifiedDoc.getSectionGrid()); .size(), classifiedDoc.getRedactionLogEntities(), classifiedDoc.getSectionGrid(), classifiedDoc.getDictionaryVersion(), classifiedDoc.getRulesVersion());
} }
return convert(pdDocument, classifiedDoc.getPages() return convert(pdDocument, classifiedDoc.getPages()
.size(), classifiedDoc.getRedactionLogEntities(), classifiedDoc.getSectionGrid()); .size(), classifiedDoc.getRedactionLogEntities(), classifiedDoc.getSectionGrid(), classifiedDoc.getDictionaryVersion(), classifiedDoc.getRulesVersion());
} catch (IOException e) { } catch (IOException e) {
throw new RedactionException(e); throw new RedactionException(e);
@ -142,21 +140,20 @@ public class RedactionController implements RedactionResource {
private RedactionResult convert(PDDocument document, int numberOfPages) throws IOException { private RedactionResult convert(PDDocument document, int numberOfPages) throws IOException {
return convert(document, numberOfPages, null, null); return convert(document, numberOfPages, null, null, 0, 0);
} }
private RedactionResult convert(PDDocument document, int numberOfPages, private RedactionResult convert(PDDocument document, int numberOfPages,
List<RedactionLogEntry> redactionLogEntities, List<RedactionLogEntry> redactionLogEntities,
SectionGrid sectionGrid) throws IOException { SectionGrid sectionGrid, long dictionaryVersion, long rulesVersion) throws IOException {
try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) { try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
document.save(byteArrayOutputStream); document.save(byteArrayOutputStream);
return RedactionResult.builder() return RedactionResult.builder()
.document(byteArrayOutputStream.toByteArray()) .document(byteArrayOutputStream.toByteArray())
.numberOfPages(numberOfPages) .numberOfPages(numberOfPages)
.redactionLog(new RedactionLog(redactionLogEntities, dictionaryService.getDictionaryVersion(), droolsExecutionService .redactionLog(new RedactionLog(redactionLogEntities,dictionaryVersion, rulesVersion))
.getRulesVersion()))
.sectionGrid(sectionGrid) .sectionGrid(sectionGrid)
.build(); .build();
} }

View File

@ -0,0 +1,88 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import lombok.Data;
import lombok.Getter;
@Data
public class Dictionary {
public static final String RECOMMENDATION_PREFIX = "recommendation_";
@Getter
private List<DictionaryModel> dictionaryModels;
private Map<String, DictionaryModel> localAccessMap = new HashMap<>();
@Getter
private long version;
public Dictionary(List<DictionaryModel> dictionaryModels, long dictionaryVersion){
this.dictionaryModels = dictionaryModels;
this.dictionaryModels.forEach(dm -> localAccessMap.put(dm.getType(), dm));
this.version = dictionaryVersion;
}
public boolean isRecommendation(String type) {
DictionaryModel model = localAccessMap.get(type);
if (model != null) {
return model.isRecommendation();
}
return false;
}
public boolean hasLocalEntries() {
return dictionaryModels.stream().anyMatch(dm -> !dm.getLocalEntries().isEmpty());
}
public Set<String> getTypes() {
return localAccessMap.keySet();
}
public boolean containsValue(String type, String value) {
if (localAccessMap.containsKey(type) && localAccessMap.get(type)
.getEntries()
.contains(value) || localAccessMap.containsKey(type) && localAccessMap.get(type)
.getLocalEntries()
.contains(value) || localAccessMap.containsKey(RECOMMENDATION_PREFIX + type) && localAccessMap.get(RECOMMENDATION_PREFIX + type)
.getEntries()
.contains(value) || localAccessMap.containsKey(RECOMMENDATION_PREFIX + type) && localAccessMap.get(RECOMMENDATION_PREFIX + type)
.getLocalEntries()
.contains(value)) {
return true;
}
return false;
}
public boolean isHint(String type) {
DictionaryModel model = localAccessMap.get(type);
if (model != null) {
return model.isHint();
}
return false;
}
public boolean isCaseInsensitiveDictionary(String type) {
DictionaryModel dictionaryModel = localAccessMap.get(type);
if (dictionaryModel != null) {
return dictionaryModel.isCaseInsensitive();
}
return false;
}
}

View File

@ -1,6 +1,7 @@
package com.iqser.red.service.redaction.v1.server.redaction.model; package com.iqser.red.service.redaction.v1.server.redaction.model;
import java.io.Serializable;
import java.util.Set; import java.util.Set;
import lombok.AllArgsConstructor; import lombok.AllArgsConstructor;
@ -8,7 +9,7 @@ import lombok.Data;
@Data @Data
@AllArgsConstructor @AllArgsConstructor
public class DictionaryModel { public class DictionaryModel implements Serializable {
private String type; private String type;
private int rank; private int rank;

View File

@ -1,8 +1,9 @@
package com.iqser.red.service.redaction.v1.server.redaction.model; package com.iqser.red.service.redaction.v1.server.redaction.model;
import static com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService.RECOMMENDATION_PREFIX; import static com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary.RECOMMENDATION_PREFIX;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -12,7 +13,6 @@ import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService;
import com.iqser.red.service.redaction.v1.server.redaction.utils.Patterns; import com.iqser.red.service.redaction.v1.server.redaction.utils.Patterns;
import lombok.Builder; import lombok.Builder;
@ -24,7 +24,12 @@ import lombok.extern.slf4j.Slf4j;
@Builder @Builder
public class Section { public class Section {
private DictionaryService dictionaryService; private boolean isLocal;
private Set<String> dictionaryTypes;
@Builder.Default
private Map<String, Set<String>> localDictionaryAdds = new HashMap<>();
private Set<Entity> entities; private Set<Entity> entities;
@ -66,7 +71,7 @@ public class Section {
public void redact(String type, int ruleNumber, String reason, String legalBasis) { public void redact(String type, int ruleNumber, String reason, String legalBasis) {
boolean hasRecommendactionDictionary = dictionaryService.hasRecommendationDictionary(type); boolean hasRecommendactionDictionary = dictionaryTypes.contains(RECOMMENDATION_PREFIX + type);
entities.forEach(entity -> { entities.forEach(entity -> {
if (entity.getType().equals(type) || hasRecommendactionDictionary && entity.getType() if (entity.getType().equals(type) || hasRecommendactionDictionary && entity.getType()
@ -82,7 +87,7 @@ public class Section {
public void redactNot(String type, int ruleNumber, String reason) { public void redactNot(String type, int ruleNumber, String reason) {
boolean hasRecommendactionDictionary = dictionaryService.hasRecommendationDictionary(type); boolean hasRecommendactionDictionary = dictionaryTypes.contains(RECOMMENDATION_PREFIX + type);
entities.forEach(entity -> { entities.forEach(entity -> {
if (entity.getType().equals(type) || hasRecommendactionDictionary && entity.getType() if (entity.getType().equals(type) || hasRecommendactionDictionary && entity.getType()
@ -156,8 +161,8 @@ public class Section {
// HashSet keeps the older value, but we want the new only. // HashSet keeps the older value, but we want the new only.
entities.removeAll(found); entities.removeAll(found);
entities.addAll(found); entities.addAll(found);
if (redactEverywhere) { if (redactEverywhere && !isLocal()) {
dictionaryService.addToLocalDictionary(asType, value.trim()); localDictionaryAdds.computeIfAbsent(asType, (x) -> new HashSet<>()).add(value.trim());
} }
} }
} }
@ -190,8 +195,8 @@ public class Section {
// HashSet keeps the older value, but we want the new only. // HashSet keeps the older value, but we want the new only.
entities.removeAll(found); entities.removeAll(found);
entities.addAll(found); entities.addAll(found);
if (redactEverywhere) { if (redactEverywhere && !isLocal()) {
dictionaryService.addToLocalDictionary(asType, line.trim()); localDictionaryAdds.computeIfAbsent(asType, (x) -> new HashSet<>()).add(line.trim());
} }
} }
} }
@ -300,7 +305,7 @@ public class Section {
entities = removeEntitiesContainedInLarger(entities); entities = removeEntitiesContainedInLarger(entities);
if (addAsRecommendations) { if (addAsRecommendations && !isLocal()) {
String cleanedWord = word.replaceAll(",", " ").replaceAll(" ", " ").trim() + " "; String cleanedWord = word.replaceAll(",", " ").replaceAll(" ", " ").trim() + " ";
Pattern pattern = Patterns.AUTHOR_TABLE_SPITTER; Pattern pattern = Patterns.AUTHOR_TABLE_SPITTER;
Matcher matcher = pattern.matcher(cleanedWord); Matcher matcher = pattern.matcher(cleanedWord);
@ -308,13 +313,11 @@ public class Section {
while (matcher.find()) { while (matcher.find()) {
String match = matcher.group().trim(); String match = matcher.group().trim();
if (match.length() >= 3) { if (match.length() >= 3) {
if(!dictionaryService.getDictionary(type).getEntries().contains(match) && !dictionaryService.getDictionary(RECOMMENDATION_PREFIX + type).getEntries().contains(match)) { localDictionaryAdds.computeIfAbsent(RECOMMENDATION_PREFIX + type, (x) -> new HashSet<>())
dictionaryService.addToLocalDictionary(RECOMMENDATION_PREFIX + type, match); .add(match);
}
String lastname = match.split(" ")[0]; String lastname = match.split(" ")[0];
if(!dictionaryService.getDictionary(type).getEntries().contains(lastname) && !dictionaryService.getDictionary(RECOMMENDATION_PREFIX + type).getEntries().contains(lastname)) { localDictionaryAdds.computeIfAbsent(RECOMMENDATION_PREFIX + type, (x) -> new HashSet<>())
dictionaryService.addToLocalDictionary(RECOMMENDATION_PREFIX + type, lastname); .add(lastname);
}
} }
} }
} }
@ -322,3 +325,9 @@ public class Section {
} }
} }

View File

@ -0,0 +1,13 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import lombok.AllArgsConstructor;
import lombok.Data;
@Data
@AllArgsConstructor
public class SectionSearchableTextPair {
private Section section;
private SearchableText searchableText;
}

View File

@ -11,12 +11,14 @@ import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.SerializationUtils;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import com.iqser.red.service.configuration.v1.api.model.Colors; import com.iqser.red.service.configuration.v1.api.model.Colors;
import com.iqser.red.service.configuration.v1.api.model.TypeResponse; import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
import com.iqser.red.service.configuration.v1.api.model.TypeResult; import com.iqser.red.service.configuration.v1.api.model.TypeResult;
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient; import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel; import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
import feign.FeignException; import feign.FeignException;
@ -29,7 +31,6 @@ import lombok.extern.slf4j.Slf4j;
@RequiredArgsConstructor @RequiredArgsConstructor
public class DictionaryService { public class DictionaryService {
public static final String RECOMMENDATION_PREFIX = "recommendation_";
private final DictionaryClient dictionaryClient; private final DictionaryClient dictionaryClient;
@ -55,23 +56,6 @@ public class DictionaryService {
private Map<String, DictionaryModel> localAccessMap = new HashMap<>(); private Map<String, DictionaryModel> localAccessMap = new HashMap<>();
public boolean hasLocalEntries() {
return this.dictionary.stream().anyMatch(dm -> !dm.getLocalEntries().isEmpty());
}
public void addToLocalDictionary(String type, String value) {
localAccessMap.get(type).getLocalEntries().add(value);
}
public void clearLocalEntries() {
this.dictionary.forEach(dm -> dm.getLocalEntries().clear());
}
public void updateDictionary() { public void updateDictionary() {
@ -112,13 +96,13 @@ public class DictionaryService {
} }
public void updateExternalDictionary(){ public void updateExternalDictionary(Dictionary dictionary){
dictionary.forEach(dm -> { dictionary.getDictionaryModels().forEach(dm -> {
if(dm.isRecommendation() && !dm.getLocalEntries().isEmpty()){ if(dm.isRecommendation() && !dm.getLocalEntries().isEmpty()){
dictionaryClient.addEntries(dm.getType(), new ArrayList<>(dm.getLocalEntries()), false); dictionaryClient.addEntries(dm.getType(), new ArrayList<>(dm.getLocalEntries()), false);
long externalVersion = dictionaryClient.getVersion(); long externalVersion = dictionaryClient.getVersion();
if(externalVersion == dictionaryVersion + 1){ if(externalVersion == dictionary.getVersion() + 1){
dictionaryVersion = externalVersion; dictionary.setVersion(externalVersion);
} }
} }
}); });
@ -185,19 +169,14 @@ public class DictionaryService {
} }
public boolean hasRecommendationDictionary(String type) { public Dictionary getDeepCopyDictionary(){
List<DictionaryModel> copy = new ArrayList<>();
DictionaryModel model = localAccessMap.get(RECOMMENDATION_PREFIX + type); dictionary.forEach(dm -> {
if (model != null) { copy.add(SerializationUtils.clone(dm));
return true; });
}
return false;
}
return new Dictionary(copy, dictionaryVersion);
public DictionaryModel getDictionary(String type) {
return localAccessMap.get(type);
} }
} }

View File

@ -21,11 +21,13 @@ import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph; import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue; import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue;
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel; import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence; import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText; import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
import com.iqser.red.service.redaction.v1.server.redaction.model.Section; import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
import com.iqser.red.service.redaction.v1.server.redaction.model.SectionSearchableTextPair;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
@ -43,21 +45,23 @@ public class EntityRedactionService {
dictionaryService.updateDictionary(); dictionaryService.updateDictionary();
droolsExecutionService.updateRules(); droolsExecutionService.updateRules();
dictionaryService.clearLocalEntries(); long rulesVersion = droolsExecutionService.getRulesVersion();
Set<Entity> documentEntities = new HashSet<>(findEntities(classifiedDoc, manualRedactions, false, null)); Dictionary dictionary = dictionaryService.getDeepCopyDictionary();
if (dictionaryService.hasLocalEntries()) { Set<Entity> documentEntities = new HashSet<>(findEntities(classifiedDoc, manualRedactions, dictionary, false, null));
if (dictionary.hasLocalEntries()) {
Map<Integer, Set<Entity>> hintsPerSectionNumber = new HashMap<>(); Map<Integer, Set<Entity>> hintsPerSectionNumber = new HashMap<>();
documentEntities.stream().forEach(entity -> { documentEntities.stream().forEach(entity -> {
if (dictionaryService.isHint(entity.getType())) { if (dictionary.isHint(entity.getType())) {
hintsPerSectionNumber.computeIfAbsent(entity.getSectionNumber(), (x) -> new HashSet<>()) hintsPerSectionNumber.computeIfAbsent(entity.getSectionNumber(), (x) -> new HashSet<>())
.add(entity); .add(entity);
} }
}); });
Set<Entity> foundByLocal = findEntities(classifiedDoc, manualRedactions, true, hintsPerSectionNumber); Set<Entity> foundByLocal = findEntities(classifiedDoc, manualRedactions, dictionary, true, hintsPerSectionNumber);
// HashSet keeps the older value, but we want the new only. // HashSet keeps the older value, but we want the new only.
documentEntities.removeAll(foundByLocal); documentEntities.removeAll(foundByLocal);
documentEntities.addAll(foundByLocal); documentEntities.addAll(foundByLocal);
@ -81,15 +85,20 @@ public class EntityRedactionService {
} }
} }
dictionaryService.updateExternalDictionary(); dictionaryService.updateExternalDictionary(dictionary);
classifiedDoc.setDictionaryVersion(dictionary.getVersion());
classifiedDoc.setRulesVersion(rulesVersion);
} }
private Set<Entity> findEntities(Document classifiedDoc, ManualRedactions manualRedactions, boolean localEntries, private Set<Entity> findEntities(Document classifiedDoc, ManualRedactions manualRedactions,
Dictionary dictionary, boolean local,
Map<Integer, Set<Entity>> hintsPerSectionNumber) { Map<Integer, Set<Entity>> hintsPerSectionNumber) {
Set<Entity> documentEntities = new HashSet<>(); Set<Entity> documentEntities = new HashSet<>();
int sectionNumber = 1; int sectionNumber = 1;
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
for (Paragraph paragraph : classifiedDoc.getParagraphs()) { for (Paragraph paragraph : classifiedDoc.getParagraphs()) {
SearchableText searchableText = paragraph.getSearchableText(); SearchableText searchableText = paragraph.getSearchableText();
@ -122,10 +131,11 @@ public class EntityRedactionService {
searchableRow.addAll(textBlock.getSequences()); searchableRow.addAll(textBlock.getSequences());
} }
} }
Set<Entity> rowEntities = findEntities(searchableRow, table.getHeadline(), sectionNumber, localEntries); Set<Entity> rowEntities = findEntities(searchableRow, table.getHeadline(), sectionNumber, dictionary.getDictionaryModels(), local);
Section analysedRowSection = droolsExecutionService.executeRules(Section.builder() sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder()
.dictionaryService(dictionaryService) .isLocal(local)
.dictionaryTypes(dictionary.getTypes())
.entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(sectionNumber) ? Stream .entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(sectionNumber) ? Stream
.concat(rowEntities.stream(), hintsPerSectionNumber.get(sectionNumber).stream()) .concat(rowEntities.stream(), hintsPerSectionNumber.get(sectionNumber).stream())
.collect(Collectors.toSet()) : rowEntities) .collect(Collectors.toSet()) : rowEntities)
@ -134,18 +144,19 @@ public class EntityRedactionService {
.headline(table.getHeadline()) .headline(table.getHeadline())
.sectionNumber(sectionNumber) .sectionNumber(sectionNumber)
.tabularData(tabularData) .tabularData(tabularData)
.build()); .build(), searchableRow));
documentEntities.addAll(clearAndFindPositions(analysedRowSection.getEntities(), searchableRow));
sectionNumber++; sectionNumber++;
} }
sectionNumber++; sectionNumber++;
} }
addSectionToManualRedactions(paragraph.getTextBlocks(), manualRedactions, paragraph.getHeadline(), sectionNumber); addSectionToManualRedactions(paragraph.getTextBlocks(), manualRedactions, paragraph.getHeadline(), sectionNumber);
Set<Entity> entities = findEntities(searchableText, paragraph.getHeadline(), sectionNumber, localEntries); Set<Entity> entities = findEntities(searchableText, paragraph.getHeadline(), sectionNumber, dictionary.getDictionaryModels(), local);
Section analysedSection = droolsExecutionService.executeRules(Section.builder()
.dictionaryService(dictionaryService) sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder()
.isLocal(local)
.dictionaryTypes(dictionary.getTypes())
.entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(sectionNumber) ? Stream .entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(sectionNumber) ? Stream
.concat(entities.stream(), hintsPerSectionNumber.get(sectionNumber).stream()) .concat(entities.stream(), hintsPerSectionNumber.get(sectionNumber).stream())
.collect(Collectors.toSet()) : entities) .collect(Collectors.toSet()) : entities)
@ -153,22 +164,43 @@ public class EntityRedactionService {
.searchText(searchableText.toString()) .searchText(searchableText.toString())
.headline(paragraph.getHeadline()) .headline(paragraph.getHeadline())
.sectionNumber(sectionNumber) .sectionNumber(sectionNumber)
.build()); .build(), searchableText));
documentEntities.addAll(clearAndFindPositions(analysedSection.getEntities(), searchableText));
sectionNumber++; sectionNumber++;
} }
sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> {
Section analysedRowSection = droolsExecutionService.executeRules(sectionSearchableTextPair.getSection());
documentEntities.addAll(clearAndFindPositions(analysedRowSection.getEntities(), sectionSearchableTextPair.getSearchableText(), dictionary));
analysedRowSection.getLocalDictionaryAdds().keySet().forEach(key -> {
if (dictionary.isRecommendation(key)){
analysedRowSection.getLocalDictionaryAdds().get(key).forEach(value -> {
if (!dictionary.containsValue(key, value)){
dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value);
}
});
} else {
analysedRowSection.getLocalDictionaryAdds().get(key).forEach( value -> {
dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value);
});
}
});
});
return documentEntities; return documentEntities;
} }
private Set<Entity> clearAndFindPositions(Set<Entity> entities, SearchableText text) { private Set<Entity> clearAndFindPositions(Set<Entity> entities, SearchableText text, Dictionary dictionary) {
removeEntitiesContainedInLarger(entities); removeEntitiesContainedInLarger(entities);
for (Entity entity : entities) { for (Entity entity : entities) {
if (entity.getPositionSequences().isEmpty()) { if (entity.getPositionSequences().isEmpty()) {
entity.setPositionSequences(text.getSequences(entity.getWord(), dictionaryService.isCaseInsensitiveDictionary(entity entity.setPositionSequences(text.getSequences(entity.getWord(), dictionary.isCaseInsensitiveDictionary(entity
.getType()), entity.getTargetSequences())); .getType()), entity.getTargetSequences()));
} }
} }
@ -177,7 +209,8 @@ public class EntityRedactionService {
} }
private Set<Entity> findEntities(SearchableText searchableText, String headline, int sectionNumber, boolean local) { private Set<Entity> findEntities(SearchableText searchableText, String headline, int sectionNumber,
List<DictionaryModel> dictionary, boolean local) {
Set<Entity> found = new HashSet<>(); Set<Entity> found = new HashSet<>();
String searchableString = searchableText.toString(); String searchableString = searchableText.toString();
@ -186,7 +219,7 @@ public class EntityRedactionService {
} }
String lowercaseInputString = searchableString.toLowerCase(); String lowercaseInputString = searchableString.toLowerCase();
for (DictionaryModel model : dictionaryService.getDictionary()) { for (DictionaryModel model : dictionary) {
if (model.isCaseInsensitive()) { if (model.isCaseInsensitive()) {
found.addAll(find(lowercaseInputString, model.getValues(local), model.getType(), headline, sectionNumber)); found.addAll(find(lowercaseInputString, model.getValues(local), model.getType(), headline, sectionNumber));
} else { } else {
@ -231,7 +264,8 @@ public class EntityRedactionService {
for (Entity word : entities) { for (Entity word : entities) {
for (Entity inner : entities) { for (Entity inner : entities) {
if (inner.getWord().length() < word.getWord() if (inner.getWord().length() < word.getWord()
.length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner && word.getSectionNumber() == inner.getSectionNumber()) { .length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner && word
.getSectionNumber() == inner.getSectionNumber()) {
wordsToRemove.add(inner); wordsToRemove.add(inner);
} }
} }

View File

@ -382,7 +382,7 @@ public class RedactionIntegrationTest {
System.out.println("redactionTest"); System.out.println("redactionTest");
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf"); ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/52 Fludioxonil_RAR_07_Volume_3CA_B-5_2018-02-21.pdf");
RedactionRequest request = RedactionRequest.builder() RedactionRequest request = RedactionRequest.builder()
.document(IOUtils.toByteArray(pdfFileResource.getInputStream())) .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))

View File

@ -1,2 +1,10 @@
Long-term Long-term
Brown liquid Brown liquid
Brown solid
Hand-held
Manual-Hand held
Manual-Hand held
Weight:
Sprague
Weight and length
Aeration: Gentle