From 44613ee1174da877ee87414903c126bd7c25c311 Mon Sep 17 00:00:00 2001 From: deiflaender Date: Wed, 9 Dec 2020 13:14:04 +0100 Subject: [PATCH] Made dictionaries Theadsafe --- .../server/classification/model/Document.java | 2 + .../controller/RedactionController.java | 13 ++- .../v1/server/redaction/model/Dictionary.java | 88 +++++++++++++++++++ .../redaction/model/DictionaryModel.java | 3 +- .../v1/server/redaction/model/Section.java | 41 +++++---- .../model/SectionSearchableTextPair.java | 13 +++ .../redaction/service/DictionaryService.java | 45 +++------- .../service/EntityRedactionService.java | 80 ++++++++++++----- .../v1/server/RedactionIntegrationTest.java | 2 +- .../resources/dictionaries/false_positive.txt | 10 ++- 10 files changed, 214 insertions(+), 83 deletions(-) create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Dictionary.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/SectionSearchableTextPair.java diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Document.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Document.java index 1749503d..e1cb08ad 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Document.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Document.java @@ -27,4 +27,6 @@ public class Document { private List redactionLogEntities = new ArrayList<>(); private SectionGrid sectionGrid = new SectionGrid(); + private long dictionaryVersion; + private long rulesVersion; } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java index 532b0d7d..d3b67e99 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java @@ -18,7 +18,6 @@ import com.iqser.red.service.redaction.v1.resources.RedactionResource; import com.iqser.red.service.redaction.v1.server.classification.model.Document; import com.iqser.red.service.redaction.v1.server.classification.model.Page; import com.iqser.red.service.redaction.v1.server.exception.RedactionException; -import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService; import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService; import com.iqser.red.service.redaction.v1.server.redaction.service.EntityRedactionService; import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService; @@ -40,7 +39,6 @@ public class RedactionController implements RedactionResource { private final EntityRedactionService entityRedactionService; private final PdfFlattenService pdfFlattenService; private final DroolsExecutionService droolsExecutionService; - private final DictionaryService dictionaryService; @Override @@ -57,11 +55,11 @@ public class RedactionController implements RedactionResource { if (redactionRequest.isFlatRedaction()) { PDDocument flatDocument = pdfFlattenService.flattenPDF(pdDocument); return convert(flatDocument, classifiedDoc.getPages() - .size(), classifiedDoc.getRedactionLogEntities(), classifiedDoc.getSectionGrid()); + .size(), classifiedDoc.getRedactionLogEntities(), classifiedDoc.getSectionGrid(), classifiedDoc.getDictionaryVersion(), classifiedDoc.getRulesVersion()); } return convert(pdDocument, classifiedDoc.getPages() - .size(), classifiedDoc.getRedactionLogEntities(), classifiedDoc.getSectionGrid()); + .size(), classifiedDoc.getRedactionLogEntities(), classifiedDoc.getSectionGrid(), classifiedDoc.getDictionaryVersion(), classifiedDoc.getRulesVersion()); } catch (IOException e) { throw new RedactionException(e); @@ -142,21 +140,20 @@ public class RedactionController implements RedactionResource { private RedactionResult convert(PDDocument document, int numberOfPages) throws IOException { - return convert(document, numberOfPages, null, null); + return convert(document, numberOfPages, null, null, 0, 0); } private RedactionResult convert(PDDocument document, int numberOfPages, List redactionLogEntities, - SectionGrid sectionGrid) throws IOException { + SectionGrid sectionGrid, long dictionaryVersion, long rulesVersion) throws IOException { try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) { document.save(byteArrayOutputStream); return RedactionResult.builder() .document(byteArrayOutputStream.toByteArray()) .numberOfPages(numberOfPages) - .redactionLog(new RedactionLog(redactionLogEntities, dictionaryService.getDictionaryVersion(), droolsExecutionService - .getRulesVersion())) + .redactionLog(new RedactionLog(redactionLogEntities,dictionaryVersion, rulesVersion)) .sectionGrid(sectionGrid) .build(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Dictionary.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Dictionary.java new file mode 100644 index 00000000..05ece6e3 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Dictionary.java @@ -0,0 +1,88 @@ +package com.iqser.red.service.redaction.v1.server.redaction.model; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import lombok.Data; +import lombok.Getter; + +@Data +public class Dictionary { + + public static final String RECOMMENDATION_PREFIX = "recommendation_"; + + @Getter + private List dictionaryModels; + private Map localAccessMap = new HashMap<>(); + + @Getter + private long version; + + + public Dictionary(List dictionaryModels, long dictionaryVersion){ + this.dictionaryModels = dictionaryModels; + this.dictionaryModels.forEach(dm -> localAccessMap.put(dm.getType(), dm)); + this.version = dictionaryVersion; + } + + + public boolean isRecommendation(String type) { + + DictionaryModel model = localAccessMap.get(type); + if (model != null) { + return model.isRecommendation(); + } + return false; + } + + + public boolean hasLocalEntries() { + + return dictionaryModels.stream().anyMatch(dm -> !dm.getLocalEntries().isEmpty()); + } + + + public Set getTypes() { + + return localAccessMap.keySet(); + } + + + public boolean containsValue(String type, String value) { + + if (localAccessMap.containsKey(type) && localAccessMap.get(type) + .getEntries() + .contains(value) || localAccessMap.containsKey(type) && localAccessMap.get(type) + .getLocalEntries() + .contains(value) || localAccessMap.containsKey(RECOMMENDATION_PREFIX + type) && localAccessMap.get(RECOMMENDATION_PREFIX + type) + .getEntries() + .contains(value) || localAccessMap.containsKey(RECOMMENDATION_PREFIX + type) && localAccessMap.get(RECOMMENDATION_PREFIX + type) + .getLocalEntries() + .contains(value)) { + return true; + } + return false; + } + + + public boolean isHint(String type) { + + DictionaryModel model = localAccessMap.get(type); + if (model != null) { + return model.isHint(); + } + return false; + } + + public boolean isCaseInsensitiveDictionary(String type) { + + DictionaryModel dictionaryModel = localAccessMap.get(type); + if (dictionaryModel != null) { + return dictionaryModel.isCaseInsensitive(); + } + return false; + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryModel.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryModel.java index a8b050a1..dd8aecc1 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryModel.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryModel.java @@ -1,6 +1,7 @@ package com.iqser.red.service.redaction.v1.server.redaction.model; +import java.io.Serializable; import java.util.Set; import lombok.AllArgsConstructor; @@ -8,7 +9,7 @@ import lombok.Data; @Data @AllArgsConstructor -public class DictionaryModel { +public class DictionaryModel implements Serializable { private String type; private int rank; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java index fd91d244..f3ff6de6 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java @@ -1,8 +1,9 @@ package com.iqser.red.service.redaction.v1.server.redaction.model; -import static com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService.RECOMMENDATION_PREFIX; +import static com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary.RECOMMENDATION_PREFIX; import java.util.ArrayList; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -12,7 +13,6 @@ import java.util.regex.Pattern; import org.apache.commons.lang3.StringUtils; -import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService; import com.iqser.red.service.redaction.v1.server.redaction.utils.Patterns; import lombok.Builder; @@ -24,7 +24,12 @@ import lombok.extern.slf4j.Slf4j; @Builder public class Section { - private DictionaryService dictionaryService; + private boolean isLocal; + + private Set dictionaryTypes; + + @Builder.Default + private Map> localDictionaryAdds = new HashMap<>(); private Set entities; @@ -66,7 +71,7 @@ public class Section { public void redact(String type, int ruleNumber, String reason, String legalBasis) { - boolean hasRecommendactionDictionary = dictionaryService.hasRecommendationDictionary(type); + boolean hasRecommendactionDictionary = dictionaryTypes.contains(RECOMMENDATION_PREFIX + type); entities.forEach(entity -> { if (entity.getType().equals(type) || hasRecommendactionDictionary && entity.getType() @@ -82,7 +87,7 @@ public class Section { public void redactNot(String type, int ruleNumber, String reason) { - boolean hasRecommendactionDictionary = dictionaryService.hasRecommendationDictionary(type); + boolean hasRecommendactionDictionary = dictionaryTypes.contains(RECOMMENDATION_PREFIX + type); entities.forEach(entity -> { if (entity.getType().equals(type) || hasRecommendactionDictionary && entity.getType() @@ -156,8 +161,8 @@ public class Section { // HashSet keeps the older value, but we want the new only. entities.removeAll(found); entities.addAll(found); - if (redactEverywhere) { - dictionaryService.addToLocalDictionary(asType, value.trim()); + if (redactEverywhere && !isLocal()) { + localDictionaryAdds.computeIfAbsent(asType, (x) -> new HashSet<>()).add(value.trim()); } } } @@ -190,8 +195,8 @@ public class Section { // HashSet keeps the older value, but we want the new only. entities.removeAll(found); entities.addAll(found); - if (redactEverywhere) { - dictionaryService.addToLocalDictionary(asType, line.trim()); + if (redactEverywhere && !isLocal()) { + localDictionaryAdds.computeIfAbsent(asType, (x) -> new HashSet<>()).add(line.trim()); } } } @@ -300,7 +305,7 @@ public class Section { entities = removeEntitiesContainedInLarger(entities); - if (addAsRecommendations) { + if (addAsRecommendations && !isLocal()) { String cleanedWord = word.replaceAll(",", " ").replaceAll(" ", " ").trim() + " "; Pattern pattern = Patterns.AUTHOR_TABLE_SPITTER; Matcher matcher = pattern.matcher(cleanedWord); @@ -308,13 +313,11 @@ public class Section { while (matcher.find()) { String match = matcher.group().trim(); if (match.length() >= 3) { - if(!dictionaryService.getDictionary(type).getEntries().contains(match) && !dictionaryService.getDictionary(RECOMMENDATION_PREFIX + type).getEntries().contains(match)) { - dictionaryService.addToLocalDictionary(RECOMMENDATION_PREFIX + type, match); - } + localDictionaryAdds.computeIfAbsent(RECOMMENDATION_PREFIX + type, (x) -> new HashSet<>()) + .add(match); String lastname = match.split(" ")[0]; - if(!dictionaryService.getDictionary(type).getEntries().contains(lastname) && !dictionaryService.getDictionary(RECOMMENDATION_PREFIX + type).getEntries().contains(lastname)) { - dictionaryService.addToLocalDictionary(RECOMMENDATION_PREFIX + type, lastname); - } + localDictionaryAdds.computeIfAbsent(RECOMMENDATION_PREFIX + type, (x) -> new HashSet<>()) + .add(lastname); } } } @@ -322,3 +325,9 @@ public class Section { } } + + + + + + diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/SectionSearchableTextPair.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/SectionSearchableTextPair.java new file mode 100644 index 00000000..996eb50d --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/SectionSearchableTextPair.java @@ -0,0 +1,13 @@ +package com.iqser.red.service.redaction.v1.server.redaction.model; + +import lombok.AllArgsConstructor; +import lombok.Data; + +@Data +@AllArgsConstructor +public class SectionSearchableTextPair { + + private Section section; + private SearchableText searchableText; + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java index df1f6ad2..9800499c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java @@ -11,12 +11,14 @@ import java.util.Set; import java.util.stream.Collectors; import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.SerializationUtils; import org.springframework.stereotype.Service; import com.iqser.red.service.configuration.v1.api.model.Colors; import com.iqser.red.service.configuration.v1.api.model.TypeResponse; import com.iqser.red.service.configuration.v1.api.model.TypeResult; import com.iqser.red.service.redaction.v1.server.client.DictionaryClient; +import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary; import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel; import feign.FeignException; @@ -29,7 +31,6 @@ import lombok.extern.slf4j.Slf4j; @RequiredArgsConstructor public class DictionaryService { - public static final String RECOMMENDATION_PREFIX = "recommendation_"; private final DictionaryClient dictionaryClient; @@ -55,23 +56,6 @@ public class DictionaryService { private Map localAccessMap = new HashMap<>(); - public boolean hasLocalEntries() { - - return this.dictionary.stream().anyMatch(dm -> !dm.getLocalEntries().isEmpty()); - } - - - public void addToLocalDictionary(String type, String value) { - - localAccessMap.get(type).getLocalEntries().add(value); - } - - - public void clearLocalEntries() { - - this.dictionary.forEach(dm -> dm.getLocalEntries().clear()); - } - public void updateDictionary() { @@ -112,13 +96,13 @@ public class DictionaryService { } - public void updateExternalDictionary(){ - dictionary.forEach(dm -> { + public void updateExternalDictionary(Dictionary dictionary){ + dictionary.getDictionaryModels().forEach(dm -> { if(dm.isRecommendation() && !dm.getLocalEntries().isEmpty()){ dictionaryClient.addEntries(dm.getType(), new ArrayList<>(dm.getLocalEntries()), false); long externalVersion = dictionaryClient.getVersion(); - if(externalVersion == dictionaryVersion + 1){ - dictionaryVersion = externalVersion; + if(externalVersion == dictionary.getVersion() + 1){ + dictionary.setVersion(externalVersion); } } }); @@ -185,19 +169,14 @@ public class DictionaryService { } - public boolean hasRecommendationDictionary(String type) { + public Dictionary getDeepCopyDictionary(){ + List copy = new ArrayList<>(); - DictionaryModel model = localAccessMap.get(RECOMMENDATION_PREFIX + type); - if (model != null) { - return true; - } - return false; - } + dictionary.forEach(dm -> { + copy.add(SerializationUtils.clone(dm)); + }); - - public DictionaryModel getDictionary(String type) { - - return localAccessMap.get(type); + return new Dictionary(copy, dictionaryVersion); } } \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java index 7622e8dc..c7b07398 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java @@ -21,11 +21,13 @@ import com.iqser.red.service.redaction.v1.server.classification.model.Document; import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph; import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue; +import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary; import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel; import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence; import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText; import com.iqser.red.service.redaction.v1.server.redaction.model.Section; +import com.iqser.red.service.redaction.v1.server.redaction.model.SectionSearchableTextPair; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; @@ -43,21 +45,23 @@ public class EntityRedactionService { dictionaryService.updateDictionary(); droolsExecutionService.updateRules(); - dictionaryService.clearLocalEntries(); + long rulesVersion = droolsExecutionService.getRulesVersion(); - Set documentEntities = new HashSet<>(findEntities(classifiedDoc, manualRedactions, false, null)); + Dictionary dictionary = dictionaryService.getDeepCopyDictionary(); - if (dictionaryService.hasLocalEntries()) { + Set documentEntities = new HashSet<>(findEntities(classifiedDoc, manualRedactions, dictionary, false, null)); + + if (dictionary.hasLocalEntries()) { Map> hintsPerSectionNumber = new HashMap<>(); documentEntities.stream().forEach(entity -> { - if (dictionaryService.isHint(entity.getType())) { + if (dictionary.isHint(entity.getType())) { hintsPerSectionNumber.computeIfAbsent(entity.getSectionNumber(), (x) -> new HashSet<>()) .add(entity); } }); - Set foundByLocal = findEntities(classifiedDoc, manualRedactions, true, hintsPerSectionNumber); + Set foundByLocal = findEntities(classifiedDoc, manualRedactions, dictionary, true, hintsPerSectionNumber); // HashSet keeps the older value, but we want the new only. documentEntities.removeAll(foundByLocal); documentEntities.addAll(foundByLocal); @@ -81,15 +85,20 @@ public class EntityRedactionService { } } - dictionaryService.updateExternalDictionary(); + dictionaryService.updateExternalDictionary(dictionary); + + classifiedDoc.setDictionaryVersion(dictionary.getVersion()); + classifiedDoc.setRulesVersion(rulesVersion); } - private Set findEntities(Document classifiedDoc, ManualRedactions manualRedactions, boolean localEntries, + private Set findEntities(Document classifiedDoc, ManualRedactions manualRedactions, + Dictionary dictionary, boolean local, Map> hintsPerSectionNumber) { Set documentEntities = new HashSet<>(); int sectionNumber = 1; + List sectionSearchableTextPairs = new ArrayList<>(); for (Paragraph paragraph : classifiedDoc.getParagraphs()) { SearchableText searchableText = paragraph.getSearchableText(); @@ -122,10 +131,11 @@ public class EntityRedactionService { searchableRow.addAll(textBlock.getSequences()); } } - Set rowEntities = findEntities(searchableRow, table.getHeadline(), sectionNumber, localEntries); + Set rowEntities = findEntities(searchableRow, table.getHeadline(), sectionNumber, dictionary.getDictionaryModels(), local); - Section analysedRowSection = droolsExecutionService.executeRules(Section.builder() - .dictionaryService(dictionaryService) + sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder() + .isLocal(local) + .dictionaryTypes(dictionary.getTypes()) .entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(sectionNumber) ? Stream .concat(rowEntities.stream(), hintsPerSectionNumber.get(sectionNumber).stream()) .collect(Collectors.toSet()) : rowEntities) @@ -134,18 +144,19 @@ public class EntityRedactionService { .headline(table.getHeadline()) .sectionNumber(sectionNumber) .tabularData(tabularData) - .build()); + .build(), searchableRow)); - documentEntities.addAll(clearAndFindPositions(analysedRowSection.getEntities(), searchableRow)); sectionNumber++; } sectionNumber++; } addSectionToManualRedactions(paragraph.getTextBlocks(), manualRedactions, paragraph.getHeadline(), sectionNumber); - Set entities = findEntities(searchableText, paragraph.getHeadline(), sectionNumber, localEntries); - Section analysedSection = droolsExecutionService.executeRules(Section.builder() - .dictionaryService(dictionaryService) + Set entities = findEntities(searchableText, paragraph.getHeadline(), sectionNumber, dictionary.getDictionaryModels(), local); + + sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder() + .isLocal(local) + .dictionaryTypes(dictionary.getTypes()) .entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(sectionNumber) ? Stream .concat(entities.stream(), hintsPerSectionNumber.get(sectionNumber).stream()) .collect(Collectors.toSet()) : entities) @@ -153,22 +164,43 @@ public class EntityRedactionService { .searchText(searchableText.toString()) .headline(paragraph.getHeadline()) .sectionNumber(sectionNumber) - .build()); + .build(), searchableText)); - documentEntities.addAll(clearAndFindPositions(analysedSection.getEntities(), searchableText)); sectionNumber++; } + + sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> { + Section analysedRowSection = droolsExecutionService.executeRules(sectionSearchableTextPair.getSection()); + documentEntities.addAll(clearAndFindPositions(analysedRowSection.getEntities(), sectionSearchableTextPair.getSearchableText(), dictionary)); + + analysedRowSection.getLocalDictionaryAdds().keySet().forEach(key -> { + if (dictionary.isRecommendation(key)){ + analysedRowSection.getLocalDictionaryAdds().get(key).forEach(value -> { + if (!dictionary.containsValue(key, value)){ + dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value); + } + }); + } else { + analysedRowSection.getLocalDictionaryAdds().get(key).forEach( value -> { + dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value); + }); + } + }); + + + }); + return documentEntities; } - private Set clearAndFindPositions(Set entities, SearchableText text) { + private Set clearAndFindPositions(Set entities, SearchableText text, Dictionary dictionary) { removeEntitiesContainedInLarger(entities); for (Entity entity : entities) { - if(entity.getPositionSequences().isEmpty()) { - entity.setPositionSequences(text.getSequences(entity.getWord(), dictionaryService.isCaseInsensitiveDictionary(entity + if (entity.getPositionSequences().isEmpty()) { + entity.setPositionSequences(text.getSequences(entity.getWord(), dictionary.isCaseInsensitiveDictionary(entity .getType()), entity.getTargetSequences())); } } @@ -177,7 +209,8 @@ public class EntityRedactionService { } - private Set findEntities(SearchableText searchableText, String headline, int sectionNumber, boolean local) { + private Set findEntities(SearchableText searchableText, String headline, int sectionNumber, + List dictionary, boolean local) { Set found = new HashSet<>(); String searchableString = searchableText.toString(); @@ -186,7 +219,7 @@ public class EntityRedactionService { } String lowercaseInputString = searchableString.toLowerCase(); - for (DictionaryModel model : dictionaryService.getDictionary()) { + for (DictionaryModel model : dictionary) { if (model.isCaseInsensitive()) { found.addAll(find(lowercaseInputString, model.getValues(local), model.getType(), headline, sectionNumber)); } else { @@ -231,7 +264,8 @@ public class EntityRedactionService { for (Entity word : entities) { for (Entity inner : entities) { if (inner.getWord().length() < word.getWord() - .length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner && word.getSectionNumber() == inner.getSectionNumber()) { + .length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner && word + .getSectionNumber() == inner.getSectionNumber()) { wordsToRemove.add(inner); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index 9f07a1c9..f9fd5135 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -382,7 +382,7 @@ public class RedactionIntegrationTest { System.out.println("redactionTest"); long start = System.currentTimeMillis(); - ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/52 Fludioxonil_RAR_07_Volume_3CA_B-5_2018-02-21.pdf"); RedactionRequest request = RedactionRequest.builder() .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/false_positive.txt b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/false_positive.txt index 95da0f83..25e3e9e9 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/false_positive.txt +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/false_positive.txt @@ -1,2 +1,10 @@ Long-term -Brown liquid \ No newline at end of file +Brown liquid +Brown solid +Hand-held +Manual-Hand held +Manual-Hand held +Weight: +Sprague +Weight and length +Aeration: Gentle \ No newline at end of file