From 135a715e22e6c2536b268db29161552cfd7a6c1c Mon Sep 17 00:00:00 2001 From: deiflaender Date: Mon, 27 Jul 2020 13:39:31 +0200 Subject: [PATCH] Fixed style in EnityRedactionService --- .../service/EntityRedactionService.java | 41 +++++++++++-------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java index 10f9b851..73c15013 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java @@ -27,6 +27,7 @@ public class EntityRedactionService { private final DictionaryService dictionaryService; private final DroolsExecutionService droolsExecutionService; + public void processDocument(Document classifiedDoc) { dictionaryService.updateDictionary(); @@ -56,8 +57,7 @@ public class EntityRedactionService { } Set entities = findEntities(searchableText, paragraph.getHeadline()); - Section analysedSection = droolsExecutionService.executeRules(Section - .builder() + Section analysedSection = droolsExecutionService.executeRules(Section.builder() .entities(entities) .text(searchableText.getAsStringWithLinebreaks()) .searchText(searchableText.toString()) @@ -65,9 +65,9 @@ public class EntityRedactionService { .build()); for (Entity entity : analysedSection.getEntities()) { - if(dictionaryService.getCaseInsensitiveTypes().contains(entity.getType())) { + if (dictionaryService.getCaseInsensitiveTypes().contains(entity.getType())) { entity.setPositionSequences(searchableText.getSequences(entity.getWord(), true)); - } else{ + } else { entity.setPositionSequences(searchableText.getSequences(entity.getWord(), false)); } } @@ -77,8 +77,7 @@ public class EntityRedactionService { for (SearchableText searchableRow : searchableRows) { Set rowEntities = findEntities(searchableRow, "//TODO TableHeader"); - Section analysedRowSection = droolsExecutionService.executeRules(Section - .builder() + Section analysedRowSection = droolsExecutionService.executeRules(Section.builder() .entities(rowEntities) .text(searchableRow.getAsStringWithLinebreaks()) .searchText(searchableRow.toString()) @@ -86,9 +85,9 @@ public class EntityRedactionService { .build()); for (Entity entity : analysedRowSection.getEntities()) { - if(dictionaryService.getCaseInsensitiveTypes().contains(entity.getType())) { + if (dictionaryService.getCaseInsensitiveTypes().contains(entity.getType())) { entity.setPositionSequences(searchableRow.getSequences(entity.getWord(), true)); - } else{ + } else { entity.setPositionSequences(searchableRow.getSequences(entity.getWord(), false)); } } @@ -98,13 +97,15 @@ public class EntityRedactionService { documentEntities.forEach(entity -> { entity.getPositionSequences().forEach(sequence -> { - classifiedDoc.getEntities().computeIfAbsent(sequence.getPageNumber(), (x) -> new HashSet<>()).add( - new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), List.of(sequence), entity.getHeadline(), entity.getMatchedRule()) - ); + classifiedDoc.getEntities() + .computeIfAbsent(sequence.getPageNumber(), (x) -> new HashSet<>()) + .add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), List + .of(sequence), entity.getHeadline(), entity.getMatchedRule())); }); }); } + private Set findEntities(SearchableText searchableText, String headline) { String inputString = searchableText.toString(); @@ -113,7 +114,7 @@ public class EntityRedactionService { Set found = new HashSet<>(); for (Map.Entry> entry : dictionaryService.getDictionary().entrySet()) { - if(dictionaryService.getCaseInsensitiveTypes().contains(entry.getKey())){ + if (dictionaryService.getCaseInsensitiveTypes().contains(entry.getKey())) { found.addAll(find(lowercaseInputString, entry.getValue(), entry.getKey(), headline)); } else { found.addAll(find(inputString, entry.getValue(), entry.getKey(), headline)); @@ -126,7 +127,8 @@ public class EntityRedactionService { } - private Set find(String inputString, Set values, String type, String headline){ + private Set find(String inputString, Set values, String type, String headline) { + Set found = new HashSet<>(); for (String value : values) { int startIndex; @@ -135,9 +137,8 @@ public class EntityRedactionService { startIndex = inputString.indexOf(value, stopIndex); stopIndex = startIndex + value.length(); - if (startIndex > -1 && - (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString.charAt(startIndex - 1))) && - (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) { + if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString + .charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) { found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline)); } } while (startIndex > -1); @@ -146,20 +147,24 @@ public class EntityRedactionService { } - private boolean isSeparator(char c) { + return Character.isWhitespace(c) || Pattern.matches("\\p{Punct}", String.valueOf(c)) || c == '\"' || c == '‘' || c == '’'; } + public void removeEntitiesContainedInLarger(Set entities) { + List wordsToRemove = new ArrayList<>(); for (Entity word : entities) { for (Entity inner : entities) { - if (inner.getWord().length() < word.getWord().length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner) { + if (inner.getWord().length() < word.getWord() + .length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner) { wordsToRemove.add(inner); } } } entities.removeAll(wordsToRemove); } + }