Fixed style in EnityRedactionService

This commit is contained in:
deiflaender 2020-07-27 13:39:31 +02:00
parent c953f161b2
commit 135a715e22

View File

@ -27,6 +27,7 @@ public class EntityRedactionService {
private final DictionaryService dictionaryService;
private final DroolsExecutionService droolsExecutionService;
public void processDocument(Document classifiedDoc) {
dictionaryService.updateDictionary();
@ -56,8 +57,7 @@ public class EntityRedactionService {
}
Set<Entity> entities = findEntities(searchableText, paragraph.getHeadline());
Section analysedSection = droolsExecutionService.executeRules(Section
.builder()
Section analysedSection = droolsExecutionService.executeRules(Section.builder()
.entities(entities)
.text(searchableText.getAsStringWithLinebreaks())
.searchText(searchableText.toString())
@ -65,9 +65,9 @@ public class EntityRedactionService {
.build());
for (Entity entity : analysedSection.getEntities()) {
if(dictionaryService.getCaseInsensitiveTypes().contains(entity.getType())) {
if (dictionaryService.getCaseInsensitiveTypes().contains(entity.getType())) {
entity.setPositionSequences(searchableText.getSequences(entity.getWord(), true));
} else{
} else {
entity.setPositionSequences(searchableText.getSequences(entity.getWord(), false));
}
}
@ -77,8 +77,7 @@ public class EntityRedactionService {
for (SearchableText searchableRow : searchableRows) {
Set<Entity> rowEntities = findEntities(searchableRow, "//TODO TableHeader");
Section analysedRowSection = droolsExecutionService.executeRules(Section
.builder()
Section analysedRowSection = droolsExecutionService.executeRules(Section.builder()
.entities(rowEntities)
.text(searchableRow.getAsStringWithLinebreaks())
.searchText(searchableRow.toString())
@ -86,9 +85,9 @@ public class EntityRedactionService {
.build());
for (Entity entity : analysedRowSection.getEntities()) {
if(dictionaryService.getCaseInsensitiveTypes().contains(entity.getType())) {
if (dictionaryService.getCaseInsensitiveTypes().contains(entity.getType())) {
entity.setPositionSequences(searchableRow.getSequences(entity.getWord(), true));
} else{
} else {
entity.setPositionSequences(searchableRow.getSequences(entity.getWord(), false));
}
}
@ -98,13 +97,15 @@ public class EntityRedactionService {
documentEntities.forEach(entity -> {
entity.getPositionSequences().forEach(sequence -> {
classifiedDoc.getEntities().computeIfAbsent(sequence.getPageNumber(), (x) -> new HashSet<>()).add(
new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), List.of(sequence), entity.getHeadline(), entity.getMatchedRule())
);
classifiedDoc.getEntities()
.computeIfAbsent(sequence.getPageNumber(), (x) -> new HashSet<>())
.add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), List
.of(sequence), entity.getHeadline(), entity.getMatchedRule()));
});
});
}
private Set<Entity> findEntities(SearchableText searchableText, String headline) {
String inputString = searchableText.toString();
@ -113,7 +114,7 @@ public class EntityRedactionService {
Set<Entity> found = new HashSet<>();
for (Map.Entry<String, Set<String>> entry : dictionaryService.getDictionary().entrySet()) {
if(dictionaryService.getCaseInsensitiveTypes().contains(entry.getKey())){
if (dictionaryService.getCaseInsensitiveTypes().contains(entry.getKey())) {
found.addAll(find(lowercaseInputString, entry.getValue(), entry.getKey(), headline));
} else {
found.addAll(find(inputString, entry.getValue(), entry.getKey(), headline));
@ -126,7 +127,8 @@ public class EntityRedactionService {
}
private Set<Entity> find(String inputString, Set<String> values, String type, String headline){
private Set<Entity> find(String inputString, Set<String> values, String type, String headline) {
Set<Entity> found = new HashSet<>();
for (String value : values) {
int startIndex;
@ -135,9 +137,8 @@ public class EntityRedactionService {
startIndex = inputString.indexOf(value, stopIndex);
stopIndex = startIndex + value.length();
if (startIndex > -1 &&
(startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString.charAt(startIndex - 1))) &&
(stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) {
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString
.charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) {
found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline));
}
} while (startIndex > -1);
@ -146,20 +147,24 @@ public class EntityRedactionService {
}
private boolean isSeparator(char c) {
return Character.isWhitespace(c) || Pattern.matches("\\p{Punct}", String.valueOf(c)) || c == '\"' || c == '' || c == '';
}
public void removeEntitiesContainedInLarger(Set<Entity> entities) {
List<Entity> wordsToRemove = new ArrayList<>();
for (Entity word : entities) {
for (Entity inner : entities) {
if (inner.getWord().length() < word.getWord().length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner) {
if (inner.getWord().length() < word.getWord()
.length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner) {
wordsToRemove.add(inner);
}
}
}
entities.removeAll(wordsToRemove);
}
}