Fixed style in EnityRedactionService
This commit is contained in:
parent
c953f161b2
commit
135a715e22
@ -27,6 +27,7 @@ public class EntityRedactionService {
|
||||
private final DictionaryService dictionaryService;
|
||||
private final DroolsExecutionService droolsExecutionService;
|
||||
|
||||
|
||||
public void processDocument(Document classifiedDoc) {
|
||||
|
||||
dictionaryService.updateDictionary();
|
||||
@ -56,8 +57,7 @@ public class EntityRedactionService {
|
||||
}
|
||||
|
||||
Set<Entity> entities = findEntities(searchableText, paragraph.getHeadline());
|
||||
Section analysedSection = droolsExecutionService.executeRules(Section
|
||||
.builder()
|
||||
Section analysedSection = droolsExecutionService.executeRules(Section.builder()
|
||||
.entities(entities)
|
||||
.text(searchableText.getAsStringWithLinebreaks())
|
||||
.searchText(searchableText.toString())
|
||||
@ -65,9 +65,9 @@ public class EntityRedactionService {
|
||||
.build());
|
||||
|
||||
for (Entity entity : analysedSection.getEntities()) {
|
||||
if(dictionaryService.getCaseInsensitiveTypes().contains(entity.getType())) {
|
||||
if (dictionaryService.getCaseInsensitiveTypes().contains(entity.getType())) {
|
||||
entity.setPositionSequences(searchableText.getSequences(entity.getWord(), true));
|
||||
} else{
|
||||
} else {
|
||||
entity.setPositionSequences(searchableText.getSequences(entity.getWord(), false));
|
||||
}
|
||||
}
|
||||
@ -77,8 +77,7 @@ public class EntityRedactionService {
|
||||
for (SearchableText searchableRow : searchableRows) {
|
||||
Set<Entity> rowEntities = findEntities(searchableRow, "//TODO TableHeader");
|
||||
|
||||
Section analysedRowSection = droolsExecutionService.executeRules(Section
|
||||
.builder()
|
||||
Section analysedRowSection = droolsExecutionService.executeRules(Section.builder()
|
||||
.entities(rowEntities)
|
||||
.text(searchableRow.getAsStringWithLinebreaks())
|
||||
.searchText(searchableRow.toString())
|
||||
@ -86,9 +85,9 @@ public class EntityRedactionService {
|
||||
.build());
|
||||
|
||||
for (Entity entity : analysedRowSection.getEntities()) {
|
||||
if(dictionaryService.getCaseInsensitiveTypes().contains(entity.getType())) {
|
||||
if (dictionaryService.getCaseInsensitiveTypes().contains(entity.getType())) {
|
||||
entity.setPositionSequences(searchableRow.getSequences(entity.getWord(), true));
|
||||
} else{
|
||||
} else {
|
||||
entity.setPositionSequences(searchableRow.getSequences(entity.getWord(), false));
|
||||
}
|
||||
}
|
||||
@ -98,13 +97,15 @@ public class EntityRedactionService {
|
||||
|
||||
documentEntities.forEach(entity -> {
|
||||
entity.getPositionSequences().forEach(sequence -> {
|
||||
classifiedDoc.getEntities().computeIfAbsent(sequence.getPageNumber(), (x) -> new HashSet<>()).add(
|
||||
new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), List.of(sequence), entity.getHeadline(), entity.getMatchedRule())
|
||||
);
|
||||
classifiedDoc.getEntities()
|
||||
.computeIfAbsent(sequence.getPageNumber(), (x) -> new HashSet<>())
|
||||
.add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), List
|
||||
.of(sequence), entity.getHeadline(), entity.getMatchedRule()));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> findEntities(SearchableText searchableText, String headline) {
|
||||
|
||||
String inputString = searchableText.toString();
|
||||
@ -113,7 +114,7 @@ public class EntityRedactionService {
|
||||
Set<Entity> found = new HashSet<>();
|
||||
for (Map.Entry<String, Set<String>> entry : dictionaryService.getDictionary().entrySet()) {
|
||||
|
||||
if(dictionaryService.getCaseInsensitiveTypes().contains(entry.getKey())){
|
||||
if (dictionaryService.getCaseInsensitiveTypes().contains(entry.getKey())) {
|
||||
found.addAll(find(lowercaseInputString, entry.getValue(), entry.getKey(), headline));
|
||||
} else {
|
||||
found.addAll(find(inputString, entry.getValue(), entry.getKey(), headline));
|
||||
@ -126,7 +127,8 @@ public class EntityRedactionService {
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> find(String inputString, Set<String> values, String type, String headline){
|
||||
private Set<Entity> find(String inputString, Set<String> values, String type, String headline) {
|
||||
|
||||
Set<Entity> found = new HashSet<>();
|
||||
for (String value : values) {
|
||||
int startIndex;
|
||||
@ -135,9 +137,8 @@ public class EntityRedactionService {
|
||||
startIndex = inputString.indexOf(value, stopIndex);
|
||||
stopIndex = startIndex + value.length();
|
||||
|
||||
if (startIndex > -1 &&
|
||||
(startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString.charAt(startIndex - 1))) &&
|
||||
(stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) {
|
||||
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString
|
||||
.charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) {
|
||||
found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline));
|
||||
}
|
||||
} while (startIndex > -1);
|
||||
@ -146,20 +147,24 @@ public class EntityRedactionService {
|
||||
}
|
||||
|
||||
|
||||
|
||||
private boolean isSeparator(char c) {
|
||||
|
||||
return Character.isWhitespace(c) || Pattern.matches("\\p{Punct}", String.valueOf(c)) || c == '\"' || c == '‘' || c == '’';
|
||||
}
|
||||
|
||||
|
||||
public void removeEntitiesContainedInLarger(Set<Entity> entities) {
|
||||
|
||||
List<Entity> wordsToRemove = new ArrayList<>();
|
||||
for (Entity word : entities) {
|
||||
for (Entity inner : entities) {
|
||||
if (inner.getWord().length() < word.getWord().length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner) {
|
||||
if (inner.getWord().length() < word.getWord()
|
||||
.length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner) {
|
||||
wordsToRemove.add(inner);
|
||||
}
|
||||
}
|
||||
}
|
||||
entities.removeAll(wordsToRemove);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user