RSS-31: Allow to skip removeEntitiesContainedInLarger in redactBetween rule and added param to sort result by positions

This commit is contained in:
deiflaender 2022-09-09 13:25:53 +02:00
parent 28e437a037
commit 317a8a9af9
4 changed files with 38 additions and 7 deletions

View File

@ -54,6 +54,8 @@ public class Entity implements ReasonHolder {
private boolean resized;
private boolean skipRemoveEntitiesContainedInLarger;
public Entity(String word, String type, boolean redaction, String redactionReason,
List<EntityPositionSequence> positionSequences, String headline, int matchedRule, int sectionNumber,

View File

@ -225,7 +225,11 @@ public class SearchableText {
public String getAsStringWithLinebreaksSorted() {
return getAsStringWithLinebreaksSorted(this.sequences);
}
public String getAsStringWithLinebreaksSorted(List<TextPositionSequence> sequences) {
var sorted = sequences.stream().sorted(Comparator.comparing(a -> a.getTextPositions().get(0).getXDirAdj())).sorted(Comparator.comparing(a -> a.getTextPositions().get(0).getYDirAdj())).sorted(Comparator.comparing(a -> a.getPage())).collect(Collectors.toList());
StringBuilder sb = new StringBuilder();
@ -248,6 +252,7 @@ public class SearchableText {
public String getAsStringWithLinebreaks() {
StringBuilder sb = new StringBuilder();

View File

@ -534,7 +534,7 @@ public class Section {
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
@Argument(ArgumentType.STRING) String reason, @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
redactBetween(start, stop, asType, ruleNumber, redactEverywhere, false, reason, legalBasis, true);
redactBetween(start, stop, asType, ruleNumber, redactEverywhere, false, reason, legalBasis, true, false, false);
}
@ -545,7 +545,18 @@ public class Section {
@Argument(ArgumentType.BOOLEAN) boolean excludeHeadLine, @Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
redactBetween(start, stop, asType, ruleNumber, redactEverywhere, excludeHeadLine, reason, legalBasis, true);
redactBetween(start, stop, asType, ruleNumber, redactEverywhere, excludeHeadLine, reason, legalBasis, true, false, false);
}
@ThenAction
@SuppressWarnings("unused")
public void redactBetween(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.STRING) String stop, @Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
@Argument(ArgumentType.BOOLEAN) boolean excludeHeadLine, @Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis, @Argument(ArgumentType.BOOLEAN) boolean skipRemoveEntitiesContainedInLarger, @Argument(ArgumentType.BOOLEAN) boolean sortedResult) {
redactBetween(start, stop, asType, ruleNumber, redactEverywhere, excludeHeadLine, reason, legalBasis, true, skipRemoveEntitiesContainedInLarger, sortedResult);
}
@ -556,7 +567,7 @@ public class Section {
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
@Argument(ArgumentType.STRING) String reason) {
redactBetween(start, stop, asType, ruleNumber, redactEverywhere, false, reason, null, false);
redactBetween(start, stop, asType, ruleNumber, redactEverywhere, false, reason, null, false, false);
}
@ -566,7 +577,7 @@ public class Section {
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
@Argument(ArgumentType.BOOLEAN) boolean excludeHeadLine, @Argument(ArgumentType.STRING) String reason) {
redactBetween(start, stop, asType, ruleNumber, redactEverywhere, excludeHeadLine, reason, null, false);
redactBetween(start, stop, asType, ruleNumber, redactEverywhere, excludeHeadLine, reason, null, false, false);
}
@ -580,6 +591,8 @@ public class Section {
}
@ThenAction
@SuppressWarnings("unused")
public void redactNotLinesBetween(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.STRING) String stop, @Argument(ArgumentType.TYPE) String asType,
@ -834,7 +847,7 @@ public class Section {
public void redactSectionText(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
redactBetween("", "", type, ruleNumber, false, false, reason, legalBasis, true);
redactBetween("", "", type, ruleNumber, false, false, reason, legalBasis, true, false);
}
@ -843,7 +856,7 @@ public class Section {
public void redactSectionTextWithoutHeadLine(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.STRING) String reason, @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
redactBetween("", "", type, ruleNumber, false, true, reason, legalBasis, true);
redactBetween("", "", type, ruleNumber, false, true, reason, legalBasis, true, false);
}
@ -1054,7 +1067,7 @@ public class Section {
private void redactBetween(String start, String stop, String asType, int ruleNumber, boolean redactEverywhere, boolean excludeHeadLine, String reason, String legalBasis,
boolean redaction) {
boolean redaction, boolean skipRemoveEntitiesContainedInLarger, boolean sortedResult) {
String[] values = new String[1];
@ -1077,6 +1090,13 @@ public class Section {
if (StringUtils.isNotBlank(value)) {
Set<Entity> found = findEntities(value.trim(), asType, false, redaction, ruleNumber, reason, legalBasis, Engine.RULE, false);
found.forEach(f -> {
f.setSkipRemoveEntitiesContainedInLarger(skipRemoveEntitiesContainedInLarger);
if(sortedResult){
f.setWord(searchableText.getAsStringWithLinebreaksSorted(f.getPositionSequences().stream().map(EntityPositionSequence::getSequences).flatMap(Collection::stream).collect(Collectors.toList())));
}
});
EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary);
if (redactEverywhere && !isLocal()) {

View File

@ -150,6 +150,10 @@ public class EntitySearchUtils {
// continue;
// }
if(inner.isSkipRemoveEntitiesContainedInLarger() || word.isSkipRemoveEntitiesContainedInLarger()){
continue;
}
if (inner.getWord().length() < word.getWord()
.length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner && word.getSectionNumber() == inner.getSectionNumber()) {
if (word.getEntityType().equals(EntityType.RECOMMENDATION) && inner.getEntityType().equals(EntityType.ENTITY)) {