RSS-31: Allow to skip removeEntitiesContainedInLarger in redactBetween rule and added param to sort result by positions
This commit is contained in:
parent
28e437a037
commit
317a8a9af9
@ -54,6 +54,8 @@ public class Entity implements ReasonHolder {
|
||||
|
||||
private boolean resized;
|
||||
|
||||
private boolean skipRemoveEntitiesContainedInLarger;
|
||||
|
||||
|
||||
public Entity(String word, String type, boolean redaction, String redactionReason,
|
||||
List<EntityPositionSequence> positionSequences, String headline, int matchedRule, int sectionNumber,
|
||||
|
||||
@ -225,7 +225,11 @@ public class SearchableText {
|
||||
|
||||
public String getAsStringWithLinebreaksSorted() {
|
||||
|
||||
return getAsStringWithLinebreaksSorted(this.sequences);
|
||||
}
|
||||
|
||||
|
||||
public String getAsStringWithLinebreaksSorted(List<TextPositionSequence> sequences) {
|
||||
var sorted = sequences.stream().sorted(Comparator.comparing(a -> a.getTextPositions().get(0).getXDirAdj())).sorted(Comparator.comparing(a -> a.getTextPositions().get(0).getYDirAdj())).sorted(Comparator.comparing(a -> a.getPage())).collect(Collectors.toList());
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
@ -248,6 +252,7 @@ public class SearchableText {
|
||||
|
||||
|
||||
|
||||
|
||||
public String getAsStringWithLinebreaks() {
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
@ -534,7 +534,7 @@ public class Section {
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
|
||||
@Argument(ArgumentType.STRING) String reason, @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
redactBetween(start, stop, asType, ruleNumber, redactEverywhere, false, reason, legalBasis, true);
|
||||
redactBetween(start, stop, asType, ruleNumber, redactEverywhere, false, reason, legalBasis, true, false, false);
|
||||
}
|
||||
|
||||
|
||||
@ -545,7 +545,18 @@ public class Section {
|
||||
@Argument(ArgumentType.BOOLEAN) boolean excludeHeadLine, @Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
redactBetween(start, stop, asType, ruleNumber, redactEverywhere, excludeHeadLine, reason, legalBasis, true);
|
||||
redactBetween(start, stop, asType, ruleNumber, redactEverywhere, excludeHeadLine, reason, legalBasis, true, false, false);
|
||||
}
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactBetween(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.STRING) String stop, @Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean excludeHeadLine, @Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis, @Argument(ArgumentType.BOOLEAN) boolean skipRemoveEntitiesContainedInLarger, @Argument(ArgumentType.BOOLEAN) boolean sortedResult) {
|
||||
|
||||
redactBetween(start, stop, asType, ruleNumber, redactEverywhere, excludeHeadLine, reason, legalBasis, true, skipRemoveEntitiesContainedInLarger, sortedResult);
|
||||
}
|
||||
|
||||
|
||||
@ -556,7 +567,7 @@ public class Section {
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
|
||||
@Argument(ArgumentType.STRING) String reason) {
|
||||
|
||||
redactBetween(start, stop, asType, ruleNumber, redactEverywhere, false, reason, null, false);
|
||||
redactBetween(start, stop, asType, ruleNumber, redactEverywhere, false, reason, null, false, false);
|
||||
}
|
||||
|
||||
|
||||
@ -566,7 +577,7 @@ public class Section {
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean excludeHeadLine, @Argument(ArgumentType.STRING) String reason) {
|
||||
|
||||
redactBetween(start, stop, asType, ruleNumber, redactEverywhere, excludeHeadLine, reason, null, false);
|
||||
redactBetween(start, stop, asType, ruleNumber, redactEverywhere, excludeHeadLine, reason, null, false, false);
|
||||
}
|
||||
|
||||
|
||||
@ -580,6 +591,8 @@ public class Section {
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactNotLinesBetween(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.STRING) String stop, @Argument(ArgumentType.TYPE) String asType,
|
||||
@ -834,7 +847,7 @@ public class Section {
|
||||
public void redactSectionText(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
redactBetween("", "", type, ruleNumber, false, false, reason, legalBasis, true);
|
||||
redactBetween("", "", type, ruleNumber, false, false, reason, legalBasis, true, false);
|
||||
}
|
||||
|
||||
|
||||
@ -843,7 +856,7 @@ public class Section {
|
||||
public void redactSectionTextWithoutHeadLine(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.STRING) String reason, @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
redactBetween("", "", type, ruleNumber, false, true, reason, legalBasis, true);
|
||||
redactBetween("", "", type, ruleNumber, false, true, reason, legalBasis, true, false);
|
||||
}
|
||||
|
||||
|
||||
@ -1054,7 +1067,7 @@ public class Section {
|
||||
|
||||
|
||||
private void redactBetween(String start, String stop, String asType, int ruleNumber, boolean redactEverywhere, boolean excludeHeadLine, String reason, String legalBasis,
|
||||
boolean redaction) {
|
||||
boolean redaction, boolean skipRemoveEntitiesContainedInLarger, boolean sortedResult) {
|
||||
|
||||
String[] values = new String[1];
|
||||
|
||||
@ -1077,6 +1090,13 @@ public class Section {
|
||||
if (StringUtils.isNotBlank(value)) {
|
||||
|
||||
Set<Entity> found = findEntities(value.trim(), asType, false, redaction, ruleNumber, reason, legalBasis, Engine.RULE, false);
|
||||
found.forEach(f -> {
|
||||
f.setSkipRemoveEntitiesContainedInLarger(skipRemoveEntitiesContainedInLarger);
|
||||
if(sortedResult){
|
||||
f.setWord(searchableText.getAsStringWithLinebreaksSorted(f.getPositionSequences().stream().map(EntityPositionSequence::getSequences).flatMap(Collection::stream).collect(Collectors.toList())));
|
||||
}
|
||||
});
|
||||
|
||||
EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary);
|
||||
|
||||
if (redactEverywhere && !isLocal()) {
|
||||
|
||||
@ -150,6 +150,10 @@ public class EntitySearchUtils {
|
||||
// continue;
|
||||
// }
|
||||
|
||||
if(inner.isSkipRemoveEntitiesContainedInLarger() || word.isSkipRemoveEntitiesContainedInLarger()){
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inner.getWord().length() < word.getWord()
|
||||
.length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner && word.getSectionNumber() == inner.getSectionNumber()) {
|
||||
if (word.getEntityType().equals(EntityType.RECOMMENDATION) && inner.getEntityType().equals(EntityType.ENTITY)) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user