Pull request #495: RSS-177: Added required rule functions for scm poc

Merge in RED/redaction-service from RSS-177 to master

* commit '5c38150d34d8558f0e0e19ecd340d459a132426d':
  RSS-177: Added required rule functions for scm poc
This commit is contained in:
Dominique Eiflaender 2022-11-03 14:22:40 +01:00
commit 19e607e8a8
3 changed files with 178 additions and 12 deletions

View File

@ -82,6 +82,8 @@ public class Section {
private ManualRedactions manualRedactions;
private boolean isInTable;
@Deprecated
@SuppressWarnings("unused")
@ -204,6 +206,12 @@ public class Section {
return fileAttributes != null && fileAttributes.stream().anyMatch(attribute -> label.equals(attribute.getLabel()) && value.equals(attribute.getValue()));
}
@SuppressWarnings("unused")
@WhenCondition
public boolean fileAttributeContainsAnyOf(@Argument(ArgumentType.FILE_ATTRIBUTE) String label, @Argument(ArgumentType.STRING) Set<String> value) {
return fileAttributes != null && fileAttributes.stream().anyMatch(attribute -> label.equals(attribute.getLabel()) && value.contains(attribute.getValue()));
}
@SuppressWarnings("unused")
@WhenCondition
@ -555,7 +563,36 @@ public class Section {
@Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
redactByRegEx(pattern, patternCaseInsensitive, group, asType, ruleNumber, reason, legalBasis, true);
redactByRegEx(pattern, patternCaseInsensitive, group, asType, ruleNumber, reason, legalBasis, true, false);
}
@ThenAction
@SuppressWarnings("unused")
public void redactByRegExWithNewlines(@Argument(ArgumentType.REGEX) String pattern,
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
@Argument(ArgumentType.INTEGER) int group,
@Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
redactByRegExWithNewlines(pattern, patternCaseInsensitive, group, asType, ruleNumber, reason, legalBasis, true, false);
}
@ThenAction
@SuppressWarnings("unused")
public void redactByRegEx(@Argument(ArgumentType.REGEX) String pattern,
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
@Argument(ArgumentType.INTEGER) int group,
@Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.BOOLEAN) boolean skipRemoveEntitiesContainedInLarger,
@Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
redactByRegEx(pattern, patternCaseInsensitive, group, asType, ruleNumber, reason, legalBasis, true, skipRemoveEntitiesContainedInLarger);
}
@ -568,7 +605,7 @@ public class Section {
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.STRING) String reason) {
redactByRegEx(pattern, patternCaseInsensitive, group, asType, ruleNumber, reason, null, false);
redactByRegEx(pattern, patternCaseInsensitive, group, asType, ruleNumber, reason, null, false, false);
}
@ -583,7 +620,7 @@ public class Section {
@Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
redactBetween(start, stop, false, false, asType, ruleNumber, redactEverywhere, false, reason, legalBasis, true, false, false);
redactBetween(start, stop, false, false, asType, ruleNumber, redactEverywhere, false, reason, legalBasis, true, false, false, false);
}
@ -598,7 +635,7 @@ public class Section {
@Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
redactBetween(start, stop, false, false, asType, ruleNumber, redactEverywhere, excludeHeadLine, reason, legalBasis, true, false, false);
redactBetween(start, stop, false, false, asType, ruleNumber, redactEverywhere, excludeHeadLine, reason, legalBasis, true, false, false, false);
}
@ -627,10 +664,43 @@ public class Section {
legalBasis,
true,
skipRemoveEntitiesContainedInLarger,
sortedResult);
sortedResult, false);
}
@ThenAction
@SuppressWarnings("unused")
public void redactBetween(@Argument(ArgumentType.STRING) String start,
@Argument(ArgumentType.STRING) String stop,
@Argument(ArgumentType.BOOLEAN) boolean includeStart,
@Argument(ArgumentType.BOOLEAN) boolean includeStop,
@Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
@Argument(ArgumentType.BOOLEAN) boolean excludeHeadLine,
@Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis,
@Argument(ArgumentType.BOOLEAN) boolean skipRemoveEntitiesContainedInLarger,
@Argument(ArgumentType.BOOLEAN) boolean sortedResult,
@Argument(ArgumentType.BOOLEAN) boolean ignoreTables) {
redactBetween(start,
stop,
includeStart,
includeStop,
asType,
ruleNumber,
redactEverywhere,
excludeHeadLine,
reason,
legalBasis,
true,
skipRemoveEntitiesContainedInLarger,
sortedResult, ignoreTables);
}
@ThenAction
@SuppressWarnings("unused")
public void redactBetween(@Argument(ArgumentType.STRING) String start,
@ -658,7 +728,7 @@ public class Section {
legalBasis,
true,
skipRemoveEntitiesContainedInLarger,
sortedResult);
sortedResult, false);
}
@ -701,7 +771,7 @@ public class Section {
legalBasis,
true,
skipRemoveEntitiesContainedInLarger,
sortedResult);
sortedResult, false);
}
}
@ -716,7 +786,19 @@ public class Section {
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
@Argument(ArgumentType.STRING) String reason) {
redactBetween(start, stop, asType, ruleNumber, redactEverywhere, false, reason, null, false, false);
redactBetween(start,
stop,
false,
false,
asType,
ruleNumber,
redactEverywhere,
false,
reason,
null,
false,
false,
false, false);
}
@ -730,7 +812,19 @@ public class Section {
@Argument(ArgumentType.BOOLEAN) boolean excludeHeadLine,
@Argument(ArgumentType.STRING) String reason) {
redactBetween(start, stop, asType, ruleNumber, redactEverywhere, excludeHeadLine, reason, null, false, false);
redactBetween(start,
stop,
false,
false,
asType,
ruleNumber,
redactEverywhere,
excludeHeadLine,
reason,
null,
false,
false,
false, false);
}
@ -1036,6 +1130,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void redactWordPartByRegEx(@Argument(ArgumentType.REGEX) String pattern,
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
@Argument(ArgumentType.INTEGER) int group,
@ -1132,6 +1227,33 @@ public class Section {
}
@ThenAction
@SuppressWarnings("unused")
public void redactLineAfterAcrossColumns(@Argument(ArgumentType.STRING) String start,
@Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
@Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
redactLineAfterAcrossColumns(start, asType, ruleNumber, redactEverywhere, reason, legalBasis, false);
}
@ThenAction
@SuppressWarnings("unused")
public void redactLineAfterAcrossColumns(@Argument(ArgumentType.STRING) String start,
@Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
@Argument(ArgumentType.BOOLEAN) boolean skipRemoveEntitiesContainedInLarger,
@Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
redactLineAfterAcrossColumns(start, asType, ruleNumber, redactEverywhere, reason, legalBasis, skipRemoveEntitiesContainedInLarger);
}
private void redactAndRecommendByRegEx(String pattern,
boolean patternCaseInsensitive,
int group,
@ -1291,7 +1413,13 @@ public class Section {
}
public void redactLineAfterAcrossColumns(String start, String asType, int ruleNumber, boolean redactEverywhere, String reason, String legalBasis) {
private void redactLineAfterAcrossColumns(String start,
String asType,
int ruleNumber,
boolean redactEverywhere,
String reason,
String legalBasis,
boolean skipRemoveEntitiesContainedInLarger) {
String[] values = StringUtils.substringsBetween(searchableText.getAsStringWithLinebreaksSorted(), start, "\n");
@ -1299,6 +1427,7 @@ public class Section {
for (String value : values) {
if (StringUtils.isNotBlank(value)) {
Set<Entity> found = findEntities(value.trim(), asType, false, true, ruleNumber, reason, legalBasis, Engine.RULE, false);
found.forEach(f -> f.setSkipRemoveEntitiesContainedInLarger(skipRemoveEntitiesContainedInLarger));
EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary);
if (redactEverywhere && !isLocal()) {
@ -1310,7 +1439,24 @@ public class Section {
}
private void redactByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType, int ruleNumber, String reason, String legalBasis, boolean redaction) {
private void redactByRegExWithNewlines(String pattern, boolean patternCaseInsensitive, int group, String asType, int ruleNumber, String reason, String legalBasis, boolean redaction, boolean skipRemoveEntitiesContainedInLarger) {
Pattern compiledPattern = Patterns.getCompiledMultilinePattern(pattern, patternCaseInsensitive);
Matcher matcher = compiledPattern.matcher(text);
while (matcher.find()) {
String match = matcher.group(group);
if (StringUtils.isNotBlank(match)) {
Set<Entity> found = findEntities(match.replaceAll("\\n", " ").trim(), asType, false, redaction, ruleNumber, reason, legalBasis, Engine.RULE, false);
found.forEach(f -> f.setSkipRemoveEntitiesContainedInLarger(skipRemoveEntitiesContainedInLarger));
EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary);
}
}
}
private void redactByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType, int ruleNumber, String reason, String legalBasis, boolean redaction, boolean skipRemoveEntitiesContainedInLarger) {
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
@ -1320,6 +1466,7 @@ public class Section {
String match = matcher.group(group);
if (StringUtils.isNotBlank(match)) {
Set<Entity> found = findEntities(match.trim(), asType, false, redaction, ruleNumber, reason, legalBasis, Engine.RULE, false);
found.forEach(f -> f.setSkipRemoveEntitiesContainedInLarger(skipRemoveEntitiesContainedInLarger));
EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary);
}
}
@ -1354,7 +1501,13 @@ public class Section {
String legalBasis,
boolean redaction,
boolean skipRemoveEntitiesContainedInLarger,
boolean sortedResult) {
boolean sortedResult,
boolean ignoreTables) {
if(isInTable && ignoreTables){
return;
}
String[] values = new String[1];

View File

@ -153,6 +153,7 @@ public class EntityRedactionService {
.sectionAreas(reanalysisSection.getSectionAreas())
.fileAttributes(analyzeRequest.getFileAttributes())
.manualRedactions(analyzeRequest.getManualRedactions())
.isInTable(reanalysisSection.isTable())
.build(), reanalysisSection.getSearchableText(), reanalysisSection.getCellStarts()));
}

View File

@ -26,4 +26,16 @@ public class Patterns {
return compiledPattern;
}
public Pattern getCompiledMultilinePattern(String pattern, boolean caseInsensitive) {
String patternKey = pattern + caseInsensitive;
if (patternCache.containsKey(patternKey)) {
return patternCache.get(patternKey);
}
Pattern compiledPattern = Pattern.compile(pattern, (caseInsensitive ? Pattern.CASE_INSENSITIVE : 0) | Pattern.MULTILINE);
patternCache.put(patternKey, compiledPattern);
return compiledPattern;
}
}