Pull request #495: RSS-177: Added required rule functions for scm poc
Merge in RED/redaction-service from RSS-177 to master * commit '5c38150d34d8558f0e0e19ecd340d459a132426d': RSS-177: Added required rule functions for scm poc
This commit is contained in:
commit
19e607e8a8
@ -82,6 +82,8 @@ public class Section {
|
||||
|
||||
private ManualRedactions manualRedactions;
|
||||
|
||||
private boolean isInTable;
|
||||
|
||||
|
||||
@Deprecated
|
||||
@SuppressWarnings("unused")
|
||||
@ -204,6 +206,12 @@ public class Section {
|
||||
return fileAttributes != null && fileAttributes.stream().anyMatch(attribute -> label.equals(attribute.getLabel()) && value.equals(attribute.getValue()));
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@WhenCondition
|
||||
public boolean fileAttributeContainsAnyOf(@Argument(ArgumentType.FILE_ATTRIBUTE) String label, @Argument(ArgumentType.STRING) Set<String> value) {
|
||||
|
||||
return fileAttributes != null && fileAttributes.stream().anyMatch(attribute -> label.equals(attribute.getLabel()) && value.contains(attribute.getValue()));
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@WhenCondition
|
||||
@ -555,7 +563,36 @@ public class Section {
|
||||
@Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
redactByRegEx(pattern, patternCaseInsensitive, group, asType, ruleNumber, reason, legalBasis, true);
|
||||
redactByRegEx(pattern, patternCaseInsensitive, group, asType, ruleNumber, reason, legalBasis, true, false);
|
||||
}
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactByRegExWithNewlines(@Argument(ArgumentType.REGEX) String pattern,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
|
||||
@Argument(ArgumentType.INTEGER) int group,
|
||||
@Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
redactByRegExWithNewlines(pattern, patternCaseInsensitive, group, asType, ruleNumber, reason, legalBasis, true, false);
|
||||
}
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactByRegEx(@Argument(ArgumentType.REGEX) String pattern,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
|
||||
@Argument(ArgumentType.INTEGER) int group,
|
||||
@Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean skipRemoveEntitiesContainedInLarger,
|
||||
@Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
redactByRegEx(pattern, patternCaseInsensitive, group, asType, ruleNumber, reason, legalBasis, true, skipRemoveEntitiesContainedInLarger);
|
||||
}
|
||||
|
||||
|
||||
@ -568,7 +605,7 @@ public class Section {
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.STRING) String reason) {
|
||||
|
||||
redactByRegEx(pattern, patternCaseInsensitive, group, asType, ruleNumber, reason, null, false);
|
||||
redactByRegEx(pattern, patternCaseInsensitive, group, asType, ruleNumber, reason, null, false, false);
|
||||
}
|
||||
|
||||
|
||||
@ -583,7 +620,7 @@ public class Section {
|
||||
@Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
redactBetween(start, stop, false, false, asType, ruleNumber, redactEverywhere, false, reason, legalBasis, true, false, false);
|
||||
redactBetween(start, stop, false, false, asType, ruleNumber, redactEverywhere, false, reason, legalBasis, true, false, false, false);
|
||||
}
|
||||
|
||||
|
||||
@ -598,7 +635,7 @@ public class Section {
|
||||
@Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
redactBetween(start, stop, false, false, asType, ruleNumber, redactEverywhere, excludeHeadLine, reason, legalBasis, true, false, false);
|
||||
redactBetween(start, stop, false, false, asType, ruleNumber, redactEverywhere, excludeHeadLine, reason, legalBasis, true, false, false, false);
|
||||
}
|
||||
|
||||
|
||||
@ -627,10 +664,43 @@ public class Section {
|
||||
legalBasis,
|
||||
true,
|
||||
skipRemoveEntitiesContainedInLarger,
|
||||
sortedResult);
|
||||
sortedResult, false);
|
||||
}
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactBetween(@Argument(ArgumentType.STRING) String start,
|
||||
@Argument(ArgumentType.STRING) String stop,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean includeStart,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean includeStop,
|
||||
@Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean excludeHeadLine,
|
||||
@Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean skipRemoveEntitiesContainedInLarger,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean sortedResult,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean ignoreTables) {
|
||||
|
||||
redactBetween(start,
|
||||
stop,
|
||||
includeStart,
|
||||
includeStop,
|
||||
asType,
|
||||
ruleNumber,
|
||||
redactEverywhere,
|
||||
excludeHeadLine,
|
||||
reason,
|
||||
legalBasis,
|
||||
true,
|
||||
skipRemoveEntitiesContainedInLarger,
|
||||
sortedResult, ignoreTables);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactBetween(@Argument(ArgumentType.STRING) String start,
|
||||
@ -658,7 +728,7 @@ public class Section {
|
||||
legalBasis,
|
||||
true,
|
||||
skipRemoveEntitiesContainedInLarger,
|
||||
sortedResult);
|
||||
sortedResult, false);
|
||||
}
|
||||
|
||||
|
||||
@ -701,7 +771,7 @@ public class Section {
|
||||
legalBasis,
|
||||
true,
|
||||
skipRemoveEntitiesContainedInLarger,
|
||||
sortedResult);
|
||||
sortedResult, false);
|
||||
}
|
||||
}
|
||||
|
||||
@ -716,7 +786,19 @@ public class Section {
|
||||
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
|
||||
@Argument(ArgumentType.STRING) String reason) {
|
||||
|
||||
redactBetween(start, stop, asType, ruleNumber, redactEverywhere, false, reason, null, false, false);
|
||||
redactBetween(start,
|
||||
stop,
|
||||
false,
|
||||
false,
|
||||
asType,
|
||||
ruleNumber,
|
||||
redactEverywhere,
|
||||
false,
|
||||
reason,
|
||||
null,
|
||||
false,
|
||||
false,
|
||||
false, false);
|
||||
}
|
||||
|
||||
|
||||
@ -730,7 +812,19 @@ public class Section {
|
||||
@Argument(ArgumentType.BOOLEAN) boolean excludeHeadLine,
|
||||
@Argument(ArgumentType.STRING) String reason) {
|
||||
|
||||
redactBetween(start, stop, asType, ruleNumber, redactEverywhere, excludeHeadLine, reason, null, false, false);
|
||||
redactBetween(start,
|
||||
stop,
|
||||
false,
|
||||
false,
|
||||
asType,
|
||||
ruleNumber,
|
||||
redactEverywhere,
|
||||
excludeHeadLine,
|
||||
reason,
|
||||
null,
|
||||
false,
|
||||
false,
|
||||
false, false);
|
||||
}
|
||||
|
||||
|
||||
@ -1036,6 +1130,7 @@ public class Section {
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactWordPartByRegEx(@Argument(ArgumentType.REGEX) String pattern,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
|
||||
@Argument(ArgumentType.INTEGER) int group,
|
||||
@ -1132,6 +1227,33 @@ public class Section {
|
||||
}
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactLineAfterAcrossColumns(@Argument(ArgumentType.STRING) String start,
|
||||
@Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
|
||||
@Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
redactLineAfterAcrossColumns(start, asType, ruleNumber, redactEverywhere, reason, legalBasis, false);
|
||||
}
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactLineAfterAcrossColumns(@Argument(ArgumentType.STRING) String start,
|
||||
@Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean skipRemoveEntitiesContainedInLarger,
|
||||
@Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
redactLineAfterAcrossColumns(start, asType, ruleNumber, redactEverywhere, reason, legalBasis, skipRemoveEntitiesContainedInLarger);
|
||||
}
|
||||
|
||||
|
||||
private void redactAndRecommendByRegEx(String pattern,
|
||||
boolean patternCaseInsensitive,
|
||||
int group,
|
||||
@ -1291,7 +1413,13 @@ public class Section {
|
||||
}
|
||||
|
||||
|
||||
public void redactLineAfterAcrossColumns(String start, String asType, int ruleNumber, boolean redactEverywhere, String reason, String legalBasis) {
|
||||
private void redactLineAfterAcrossColumns(String start,
|
||||
String asType,
|
||||
int ruleNumber,
|
||||
boolean redactEverywhere,
|
||||
String reason,
|
||||
String legalBasis,
|
||||
boolean skipRemoveEntitiesContainedInLarger) {
|
||||
|
||||
String[] values = StringUtils.substringsBetween(searchableText.getAsStringWithLinebreaksSorted(), start, "\n");
|
||||
|
||||
@ -1299,6 +1427,7 @@ public class Section {
|
||||
for (String value : values) {
|
||||
if (StringUtils.isNotBlank(value)) {
|
||||
Set<Entity> found = findEntities(value.trim(), asType, false, true, ruleNumber, reason, legalBasis, Engine.RULE, false);
|
||||
found.forEach(f -> f.setSkipRemoveEntitiesContainedInLarger(skipRemoveEntitiesContainedInLarger));
|
||||
EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary);
|
||||
|
||||
if (redactEverywhere && !isLocal()) {
|
||||
@ -1310,7 +1439,24 @@ public class Section {
|
||||
}
|
||||
|
||||
|
||||
private void redactByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType, int ruleNumber, String reason, String legalBasis, boolean redaction) {
|
||||
private void redactByRegExWithNewlines(String pattern, boolean patternCaseInsensitive, int group, String asType, int ruleNumber, String reason, String legalBasis, boolean redaction, boolean skipRemoveEntitiesContainedInLarger) {
|
||||
|
||||
Pattern compiledPattern = Patterns.getCompiledMultilinePattern(pattern, patternCaseInsensitive);
|
||||
|
||||
Matcher matcher = compiledPattern.matcher(text);
|
||||
|
||||
while (matcher.find()) {
|
||||
String match = matcher.group(group);
|
||||
if (StringUtils.isNotBlank(match)) {
|
||||
Set<Entity> found = findEntities(match.replaceAll("\\n", " ").trim(), asType, false, redaction, ruleNumber, reason, legalBasis, Engine.RULE, false);
|
||||
found.forEach(f -> f.setSkipRemoveEntitiesContainedInLarger(skipRemoveEntitiesContainedInLarger));
|
||||
EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void redactByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType, int ruleNumber, String reason, String legalBasis, boolean redaction, boolean skipRemoveEntitiesContainedInLarger) {
|
||||
|
||||
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
|
||||
|
||||
@ -1320,6 +1466,7 @@ public class Section {
|
||||
String match = matcher.group(group);
|
||||
if (StringUtils.isNotBlank(match)) {
|
||||
Set<Entity> found = findEntities(match.trim(), asType, false, redaction, ruleNumber, reason, legalBasis, Engine.RULE, false);
|
||||
found.forEach(f -> f.setSkipRemoveEntitiesContainedInLarger(skipRemoveEntitiesContainedInLarger));
|
||||
EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary);
|
||||
}
|
||||
}
|
||||
@ -1354,7 +1501,13 @@ public class Section {
|
||||
String legalBasis,
|
||||
boolean redaction,
|
||||
boolean skipRemoveEntitiesContainedInLarger,
|
||||
boolean sortedResult) {
|
||||
boolean sortedResult,
|
||||
boolean ignoreTables) {
|
||||
|
||||
|
||||
if(isInTable && ignoreTables){
|
||||
return;
|
||||
}
|
||||
|
||||
String[] values = new String[1];
|
||||
|
||||
|
||||
@ -153,6 +153,7 @@ public class EntityRedactionService {
|
||||
.sectionAreas(reanalysisSection.getSectionAreas())
|
||||
.fileAttributes(analyzeRequest.getFileAttributes())
|
||||
.manualRedactions(analyzeRequest.getManualRedactions())
|
||||
.isInTable(reanalysisSection.isTable())
|
||||
.build(), reanalysisSection.getSearchableText(), reanalysisSection.getCellStarts()));
|
||||
|
||||
}
|
||||
|
||||
@ -26,4 +26,16 @@ public class Patterns {
|
||||
return compiledPattern;
|
||||
}
|
||||
|
||||
|
||||
public Pattern getCompiledMultilinePattern(String pattern, boolean caseInsensitive) {
|
||||
|
||||
String patternKey = pattern + caseInsensitive;
|
||||
if (patternCache.containsKey(patternKey)) {
|
||||
return patternCache.get(patternKey);
|
||||
}
|
||||
Pattern compiledPattern = Pattern.compile(pattern, (caseInsensitive ? Pattern.CASE_INSENSITIVE : 0) | Pattern.MULTILINE);
|
||||
patternCache.put(patternKey, compiledPattern);
|
||||
return compiledPattern;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user