diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java index 62f456c9..e6db5b9b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java @@ -82,6 +82,8 @@ public class Section { private ManualRedactions manualRedactions; + private boolean isInTable; + @Deprecated @SuppressWarnings("unused") @@ -204,6 +206,12 @@ public class Section { return fileAttributes != null && fileAttributes.stream().anyMatch(attribute -> label.equals(attribute.getLabel()) && value.equals(attribute.getValue())); } + @SuppressWarnings("unused") + @WhenCondition + public boolean fileAttributeContainsAnyOf(@Argument(ArgumentType.FILE_ATTRIBUTE) String label, @Argument(ArgumentType.STRING) Set value) { + + return fileAttributes != null && fileAttributes.stream().anyMatch(attribute -> label.equals(attribute.getLabel()) && value.contains(attribute.getValue())); + } @SuppressWarnings("unused") @WhenCondition @@ -555,7 +563,36 @@ public class Section { @Argument(ArgumentType.STRING) String reason, @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) { - redactByRegEx(pattern, patternCaseInsensitive, group, asType, ruleNumber, reason, legalBasis, true); + redactByRegEx(pattern, patternCaseInsensitive, group, asType, ruleNumber, reason, legalBasis, true, false); + } + + + @ThenAction + @SuppressWarnings("unused") + public void redactByRegExWithNewlines(@Argument(ArgumentType.REGEX) String pattern, + @Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive, + @Argument(ArgumentType.INTEGER) int group, + @Argument(ArgumentType.TYPE) String asType, + @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, + @Argument(ArgumentType.STRING) String reason, + @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) { + + redactByRegExWithNewlines(pattern, patternCaseInsensitive, group, asType, ruleNumber, reason, legalBasis, true, false); + } + + + @ThenAction + @SuppressWarnings("unused") + public void redactByRegEx(@Argument(ArgumentType.REGEX) String pattern, + @Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive, + @Argument(ArgumentType.INTEGER) int group, + @Argument(ArgumentType.TYPE) String asType, + @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, + @Argument(ArgumentType.BOOLEAN) boolean skipRemoveEntitiesContainedInLarger, + @Argument(ArgumentType.STRING) String reason, + @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) { + + redactByRegEx(pattern, patternCaseInsensitive, group, asType, ruleNumber, reason, legalBasis, true, skipRemoveEntitiesContainedInLarger); } @@ -568,7 +605,7 @@ public class Section { @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.STRING) String reason) { - redactByRegEx(pattern, patternCaseInsensitive, group, asType, ruleNumber, reason, null, false); + redactByRegEx(pattern, patternCaseInsensitive, group, asType, ruleNumber, reason, null, false, false); } @@ -583,7 +620,7 @@ public class Section { @Argument(ArgumentType.STRING) String reason, @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) { - redactBetween(start, stop, false, false, asType, ruleNumber, redactEverywhere, false, reason, legalBasis, true, false, false); + redactBetween(start, stop, false, false, asType, ruleNumber, redactEverywhere, false, reason, legalBasis, true, false, false, false); } @@ -598,7 +635,7 @@ public class Section { @Argument(ArgumentType.STRING) String reason, @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) { - redactBetween(start, stop, false, false, asType, ruleNumber, redactEverywhere, excludeHeadLine, reason, legalBasis, true, false, false); + redactBetween(start, stop, false, false, asType, ruleNumber, redactEverywhere, excludeHeadLine, reason, legalBasis, true, false, false, false); } @@ -627,10 +664,43 @@ public class Section { legalBasis, true, skipRemoveEntitiesContainedInLarger, - sortedResult); + sortedResult, false); } + @ThenAction + @SuppressWarnings("unused") + public void redactBetween(@Argument(ArgumentType.STRING) String start, + @Argument(ArgumentType.STRING) String stop, + @Argument(ArgumentType.BOOLEAN) boolean includeStart, + @Argument(ArgumentType.BOOLEAN) boolean includeStop, + @Argument(ArgumentType.TYPE) String asType, + @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, + @Argument(ArgumentType.BOOLEAN) boolean redactEverywhere, + @Argument(ArgumentType.BOOLEAN) boolean excludeHeadLine, + @Argument(ArgumentType.STRING) String reason, + @Argument(ArgumentType.LEGAL_BASIS) String legalBasis, + @Argument(ArgumentType.BOOLEAN) boolean skipRemoveEntitiesContainedInLarger, + @Argument(ArgumentType.BOOLEAN) boolean sortedResult, + @Argument(ArgumentType.BOOLEAN) boolean ignoreTables) { + + redactBetween(start, + stop, + includeStart, + includeStop, + asType, + ruleNumber, + redactEverywhere, + excludeHeadLine, + reason, + legalBasis, + true, + skipRemoveEntitiesContainedInLarger, + sortedResult, ignoreTables); + } + + + @ThenAction @SuppressWarnings("unused") public void redactBetween(@Argument(ArgumentType.STRING) String start, @@ -658,7 +728,7 @@ public class Section { legalBasis, true, skipRemoveEntitiesContainedInLarger, - sortedResult); + sortedResult, false); } @@ -701,7 +771,7 @@ public class Section { legalBasis, true, skipRemoveEntitiesContainedInLarger, - sortedResult); + sortedResult, false); } } @@ -716,7 +786,19 @@ public class Section { @Argument(ArgumentType.BOOLEAN) boolean redactEverywhere, @Argument(ArgumentType.STRING) String reason) { - redactBetween(start, stop, asType, ruleNumber, redactEverywhere, false, reason, null, false, false); + redactBetween(start, + stop, + false, + false, + asType, + ruleNumber, + redactEverywhere, + false, + reason, + null, + false, + false, + false, false); } @@ -730,7 +812,19 @@ public class Section { @Argument(ArgumentType.BOOLEAN) boolean excludeHeadLine, @Argument(ArgumentType.STRING) String reason) { - redactBetween(start, stop, asType, ruleNumber, redactEverywhere, excludeHeadLine, reason, null, false, false); + redactBetween(start, + stop, + false, + false, + asType, + ruleNumber, + redactEverywhere, + excludeHeadLine, + reason, + null, + false, + false, + false, false); } @@ -1036,6 +1130,7 @@ public class Section { @ThenAction + @SuppressWarnings("unused") public void redactWordPartByRegEx(@Argument(ArgumentType.REGEX) String pattern, @Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive, @Argument(ArgumentType.INTEGER) int group, @@ -1132,6 +1227,33 @@ public class Section { } + @ThenAction + @SuppressWarnings("unused") + public void redactLineAfterAcrossColumns(@Argument(ArgumentType.STRING) String start, + @Argument(ArgumentType.TYPE) String asType, + @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, + @Argument(ArgumentType.BOOLEAN) boolean redactEverywhere, + @Argument(ArgumentType.STRING) String reason, + @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) { + + redactLineAfterAcrossColumns(start, asType, ruleNumber, redactEverywhere, reason, legalBasis, false); + } + + + @ThenAction + @SuppressWarnings("unused") + public void redactLineAfterAcrossColumns(@Argument(ArgumentType.STRING) String start, + @Argument(ArgumentType.TYPE) String asType, + @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, + @Argument(ArgumentType.BOOLEAN) boolean redactEverywhere, + @Argument(ArgumentType.BOOLEAN) boolean skipRemoveEntitiesContainedInLarger, + @Argument(ArgumentType.STRING) String reason, + @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) { + + redactLineAfterAcrossColumns(start, asType, ruleNumber, redactEverywhere, reason, legalBasis, skipRemoveEntitiesContainedInLarger); + } + + private void redactAndRecommendByRegEx(String pattern, boolean patternCaseInsensitive, int group, @@ -1291,7 +1413,13 @@ public class Section { } - public void redactLineAfterAcrossColumns(String start, String asType, int ruleNumber, boolean redactEverywhere, String reason, String legalBasis) { + private void redactLineAfterAcrossColumns(String start, + String asType, + int ruleNumber, + boolean redactEverywhere, + String reason, + String legalBasis, + boolean skipRemoveEntitiesContainedInLarger) { String[] values = StringUtils.substringsBetween(searchableText.getAsStringWithLinebreaksSorted(), start, "\n"); @@ -1299,6 +1427,7 @@ public class Section { for (String value : values) { if (StringUtils.isNotBlank(value)) { Set found = findEntities(value.trim(), asType, false, true, ruleNumber, reason, legalBasis, Engine.RULE, false); + found.forEach(f -> f.setSkipRemoveEntitiesContainedInLarger(skipRemoveEntitiesContainedInLarger)); EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary); if (redactEverywhere && !isLocal()) { @@ -1310,7 +1439,24 @@ public class Section { } - private void redactByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType, int ruleNumber, String reason, String legalBasis, boolean redaction) { + private void redactByRegExWithNewlines(String pattern, boolean patternCaseInsensitive, int group, String asType, int ruleNumber, String reason, String legalBasis, boolean redaction, boolean skipRemoveEntitiesContainedInLarger) { + + Pattern compiledPattern = Patterns.getCompiledMultilinePattern(pattern, patternCaseInsensitive); + + Matcher matcher = compiledPattern.matcher(text); + + while (matcher.find()) { + String match = matcher.group(group); + if (StringUtils.isNotBlank(match)) { + Set found = findEntities(match.replaceAll("\\n", " ").trim(), asType, false, redaction, ruleNumber, reason, legalBasis, Engine.RULE, false); + found.forEach(f -> f.setSkipRemoveEntitiesContainedInLarger(skipRemoveEntitiesContainedInLarger)); + EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary); + } + } + } + + + private void redactByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType, int ruleNumber, String reason, String legalBasis, boolean redaction, boolean skipRemoveEntitiesContainedInLarger) { Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive); @@ -1320,6 +1466,7 @@ public class Section { String match = matcher.group(group); if (StringUtils.isNotBlank(match)) { Set found = findEntities(match.trim(), asType, false, redaction, ruleNumber, reason, legalBasis, Engine.RULE, false); + found.forEach(f -> f.setSkipRemoveEntitiesContainedInLarger(skipRemoveEntitiesContainedInLarger)); EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary); } } @@ -1354,7 +1501,13 @@ public class Section { String legalBasis, boolean redaction, boolean skipRemoveEntitiesContainedInLarger, - boolean sortedResult) { + boolean sortedResult, + boolean ignoreTables) { + + + if(isInTable && ignoreTables){ + return; + } String[] values = new String[1]; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java index b68c3916..55a6f1a2 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java @@ -153,6 +153,7 @@ public class EntityRedactionService { .sectionAreas(reanalysisSection.getSectionAreas()) .fileAttributes(analyzeRequest.getFileAttributes()) .manualRedactions(analyzeRequest.getManualRedactions()) + .isInTable(reanalysisSection.isTable()) .build(), reanalysisSection.getSearchableText(), reanalysisSection.getCellStarts())); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/Patterns.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/Patterns.java index d146b59a..0168adec 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/Patterns.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/Patterns.java @@ -26,4 +26,16 @@ public class Patterns { return compiledPattern; } + + public Pattern getCompiledMultilinePattern(String pattern, boolean caseInsensitive) { + + String patternKey = pattern + caseInsensitive; + if (patternCache.containsKey(patternKey)) { + return patternCache.get(patternKey); + } + Pattern compiledPattern = Pattern.compile(pattern, (caseInsensitive ? Pattern.CASE_INSENSITIVE : 0) | Pattern.MULTILINE); + patternCache.put(patternKey, compiledPattern); + return compiledPattern; + } + }