From 1bdc7d446cb8813413440a23d0255493ac1b6373 Mon Sep 17 00:00:00 2001 From: Kresnadi Budisantoso Date: Fri, 1 Apr 2022 14:16:39 +0200 Subject: [PATCH] RED-3761 add possibility to expand dictionary match with a prefix regex pattern --- .../v1/server/redaction/model/Section.java | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java index 9a9b4734..ab449757 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java @@ -249,6 +249,58 @@ public class Section { return tabularData != null && tabularData.containsKey(cleanHeaderName) && tabularData.get(cleanHeaderName).toString().equals(value); } + @ThenAction + public void expandByPrefixRegEx(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.REGEX) String prefixPattern, + @Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive, @Argument(ArgumentType.INTEGER) int group) { + + expandByPrefixRegEx(type, prefixPattern, patternCaseInsensitive, group, null); + } + + + @ThenAction + public void expandByPrefixRegEx(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.REGEX) String prefixPattern, + @Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive, @Argument(ArgumentType.INTEGER) int group, + @Argument(ArgumentType.REGEX) String valuePattern) { + + Pattern compiledValuePattern = valuePattern == null ? null : Patterns.getCompiledPattern(valuePattern, patternCaseInsensitive); + Pattern compiledPrefixPattern = prefixPattern == null ? null : Patterns.getCompiledPattern(prefixPattern, patternCaseInsensitive); + + Set expanded = new HashSet<>(); + for (Entity entity : entities) { + + if (!entity.getType().equals(type) || entity.getTextBefore() == null) { + continue; + } + + if (valuePattern != null) { + Matcher valueMatcher = compiledValuePattern.matcher(entity.getWord()); + if (!valueMatcher.matches()) { + continue; + } + } + + Matcher matcher = compiledPrefixPattern.matcher(entity.getTextBefore()); + + while (matcher.find()) { + String match = matcher.group(group); + + if (StringUtils.isNotBlank(match)) { + + if (!StringUtils.endsWith(entity.getTextBefore(), match)) { + log.warn("Rules contain invalid group for expandByPrefixRegEx command; prefixPattern:{}, group{}", prefixPattern, group); + continue; + } + + Set expandedEntities = findEntities(match + entity.getWord(), type, false, entity.isRedaction(), entity.getMatchedRule(), entity.getRedactionReason(), entity.getLegalBasis(), Engine.RULE); + expanded.addAll(EntitySearchUtils.findNonOverlappingMatchEntities(entities, expandedEntities)); + } + } + } + + EntitySearchUtils.addEntitiesWithHigherRank(entities, expanded, dictionary); + EntitySearchUtils.removeEntitiesContainedInLarger(entities); + } + @ThenAction public void expandByRegEx(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.REGEX) String suffixPattern, @@ -291,6 +343,12 @@ public class Section { String match = matcher.group(group); if (StringUtils.isNotBlank(match)) { + + if (!StringUtils.startsWith(entity.getTextAfter(), match)) { + log.warn("Rules contain invalid group for expandByRegEx command; suffixPattern:{}, group{}", suffixPattern, group); + continue; + } + Set expandedEntities = findEntities(entity.getWord() + match, type, false, entity.isRedaction(), entity.getMatchedRule(), entity.getRedactionReason(), entity.getLegalBasis(), Engine.RULE); expanded.addAll(EntitySearchUtils.findNonOverlappingMatchEntities(entities, expandedEntities)); }