Pull request #363: RED-3761 add possibility to expand dictionary match with a prefix regex pattern

Merge in RED/redaction-service from kbudisantoso/RED-3761-3.80.x to release/3.80.x

* commit '1bdc7d446cb8813413440a23d0255493ac1b6373':
  RED-3761 add possibility to expand dictionary match with a prefix regex pattern
This commit is contained in:
Kresnadi Budisantoso 2022-04-01 15:35:52 +02:00 committed by Dominique Eiflaender
commit 1e9923e212

View File

@ -249,6 +249,58 @@ public class Section {
return tabularData != null && tabularData.containsKey(cleanHeaderName) && tabularData.get(cleanHeaderName).toString().equals(value);
}
@ThenAction
public void expandByPrefixRegEx(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.REGEX) String prefixPattern,
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive, @Argument(ArgumentType.INTEGER) int group) {
expandByPrefixRegEx(type, prefixPattern, patternCaseInsensitive, group, null);
}
@ThenAction
public void expandByPrefixRegEx(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.REGEX) String prefixPattern,
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive, @Argument(ArgumentType.INTEGER) int group,
@Argument(ArgumentType.REGEX) String valuePattern) {
Pattern compiledValuePattern = valuePattern == null ? null : Patterns.getCompiledPattern(valuePattern, patternCaseInsensitive);
Pattern compiledPrefixPattern = prefixPattern == null ? null : Patterns.getCompiledPattern(prefixPattern, patternCaseInsensitive);
Set<Entity> expanded = new HashSet<>();
for (Entity entity : entities) {
if (!entity.getType().equals(type) || entity.getTextBefore() == null) {
continue;
}
if (valuePattern != null) {
Matcher valueMatcher = compiledValuePattern.matcher(entity.getWord());
if (!valueMatcher.matches()) {
continue;
}
}
Matcher matcher = compiledPrefixPattern.matcher(entity.getTextBefore());
while (matcher.find()) {
String match = matcher.group(group);
if (StringUtils.isNotBlank(match)) {
if (!StringUtils.endsWith(entity.getTextBefore(), match)) {
log.warn("Rules contain invalid group for expandByPrefixRegEx command; prefixPattern:{}, group{}", prefixPattern, group);
continue;
}
Set<Entity> expandedEntities = findEntities(match + entity.getWord(), type, false, entity.isRedaction(), entity.getMatchedRule(), entity.getRedactionReason(), entity.getLegalBasis(), Engine.RULE);
expanded.addAll(EntitySearchUtils.findNonOverlappingMatchEntities(entities, expandedEntities));
}
}
}
EntitySearchUtils.addEntitiesWithHigherRank(entities, expanded, dictionary);
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
}
@ThenAction
public void expandByRegEx(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.REGEX) String suffixPattern,
@ -291,6 +343,12 @@ public class Section {
String match = matcher.group(group);
if (StringUtils.isNotBlank(match)) {
if (!StringUtils.startsWith(entity.getTextAfter(), match)) {
log.warn("Rules contain invalid group for expandByRegEx command; suffixPattern:{}, group{}", suffixPattern, group);
continue;
}
Set<Entity> expandedEntities = findEntities(entity.getWord() + match, type, false, entity.isRedaction(), entity.getMatchedRule(), entity.getRedactionReason(), entity.getLegalBasis(), Engine.RULE);
expanded.addAll(EntitySearchUtils.findNonOverlappingMatchEntities(entities, expandedEntities));
}