Pull request #496: RSS-164: Added new rule function for redactLineAfterAcrossColumns with param to return only exactMatch in the section

Merge in RED/redaction-service from RSS-164-2 to master

* commit '02bdbbc2d1f6bff0c907240f61726a78a0b8f318':
  RSS-164: Added new rule function for redactLineAfterAcrossColumns with param to return only exactMatch in the section
This commit is contained in:
Dominique Eiflaender 2022-11-04 13:37:20 +01:00
commit 5feb6891e2
3 changed files with 112 additions and 9 deletions

View File

@ -0,0 +1,27 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import lombok.AllArgsConstructor;
import lombok.Data;
@Data
@AllArgsConstructor
public class OffsetString {
private String value;
private int start;
private int end;
public OffsetString trim() {
String trimmed = this.value.trim();
int indexInUntrimmed = this.value.indexOf(trimmed);
int newStart = this.start + indexInUntrimmed;
int newEnd = newStart + trimmed.length();
return new OffsetString(trimmed, newStart, newEnd);
}
}

View File

@ -29,6 +29,7 @@ import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSeque
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
import com.iqser.red.service.redaction.v1.server.redaction.utils.FindEntityDetails;
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
import com.iqser.red.service.redaction.v1.server.redaction.utils.OffsetStringUtils;
import com.iqser.red.service.redaction.v1.server.redaction.utils.Patterns;
import com.iqser.red.service.redaction.v1.server.redaction.utils.SearchImplementation;
@ -1236,7 +1237,7 @@ public class Section {
@Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
redactLineAfterAcrossColumns(start, asType, ruleNumber, redactEverywhere, reason, legalBasis, false);
redactLineAfterAcrossColumns(start, asType, ruleNumber, redactEverywhere, reason, legalBasis, false, false);
}
@ -1250,7 +1251,22 @@ public class Section {
@Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
redactLineAfterAcrossColumns(start, asType, ruleNumber, redactEverywhere, reason, legalBasis, skipRemoveEntitiesContainedInLarger);
redactLineAfterAcrossColumns(start, asType, ruleNumber, redactEverywhere, reason, legalBasis, skipRemoveEntitiesContainedInLarger, false);
}
@ThenAction
@SuppressWarnings("unused")
public void redactLineAfterAcrossColumns(@Argument(ArgumentType.STRING) String start,
@Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
@Argument(ArgumentType.BOOLEAN) boolean skipRemoveEntitiesContainedInLarger,
@Argument(ArgumentType.BOOLEAN) boolean onlyExactMatch,
@Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
redactLineAfterAcrossColumns(start, asType, ruleNumber, redactEverywhere, reason, legalBasis, skipRemoveEntitiesContainedInLarger, onlyExactMatch);
}
@ -1419,19 +1435,23 @@ public class Section {
boolean redactEverywhere,
String reason,
String legalBasis,
boolean skipRemoveEntitiesContainedInLarger) {
boolean skipRemoveEntitiesContainedInLarger,
boolean onlyExactMatch) {
String[] values = StringUtils.substringsBetween(searchableText.getAsStringWithLinebreaksSorted(), start, "\n");
var stringOffsets = OffsetStringUtils.substringsBetween(searchableText.getAsStringWithLinebreaksSorted(), start, "\n");
if (values != null) {
for (String value : values) {
if (StringUtils.isNotBlank(value)) {
Set<Entity> found = findEntities(value.trim(), asType, false, true, ruleNumber, reason, legalBasis, Engine.RULE, false);
if (stringOffsets != null) {
for (var stringOffset : stringOffsets) {
if (StringUtils.isNotBlank(stringOffset.getValue())) {
var trimmedOffsetString = stringOffset.trim();
Set<Entity> found = findEntities(trimmedOffsetString.getValue(), asType, false, true, ruleNumber, reason, legalBasis, Engine.RULE, false).stream()
.filter(f -> !onlyExactMatch || f.getStart() == trimmedOffsetString.getStart() && f.getEnd() == trimmedOffsetString.getEnd()).collect(Collectors.toSet());
found.forEach(f -> f.setSkipRemoveEntitiesContainedInLarger(skipRemoveEntitiesContainedInLarger));
EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary);
if (redactEverywhere && !isLocal()) {
localDictionaryAdds.computeIfAbsent(asType, x -> new HashSet<>()).add(value.trim());
localDictionaryAdds.computeIfAbsent(asType, x -> new HashSet<>()).add(trimmedOffsetString.getValue());
}
}
}

View File

@ -0,0 +1,56 @@
package com.iqser.red.service.redaction.v1.server.redaction.utils;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import com.iqser.red.service.redaction.v1.server.redaction.model.OffsetString;
import lombok.experimental.UtilityClass;
@UtilityClass
public class OffsetStringUtils {
/**
* Same logic as in StringUtils.redactBetween, but returns a list of object with offsets insteadof on the Strings only.
*
* @param str the String containing the substrings, null returns null, empty returns empty
* @param open the String identifying the start of the substring, empty returns null
* @param close the String identifying the end of the substring, empty returns null
* @return a list of Strings with their offsets
*/
public List<OffsetString> substringsBetween(final String str, final String open, final String close) {
if (str == null || StringUtils.isEmpty(open) || StringUtils.isEmpty(close)) {
return null;
}
final int strLen = str.length();
if (strLen == 0) {
return new ArrayList<>();
}
final int closeLen = close.length();
final int openLen = open.length();
final List<OffsetString> list = new ArrayList<>();
int pos = 0;
while (pos < strLen - closeLen) {
int start = str.indexOf(open, pos);
if (start < 0) {
break;
}
start += openLen;
final int end = str.indexOf(close, start);
if (end < 0) {
break;
}
list.add(new OffsetString(str.substring(start, end), start, end));
pos = end + closeLen;
}
if (list.isEmpty()) {
return null;
}
return list;
}
}