Pull request #496: RSS-164: Added new rule function for redactLineAfterAcrossColumns with param to return only exactMatch in the section
Merge in RED/redaction-service from RSS-164-2 to master * commit '02bdbbc2d1f6bff0c907240f61726a78a0b8f318': RSS-164: Added new rule function for redactLineAfterAcrossColumns with param to return only exactMatch in the section
This commit is contained in:
commit
5feb6891e2
@ -0,0 +1,27 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
public class OffsetString {
|
||||
|
||||
private String value;
|
||||
private int start;
|
||||
private int end;
|
||||
|
||||
|
||||
public OffsetString trim() {
|
||||
|
||||
String trimmed = this.value.trim();
|
||||
int indexInUntrimmed = this.value.indexOf(trimmed);
|
||||
|
||||
int newStart = this.start + indexInUntrimmed;
|
||||
int newEnd = newStart + trimmed.length();
|
||||
|
||||
return new OffsetString(trimmed, newStart, newEnd);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -29,6 +29,7 @@ import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSeque
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.FindEntityDetails;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.OffsetStringUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.Patterns;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.SearchImplementation;
|
||||
|
||||
@ -1236,7 +1237,7 @@ public class Section {
|
||||
@Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
redactLineAfterAcrossColumns(start, asType, ruleNumber, redactEverywhere, reason, legalBasis, false);
|
||||
redactLineAfterAcrossColumns(start, asType, ruleNumber, redactEverywhere, reason, legalBasis, false, false);
|
||||
}
|
||||
|
||||
|
||||
@ -1250,7 +1251,22 @@ public class Section {
|
||||
@Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
redactLineAfterAcrossColumns(start, asType, ruleNumber, redactEverywhere, reason, legalBasis, skipRemoveEntitiesContainedInLarger);
|
||||
redactLineAfterAcrossColumns(start, asType, ruleNumber, redactEverywhere, reason, legalBasis, skipRemoveEntitiesContainedInLarger, false);
|
||||
}
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactLineAfterAcrossColumns(@Argument(ArgumentType.STRING) String start,
|
||||
@Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean skipRemoveEntitiesContainedInLarger,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean onlyExactMatch,
|
||||
@Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
redactLineAfterAcrossColumns(start, asType, ruleNumber, redactEverywhere, reason, legalBasis, skipRemoveEntitiesContainedInLarger, onlyExactMatch);
|
||||
}
|
||||
|
||||
|
||||
@ -1419,19 +1435,23 @@ public class Section {
|
||||
boolean redactEverywhere,
|
||||
String reason,
|
||||
String legalBasis,
|
||||
boolean skipRemoveEntitiesContainedInLarger) {
|
||||
boolean skipRemoveEntitiesContainedInLarger,
|
||||
boolean onlyExactMatch) {
|
||||
|
||||
String[] values = StringUtils.substringsBetween(searchableText.getAsStringWithLinebreaksSorted(), start, "\n");
|
||||
var stringOffsets = OffsetStringUtils.substringsBetween(searchableText.getAsStringWithLinebreaksSorted(), start, "\n");
|
||||
|
||||
if (values != null) {
|
||||
for (String value : values) {
|
||||
if (StringUtils.isNotBlank(value)) {
|
||||
Set<Entity> found = findEntities(value.trim(), asType, false, true, ruleNumber, reason, legalBasis, Engine.RULE, false);
|
||||
if (stringOffsets != null) {
|
||||
for (var stringOffset : stringOffsets) {
|
||||
if (StringUtils.isNotBlank(stringOffset.getValue())) {
|
||||
var trimmedOffsetString = stringOffset.trim();
|
||||
Set<Entity> found = findEntities(trimmedOffsetString.getValue(), asType, false, true, ruleNumber, reason, legalBasis, Engine.RULE, false).stream()
|
||||
.filter(f -> !onlyExactMatch || f.getStart() == trimmedOffsetString.getStart() && f.getEnd() == trimmedOffsetString.getEnd()).collect(Collectors.toSet());
|
||||
found.forEach(f -> f.setSkipRemoveEntitiesContainedInLarger(skipRemoveEntitiesContainedInLarger));
|
||||
|
||||
EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary);
|
||||
|
||||
if (redactEverywhere && !isLocal()) {
|
||||
localDictionaryAdds.computeIfAbsent(asType, x -> new HashSet<>()).add(value.trim());
|
||||
localDictionaryAdds.computeIfAbsent(asType, x -> new HashSet<>()).add(trimmedOffsetString.getValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -0,0 +1,56 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.utils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.OffsetString;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@UtilityClass
|
||||
public class OffsetStringUtils {
|
||||
|
||||
/**
|
||||
* Same logic as in StringUtils.redactBetween, but returns a list of object with offsets insteadof on the Strings only.
|
||||
*
|
||||
* @param str – the String containing the substrings, null returns null, empty returns empty
|
||||
* @param open – the String identifying the start of the substring, empty returns null
|
||||
* @param close – the String identifying the end of the substring, empty returns null
|
||||
* @return a list of Strings with their offsets
|
||||
*/
|
||||
public List<OffsetString> substringsBetween(final String str, final String open, final String close) {
|
||||
|
||||
if (str == null || StringUtils.isEmpty(open) || StringUtils.isEmpty(close)) {
|
||||
return null;
|
||||
}
|
||||
final int strLen = str.length();
|
||||
if (strLen == 0) {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
final int closeLen = close.length();
|
||||
final int openLen = open.length();
|
||||
final List<OffsetString> list = new ArrayList<>();
|
||||
int pos = 0;
|
||||
while (pos < strLen - closeLen) {
|
||||
int start = str.indexOf(open, pos);
|
||||
if (start < 0) {
|
||||
break;
|
||||
}
|
||||
start += openLen;
|
||||
final int end = str.indexOf(close, start);
|
||||
if (end < 0) {
|
||||
break;
|
||||
}
|
||||
list.add(new OffsetString(str.substring(start, end), start, end));
|
||||
pos = end + closeLen;
|
||||
}
|
||||
if (list.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user