From 02bdbbc2d1f6bff0c907240f61726a78a0b8f318 Mon Sep 17 00:00:00 2001 From: deiflaender Date: Fri, 4 Nov 2022 13:27:40 +0100 Subject: [PATCH] RSS-164: Added new rule function for redactLineAfterAcrossColumns with param to return only exactMatch in the section --- .../server/redaction/model/OffsetString.java | 27 +++++++++ .../v1/server/redaction/model/Section.java | 38 ++++++++++--- .../redaction/utils/OffsetStringUtils.java | 56 +++++++++++++++++++ 3 files changed, 112 insertions(+), 9 deletions(-) create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/OffsetString.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/OffsetStringUtils.java diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/OffsetString.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/OffsetString.java new file mode 100644 index 00000000..4c12fe9a --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/OffsetString.java @@ -0,0 +1,27 @@ +package com.iqser.red.service.redaction.v1.server.redaction.model; + +import lombok.AllArgsConstructor; +import lombok.Data; + +@Data +@AllArgsConstructor +public class OffsetString { + + private String value; + private int start; + private int end; + + + public OffsetString trim() { + + String trimmed = this.value.trim(); + int indexInUntrimmed = this.value.indexOf(trimmed); + + int newStart = this.start + indexInUntrimmed; + int newEnd = newStart + trimmed.length(); + + return new OffsetString(trimmed, newStart, newEnd); + } + +} + diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java index e6db5b9b..5f96b260 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java @@ -29,6 +29,7 @@ import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSeque import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils; import com.iqser.red.service.redaction.v1.server.redaction.utils.FindEntityDetails; import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder; +import com.iqser.red.service.redaction.v1.server.redaction.utils.OffsetStringUtils; import com.iqser.red.service.redaction.v1.server.redaction.utils.Patterns; import com.iqser.red.service.redaction.v1.server.redaction.utils.SearchImplementation; @@ -1236,7 +1237,7 @@ public class Section { @Argument(ArgumentType.STRING) String reason, @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) { - redactLineAfterAcrossColumns(start, asType, ruleNumber, redactEverywhere, reason, legalBasis, false); + redactLineAfterAcrossColumns(start, asType, ruleNumber, redactEverywhere, reason, legalBasis, false, false); } @@ -1250,7 +1251,22 @@ public class Section { @Argument(ArgumentType.STRING) String reason, @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) { - redactLineAfterAcrossColumns(start, asType, ruleNumber, redactEverywhere, reason, legalBasis, skipRemoveEntitiesContainedInLarger); + redactLineAfterAcrossColumns(start, asType, ruleNumber, redactEverywhere, reason, legalBasis, skipRemoveEntitiesContainedInLarger, false); + } + + + @ThenAction + @SuppressWarnings("unused") + public void redactLineAfterAcrossColumns(@Argument(ArgumentType.STRING) String start, + @Argument(ArgumentType.TYPE) String asType, + @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, + @Argument(ArgumentType.BOOLEAN) boolean redactEverywhere, + @Argument(ArgumentType.BOOLEAN) boolean skipRemoveEntitiesContainedInLarger, + @Argument(ArgumentType.BOOLEAN) boolean onlyExactMatch, + @Argument(ArgumentType.STRING) String reason, + @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) { + + redactLineAfterAcrossColumns(start, asType, ruleNumber, redactEverywhere, reason, legalBasis, skipRemoveEntitiesContainedInLarger, onlyExactMatch); } @@ -1419,19 +1435,23 @@ public class Section { boolean redactEverywhere, String reason, String legalBasis, - boolean skipRemoveEntitiesContainedInLarger) { + boolean skipRemoveEntitiesContainedInLarger, + boolean onlyExactMatch) { - String[] values = StringUtils.substringsBetween(searchableText.getAsStringWithLinebreaksSorted(), start, "\n"); + var stringOffsets = OffsetStringUtils.substringsBetween(searchableText.getAsStringWithLinebreaksSorted(), start, "\n"); - if (values != null) { - for (String value : values) { - if (StringUtils.isNotBlank(value)) { - Set found = findEntities(value.trim(), asType, false, true, ruleNumber, reason, legalBasis, Engine.RULE, false); + if (stringOffsets != null) { + for (var stringOffset : stringOffsets) { + if (StringUtils.isNotBlank(stringOffset.getValue())) { + var trimmedOffsetString = stringOffset.trim(); + Set found = findEntities(trimmedOffsetString.getValue(), asType, false, true, ruleNumber, reason, legalBasis, Engine.RULE, false).stream() + .filter(f -> !onlyExactMatch || f.getStart() == trimmedOffsetString.getStart() && f.getEnd() == trimmedOffsetString.getEnd()).collect(Collectors.toSet()); found.forEach(f -> f.setSkipRemoveEntitiesContainedInLarger(skipRemoveEntitiesContainedInLarger)); + EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary); if (redactEverywhere && !isLocal()) { - localDictionaryAdds.computeIfAbsent(asType, x -> new HashSet<>()).add(value.trim()); + localDictionaryAdds.computeIfAbsent(asType, x -> new HashSet<>()).add(trimmedOffsetString.getValue()); } } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/OffsetStringUtils.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/OffsetStringUtils.java new file mode 100644 index 00000000..3154624b --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/OffsetStringUtils.java @@ -0,0 +1,56 @@ +package com.iqser.red.service.redaction.v1.server.redaction.utils; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.lang3.StringUtils; + +import com.iqser.red.service.redaction.v1.server.redaction.model.OffsetString; + +import lombok.experimental.UtilityClass; + +@UtilityClass +public class OffsetStringUtils { + + /** + * Same logic as in StringUtils.redactBetween, but returns a list of object with offsets insteadof on the Strings only. + * + * @param str – the String containing the substrings, null returns null, empty returns empty + * @param open – the String identifying the start of the substring, empty returns null + * @param close – the String identifying the end of the substring, empty returns null + * @return a list of Strings with their offsets + */ + public List substringsBetween(final String str, final String open, final String close) { + + if (str == null || StringUtils.isEmpty(open) || StringUtils.isEmpty(close)) { + return null; + } + final int strLen = str.length(); + if (strLen == 0) { + return new ArrayList<>(); + } + final int closeLen = close.length(); + final int openLen = open.length(); + final List list = new ArrayList<>(); + int pos = 0; + while (pos < strLen - closeLen) { + int start = str.indexOf(open, pos); + if (start < 0) { + break; + } + start += openLen; + final int end = str.indexOf(close, start); + if (end < 0) { + break; + } + list.add(new OffsetString(str.substring(start, end), start, end)); + pos = end + closeLen; + } + if (list.isEmpty()) { + return null; + } + return list; + } + +} +