RSS-86: Added new rule function redactLineAfterAcrossColumns

This commit is contained in:
deiflaender 2022-09-08 13:04:55 +02:00
parent 34be42cd45
commit a5f27cfa4c
3 changed files with 73 additions and 1 deletions

View File

@ -9,7 +9,11 @@ import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizati
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.stream.Collectors;
import lombok.Getter;
public class SearchableText {
@ -17,6 +21,7 @@ public class SearchableText {
@JsonIgnore
@JsonAttribute(ignore = true)
private transient String stringRepresentation;
@Getter
private final List<TextPositionSequence> sequences = new ArrayList<>();
@ -218,6 +223,31 @@ public class SearchableText {
}
public String getAsStringWithLinebreaksSorted() {
var sorted = sequences.stream().sorted(Comparator.comparing(a -> a.getTextPositions().get(0).getXDirAdj())).sorted(Comparator.comparing(a -> a.getTextPositions().get(0).getYDirAdj())).sorted(Comparator.comparing(a -> a.getPage())).collect(Collectors.toList());
StringBuilder sb = new StringBuilder();
TextPositionSequence previous = null;
for (TextPositionSequence word : sorted) {
if (previous != null) {
if (Math.abs(previous.getRotationAdjustedY() - word.getRotationAdjustedY()) > word.getTextHeight()) {
sb.append('\n');
} else {
sb.append(' ');
}
}
sb.append(word.toString());
previous = word;
}
return sb.append("\n").toString();
}
public String getAsStringWithLinebreaks() {
StringBuilder sb = new StringBuilder();

View File

@ -1013,6 +1013,29 @@ public class Section {
}
public void redactLineAfterAcrossColumns(String start, String asType, int ruleNumber, boolean redactEverywhere, String reason,
String legalBasis) {
String[] values = StringUtils.substringsBetween(searchableText.getAsStringWithLinebreaksSorted(), start, "\n");
if (values != null) {
for (String value : values) {
if (StringUtils.isNotBlank(value)) {
Set<Entity> found = findEntities(value.trim(), asType, false, true, ruleNumber, reason, legalBasis, Engine.RULE, false);
EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary);
if (redactEverywhere && !isLocal()) {
localDictionaryAdds.computeIfAbsent(asType, x -> new HashSet<>()).add(value.trim());
}
}
}
}
}
private void redactByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType, int ruleNumber,
String reason, String legalBasis, boolean redaction) {

View File

@ -407,4 +407,23 @@ rule "102-1: Extract title"
Section(sectionNumber <= 3 && text.contains("SPL"))
then
section.redactBetween("Laboratories", "SPL", "PII", 5, false, "Title found", "n-a");
end
end
rule "30: Vehicle components"
when
Section(text.contains("vehicle"))
then
//section.redactLineAfter("Name:","vehicle",30,true,"Vehicle components found.","n-a");
section.redactLineAfterAcrossColumns("Name:","PII",30,false,"bla","n-a");
section.redactLineAfter("Vehicle:","vehicle",30,true,"Vehicle keyword found.","n-a");
section.redactLineAfter("The vehicle was","vehicle",30,true,"Vehicle section found.","n-a");
end