RSS-86: Added new rule function redactLineAfterAcrossColumns
This commit is contained in:
parent
34be42cd45
commit
a5f27cfa4c
@ -9,7 +9,11 @@ import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizati
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import lombok.Getter;
|
||||
|
||||
public class SearchableText {
|
||||
|
||||
@ -17,6 +21,7 @@ public class SearchableText {
|
||||
@JsonIgnore
|
||||
@JsonAttribute(ignore = true)
|
||||
private transient String stringRepresentation;
|
||||
@Getter
|
||||
private final List<TextPositionSequence> sequences = new ArrayList<>();
|
||||
|
||||
|
||||
@ -218,6 +223,31 @@ public class SearchableText {
|
||||
}
|
||||
|
||||
|
||||
public String getAsStringWithLinebreaksSorted() {
|
||||
|
||||
|
||||
var sorted = sequences.stream().sorted(Comparator.comparing(a -> a.getTextPositions().get(0).getXDirAdj())).sorted(Comparator.comparing(a -> a.getTextPositions().get(0).getYDirAdj())).sorted(Comparator.comparing(a -> a.getPage())).collect(Collectors.toList());
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
TextPositionSequence previous = null;
|
||||
for (TextPositionSequence word : sorted) {
|
||||
|
||||
if (previous != null) {
|
||||
if (Math.abs(previous.getRotationAdjustedY() - word.getRotationAdjustedY()) > word.getTextHeight()) {
|
||||
sb.append('\n');
|
||||
} else {
|
||||
sb.append(' ');
|
||||
}
|
||||
}
|
||||
sb.append(word.toString());
|
||||
previous = word;
|
||||
}
|
||||
return sb.append("\n").toString();
|
||||
}
|
||||
|
||||
|
||||
|
||||
public String getAsStringWithLinebreaks() {
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
@ -1013,6 +1013,29 @@ public class Section {
|
||||
}
|
||||
|
||||
|
||||
|
||||
public void redactLineAfterAcrossColumns(String start, String asType, int ruleNumber, boolean redactEverywhere, String reason,
|
||||
String legalBasis) {
|
||||
|
||||
|
||||
String[] values = StringUtils.substringsBetween(searchableText.getAsStringWithLinebreaksSorted(), start, "\n");
|
||||
|
||||
if (values != null) {
|
||||
for (String value : values) {
|
||||
if (StringUtils.isNotBlank(value)) {
|
||||
Set<Entity> found = findEntities(value.trim(), asType, false, true, ruleNumber, reason, legalBasis, Engine.RULE, false);
|
||||
EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary);
|
||||
|
||||
if (redactEverywhere && !isLocal()) {
|
||||
localDictionaryAdds.computeIfAbsent(asType, x -> new HashSet<>()).add(value.trim());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
private void redactByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType, int ruleNumber,
|
||||
String reason, String legalBasis, boolean redaction) {
|
||||
|
||||
|
||||
@ -407,4 +407,23 @@ rule "102-1: Extract title"
|
||||
Section(sectionNumber <= 3 && text.contains("SPL"))
|
||||
then
|
||||
section.redactBetween("Laboratories", "SPL", "PII", 5, false, "Title found", "n-a");
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
rule "30: Vehicle components"
|
||||
|
||||
when
|
||||
|
||||
Section(text.contains("vehicle"))
|
||||
|
||||
then
|
||||
|
||||
//section.redactLineAfter("Name:","vehicle",30,true,"Vehicle components found.","n-a");
|
||||
|
||||
section.redactLineAfterAcrossColumns("Name:","PII",30,false,"bla","n-a");
|
||||
|
||||
section.redactLineAfter("Vehicle:","vehicle",30,true,"Vehicle keyword found.","n-a");
|
||||
|
||||
section.redactLineAfter("The vehicle was","vehicle",30,true,"Vehicle section found.","n-a");
|
||||
|
||||
end
|
||||
Loading…
x
Reference in New Issue
Block a user