diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/SearchableText.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/SearchableText.java index 4503f5d6..b51736be 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/SearchableText.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/SearchableText.java @@ -9,7 +9,11 @@ import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizati import java.util.ArrayList; import java.util.Collections; +import java.util.Comparator; import java.util.List; +import java.util.stream.Collectors; + +import lombok.Getter; public class SearchableText { @@ -17,6 +21,7 @@ public class SearchableText { @JsonIgnore @JsonAttribute(ignore = true) private transient String stringRepresentation; + @Getter private final List sequences = new ArrayList<>(); @@ -218,6 +223,31 @@ public class SearchableText { } + public String getAsStringWithLinebreaksSorted() { + + + var sorted = sequences.stream().sorted(Comparator.comparing(a -> a.getTextPositions().get(0).getXDirAdj())).sorted(Comparator.comparing(a -> a.getTextPositions().get(0).getYDirAdj())).sorted(Comparator.comparing(a -> a.getPage())).collect(Collectors.toList()); + + StringBuilder sb = new StringBuilder(); + + TextPositionSequence previous = null; + for (TextPositionSequence word : sorted) { + + if (previous != null) { + if (Math.abs(previous.getRotationAdjustedY() - word.getRotationAdjustedY()) > word.getTextHeight()) { + sb.append('\n'); + } else { + sb.append(' '); + } + } + sb.append(word.toString()); + previous = word; + } + return sb.append("\n").toString(); + } + + + public String getAsStringWithLinebreaks() { StringBuilder sb = new StringBuilder(); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java index 30a24147..16ba9400 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java @@ -1013,6 +1013,29 @@ public class Section { } + + public void redactLineAfterAcrossColumns(String start, String asType, int ruleNumber, boolean redactEverywhere, String reason, + String legalBasis) { + + + String[] values = StringUtils.substringsBetween(searchableText.getAsStringWithLinebreaksSorted(), start, "\n"); + + if (values != null) { + for (String value : values) { + if (StringUtils.isNotBlank(value)) { + Set found = findEntities(value.trim(), asType, false, true, ruleNumber, reason, legalBasis, Engine.RULE, false); + EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary); + + if (redactEverywhere && !isLocal()) { + localDictionaryAdds.computeIfAbsent(asType, x -> new HashSet<>()).add(value.trim()); + } + } + } + } + } + + + private void redactByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType, int ruleNumber, String reason, String legalBasis, boolean redaction) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index 57508d08..585b5f76 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -407,4 +407,23 @@ rule "102-1: Extract title" Section(sectionNumber <= 3 && text.contains("SPL")) then section.redactBetween("Laboratories", "SPL", "PII", 5, false, "Title found", "n-a"); - end \ No newline at end of file + end + + +rule "30: Vehicle components" + + when + + Section(text.contains("vehicle")) + + then + + //section.redactLineAfter("Name:","vehicle",30,true,"Vehicle components found.","n-a"); + + section.redactLineAfterAcrossColumns("Name:","PII",30,false,"bla","n-a"); + + section.redactLineAfter("Vehicle:","vehicle",30,true,"Vehicle keyword found.","n-a"); + + section.redactLineAfter("The vehicle was","vehicle",30,true,"Vehicle section found.","n-a"); + + end \ No newline at end of file