From 317a8a9af9d9755befe66d82eef5f7b21fe49b33 Mon Sep 17 00:00:00 2001 From: deiflaender Date: Fri, 9 Sep 2022 13:25:53 +0200 Subject: [PATCH] RSS-31: Allow to skip removeEntitiesContainedInLarger in redactBetween rule and added param to sort result by positions --- .../v1/server/redaction/model/Entity.java | 2 ++ .../redaction/model/SearchableText.java | 5 +++ .../v1/server/redaction/model/Section.java | 34 +++++++++++++++---- .../redaction/utils/EntitySearchUtils.java | 4 +++ 4 files changed, 38 insertions(+), 7 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Entity.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Entity.java index ee16afd8..b555c65f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Entity.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Entity.java @@ -54,6 +54,8 @@ public class Entity implements ReasonHolder { private boolean resized; + private boolean skipRemoveEntitiesContainedInLarger; + public Entity(String word, String type, boolean redaction, String redactionReason, List positionSequences, String headline, int matchedRule, int sectionNumber, diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/SearchableText.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/SearchableText.java index b51736be..68a44301 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/SearchableText.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/SearchableText.java @@ -225,7 +225,11 @@ public class SearchableText { public String getAsStringWithLinebreaksSorted() { + return getAsStringWithLinebreaksSorted(this.sequences); + } + + public String getAsStringWithLinebreaksSorted(List sequences) { var sorted = sequences.stream().sorted(Comparator.comparing(a -> a.getTextPositions().get(0).getXDirAdj())).sorted(Comparator.comparing(a -> a.getTextPositions().get(0).getYDirAdj())).sorted(Comparator.comparing(a -> a.getPage())).collect(Collectors.toList()); StringBuilder sb = new StringBuilder(); @@ -248,6 +252,7 @@ public class SearchableText { + public String getAsStringWithLinebreaks() { StringBuilder sb = new StringBuilder(); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java index 16ba9400..8aeb8cbd 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java @@ -534,7 +534,7 @@ public class Section { @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.BOOLEAN) boolean redactEverywhere, @Argument(ArgumentType.STRING) String reason, @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) { - redactBetween(start, stop, asType, ruleNumber, redactEverywhere, false, reason, legalBasis, true); + redactBetween(start, stop, asType, ruleNumber, redactEverywhere, false, reason, legalBasis, true, false, false); } @@ -545,7 +545,18 @@ public class Section { @Argument(ArgumentType.BOOLEAN) boolean excludeHeadLine, @Argument(ArgumentType.STRING) String reason, @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) { - redactBetween(start, stop, asType, ruleNumber, redactEverywhere, excludeHeadLine, reason, legalBasis, true); + redactBetween(start, stop, asType, ruleNumber, redactEverywhere, excludeHeadLine, reason, legalBasis, true, false, false); + } + + + @ThenAction + @SuppressWarnings("unused") + public void redactBetween(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.STRING) String stop, @Argument(ArgumentType.TYPE) String asType, + @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.BOOLEAN) boolean redactEverywhere, + @Argument(ArgumentType.BOOLEAN) boolean excludeHeadLine, @Argument(ArgumentType.STRING) String reason, + @Argument(ArgumentType.LEGAL_BASIS) String legalBasis, @Argument(ArgumentType.BOOLEAN) boolean skipRemoveEntitiesContainedInLarger, @Argument(ArgumentType.BOOLEAN) boolean sortedResult) { + + redactBetween(start, stop, asType, ruleNumber, redactEverywhere, excludeHeadLine, reason, legalBasis, true, skipRemoveEntitiesContainedInLarger, sortedResult); } @@ -556,7 +567,7 @@ public class Section { @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.BOOLEAN) boolean redactEverywhere, @Argument(ArgumentType.STRING) String reason) { - redactBetween(start, stop, asType, ruleNumber, redactEverywhere, false, reason, null, false); + redactBetween(start, stop, asType, ruleNumber, redactEverywhere, false, reason, null, false, false); } @@ -566,7 +577,7 @@ public class Section { @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.BOOLEAN) boolean redactEverywhere, @Argument(ArgumentType.BOOLEAN) boolean excludeHeadLine, @Argument(ArgumentType.STRING) String reason) { - redactBetween(start, stop, asType, ruleNumber, redactEverywhere, excludeHeadLine, reason, null, false); + redactBetween(start, stop, asType, ruleNumber, redactEverywhere, excludeHeadLine, reason, null, false, false); } @@ -580,6 +591,8 @@ public class Section { } + + @ThenAction @SuppressWarnings("unused") public void redactNotLinesBetween(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.STRING) String stop, @Argument(ArgumentType.TYPE) String asType, @@ -834,7 +847,7 @@ public class Section { public void redactSectionText(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.STRING) String reason, @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) { - redactBetween("", "", type, ruleNumber, false, false, reason, legalBasis, true); + redactBetween("", "", type, ruleNumber, false, false, reason, legalBasis, true, false); } @@ -843,7 +856,7 @@ public class Section { public void redactSectionTextWithoutHeadLine(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.STRING) String reason, @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) { - redactBetween("", "", type, ruleNumber, false, true, reason, legalBasis, true); + redactBetween("", "", type, ruleNumber, false, true, reason, legalBasis, true, false); } @@ -1054,7 +1067,7 @@ public class Section { private void redactBetween(String start, String stop, String asType, int ruleNumber, boolean redactEverywhere, boolean excludeHeadLine, String reason, String legalBasis, - boolean redaction) { + boolean redaction, boolean skipRemoveEntitiesContainedInLarger, boolean sortedResult) { String[] values = new String[1]; @@ -1077,6 +1090,13 @@ public class Section { if (StringUtils.isNotBlank(value)) { Set found = findEntities(value.trim(), asType, false, redaction, ruleNumber, reason, legalBasis, Engine.RULE, false); + found.forEach(f -> { + f.setSkipRemoveEntitiesContainedInLarger(skipRemoveEntitiesContainedInLarger); + if(sortedResult){ + f.setWord(searchableText.getAsStringWithLinebreaksSorted(f.getPositionSequences().stream().map(EntityPositionSequence::getSequences).flatMap(Collection::stream).collect(Collectors.toList()))); + } + }); + EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary); if (redactEverywhere && !isLocal()) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java index a0079505..ae535942 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java @@ -150,6 +150,10 @@ public class EntitySearchUtils { // continue; // } + if(inner.isSkipRemoveEntitiesContainedInLarger() || word.isSkipRemoveEntitiesContainedInLarger()){ + continue; + } + if (inner.getWord().length() < word.getWord() .length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner && word.getSectionNumber() == inner.getSectionNumber()) { if (word.getEntityType().equals(EntityType.RECOMMENDATION) && inner.getEntityType().equals(EntityType.ENTITY)) {