Pull request #482: RED-5295: Added redactWordPartByRegEx rule function
Merge in RED/redaction-service from RED-5295 to master * commit 'e0dd06c6bf64bccff41f425cd934ebbe934384cf': RED-5295: Added redactWordPartByRegEx rule function
This commit is contained in:
commit
69540bcd5e
@ -9,6 +9,7 @@ import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
@ -24,8 +25,11 @@ import com.iqser.red.service.redaction.v1.model.Engine;
|
||||
import com.iqser.red.service.redaction.v1.model.FileAttribute;
|
||||
import com.iqser.red.service.redaction.v1.model.SectionArea;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.RedTextPosition;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.FindEntityDetails;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.Patterns;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.SearchImplementation;
|
||||
|
||||
@ -606,7 +610,7 @@ public class Section {
|
||||
|
||||
String startValue = getFirstRexExMatch(searchText, startPattern, startPatternCaseInsensitive, startGroup);
|
||||
|
||||
if (startValue == null){
|
||||
if (startValue == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -941,13 +945,10 @@ public class Section {
|
||||
@Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
|
||||
|
||||
|
||||
if (!headline.isBlank()) {
|
||||
|
||||
String cleanHeadline = headline.replaceAll("\\n", " ").replaceAll(" ", " ").trim();
|
||||
if(searchText.contains(cleanHeadline)) {
|
||||
if (searchText.contains(cleanHeadline)) {
|
||||
Set<Entity> found = findEntities(cleanHeadline, type, false, true, ruleNumber, reason, legalBasis, Engine.RULE, false);
|
||||
EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary);
|
||||
}
|
||||
@ -955,6 +956,64 @@ public class Section {
|
||||
}
|
||||
|
||||
|
||||
@ThenAction
|
||||
public void redactWordPartByRegEx(@Argument(ArgumentType.REGEX)String pattern,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
|
||||
@Argument(ArgumentType.INTEGER) int group,
|
||||
@Argument(ArgumentType.INTEGER) int redactGroup,
|
||||
@Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
|
||||
|
||||
Matcher findMatcher = compiledPattern.matcher(searchText);
|
||||
|
||||
while (findMatcher.find()) {
|
||||
String findMatch = findMatcher.group(group);
|
||||
if (StringUtils.isNotBlank(findMatch)) {
|
||||
Set<Entity> found = findEntities(findMatch.trim(), asType, false, true, ruleNumber, reason, legalBasis, Engine.RULE, false);
|
||||
|
||||
for (Entity entity : found) {
|
||||
|
||||
Matcher redactMatcher = compiledPattern.matcher(entity.getWord());
|
||||
|
||||
while (redactMatcher.find()) {
|
||||
String redactMatch = redactMatcher.group(redactGroup);
|
||||
int start = redactMatcher.start(redactGroup);
|
||||
|
||||
int i = 0;
|
||||
List<RedTextPosition> newPositions = new ArrayList<>();
|
||||
TextPositionSequence newSeq = null;
|
||||
|
||||
for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) {
|
||||
for (TextPositionSequence textPositionSequence : entityPositionSequence.getSequences()) {
|
||||
for (RedTextPosition textPosition : textPositionSequence.getTextPositions()) {
|
||||
if (i >= start && i < start + redactMatch.length()) {
|
||||
newPositions.add(textPosition);
|
||||
if (newSeq == null) {
|
||||
newSeq = textPositionSequence;
|
||||
}
|
||||
}
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
newSeq.setTextPositions(newPositions);
|
||||
entity.setWord(redactMatch);
|
||||
String plainId = IdBuilder.buildId(List.of(newSeq));
|
||||
entity.setPositionSequences(List.of(new EntityPositionSequence(plainId, List.of(newSeq), newSeq.getPage())));
|
||||
|
||||
EntitySearchUtils.addEntitiesWithHigherRank(entities, entity, dictionary);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactSection(@Argument(ArgumentType.TYPE) String type,
|
||||
@ -1008,9 +1067,6 @@ public class Section {
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
private Set<Entity> findEntities(String value, String asType, boolean caseInsensitive, boolean redacted,
|
||||
int ruleNumber, String reason, String legalBasis, Engine engine,
|
||||
boolean asRecommendation) {
|
||||
@ -1149,6 +1205,7 @@ public class Section {
|
||||
}
|
||||
|
||||
|
||||
|
||||
private void redactByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType, int ruleNumber,
|
||||
String reason, String legalBasis, boolean redaction) {
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user