diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java index 8acd2a95..a2c27b19 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java @@ -62,9 +62,10 @@ public class Section { } - public boolean hasTableHeader(String headerName){ - String cleanHeaderName = headerName.replaceAll("\n", "").replaceAll(" ", "").replaceAll("-", ""); - return tabularData != null && tabularData.containsKey(cleanHeaderName); + public boolean hasTableHeader(String headerName) { + + String cleanHeaderName = headerName.replaceAll("\n", "").replaceAll(" ", "").replaceAll("-", ""); + return tabularData != null && tabularData.containsKey(cleanHeaderName); } @@ -80,6 +81,34 @@ public class Section { } + public void expandByRegEx(String type, String pattern, boolean patternCaseInsensitive, int group) { + + Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive); + + Set expanded = new HashSet<>(); + for (Entity entity : entities) { + + if (!entity.getType().equals(type) || entity.getTextAfter() == null) { + continue; + } + + Matcher matcher = compiledPattern.matcher(entity.getTextAfter()); + + while (matcher.find()) { + String match = matcher.group(group); + if (StringUtils.isNotBlank(match)) { + expanded.addAll(findEntities(entity.getWord() + match, type, false, entity.isRedaction(), entity.getMatchedRule(), entity + .getRedactionReason(), entity.getLegalBasis())); + + } + } + } + + EntitySearchUtils.addEntitiesWithHigherRank(entities, expanded, dictionary); + EntitySearchUtils.removeEntitiesContainedInLarger(entities); + } + + public void redact(String type, int ruleNumber, String reason, String legalBasis) { boolean hasRecommendationDictionary = dictionaryTypes.contains(RECOMMENDATION_PREFIX + type); @@ -160,7 +189,7 @@ public class Section { String trimmedValue = value.trim(); String cleanValue; - if(trimmedValue.startsWith(":")){ + if (trimmedValue.startsWith(":")) { cleanValue = trimmedValue.substring(1).trim(); } else { cleanValue = trimmedValue; @@ -207,8 +236,8 @@ public class Section { } - public void redactAndRecommendByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType, int ruleNumber, - String reason, String legalBasis) { + public void redactAndRecommendByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType, + int ruleNumber, String reason, String legalBasis) { Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive); @@ -363,6 +392,7 @@ public class Section { } } } + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index 3d1bb993..e6eef8d8 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -441,7 +441,7 @@ public class RedactionIntegrationTest { System.out.println("redactionTest"); long start = System.currentTimeMillis(); - ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_08_Volume_3CA_B-6_2018-09-06.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf"); AnalyzeRequest request = AnalyzeRequest.builder() .ruleSetId(TEST_RULESET_ID) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/utils/RegExPatternTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/utils/RegExPatternTest.java index 1f2ae55b..0ca7344e 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/utils/RegExPatternTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/utils/RegExPatternTest.java @@ -1,15 +1,36 @@ package com.iqser.red.service.redaction.v1.server.redaction.utils; +import org.apache.commons.lang3.StringUtils; import org.junit.Test; import java.util.ArrayList; import java.util.List; +import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; + public class RegExPatternTest { + @Test + public void testExpand(){ + String pattern = "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)"; + String text = ", G.R., Simoneaux,"; + Pattern compiledPattern = Pattern.compile(pattern, 0); + Matcher matcher = compiledPattern.matcher(text); + + while (matcher.find()) { + String match = matcher.group(1); + if (StringUtils.isNotBlank(match)) { + System.out.println(match); + } + } + } + + + @Test public void testEmailRegEx(){ String text = "Address: Schwarzwaldalle " + diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/allAuthors.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/allAuthors.drl index fe0dc278..ef08c66d 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/allAuthors.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/allAuthors.drl @@ -7,6 +7,15 @@ global Section section // --------------------------------------- CBI rules ------------------------------------------------------------------- +rule "0: Expand CBI Authors with firstname initials" + when + Section(matchesType("CBI_author") || matchesType("recommendation_CBI_author")) + then + section.expandByRegEx("CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1); + section.expandByRegEx("recommendation_CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1); + end + + rule "1: Redact CBI Authors" when Section(matchesType("CBI_author") || matchesType("recommendation_CBI_author")) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index df75bbe9..a028a45f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -7,6 +7,15 @@ global Section section // --------------------------------------- CBI rules ------------------------------------------------------------------- +rule "0: Expand CBI Authors with firstname initials" + when + Section(matchesType("CBI_author") || matchesType("recommendation_CBI_author")) + then + section.expandByRegEx("CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1); + section.expandByRegEx("recommendation_CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1); + end + + rule "1: Redacted because Section contains Vertebrate" when Section(matchesType("vertebrate"))