Pull request #115: Expand CBI Authors with firstname initials
Merge in RED/redaction-service from ExpandAuthors to master * commit '35f3582d0850053c1dc38ef0b149daa47bd9cc56': Expand CBI Authors with firstname initials
This commit is contained in:
commit
7898f6a30f
@ -62,9 +62,10 @@ public class Section {
|
||||
}
|
||||
|
||||
|
||||
public boolean hasTableHeader(String headerName){
|
||||
String cleanHeaderName = headerName.replaceAll("\n", "").replaceAll(" ", "").replaceAll("-", "");
|
||||
return tabularData != null && tabularData.containsKey(cleanHeaderName);
|
||||
public boolean hasTableHeader(String headerName) {
|
||||
|
||||
String cleanHeaderName = headerName.replaceAll("\n", "").replaceAll(" ", "").replaceAll("-", "");
|
||||
return tabularData != null && tabularData.containsKey(cleanHeaderName);
|
||||
}
|
||||
|
||||
|
||||
@ -80,6 +81,34 @@ public class Section {
|
||||
}
|
||||
|
||||
|
||||
public void expandByRegEx(String type, String pattern, boolean patternCaseInsensitive, int group) {
|
||||
|
||||
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
|
||||
|
||||
Set<Entity> expanded = new HashSet<>();
|
||||
for (Entity entity : entities) {
|
||||
|
||||
if (!entity.getType().equals(type) || entity.getTextAfter() == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Matcher matcher = compiledPattern.matcher(entity.getTextAfter());
|
||||
|
||||
while (matcher.find()) {
|
||||
String match = matcher.group(group);
|
||||
if (StringUtils.isNotBlank(match)) {
|
||||
expanded.addAll(findEntities(entity.getWord() + match, type, false, entity.isRedaction(), entity.getMatchedRule(), entity
|
||||
.getRedactionReason(), entity.getLegalBasis()));
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
EntitySearchUtils.addEntitiesWithHigherRank(entities, expanded, dictionary);
|
||||
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
|
||||
}
|
||||
|
||||
|
||||
public void redact(String type, int ruleNumber, String reason, String legalBasis) {
|
||||
|
||||
boolean hasRecommendationDictionary = dictionaryTypes.contains(RECOMMENDATION_PREFIX + type);
|
||||
@ -160,7 +189,7 @@ public class Section {
|
||||
|
||||
String trimmedValue = value.trim();
|
||||
String cleanValue;
|
||||
if(trimmedValue.startsWith(":")){
|
||||
if (trimmedValue.startsWith(":")) {
|
||||
cleanValue = trimmedValue.substring(1).trim();
|
||||
} else {
|
||||
cleanValue = trimmedValue;
|
||||
@ -207,8 +236,8 @@ public class Section {
|
||||
}
|
||||
|
||||
|
||||
public void redactAndRecommendByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType, int ruleNumber,
|
||||
String reason, String legalBasis) {
|
||||
public void redactAndRecommendByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType,
|
||||
int ruleNumber, String reason, String legalBasis) {
|
||||
|
||||
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
|
||||
|
||||
@ -363,6 +392,7 @@ public class Section {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -441,7 +441,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
System.out.println("redactionTest");
|
||||
long start = System.currentTimeMillis();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_08_Volume_3CA_B-6_2018-09-06.pdf");
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf");
|
||||
|
||||
AnalyzeRequest request = AnalyzeRequest.builder()
|
||||
.ruleSetId(TEST_RULESET_ID)
|
||||
|
||||
@ -1,15 +1,36 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.utils;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
|
||||
public class RegExPatternTest {
|
||||
|
||||
|
||||
@Test
|
||||
public void testExpand(){
|
||||
String pattern = "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)";
|
||||
String text = ", G.R., Simoneaux,";
|
||||
Pattern compiledPattern = Pattern.compile(pattern, 0);
|
||||
Matcher matcher = compiledPattern.matcher(text);
|
||||
|
||||
while (matcher.find()) {
|
||||
String match = matcher.group(1);
|
||||
if (StringUtils.isNotBlank(match)) {
|
||||
System.out.println(match);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void testEmailRegEx(){
|
||||
String text = "Address: Schwarzwaldalle " +
|
||||
|
||||
@ -7,6 +7,15 @@ global Section section
|
||||
|
||||
// --------------------------------------- CBI rules -------------------------------------------------------------------
|
||||
|
||||
rule "0: Expand CBI Authors with firstname initials"
|
||||
when
|
||||
Section(matchesType("CBI_author") || matchesType("recommendation_CBI_author"))
|
||||
then
|
||||
section.expandByRegEx("CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
|
||||
section.expandByRegEx("recommendation_CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
|
||||
end
|
||||
|
||||
|
||||
rule "1: Redact CBI Authors"
|
||||
when
|
||||
Section(matchesType("CBI_author") || matchesType("recommendation_CBI_author"))
|
||||
|
||||
@ -7,6 +7,15 @@ global Section section
|
||||
|
||||
// --------------------------------------- CBI rules -------------------------------------------------------------------
|
||||
|
||||
rule "0: Expand CBI Authors with firstname initials"
|
||||
when
|
||||
Section(matchesType("CBI_author") || matchesType("recommendation_CBI_author"))
|
||||
then
|
||||
section.expandByRegEx("CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
|
||||
section.expandByRegEx("recommendation_CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
|
||||
end
|
||||
|
||||
|
||||
rule "1: Redacted because Section contains Vertebrate"
|
||||
when
|
||||
Section(matchesType("vertebrate"))
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user