Expand CBI Authors with firstname initials
This commit is contained in:
parent
b4699caaae
commit
35f3582d08
@ -63,6 +63,7 @@ public class Section {
|
|||||||
|
|
||||||
|
|
||||||
public boolean hasTableHeader(String headerName) {
|
public boolean hasTableHeader(String headerName) {
|
||||||
|
|
||||||
String cleanHeaderName = headerName.replaceAll("\n", "").replaceAll(" ", "").replaceAll("-", "");
|
String cleanHeaderName = headerName.replaceAll("\n", "").replaceAll(" ", "").replaceAll("-", "");
|
||||||
return tabularData != null && tabularData.containsKey(cleanHeaderName);
|
return tabularData != null && tabularData.containsKey(cleanHeaderName);
|
||||||
}
|
}
|
||||||
@ -80,6 +81,34 @@ public class Section {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void expandByRegEx(String type, String pattern, boolean patternCaseInsensitive, int group) {
|
||||||
|
|
||||||
|
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
|
||||||
|
|
||||||
|
Set<Entity> expanded = new HashSet<>();
|
||||||
|
for (Entity entity : entities) {
|
||||||
|
|
||||||
|
if (!entity.getType().equals(type) || entity.getTextAfter() == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
Matcher matcher = compiledPattern.matcher(entity.getTextAfter());
|
||||||
|
|
||||||
|
while (matcher.find()) {
|
||||||
|
String match = matcher.group(group);
|
||||||
|
if (StringUtils.isNotBlank(match)) {
|
||||||
|
expanded.addAll(findEntities(entity.getWord() + match, type, false, entity.isRedaction(), entity.getMatchedRule(), entity
|
||||||
|
.getRedactionReason(), entity.getLegalBasis()));
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
EntitySearchUtils.addEntitiesWithHigherRank(entities, expanded, dictionary);
|
||||||
|
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public void redact(String type, int ruleNumber, String reason, String legalBasis) {
|
public void redact(String type, int ruleNumber, String reason, String legalBasis) {
|
||||||
|
|
||||||
boolean hasRecommendationDictionary = dictionaryTypes.contains(RECOMMENDATION_PREFIX + type);
|
boolean hasRecommendationDictionary = dictionaryTypes.contains(RECOMMENDATION_PREFIX + type);
|
||||||
@ -207,8 +236,8 @@ public class Section {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void redactAndRecommendByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType, int ruleNumber,
|
public void redactAndRecommendByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType,
|
||||||
String reason, String legalBasis) {
|
int ruleNumber, String reason, String legalBasis) {
|
||||||
|
|
||||||
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
|
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
|
||||||
|
|
||||||
@ -363,6 +392,7 @@ public class Section {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -441,7 +441,7 @@ public class RedactionIntegrationTest {
|
|||||||
|
|
||||||
System.out.println("redactionTest");
|
System.out.println("redactionTest");
|
||||||
long start = System.currentTimeMillis();
|
long start = System.currentTimeMillis();
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_08_Volume_3CA_B-6_2018-09-06.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf");
|
||||||
|
|
||||||
AnalyzeRequest request = AnalyzeRequest.builder()
|
AnalyzeRequest request = AnalyzeRequest.builder()
|
||||||
.ruleSetId(TEST_RULESET_ID)
|
.ruleSetId(TEST_RULESET_ID)
|
||||||
|
|||||||
@ -1,15 +1,36 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.redaction.utils;
|
package com.iqser.red.service.redaction.v1.server.redaction.utils;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||||
|
|
||||||
public class RegExPatternTest {
|
public class RegExPatternTest {
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testExpand(){
|
||||||
|
String pattern = "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)";
|
||||||
|
String text = ", G.R., Simoneaux,";
|
||||||
|
Pattern compiledPattern = Pattern.compile(pattern, 0);
|
||||||
|
Matcher matcher = compiledPattern.matcher(text);
|
||||||
|
|
||||||
|
while (matcher.find()) {
|
||||||
|
String match = matcher.group(1);
|
||||||
|
if (StringUtils.isNotBlank(match)) {
|
||||||
|
System.out.println(match);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testEmailRegEx(){
|
public void testEmailRegEx(){
|
||||||
String text = "Address: Schwarzwaldalle " +
|
String text = "Address: Schwarzwaldalle " +
|
||||||
|
|||||||
@ -7,6 +7,15 @@ global Section section
|
|||||||
|
|
||||||
// --------------------------------------- CBI rules -------------------------------------------------------------------
|
// --------------------------------------- CBI rules -------------------------------------------------------------------
|
||||||
|
|
||||||
|
rule "0: Expand CBI Authors with firstname initials"
|
||||||
|
when
|
||||||
|
Section(matchesType("CBI_author") || matchesType("recommendation_CBI_author"))
|
||||||
|
then
|
||||||
|
section.expandByRegEx("CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
|
||||||
|
section.expandByRegEx("recommendation_CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
rule "1: Redact CBI Authors"
|
rule "1: Redact CBI Authors"
|
||||||
when
|
when
|
||||||
Section(matchesType("CBI_author") || matchesType("recommendation_CBI_author"))
|
Section(matchesType("CBI_author") || matchesType("recommendation_CBI_author"))
|
||||||
|
|||||||
@ -7,6 +7,15 @@ global Section section
|
|||||||
|
|
||||||
// --------------------------------------- CBI rules -------------------------------------------------------------------
|
// --------------------------------------- CBI rules -------------------------------------------------------------------
|
||||||
|
|
||||||
|
rule "0: Expand CBI Authors with firstname initials"
|
||||||
|
when
|
||||||
|
Section(matchesType("CBI_author") || matchesType("recommendation_CBI_author"))
|
||||||
|
then
|
||||||
|
section.expandByRegEx("CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
|
||||||
|
section.expandByRegEx("recommendation_CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
rule "1: Redacted because Section contains Vertebrate"
|
rule "1: Redacted because Section contains Vertebrate"
|
||||||
when
|
when
|
||||||
Section(matchesType("vertebrate"))
|
Section(matchesType("vertebrate"))
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user