RED-2841: PORT - INC6207970 Rule for initials expansion should be applied only to dictionary entries without whitespaces

This commit is contained in:
aoezyetimoglu 2021-11-29 18:49:40 +01:00
parent 48c100c25a
commit 1431727f0e
4 changed files with 61 additions and 4 deletions

View File

@ -121,6 +121,16 @@ public class Section {
public void expandByRegEx(String type, String pattern, boolean patternCaseInsensitive, int group) {
expandByRegEx(type, pattern, patternCaseInsensitive, group, null);
}
public void expandByRegEx(String type, String pattern, boolean patternCaseInsensitive, int group, String withoutPattern) {
Pattern compiledWithoutPattern = null;
if (withoutPattern != null) {
compiledWithoutPattern = Patterns.getCompiledPattern(withoutPattern, patternCaseInsensitive);
}
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
@ -131,6 +141,13 @@ public class Section {
continue;
}
if(withoutPattern != null) {
Matcher matcherWithout = compiledWithoutPattern.matcher(entity.getWord());
if (matcherWithout.find()) {
continue;
}
}
Matcher matcher = compiledPattern.matcher(entity.getTextAfter());
while (matcher.find()) {

View File

@ -1017,6 +1017,30 @@ public class RedactionIntegrationTest {
}
@Test
public void testExpandByRegEx() throws IOException {
System.out.println("expandByRegex");
long start = System.currentTimeMillis();
AnalyzeRequest request = prepareStorage("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
AnalyzeResult result = reanalyzeService.analyze(request);
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.build());
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
fileOutputStream.write(annotateResponse.getDocument());
}
long end = System.currentTimeMillis();
System.out.println("duration: " + (end - start));
System.out.println("numberOfPages: " + result.getNumberOfPages());
}
private static String loadFromClassPath(String path) {
URL resource = ResourceLoader.class.getClassLoader().getResource(path);

View File

@ -7,12 +7,20 @@ global Section section
// --------------------------------------- CBI rules -------------------------------------------------------------------
//rule "0: Expand CBI Authors with firstname initials"
// when
// Section(matchesType("CBI_author") || matchesType("recommendation_CBI_author"))
// then
// section.expandByRegEx("CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
// section.expandByRegEx("recommendation_CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
// end
rule "0: Expand CBI Authors with firstname initials"
when
Section(matchesType("CBI_author") || matchesType("recommendation_CBI_author"))
then
section.expandByRegEx("CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
section.expandByRegEx("recommendation_CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
section.expandByRegEx("CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1, "[^\\s]+");
section.expandByRegEx("recommendation_CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1, "[^\\s]+");
end

View File

@ -7,12 +7,20 @@ global Section section
// --------------------------------------- CBI rules -------------------------------------------------------------------
//rule "0: Expand CBI Authors with firstname initials"
// when
// Section(matchesType("CBI_author") || matchesType("recommendation_CBI_author"))
// then
// section.expandByRegEx("CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
// section.expandByRegEx("recommendation_CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
// end
rule "0: Expand CBI Authors with firstname initials"
when
Section(matchesType("CBI_author") || matchesType("recommendation_CBI_author"))
then
section.expandByRegEx("CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
section.expandByRegEx("recommendation_CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
section.expandByRegEx("CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1, "[^\\s]+");
section.expandByRegEx("recommendation_CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1, "[^\\s]+");
end