RED-2841: INC6207970 Rule for initials expansion should be applied only to dictionary entries without whitespaces

This commit is contained in:
aoezyetimoglu 2021-11-29 12:38:53 +01:00
parent f5817204bf
commit d1317c5bd4
4 changed files with 67 additions and 4 deletions

View File

@ -131,11 +131,27 @@ public class Section {
return StringUtils.containsIgnoreCase(headline, word);
}
@ThenAction
public void expandByRegEx(@Argument(ArgumentType.TYPE) String type,
@Argument(ArgumentType.REGEX) String pattern,
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
@Argument(ArgumentType.INTEGER) int group) {
expandByRegEx(type, pattern, patternCaseInsensitive, group, null);
}
@ThenAction
public void expandByRegEx(@Argument(ArgumentType.TYPE) String type,
@Argument(ArgumentType.REGEX) String pattern,
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
@Argument(ArgumentType.INTEGER) int group,
@Argument(ArgumentType.REGEX) String withoutPattern) {
Pattern compiledWithoutPattern = null;
if (withoutPattern != null) {
compiledWithoutPattern = Patterns.getCompiledPattern(withoutPattern, patternCaseInsensitive);
}
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
@ -146,6 +162,13 @@ public class Section {
continue;
}
if(withoutPattern != null) {
Matcher matcherWithout = compiledWithoutPattern.matcher(entity.getWord());
if (matcherWithout.find()) {
continue;
}
}
Matcher matcher = compiledPattern.matcher(entity.getTextAfter());
while (matcher.find()) {

View File

@ -1304,6 +1304,30 @@ public class RedactionIntegrationTest {
}
@Test
public void testExpandByRegEx() throws IOException {
System.out.println("expandByRegex");
long start = System.currentTimeMillis();
AnalyzeRequest request = prepareStorage("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.build());
try (FileOutputStream fileOutputStream = new FileOutputStream(getTemporaryDirectory() + "/Annotated.pdf")) {
fileOutputStream.write(annotateResponse.getDocument());
}
long end = System.currentTimeMillis();
System.out.println("duration: " + (end - start));
System.out.println("numberOfPages: " + result.getNumberOfPages());
}
private static String loadFromClassPath(String path) {

View File

@ -7,12 +7,20 @@ global Section section
// --------------------------------------- CBI rules -------------------------------------------------------------------
//rule "0: Expand CBI Authors with firstname initials"
// when
// Section(matchesType("CBI_author") || matchesType("recommendation_CBI_author"))
// then
// section.expandByRegEx("CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
// section.expandByRegEx("recommendation_CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
// end
rule "0: Expand CBI Authors with firstname initials"
when
Section(matchesType("CBI_author") || matchesType("recommendation_CBI_author"))
then
section.expandByRegEx("CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
section.expandByRegEx("recommendation_CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
section.expandByRegEx("CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1, "[^\\s]+");
section.expandByRegEx("recommendation_CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1, "[^\\s]+");
end

View File

@ -7,12 +7,20 @@ global Section section
// --------------------------------------- CBI rules -------------------------------------------------------------------
//rule "0: Expand CBI Authors with firstname initials"
// when
// Section(matchesType("CBI_author") || matchesType("recommendation_CBI_author"))
// then
// section.expandByRegEx("CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
// section.expandByRegEx("recommendation_CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
// end
rule "0: Expand CBI Authors with firstname initials"
when
Section(matchesType("CBI_author") || matchesType("recommendation_CBI_author"))
then
section.expandByRegEx("CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
section.expandByRegEx("recommendation_CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
section.expandByRegEx("CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1, "[^\\s]+");
section.expandByRegEx("recommendation_CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1, "[^\\s]+");
end