RED-2841: INC6207970 Rule for initials expansion should be applied only to dictionary entries without whitespaces
This commit is contained in:
parent
f5817204bf
commit
d1317c5bd4
@ -131,11 +131,27 @@ public class Section {
|
||||
return StringUtils.containsIgnoreCase(headline, word);
|
||||
}
|
||||
|
||||
|
||||
@ThenAction
|
||||
public void expandByRegEx(@Argument(ArgumentType.TYPE) String type,
|
||||
@Argument(ArgumentType.REGEX) String pattern,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
|
||||
@Argument(ArgumentType.INTEGER) int group) {
|
||||
expandByRegEx(type, pattern, patternCaseInsensitive, group, null);
|
||||
}
|
||||
|
||||
@ThenAction
|
||||
public void expandByRegEx(@Argument(ArgumentType.TYPE) String type,
|
||||
@Argument(ArgumentType.REGEX) String pattern,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
|
||||
@Argument(ArgumentType.INTEGER) int group,
|
||||
@Argument(ArgumentType.REGEX) String withoutPattern) {
|
||||
|
||||
Pattern compiledWithoutPattern = null;
|
||||
|
||||
if (withoutPattern != null) {
|
||||
compiledWithoutPattern = Patterns.getCompiledPattern(withoutPattern, patternCaseInsensitive);
|
||||
}
|
||||
|
||||
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
|
||||
|
||||
@ -146,6 +162,13 @@ public class Section {
|
||||
continue;
|
||||
}
|
||||
|
||||
if(withoutPattern != null) {
|
||||
Matcher matcherWithout = compiledWithoutPattern.matcher(entity.getWord());
|
||||
if (matcherWithout.find()) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
Matcher matcher = compiledPattern.matcher(entity.getTextAfter());
|
||||
|
||||
while (matcher.find()) {
|
||||
|
||||
@ -1304,6 +1304,30 @@ public class RedactionIntegrationTest {
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExpandByRegEx() throws IOException {
|
||||
|
||||
System.out.println("expandByRegex");
|
||||
long start = System.currentTimeMillis();
|
||||
|
||||
AnalyzeRequest request = prepareStorage("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
|
||||
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
|
||||
.dossierId(TEST_DOSSIER_ID)
|
||||
.fileId(TEST_FILE_ID)
|
||||
.build());
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream(getTemporaryDirectory() + "/Annotated.pdf")) {
|
||||
fileOutputStream.write(annotateResponse.getDocument());
|
||||
}
|
||||
long end = System.currentTimeMillis();
|
||||
|
||||
System.out.println("duration: " + (end - start));
|
||||
System.out.println("numberOfPages: " + result.getNumberOfPages());
|
||||
}
|
||||
|
||||
|
||||
private static String loadFromClassPath(String path) {
|
||||
|
||||
|
||||
@ -7,12 +7,20 @@ global Section section
|
||||
|
||||
// --------------------------------------- CBI rules -------------------------------------------------------------------
|
||||
|
||||
//rule "0: Expand CBI Authors with firstname initials"
|
||||
// when
|
||||
// Section(matchesType("CBI_author") || matchesType("recommendation_CBI_author"))
|
||||
// then
|
||||
// section.expandByRegEx("CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
|
||||
// section.expandByRegEx("recommendation_CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
|
||||
// end
|
||||
|
||||
rule "0: Expand CBI Authors with firstname initials"
|
||||
when
|
||||
Section(matchesType("CBI_author") || matchesType("recommendation_CBI_author"))
|
||||
then
|
||||
section.expandByRegEx("CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
|
||||
section.expandByRegEx("recommendation_CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
|
||||
section.expandByRegEx("CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1, "[^\\s]+");
|
||||
section.expandByRegEx("recommendation_CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1, "[^\\s]+");
|
||||
end
|
||||
|
||||
|
||||
|
||||
@ -7,12 +7,20 @@ global Section section
|
||||
|
||||
// --------------------------------------- CBI rules -------------------------------------------------------------------
|
||||
|
||||
//rule "0: Expand CBI Authors with firstname initials"
|
||||
// when
|
||||
// Section(matchesType("CBI_author") || matchesType("recommendation_CBI_author"))
|
||||
// then
|
||||
// section.expandByRegEx("CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
|
||||
// section.expandByRegEx("recommendation_CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
|
||||
// end
|
||||
|
||||
rule "0: Expand CBI Authors with firstname initials"
|
||||
when
|
||||
Section(matchesType("CBI_author") || matchesType("recommendation_CBI_author"))
|
||||
then
|
||||
section.expandByRegEx("CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
|
||||
section.expandByRegEx("recommendation_CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
|
||||
section.expandByRegEx("CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1, "[^\\s]+");
|
||||
section.expandByRegEx("recommendation_CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1, "[^\\s]+");
|
||||
end
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user