RED-293: Add unkown Textblocks to paragraphs

This commit is contained in:
deiflaender 2020-09-14 13:51:56 +02:00
parent a0a78440d8
commit de310e8a65
2 changed files with 8 additions and 2 deletions

View File

@ -84,7 +84,7 @@ public class ClassificationService {
}
else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter().getMostPopular() && textBlock.getMostPopularWordStyle().equals("italic") && !document.getFontStyleCounter().getMostPopular().equals("italic") && PositionUtils.getApproxLineCount(textBlock) < 2.9) {
textBlock.setClassification("TextBlock Italic");
} else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getSequences().size() > 3){
} else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock)){
textBlock.setClassification("TextBlock Unknown");
}
}

View File

@ -51,7 +51,7 @@ rule "5: Do not redact in guideline sections"
rule "6: Redact contact information if applicant is found"
when
eval(section.headlineContainsWord("applicant") || section.getText().contains("Applicant"));
eval(section.headlineContainsWord("applicant") || section.getText().contains("Applicant") || section.headlineContainsWord("Primary contact") || section.headlineContainsWord("Alternative contact"));
then
section.redactLineAfter("Name:", "address", 6, "Applicant information was found");
section.redactBetween("Address:", "Contact", "address", 6, "Applicant information was found");
@ -62,14 +62,20 @@ rule "6: Redact contact information if applicant is found"
section.redactLineAfter("Tel:", "address", 6, "Applicant information was found");
section.redactLineAfter("E-mail:", "address", 6, "Applicant information was found");
section.redactLineAfter("Email:", "address", 6, "Applicant information was found");
section.redactLineAfter("e-mail:", "address", 6, "Applicant information was found");
section.redactLineAfter("E-mail address:", "address", 6, "Applicant information was found");
section.redactLineAfter("Contact:", "address", 6, "Applicant information was found");
section.redactLineAfter("Alternative contact:", "address", 6, "Applicant information was found");
section.redactLineAfter("Telephone number:", "address", 6, "Applicant information was found");
section.redactLineAfter("Telephone No:", "address", 6, "Applicant information was found");
section.redactLineAfter("Fax number:", "address", 6, "Applicant information was found");
section.redactLineAfter("Telephone:", "address", 6, "Applicant information was found");
section.redactLineAfter("Company:", "address", 6, "Applicant information was found");
section.redactBetween("No:", "Fax", "address", 6, "Applicant information was found");
section.redactBetween("Contact:", "Tel.:", "address", 6, "Applicant information was found");
end
rule "7: Redact contact information if Producer is found"
when
eval(section.getText().toLowerCase().contains("producer of the plant protection") || section.getText().toLowerCase().contains("producer of the active substance") || section.getText().contains("Manufacturer of the active substance") || section.getText().contains("Manufacturer:") || section.getText().contains("Producer or producers of the active substance"));