From de310e8a654b601cb4bf258675904a25d4233783 Mon Sep 17 00:00:00 2001 From: deiflaender Date: Mon, 14 Sep 2020 13:51:56 +0200 Subject: [PATCH] RED-293: Add unkown Textblocks to paragraphs --- .../classification/service/ClassificationService.java | 2 +- .../src/test/resources/drools/rules.drl | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/ClassificationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/ClassificationService.java index a17cc508..eb2fa386 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/ClassificationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/ClassificationService.java @@ -84,7 +84,7 @@ public class ClassificationService { } else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter().getMostPopular() && textBlock.getMostPopularWordStyle().equals("italic") && !document.getFontStyleCounter().getMostPopular().equals("italic") && PositionUtils.getApproxLineCount(textBlock) < 2.9) { textBlock.setClassification("TextBlock Italic"); - } else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getSequences().size() > 3){ + } else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock)){ textBlock.setClassification("TextBlock Unknown"); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index c991b0b5..7fd18d3d 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -51,7 +51,7 @@ rule "5: Do not redact in guideline sections" rule "6: Redact contact information if applicant is found" when - eval(section.headlineContainsWord("applicant") || section.getText().contains("Applicant")); + eval(section.headlineContainsWord("applicant") || section.getText().contains("Applicant") || section.headlineContainsWord("Primary contact") || section.headlineContainsWord("Alternative contact")); then section.redactLineAfter("Name:", "address", 6, "Applicant information was found"); section.redactBetween("Address:", "Contact", "address", 6, "Applicant information was found"); @@ -62,14 +62,20 @@ rule "6: Redact contact information if applicant is found" section.redactLineAfter("Tel:", "address", 6, "Applicant information was found"); section.redactLineAfter("E-mail:", "address", 6, "Applicant information was found"); section.redactLineAfter("Email:", "address", 6, "Applicant information was found"); + section.redactLineAfter("e-mail:", "address", 6, "Applicant information was found"); + section.redactLineAfter("E-mail address:", "address", 6, "Applicant information was found"); section.redactLineAfter("Contact:", "address", 6, "Applicant information was found"); + section.redactLineAfter("Alternative contact:", "address", 6, "Applicant information was found"); section.redactLineAfter("Telephone number:", "address", 6, "Applicant information was found"); + section.redactLineAfter("Telephone No:", "address", 6, "Applicant information was found"); section.redactLineAfter("Fax number:", "address", 6, "Applicant information was found"); section.redactLineAfter("Telephone:", "address", 6, "Applicant information was found"); + section.redactLineAfter("Company:", "address", 6, "Applicant information was found"); section.redactBetween("No:", "Fax", "address", 6, "Applicant information was found"); section.redactBetween("Contact:", "Tel.:", "address", 6, "Applicant information was found"); end + rule "7: Redact contact information if Producer is found" when eval(section.getText().toLowerCase().contains("producer of the plant protection") || section.getText().toLowerCase().contains("producer of the active substance") || section.getText().contains("Manufacturer of the active substance") || section.getText().contains("Manufacturer:") || section.getText().contains("Producer or producers of the active substance"));