Pull request #71: RED-473

Merge in RED/redaction-service from RED-473 to master

* commit 'bdc231f3c25aa9e3c1d4e77e62293bca90824b30':
  RED-473: Fixed missing batched produced at annotation
  Adjusted rules
This commit is contained in:
Dominique Eiflaender 2020-11-18 13:37:37 +01:00
commit 42fd7b1ee9
2 changed files with 20 additions and 9 deletions

View File

@ -94,6 +94,12 @@ public class Section {
}
public void addHintAnnotation(String value, String asType){
Set<Entity> found = findEntities(value.trim(), asType, true);
entities.addAll(found);
}
public void redactLineAfter(String start, String asType, int ruleNumber, String reason, String legalBasis) {
String[] values = StringUtils.substringsBetween(text, start, "\n");
@ -101,7 +107,7 @@ public class Section {
if (values != null) {
for (String value : values) {
if (StringUtils.isNotBlank(value)) {
Set<Entity> found = findEntities(value.trim(), asType);
Set<Entity> found = findEntities(value.trim(), asType, false);
entities.addAll(found);
}
}
@ -127,7 +133,7 @@ public class Section {
if (values != null) {
for (String value : values) {
if (StringUtils.isNotBlank(value)) {
Set<Entity> found = findEntities(value.trim(), asType);
Set<Entity> found = findEntities(value.trim(), asType, false);
entities.addAll(found);
}
}
@ -145,18 +151,22 @@ public class Section {
}
private Set<Entity> findEntities(String value, String asType) {
private Set<Entity> findEntities(String value, String asType, boolean caseinsensitive) {
Set<Entity> found = new HashSet<>();
String text = caseinsensitive ? searchText.toLowerCase() : searchText;
String searchValue = caseinsensitive ? value.toLowerCase() : value;
int startIndex;
int stopIndex = 0;
do {
startIndex = searchText.indexOf(value, stopIndex);
stopIndex = startIndex + value.length();
startIndex = text.indexOf(searchValue, stopIndex);
stopIndex = startIndex + searchValue.length();
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(searchText.charAt(startIndex - 1)) || isSeparator(
searchText.charAt(startIndex - 1))) && (stopIndex == searchText.length() || isSeparator(searchText.charAt(
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(text.charAt(startIndex - 1)) || isSeparator(
text.charAt(startIndex - 1))) && (stopIndex == text.length() || isSeparator(text.charAt(
stopIndex)))) {
found.add(new Entity(searchText.substring(startIndex, stopIndex),
asType,

View File

@ -52,7 +52,7 @@ rule "5: Redact Names and Addresses if no_redaction_indicator and redaction_indi
rule "6: Redact contact information if applicant is found"
when
Section(headlineContainsWord("applicant") || text.contains("Applicant") || headlineContainsWord("Primary contact") || headlineContainsWord("Alternative contact"))
Section(headlineContainsWord("applicant") || text.contains("Applicant") || headlineContainsWord("Primary contact") || headlineContainsWord("Alternative contact") || text.contains("Contact:") || text.contains("Telephone number:"))
then
section.redactLineAfter("Contact point:", "address", 6, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactLineAfter("Phone:", "address", 6, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
@ -72,6 +72,7 @@ rule "6: Redact contact information if applicant is found"
section.redactLineAfter("Company:", "address", 6, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactBetween("No:", "Fax", "address", 6, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactBetween("Contact:", "Tel.:", "address", 6, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactLineAfter("European contact:", "address", 6, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
end
@ -79,7 +80,6 @@ rule "7: Redact contact information if Producer is found"
when
Section(text.toLowerCase().contains("producer of the plant protection") || text.toLowerCase().contains("producer of the active substance") || text.contains("Manufacturer of the active substance") || text.contains("Manufacturer:") || text.contains("Producer or producers of the active substance"))
then
section.redactLineAfter("Contact:", "address", 7, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactLineAfter("Contact:", "address", 7, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactLineAfter("Telephone:", "address", 7, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactLineAfter("Phone:", "address", 7, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
@ -127,4 +127,5 @@ rule "11: Redact sponsor company"
Section(searchText.toLowerCase().contains("batches produced at"))
then
section.redactIfPrecededBy("batches produced at", "sponsor", 11, "Redacted because it represents a sponsor company", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.addHintAnnotation("batches produced at", "must_redact");
end