Pull request #18: RED-149: Added must_redact dictionary and Rule, Adjusted rules for applicant and producer to work on all documents.

Merge in RED/redaction-service from RED-149 to master

* commit 'cce8200d433ec89160af3af32f40be57c0b67678':
  redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java online editiert mit Bitbucket
  RED-149: Added must_redact dictionary and Rule, Adjusted rules for applicant and producer to work on all documents. Fixed endless loop in rules. Detect multiple occurences in rules
This commit is contained in:
Cheng Zhu 2020-08-05 13:21:14 +02:00
commit d465a4ba5b
5 changed files with 82 additions and 29 deletions

View File

@ -34,7 +34,8 @@ public class Section {
}
public boolean headlineContainsWord(String word){
public boolean headlineContainsWord(String word) {
return StringUtils.containsIgnoreCase(headline, word);
}
@ -65,11 +66,15 @@ public class Section {
public void redactLineAfter(String start, String asType, int ruleNumber, String reason) {
String value = StringUtils.substringBetween(text, start, "\n");
String[] values = StringUtils.substringsBetween(text, start, "\n");
if (value != null) {
Set<Entity> found = findEntity(value.trim(), asType);
entities.addAll(found);
if (values != null) {
for (String value : values) {
if (StringUtils.isNotBlank(value)) {
Set<Entity> found = findEntity(value.trim(), asType);
entities.addAll(found);
}
}
}
// TODO No need to iterate
@ -86,11 +91,15 @@ public class Section {
public void redactBetween(String start, String stop, String asType, int ruleNumber, String reason) {
String value = StringUtils.substringBetween(searchText, start, stop);
String[] values = StringUtils.substringsBetween(searchText, start, stop);
if (value != null) {
Set<Entity> found = findEntity(value.trim(), asType);
entities.addAll(found);
if (values != null) {
for (String value : values) {
if (value != null && StringUtils.isNotBlank(value)) {
Set<Entity> found = findEntity(value.trim(), asType);
entities.addAll(found);
}
}
}
// TODO No need to iterate

View File

@ -60,6 +60,7 @@ public class RedactionIntegrationTest {
private static final String NO_REDACTION_INDICATOR = "no_redaction_indicator";
private static final String REDACTION_INDICATOR = "redaction_indicator";
private static final String HINT_ONLY = "hint_only";
private static final String MUST_REDACT = "must_redact";
@Autowired
private RedactionController redactionController;
@ -113,6 +114,7 @@ public class RedactionIntegrationTest {
when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR)).thenReturn(getDictionaryResponse(NO_REDACTION_INDICATOR));
when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR)).thenReturn(getDictionaryResponse(REDACTION_INDICATOR));
when(dictionaryClient.getDictionaryForType(HINT_ONLY)).thenReturn(getDictionaryResponse(HINT_ONLY));
when(dictionaryClient.getDictionaryForType(MUST_REDACT)).thenReturn(getDictionaryResponse(MUST_REDACT));
when(dictionaryClient.getDefaultColor()).thenReturn(new DefaultColor(new float[]{1f, 0.502f, 0f}));
}
@ -149,6 +151,11 @@ public class RedactionIntegrationTest {
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(MUST_REDACT, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/must_redact.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
}
@ -166,6 +173,7 @@ public class RedactionIntegrationTest {
typeColorMap.put(NO_REDACTION_INDICATOR, new float[]{0.8f, 0, 0.8f});
typeColorMap.put(REDACTION_INDICATOR, new float[]{1, 0.502f, 0.1f});
typeColorMap.put(HINT_ONLY, new float[]{0.8f, 1, 0.8f});
typeColorMap.put(MUST_REDACT, new float[]{1, 0, 0});
hintTypeMap.put(VERTEBRATES_CODE, true);
hintTypeMap.put(ADDRESS_CODE, false);
@ -173,6 +181,7 @@ public class RedactionIntegrationTest {
hintTypeMap.put(NO_REDACTION_INDICATOR, true);
hintTypeMap.put(REDACTION_INDICATOR, true);
hintTypeMap.put(HINT_ONLY, true);
hintTypeMap.put(MUST_REDACT, true);
caseInSensitiveMap.put(VERTEBRATES_CODE, true);
caseInSensitiveMap.put(ADDRESS_CODE, false);
@ -180,6 +189,7 @@ public class RedactionIntegrationTest {
caseInSensitiveMap.put(NO_REDACTION_INDICATOR, true);
caseInSensitiveMap.put(REDACTION_INDICATOR, true);
caseInSensitiveMap.put(HINT_ONLY, true);
caseInSensitiveMap.put(MUST_REDACT, true);
}

View File

@ -793,4 +793,13 @@ Eurofins Agroscience Services EcoChem GmbH, NOsch., Germany
Tier3 solutions GmbH, Germany
Syngenta Crop Protection AG
Jealotts Hill Research Centre. Syngenta Crop protection AG
RCC Umweltchemie GmbH & Co KG
RCC Umweltchemie GmbH & Co KG
Syngenta Crop Protection AG Schwarzwaldallee 215 P.O. Box CH-4002 Basel Switzerland
Syngenta Crop Protection AG CH 4002 Basel Switzerland
Syngenta Crop Protection AG Schwarzwaldalle 215 P.O. Box CH-4002 Basel Switzerland
Syngenta Crop Protection AG CH 4002 Basel Switzerland
Syngenta Crop Protection AG European Product Registration B8.4.29 Scharzwaldalllee 2015 CH-4058 Basel Switzerland
Adama Agriculture BV Arnhemseweg 87 NL-3832 GK Leusden The Netherlands
Eurofins Regulatory AG Weidenweg 15 CH-4310 Rheinfelden Switzerland
Cheminova A/S Thyborønvej 78, DK-7673 Harboøre, Denmark P.O. Box 9 DK-7620 Lemvig Denmark
Helm AG Nordkanalstrasse 28 20097 Hamburg Germany

View File

@ -0,0 +1,3 @@
Batches Produced at
CTL
for determination of residues

View File

@ -49,30 +49,52 @@ rule "5: Do not redact in guideline sections"
section.redactNot("address", 5, "Section is a guideline section.");
end
rule "6: Redact contact information, if applicant is found"
rule "6: Redact if must redact entry is found"
when
eval(section.getText().toLowerCase().contains("applicant") == true);
eval(section.contains("must_redact")==true);
then
section.redactLineAfter("Name:", "address", 6, "contact information was found");
section.redactBetween("Address:", "Contact", "address", 6, "contact information was found");
section.redactLineAfter("Contact point:", "address", 6, "contact information was found");
section.redactLineAfter("Phone:", "address", 6, "contact information was found");
section.redactLineAfter("Fax:", "address", 6, "contact information was found");
section.redactLineAfter("E-mail:", "address", 6, "contact information was found");
section.redactLineAfter("Contact:", "address", 6, "contact information was found");
section.redactLineAfter("Telephone number:", "address", 6, "contact information was found");
section.redact("name", 6, "must_redact entry was found.");
section.redact("address", 6, "must_redact entry was found.");
end
rule "7: Redact contact information, if 'Producer of the plant protection product' is found"
rule "7: Redact contact information, if applicant is found"
when
eval(section.getText().contains("Producer of the plant protection product"));
eval(section.getText().toLowerCase().contains("applicant") == true);
then
section.redactLineAfter("Name:", "address", 7, "Producer of the plant protection product was found");
section.redactBetween("Address:", "Contact", "address", 7, "Producer of the plant protection product was found");
section.redactBetween("Contact:", "Phone", "address", 7, "Producer of the plant protection product was found");
section.redactLineAfter("Phone:", "address", 7, "Producer of the plant protection product was found");
section.redactLineAfter("Fax:", "address", 7, "Producer of the plant protection product was found");
section.redactLineAfter("E-mail:", "address", 7, "Producer of the plant protection product was found");
section.redactLineAfter("Name:", "address", 7, "Applicant information was found");
section.redactBetween("Address:", "Contact", "address", 7, "Applicant information was found");
section.redactLineAfter("Contact point:", "address", 7, "Applicant information was found");
section.redactLineAfter("Phone:", "address", 7, "Applicant information was found");
section.redactLineAfter("Fax:", "address", 7, "Applicant information was found");
section.redactLineAfter("Tel.:", "address", 7, "Applicant information was found");
section.redactLineAfter("Tel:", "address", 7, "Applicant information was found");
section.redactLineAfter("E-mail:", "address", 7, "Applicant information was found");
section.redactLineAfter("Email:", "address", 7, "Applicant information was found");
section.redactLineAfter("Contact:", "address", 7, "Applicant information was found");
section.redactLineAfter("Telephone number:", "address", 7, "Applicant information was found");
section.redactLineAfter("Fax number:", "address", 7, "Applicant information was found");
section.redactLineAfter("Telephone:", "address", 7, "Applicant information was found");
section.redactBetween("No:", "Fax", "address", 7, "Applicant information was found");
section.redactBetween("Contact:", "Tel.:", "address", 7, "Applicant information was found");
end
rule "8: Redact contact information, if Producer is found"
when
eval(section.getText().contains("Producer") || section.getText().contains("Manufacturer of the active substance") || section.getText().contains("Manufacturer:"));
then
section.redactLineAfter("Name:", "address", 8, "Producer was found");
section.redactBetween("Address:", "Contact", "address", 8, "Producer was found");
section.redactBetween("Contact:", "Phone", "address", 8, "Producer was found");
section.redactBetween("Contact:", "Telephone number:", "address", 8, "Producer was found");
section.redactBetween("Address:", "Manufacturing", "address", 8, "Producer was found");
section.redactLineAfter("Telephone:", "address", 8, "Producer was found");
section.redactLineAfter("Phone:", "address", 8, "Producer was found");
section.redactLineAfter("Fax:", "address", 8, "Producer was found");
section.redactLineAfter("E-mail:", "address", 8, "Producer was found");
section.redactLineAfter("Contact:", "address", 8, "Producer was found");
section.redactLineAfter("Fax number:", "address", 8, "Producer was found");
section.redactLineAfter("Telephone number:", "address", 8, "Producer was found");
section.redactLineAfter("Tel:", "address", 8, "Producer was found");
section.redactBetween("No:", "Fax", "address", 8, "Producer was found");
end