Add rule redacting sponsor companies if preceded by prefix

This commit is contained in:
Thierry Göckel 2020-09-30 14:13:17 +02:00
parent 99bde2956f
commit cec5fd3d5e
4 changed files with 44 additions and 3 deletions

View File

@ -80,6 +80,20 @@ public class Section {
}
public void redactIfPrecededBy(String prefix, String type, int ruleNumber, String reason) {
entities.forEach(entity -> {
if (entity.getType().equals(type)) {
if (searchText.indexOf(prefix + entity.getWord()) != 1) {
entity.setRedaction(true);
entity.setMatchedRule(ruleNumber);
entity.setRedactionReason(reason);
}
}
});
}
public void redactLineAfter(String start, String asType, int ruleNumber, String reason) {
String[] values = StringUtils.substringsBetween(text, start, "\n");
@ -141,7 +155,8 @@ public class Section {
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(searchText.charAt(startIndex - 1)) || isSeparator(searchText
.charAt(startIndex - 1))) && (stopIndex == searchText.length() || isSeparator(searchText.charAt(stopIndex)))) {
found.add(new Entity(searchText.substring(startIndex, stopIndex), asType, startIndex, stopIndex, headline, sectionNumber));
found.add(new Entity(searchText.substring(startIndex, stopIndex), asType, startIndex, stopIndex,
headline, sectionNumber));
}
} while (startIndex > -1);
@ -197,7 +212,8 @@ public class Section {
if (value == null) {
log.warn("Could not find any data for {}.", cellHeader);
} else {
Entity entity = new Entity(value.toString(), type, value.getRowSpanStart(), value.getRowSpanStart() + value.toString()
Entity entity = new Entity(value.toString(), type, value.getRowSpanStart(),
value.getRowSpanStart() + value.toString()
.length(), headline, sectionNumber);
entity.setRedaction(redact);
entity.setMatchedRule(ruleNumber);
@ -206,7 +222,7 @@ public class Section {
.getSequences()); // Make sure no other cells with same content are highlighted
// HashSet keeps the older value, but we want the new only.
if(entities.contains(entity)){
if (entities.contains(entity)) {
entities.remove(entity);
}
entities.add(entity);

View File

@ -63,6 +63,7 @@ public class RedactionIntegrationTest {
private static final String VERTEBRATES_CODE = "vertebrate";
private static final String ADDRESS_CODE = "address";
private static final String NAME_CODE = "name";
private static final String SPONSOR = "sponsor";
private static final String NO_REDACTION_INDICATOR = "no_redaction_indicator";
private static final String REDACTION_INDICATOR = "redaction_indicator";
private static final String HINT_ONLY = "hint_only";
@ -117,6 +118,7 @@ public class RedactionIntegrationTest {
when(dictionaryClient.getDictionaryForType(VERTEBRATES_CODE)).thenReturn(getDictionaryResponse(VERTEBRATES_CODE));
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(getDictionaryResponse(ADDRESS_CODE));
when(dictionaryClient.getDictionaryForType(NAME_CODE)).thenReturn(getDictionaryResponse(NAME_CODE));
when(dictionaryClient.getDictionaryForType(SPONSOR)).thenReturn(getDictionaryResponse(SPONSOR));
when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR)).thenReturn(getDictionaryResponse(NO_REDACTION_INDICATOR));
when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR)).thenReturn(getDictionaryResponse(REDACTION_INDICATOR));
when(dictionaryClient.getDictionaryForType(HINT_ONLY)).thenReturn(getDictionaryResponse(HINT_ONLY));
@ -132,6 +134,11 @@ public class RedactionIntegrationTest {
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(SPONSOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/sponsor_companies.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(VERTEBRATES_CODE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/vertebrates.txt")
.stream()
@ -176,6 +183,7 @@ public class RedactionIntegrationTest {
typeColorMap.put(VERTEBRATES_CODE, new float[]{0, 1, 0});
typeColorMap.put(ADDRESS_CODE, new float[]{0, 1, 1});
typeColorMap.put(NAME_CODE, new float[]{1, 1, 0});
typeColorMap.put(SPONSOR, new float[]{.5f, .5f, .5f});
typeColorMap.put(NO_REDACTION_INDICATOR, new float[]{0.8f, 0, 0.8f});
typeColorMap.put(REDACTION_INDICATOR, new float[]{1, 0.502f, 0.1f});
typeColorMap.put(HINT_ONLY, new float[]{0.8f, 1, 0.8f});
@ -184,6 +192,7 @@ public class RedactionIntegrationTest {
hintTypeMap.put(VERTEBRATES_CODE, true);
hintTypeMap.put(ADDRESS_CODE, false);
hintTypeMap.put(NAME_CODE, false);
hintTypeMap.put(SPONSOR, false);
hintTypeMap.put(NO_REDACTION_INDICATOR, true);
hintTypeMap.put(REDACTION_INDICATOR, true);
hintTypeMap.put(HINT_ONLY, true);
@ -192,6 +201,7 @@ public class RedactionIntegrationTest {
caseInSensitiveMap.put(VERTEBRATES_CODE, true);
caseInSensitiveMap.put(ADDRESS_CODE, false);
caseInSensitiveMap.put(NAME_CODE, false);
caseInSensitiveMap.put(SPONSOR, false);
caseInSensitiveMap.put(NO_REDACTION_INDICATOR, true);
caseInSensitiveMap.put(REDACTION_INDICATOR, true);
caseInSensitiveMap.put(HINT_ONLY, true);

View File

@ -0,0 +1,7 @@
Monthey Syngenta Crop Protection AG, Basel, Switzerland
Syngenta Crop Protection, Monthey, Switzerland
Fine Organics Limited, Middlesbrough, United Kingdom
Syngenta Monthey Switzerland
Hunan Haili Chemical Industry Co., Ltd., Hunan, China
Syngenta, Switzerland
Syngenta Nantong, China

View File

@ -49,6 +49,7 @@ rule "5: Do not redact in guideline sections"
section.redactNot("address", 5, "Section is a guideline section.");
end
rule "6: Redact contact information if applicant is found"
when
eval(section.headlineContainsWord("applicant") || section.getText().contains("Applicant") || section.headlineContainsWord("Primary contact") || section.headlineContainsWord("Alternative contact"));
@ -124,3 +125,10 @@ rule "10: Redact Authors and Addresses in Reference Table if it is a Vertebrate
section.redact("address", 10, "Redacted because row is a vertebrate study");
section.highlightCell("Vertebrate study Y/N", 10, "must_redact");
end
rule "11: Redact sponsor company"
when
Section(section.getText().toLowerCase().contains("batches produced at"))
then
section.redactIfPrecededBy("batches produced at", "sponsor", 11, "Redacted because it represents a sponsor company");
end