Add rule redacting sponsor companies if preceded by prefix
This commit is contained in:
parent
99bde2956f
commit
cec5fd3d5e
@ -80,6 +80,20 @@ public class Section {
|
||||
}
|
||||
|
||||
|
||||
public void redactIfPrecededBy(String prefix, String type, int ruleNumber, String reason) {
|
||||
|
||||
entities.forEach(entity -> {
|
||||
if (entity.getType().equals(type)) {
|
||||
if (searchText.indexOf(prefix + entity.getWord()) != 1) {
|
||||
entity.setRedaction(true);
|
||||
entity.setMatchedRule(ruleNumber);
|
||||
entity.setRedactionReason(reason);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
public void redactLineAfter(String start, String asType, int ruleNumber, String reason) {
|
||||
|
||||
String[] values = StringUtils.substringsBetween(text, start, "\n");
|
||||
@ -141,7 +155,8 @@ public class Section {
|
||||
|
||||
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(searchText.charAt(startIndex - 1)) || isSeparator(searchText
|
||||
.charAt(startIndex - 1))) && (stopIndex == searchText.length() || isSeparator(searchText.charAt(stopIndex)))) {
|
||||
found.add(new Entity(searchText.substring(startIndex, stopIndex), asType, startIndex, stopIndex, headline, sectionNumber));
|
||||
found.add(new Entity(searchText.substring(startIndex, stopIndex), asType, startIndex, stopIndex,
|
||||
headline, sectionNumber));
|
||||
}
|
||||
} while (startIndex > -1);
|
||||
|
||||
@ -197,7 +212,8 @@ public class Section {
|
||||
if (value == null) {
|
||||
log.warn("Could not find any data for {}.", cellHeader);
|
||||
} else {
|
||||
Entity entity = new Entity(value.toString(), type, value.getRowSpanStart(), value.getRowSpanStart() + value.toString()
|
||||
Entity entity = new Entity(value.toString(), type, value.getRowSpanStart(),
|
||||
value.getRowSpanStart() + value.toString()
|
||||
.length(), headline, sectionNumber);
|
||||
entity.setRedaction(redact);
|
||||
entity.setMatchedRule(ruleNumber);
|
||||
@ -206,7 +222,7 @@ public class Section {
|
||||
.getSequences()); // Make sure no other cells with same content are highlighted
|
||||
|
||||
// HashSet keeps the older value, but we want the new only.
|
||||
if(entities.contains(entity)){
|
||||
if (entities.contains(entity)) {
|
||||
entities.remove(entity);
|
||||
}
|
||||
entities.add(entity);
|
||||
|
||||
@ -63,6 +63,7 @@ public class RedactionIntegrationTest {
|
||||
private static final String VERTEBRATES_CODE = "vertebrate";
|
||||
private static final String ADDRESS_CODE = "address";
|
||||
private static final String NAME_CODE = "name";
|
||||
private static final String SPONSOR = "sponsor";
|
||||
private static final String NO_REDACTION_INDICATOR = "no_redaction_indicator";
|
||||
private static final String REDACTION_INDICATOR = "redaction_indicator";
|
||||
private static final String HINT_ONLY = "hint_only";
|
||||
@ -117,6 +118,7 @@ public class RedactionIntegrationTest {
|
||||
when(dictionaryClient.getDictionaryForType(VERTEBRATES_CODE)).thenReturn(getDictionaryResponse(VERTEBRATES_CODE));
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(getDictionaryResponse(ADDRESS_CODE));
|
||||
when(dictionaryClient.getDictionaryForType(NAME_CODE)).thenReturn(getDictionaryResponse(NAME_CODE));
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR)).thenReturn(getDictionaryResponse(SPONSOR));
|
||||
when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR)).thenReturn(getDictionaryResponse(NO_REDACTION_INDICATOR));
|
||||
when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR)).thenReturn(getDictionaryResponse(REDACTION_INDICATOR));
|
||||
when(dictionaryClient.getDictionaryForType(HINT_ONLY)).thenReturn(getDictionaryResponse(HINT_ONLY));
|
||||
@ -132,6 +134,11 @@ public class RedactionIntegrationTest {
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(SPONSOR, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/sponsor_companies.txt")
|
||||
.stream()
|
||||
.map(this::cleanDictionaryEntry)
|
||||
.collect(Collectors.toSet()));
|
||||
dictionary.computeIfAbsent(VERTEBRATES_CODE, v -> new ArrayList<>())
|
||||
.addAll(ResourceLoader.load("dictionaries/vertebrates.txt")
|
||||
.stream()
|
||||
@ -176,6 +183,7 @@ public class RedactionIntegrationTest {
|
||||
typeColorMap.put(VERTEBRATES_CODE, new float[]{0, 1, 0});
|
||||
typeColorMap.put(ADDRESS_CODE, new float[]{0, 1, 1});
|
||||
typeColorMap.put(NAME_CODE, new float[]{1, 1, 0});
|
||||
typeColorMap.put(SPONSOR, new float[]{.5f, .5f, .5f});
|
||||
typeColorMap.put(NO_REDACTION_INDICATOR, new float[]{0.8f, 0, 0.8f});
|
||||
typeColorMap.put(REDACTION_INDICATOR, new float[]{1, 0.502f, 0.1f});
|
||||
typeColorMap.put(HINT_ONLY, new float[]{0.8f, 1, 0.8f});
|
||||
@ -184,6 +192,7 @@ public class RedactionIntegrationTest {
|
||||
hintTypeMap.put(VERTEBRATES_CODE, true);
|
||||
hintTypeMap.put(ADDRESS_CODE, false);
|
||||
hintTypeMap.put(NAME_CODE, false);
|
||||
hintTypeMap.put(SPONSOR, false);
|
||||
hintTypeMap.put(NO_REDACTION_INDICATOR, true);
|
||||
hintTypeMap.put(REDACTION_INDICATOR, true);
|
||||
hintTypeMap.put(HINT_ONLY, true);
|
||||
@ -192,6 +201,7 @@ public class RedactionIntegrationTest {
|
||||
caseInSensitiveMap.put(VERTEBRATES_CODE, true);
|
||||
caseInSensitiveMap.put(ADDRESS_CODE, false);
|
||||
caseInSensitiveMap.put(NAME_CODE, false);
|
||||
caseInSensitiveMap.put(SPONSOR, false);
|
||||
caseInSensitiveMap.put(NO_REDACTION_INDICATOR, true);
|
||||
caseInSensitiveMap.put(REDACTION_INDICATOR, true);
|
||||
caseInSensitiveMap.put(HINT_ONLY, true);
|
||||
|
||||
@ -0,0 +1,7 @@
|
||||
Monthey Syngenta Crop Protection AG, Basel, Switzerland
|
||||
Syngenta Crop Protection, Monthey, Switzerland
|
||||
Fine Organics Limited, Middlesbrough, United Kingdom
|
||||
Syngenta Monthey Switzerland
|
||||
Hunan Haili Chemical Industry Co., Ltd., Hunan, China
|
||||
Syngenta, Switzerland
|
||||
Syngenta Nantong, China
|
||||
@ -49,6 +49,7 @@ rule "5: Do not redact in guideline sections"
|
||||
section.redactNot("address", 5, "Section is a guideline section.");
|
||||
end
|
||||
|
||||
|
||||
rule "6: Redact contact information if applicant is found"
|
||||
when
|
||||
eval(section.headlineContainsWord("applicant") || section.getText().contains("Applicant") || section.headlineContainsWord("Primary contact") || section.headlineContainsWord("Alternative contact"));
|
||||
@ -124,3 +125,10 @@ rule "10: Redact Authors and Addresses in Reference Table if it is a Vertebrate
|
||||
section.redact("address", 10, "Redacted because row is a vertebrate study");
|
||||
section.highlightCell("Vertebrate study Y/N", 10, "must_redact");
|
||||
end
|
||||
|
||||
rule "11: Redact sponsor company"
|
||||
when
|
||||
Section(section.getText().toLowerCase().contains("batches produced at"))
|
||||
then
|
||||
section.redactIfPrecededBy("batches produced at", "sponsor", 11, "Redacted because it represents a sponsor company");
|
||||
end
|
||||
Loading…
x
Reference in New Issue
Block a user