RED-783: Seperate rules for PII and CBI, RED-780: Added PII rules for author(s) and performing laboratory

This commit is contained in:
deiflaender 2020-11-25 16:56:19 +01:00
parent 412e13854f
commit 746a25c00d
10 changed files with 290 additions and 139 deletions

View File

@ -9,6 +9,8 @@ import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService;
import lombok.Builder;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
@ -18,6 +20,8 @@ import lombok.extern.slf4j.Slf4j;
@Builder
public class Section {
private DictionaryService dictionaryService;
private Set<Entity> entities;
// This still contains linebreaks etc.
@ -94,13 +98,15 @@ public class Section {
}
public void addHintAnnotation(String value, String asType){
public void addHintAnnotation(String value, String asType) {
Set<Entity> found = findEntities(value.trim(), asType, true);
entities.addAll(found);
}
public void redactLineAfter(String start, String asType, int ruleNumber, String reason, String legalBasis) {
public void redactLineAfter(String start, String asType, int ruleNumber, boolean redactEverywhere, String reason,
String legalBasis) {
String[] values = StringUtils.substringsBetween(text, start, "\n");
@ -108,6 +114,8 @@ public class Section {
for (String value : values) {
if (StringUtils.isNotBlank(value)) {
Set<Entity> found = findEntities(value.trim(), asType, false);
// HashSet keeps the older value, but we want the new only.
entities.removeAll(found);
entities.addAll(found);
}
}
@ -126,7 +134,8 @@ public class Section {
}
public void redactBetween(String start, String stop, String asType, int ruleNumber, String reason, String legalBasis) {
public void redactBetween(String start, String stop, String asType, int ruleNumber, boolean redactEverywhere,
String reason, String legalBasis) {
String[] values = StringUtils.substringsBetween(searchText, start, stop);
@ -134,7 +143,47 @@ public class Section {
for (String value : values) {
if (StringUtils.isNotBlank(value)) {
Set<Entity> found = findEntities(value.trim(), asType, false);
// HashSet keeps the older value, but we want the new only.
entities.removeAll(found);
entities.addAll(found);
if (redactEverywhere) {
dictionaryService.addToLocalDictionary(asType, value.trim());
}
}
}
}
// TODO No need to iterate
entities.forEach(entity -> {
if (entity.getType().equals(asType)) {
entity.setRedaction(true);
entity.setMatchedRule(ruleNumber);
entity.setRedactionReason(reason);
entity.setLegalBasis(legalBasis);
}
});
}
public void redactLinesBetween(String start, String stop, String asType, int ruleNumber, boolean redactEverywhere,
String reason, String legalBasis) {
String[] values = StringUtils.substringsBetween(text, start, stop);
if (values != null) {
for (String value : values) {
if (StringUtils.isNotBlank(value)) {
String[] lines = value.split("\n");
for (String line : lines) {
Set<Entity> found = findEntities(line.trim(), asType, false);
// HashSet keeps the older value, but we want the new only.
entities.removeAll(found);
entities.addAll(found);
if (redactEverywhere) {
dictionaryService.addToLocalDictionary(asType, line.trim());
}
}
}
}
}
@ -158,22 +207,15 @@ public class Section {
String text = caseinsensitive ? searchText.toLowerCase() : searchText;
String searchValue = caseinsensitive ? value.toLowerCase() : value;
int startIndex;
int stopIndex = 0;
do {
startIndex = text.indexOf(searchValue, stopIndex);
stopIndex = startIndex + searchValue.length();
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(text.charAt(startIndex - 1)) || isSeparator(
text.charAt(startIndex - 1))) && (stopIndex == text.length() || isSeparator(text.charAt(
stopIndex)))) {
found.add(new Entity(searchText.substring(startIndex, stopIndex),
asType,
startIndex,
stopIndex,
headline,
sectionNumber));
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(text.charAt(startIndex - 1)) || isSeparator(text
.charAt(startIndex - 1))) && (stopIndex == text.length() || isSeparator(text.charAt(stopIndex)))) {
found.add(new Entity(searchText.substring(startIndex, stopIndex), asType, startIndex, stopIndex, headline, sectionNumber));
}
} while (startIndex > -1);
@ -183,8 +225,7 @@ public class Section {
private boolean isSeparator(char c) {
return Character.isWhitespace(c) || Pattern.matches("\\p{Punct}",
String.valueOf(c)) || c == '\"' || c == '' || c == '';
return Character.isWhitespace(c) || Pattern.matches("\\p{Punct}", String.valueOf(c)) || c == '\"' || c == '' || c == '';
}
@ -222,7 +263,8 @@ public class Section {
}
private void annotateCell(String cellHeader, int ruleNumber, String type, boolean redact, String reason, String legalBasis) {
private void annotateCell(String cellHeader, int ruleNumber, String type, boolean redact, String reason,
String legalBasis) {
String cleanHeaderName = cellHeader.replaceAll("\n", "").replaceAll(" ", "").replaceAll("-", "");
@ -231,17 +273,12 @@ public class Section {
log.warn("Could not find any data for {}.", cellHeader);
} else {
String word = value.toString();
Entity entity = new Entity(word,
type,
value.getRowSpanStart(),
value.getRowSpanStart() + word.length(),
headline,
sectionNumber);
Entity entity = new Entity(word, type, value.getRowSpanStart(), value.getRowSpanStart() + word.length(), headline, sectionNumber);
entity.setRedaction(redact);
entity.setMatchedRule(ruleNumber);
entity.setRedactionReason(reason);
entity.setTargetSequences(value.getTextBlock()
.getSequences()); // Make sure no other cells with same content are highlighted
.getSequences()); // Make sure no other cells with same content are highlighted
entity.setLegalBasis(legalBasis);
// HashSet keeps the older value, but we want the new only.

View File

@ -2,11 +2,13 @@ package com.iqser.red.service.redaction.v1.server.redaction.service;
import java.awt.Color;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.stream.Collectors;
import org.apache.commons.collections4.CollectionUtils;
@ -33,7 +35,10 @@ public class DictionaryService {
private long dictionaryVersion = -1;
@Getter
private Map<String, Set<String>> dictionary = new HashMap<>();
private Map<String, Set<String>> dictionary = new TreeMap<>(Comparator.reverseOrder()); // Using TreeMap, because order of keys is important.
@Getter
private Map<String, Set<String>> localDictionary = new TreeMap<>(Comparator.reverseOrder()); // Using TreeMap, because order of keys is important.
@Getter
private Map<String, float[]> entryColors = new HashMap<>();
@ -57,6 +62,18 @@ public class DictionaryService {
private float[] notRedactedColor;
public void addToLocalDictionary(String type, String value) {
localDictionary.computeIfAbsent(type, (x) -> new HashSet<>()).add(value);
}
public void clearLocalDictionary() {
localDictionary = new TreeMap<>(Comparator.reverseOrder());
}
public void updateDictionary() {
long version = dictionaryClient.getVersion();
@ -85,10 +102,11 @@ public class DictionaryService {
.filter(TypeResult::isCaseInsensitive)
.map(TypeResult::getType)
.collect(Collectors.toList());
dictionary = entryColors.keySet()
.stream()
.collect(Collectors.toMap(type -> type, this::convertEntries));
dictionary = new TreeMap<>(Comparator.reverseOrder());
entryColors.keySet().forEach(type -> {
dictionary.put(type, convertEntries(type));
});
Colors colors = dictionaryClient.getColors();
defaultColor = convertColor(colors.getDefaultColor());

View File

@ -40,7 +40,37 @@ public class EntityRedactionService {
dictionaryService.updateDictionary();
droolsExecutionService.updateRules();
dictionaryService.clearLocalDictionary();
Set<Entity> documentEntities = new HashSet<>();
documentEntities.addAll(findEntities(classifiedDoc, manualRedactions, dictionaryService.getDictionary()));
if(!dictionaryService.getLocalDictionary().isEmpty()){
Set<Entity> foundByLocal = findEntities(classifiedDoc, manualRedactions, dictionaryService.getLocalDictionary());
// HashSet keeps the older value, but we want the new only.
documentEntities.removeAll(foundByLocal);
documentEntities.addAll(foundByLocal);
}
for (Entity entity : documentEntities) {
Map<Integer, List<EntityPositionSequence>> sequenceOnPage = new HashMap<>();
for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) {
sequenceOnPage.computeIfAbsent(entityPositionSequence.getPageNumber(), (x) -> new ArrayList<>())
.add(entityPositionSequence);
}
for (Map.Entry<Integer, List<EntityPositionSequence>> entry : sequenceOnPage.entrySet()) {
classifiedDoc.getEntities()
.computeIfAbsent(entry.getKey(), (x) -> new ArrayList<>())
.add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), entry
.getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber(), entity.getLegalBasis()));
}
}
}
private Set<Entity> findEntities(Document classifiedDoc, ManualRedactions manualRedactions, Map<String, Set<String>> dictionary){
Set<Entity> documentEntities = new HashSet<>();
int sectionNumber = 1;
for (Paragraph paragraph : classifiedDoc.getParagraphs()) {
@ -75,9 +105,10 @@ public class EntityRedactionService {
searchableRow.addAll(textBlock.getSequences());
}
}
Set<Entity> rowEntities = findEntities(searchableRow, table.getHeadline(), sectionNumber);
Set<Entity> rowEntities = findEntities(searchableRow, table.getHeadline(), sectionNumber, dictionary);
Section analysedRowSection = droolsExecutionService.executeRules(Section.builder()
.dictionaryService(dictionaryService)
.entities(rowEntities)
.text(searchableRow.getAsStringWithLinebreaks())
.searchText(searchableRow.toString())
@ -93,8 +124,9 @@ public class EntityRedactionService {
}
addSectionToManualRedactions(paragraph.getTextBlocks(), manualRedactions, paragraph.getHeadline(), sectionNumber);
Set<Entity> entities = findEntities(searchableText, paragraph.getHeadline(), sectionNumber);
Set<Entity> entities = findEntities(searchableText, paragraph.getHeadline(), sectionNumber, dictionary);
Section analysedSection = droolsExecutionService.executeRules(Section.builder()
.dictionaryService(dictionaryService)
.entities(entities)
.text(searchableText.getAsStringWithLinebreaks())
.searchText(searchableText.toString())
@ -105,22 +137,7 @@ public class EntityRedactionService {
documentEntities.addAll(clearAndFindPositions(analysedSection.getEntities(), searchableText));
sectionNumber++;
}
for (Entity entity : documentEntities) {
Map<Integer, List<EntityPositionSequence>> sequenceOnPage = new HashMap<>();
for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) {
sequenceOnPage.computeIfAbsent(entityPositionSequence.getPageNumber(), (x) -> new ArrayList<>())
.add(entityPositionSequence);
}
for (Map.Entry<Integer, List<EntityPositionSequence>> entry : sequenceOnPage.entrySet()) {
classifiedDoc.getEntities()
.computeIfAbsent(entry.getKey(), (x) -> new ArrayList<>())
.add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), entry
.getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber(), entity.getLegalBasis()));
}
}
return documentEntities;
}
@ -140,7 +157,7 @@ public class EntityRedactionService {
}
private Set<Entity> findEntities(SearchableText searchableText, String headline, int sectionNumber) {
private Set<Entity> findEntities(SearchableText searchableText, String headline, int sectionNumber, Map<String, Set<String>> dictionary) {
Set<Entity> found = new HashSet<>();
String searchableString = searchableText.toString();
@ -149,7 +166,7 @@ public class EntityRedactionService {
}
String lowercaseInputString = searchableString.toLowerCase();
for (Map.Entry<String, Set<String>> entry : dictionaryService.getDictionary().entrySet()) {
for (Map.Entry<String, Set<String>> entry : dictionary.entrySet()) {
if (dictionaryService.getCaseInsensitiveTypes().contains(entry.getKey())) {
found.addAll(find(lowercaseInputString, entry.getValue(), entry.getKey(), headline, sectionNumber));
} else {

View File

@ -67,8 +67,8 @@ public class RedactionIntegrationTest {
private static final String RULES = loadFromClassPath("drools/rules.drl");
private static final String VERTEBRATE = "vertebrate";
private static final String ADDRESS = "address";
private static final String AUTHOR = "author";
private static final String ADDRESS = "CBI_address";
private static final String AUTHOR = "CBI_author";
private static final String SPONSOR = "sponsor";
private static final String NO_REDACTION_INDICATOR = "no_redaction_indicator";
private static final String REDACTION_INDICATOR = "redaction_indicator";
@ -77,6 +77,8 @@ public class RedactionIntegrationTest {
private static final String PUBLISHED_INFORMATION = "published_information";
private static final String TEST_METHOD = "test_method";
private static final String PII = "PII";
@Autowired
private RedactionController redactionController;
@ -134,6 +136,7 @@ public class RedactionIntegrationTest {
when(dictionaryClient.getDictionaryForType(MUST_REDACT)).thenReturn(getDictionaryResponse(MUST_REDACT));
when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION)).thenReturn(getDictionaryResponse(PUBLISHED_INFORMATION));
when(dictionaryClient.getDictionaryForType(TEST_METHOD)).thenReturn(getDictionaryResponse(TEST_METHOD));
when(dictionaryClient.getDictionaryForType(PII)).thenReturn(getDictionaryResponse(PII));
when(dictionaryClient.getColors()).thenReturn(colors);
}
@ -141,7 +144,7 @@ public class RedactionIntegrationTest {
private void loadDictionaryForTest() {
dictionary.computeIfAbsent(AUTHOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/author.txt")
.addAll(ResourceLoader.load("dictionaries/CBI_author.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
@ -156,7 +159,7 @@ public class RedactionIntegrationTest {
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(ADDRESS, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/address.txt")
.addAll(ResourceLoader.load("dictionaries/CBI_address.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
@ -190,6 +193,11 @@ public class RedactionIntegrationTest {
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(PII, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/PII.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
}
@ -211,6 +219,7 @@ public class RedactionIntegrationTest {
typeColorMap.put(MUST_REDACT, "#fab4c0");
typeColorMap.put(PUBLISHED_INFORMATION, "#85ebff");
typeColorMap.put(TEST_METHOD, "#91fae8");
typeColorMap.put(PII, "#66ccff");
hintTypeMap.put(VERTEBRATE, true);
@ -223,6 +232,7 @@ public class RedactionIntegrationTest {
hintTypeMap.put(MUST_REDACT, true);
hintTypeMap.put(PUBLISHED_INFORMATION, true);
hintTypeMap.put(TEST_METHOD, true);
hintTypeMap.put(PII, false);
caseInSensitiveMap.put(VERTEBRATE, true);
caseInSensitiveMap.put(ADDRESS, false);
@ -234,6 +244,7 @@ public class RedactionIntegrationTest {
caseInSensitiveMap.put(MUST_REDACT, true);
caseInSensitiveMap.put(PUBLISHED_INFORMATION, true);
caseInSensitiveMap.put(TEST_METHOD, false);
caseInSensitiveMap.put(PII, false);
colors.setDefaultColor("#acfc00");
colors.setNotRedacted("#cccccc");
@ -322,7 +333,7 @@ public class RedactionIntegrationTest {
System.out.println("redactionTest");
long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
RedactionRequest request = RedactionRequest.builder()
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))

View File

@ -185,11 +185,11 @@ public class EntityRedactionServiceTest {
" Supplement - Identity of the active substance - Reference list.pdf");
when(dictionaryClient.getVersion()).thenReturn(DICTIONARY_VERSION.incrementAndGet());
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/author.txt")))
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt")))
.build();
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/address.txt")))
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt")))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(addressResponse);
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
@ -222,11 +222,11 @@ public class EntityRedactionServiceTest {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Row With Ambiguous Redaction.pdf");
when(dictionaryClient.getVersion()).thenReturn(DICTIONARY_VERSION.incrementAndGet());
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/author.txt")))
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt")))
.build();
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/address.txt")))
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt")))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(addressResponse);
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
@ -257,32 +257,32 @@ public class EntityRedactionServiceTest {
" when\n" +
" eval(section.headlineContainsWord(\"applicant\") || section.getText().contains(\"Applicant\"));\n" +
" then\n" +
" section.redactLineAfter(\"Name:\", \"address\", 6, \"Applicant information was found\", \"Reg" +
" section.redactLineAfter(\"Name:\", \"address\", 6,true, \"Applicant information was found\", \"Reg" +
" (EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactBetween(\"Address:\", \"Contact\", \"address\", 6, \"Applicant information was found\", \"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactLineAfter(\"Contact point:\", \"address\", 6, \"Applicant information was found\", \"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactLineAfter(\"Phone:\", \"address\", 6, \"Applicant information was found\", " +
" section.redactBetween(\"Address:\", \"Contact\", \"address\", 6,true, \"Applicant information was found\", \"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactLineAfter(\"Contact point:\", \"address\", 6,true, \"Applicant information was found\", \"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactLineAfter(\"Phone:\", \"address\", 6,true, \"Applicant information was found\", " +
"\"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactLineAfter(\"Fax:\", \"address\", 6, \"Applicant information was found\", \"Reg " +
" section.redactLineAfter(\"Fax:\", \"address\", 6,true, \"Applicant information was found\", \"Reg " +
"(EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactLineAfter(\"Tel.:\", \"address\", 6, \"Applicant information was found\", \"Reg" +
" section.redactLineAfter(\"Tel.:\", \"address\", 6,true, \"Applicant information was found\", \"Reg" +
" (EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactLineAfter(\"Tel:\", \"address\", 6, \"Applicant information was found\", \"Reg " +
" section.redactLineAfter(\"Tel:\", \"address\", 6,true, \"Applicant information was found\", \"Reg " +
"(EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactLineAfter(\"E-mail:\", \"address\", 6, \"Applicant information was found\", " +
" section.redactLineAfter(\"E-mail:\", \"address\", 6,true, \"Applicant information was found\", " +
"\"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactLineAfter(\"Email:\", \"address\", 6, \"Applicant information was found\", " +
" section.redactLineAfter(\"Email:\", \"address\", 6,true, \"Applicant information was found\", " +
"\"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactLineAfter(\"Contact:\", \"address\", 6, \"Applicant information was found\", " +
" section.redactLineAfter(\"Contact:\", \"address\", 6,true, \"Applicant information was found\", " +
"\"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactLineAfter(\"Telephone number:\", \"address\", 6, \"Applicant information was found\", \"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactLineAfter(\"Fax number:\", \"address\", 6, \"Applicant information was found\"," +
" section.redactLineAfter(\"Telephone number:\", \"address\", 6,true, \"Applicant information was found\", \"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactLineAfter(\"Fax number:\", \"address\", 6,true, \"Applicant information was found\"," +
" \"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactLineAfter(\"Telephone:\", \"address\", 6, \"Applicant information was found\", " +
" section.redactLineAfter(\"Telephone:\", \"address\", 6,true, \"Applicant information was found\", " +
"\"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactBetween(\"No:\", \"Fax\", \"address\", 6, \"Applicant information was found\", " +
" section.redactBetween(\"No:\", \"Fax\", \"address\", 6,true, \"Applicant information was found\", " +
"\"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactBetween(\"Contact:\", \"Tel.:\", \"address\", 6, \"Applicant information was found\", \"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactBetween(\"Contact:\", \"Tel.:\", \"address\", 6,true, \"Applicant information was found\", \"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
" end";
when(rulesClient.getVersion()).thenReturn(RULES_VERSION.incrementAndGet());
when(rulesClient.getRules()).thenReturn(new RulesResponse(tableRules));
@ -291,11 +291,11 @@ public class EntityRedactionServiceTest {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Applicant Producer Table.pdf");
when(dictionaryClient.getVersion()).thenReturn(DICTIONARY_VERSION.incrementAndGet());
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/author.txt")))
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt")))
.build();
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/address.txt")))
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt")))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(addressResponse);
DictionaryResponse sponsorResponse = DictionaryResponse.builder()

View File

@ -5,12 +5,14 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.Section
global Section section
// --------------------------------------- CBI rules -------------------------------------------------------------------
rule "1: Redacted because Section contains Vertebrate"
when
Section(matchesType("vertebrate"))
then
section.redact("author", 1, "Vertebrate found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redact("address", 1, "Vertebrate found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redact("CBI_author", 1, "Vertebrate found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redact("CBI_address", 1, "Vertebrate found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
end
@ -18,8 +20,8 @@ rule "2: Not Redacted because Section contains no Vertebrate"
when
Section(!matchesType("vertebrate"))
then
section.redactNot("author", 2, "No Vertebrate found");
section.redactNot("address", 2, "No Vertebrate found");
section.redactNot("CBI_author", 2, "No Vertebrate found");
section.redactNot("CBI_address", 2, "No Vertebrate found");
end
@ -27,8 +29,8 @@ rule "3: Do not redact Names and Addresses if no redaction Indicator is containe
when
Section(matchesType("vertebrate"), matchesType("no_redaction_indicator"))
then
section.redactNot("author", 3, "Vertebrate and No Redaction Indicator found");
section.redactNot("address", 3, "Vertebrate and No Redaction Indicator found");
section.redactNot("CBI_author", 3, "Vertebrate and No Redaction Indicator found");
section.redactNot("CBI_address", 3, "Vertebrate and No Redaction Indicator found");
end
@ -36,8 +38,8 @@ rule "4: Do not redact Names and Addresses if no redaction Indicator is containe
when
Section(matchesType("vertebrate"), matchesType("published_information"))
then
section.redactNot("author", 4, "Vertebrate and Published Information found");
section.redactNot("address", 4, "Vertebrate and Published Information found");
section.redactNot("CBI_author", 4, "Vertebrate and Published Information found");
section.redactNot("CBI_address", 4, "Vertebrate and Published Information found");
end
@ -45,87 +47,153 @@ rule "5: Redact Names and Addresses if no_redaction_indicator and redaction_indi
when
Section(matchesType("vertebrate"), matchesType("no_redaction_indicator"), matchesType("redaction_indicator"))
then
section.redact("author", 5, "Vertebrate and Redaction Indicator found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redact("address", 5, "Vertebrate and Redaction Indicator found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redact("CBI_author", 5, "Vertebrate and Redaction Indicator found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redact("CBI_address", 5, "Vertebrate and Redaction Indicator found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
end
rule "6: Redact contact information if applicant is found"
when
Section(headlineContainsWord("applicant") || text.contains("Applicant") || headlineContainsWord("Primary contact") || headlineContainsWord("Alternative contact") || text.contains("Contact:") || text.contains("Telephone number:"))
then
section.redactLineAfter("Contact point:", "address", 6, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactLineAfter("Phone:", "address", 6, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactLineAfter("Fax:", "address", 6, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactLineAfter("Tel.:", "address", 6, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactLineAfter("Tel:", "address", 6, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactLineAfter("E-mail:", "address", 6, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactLineAfter("Email:", "address", 6, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactLineAfter("e-mail:", "address", 6, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactLineAfter("E-mail address:", "address", 6, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactLineAfter("Contact:", "address", 6, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactLineAfter("Alternative contact:", "address", 6, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactLineAfter("Telephone number:", "address", 6, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactLineAfter("Telephone No:", "address", 6, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactLineAfter("Fax number:", "address", 6, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactLineAfter("Telephone:", "address", 6, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactLineAfter("Company:", "address", 6, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactBetween("No:", "Fax", "address", 6, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactBetween("Contact:", "Tel.:", "address", 6, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactLineAfter("European contact:", "address", 6, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
end
rule "7: Redact contact information if Producer is found"
when
Section(text.toLowerCase().contains("producer of the plant protection") || text.toLowerCase().contains("producer of the active substance") || text.contains("Manufacturer of the active substance") || text.contains("Manufacturer:") || text.contains("Producer or producers of the active substance"))
then
section.redactLineAfter("Contact:", "address", 7, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactLineAfter("Telephone:", "address", 7, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactLineAfter("Phone:", "address", 7, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactLineAfter("Fax:", "address", 7, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactLineAfter("E-mail:", "address", 7, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactLineAfter("Contact:", "address", 7, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactLineAfter("Fax number:", "address", 7, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactLineAfter("Telephone number:", "address", 7, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactLineAfter("Tel:", "address", 7, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactBetween("No:", "Fax", "address", 7, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
end
rule "8: Not redacted because Vertebrate Study = N"
rule "6: Not redacted because Vertebrate Study = N"
when
Section(rowEquals("Vertebrate study Y/N", "N") || rowEquals("Vertebrate study Y/N", "No"))
then
section.redactNotCell("Author(s)", 8, "author", "Not redacted because row is not a vertebrate study");
section.redactNot("address", 8, "Not redacted because row is not a vertebrate study");
section.highlightCell("Vertebrate study Y/N", 8, "hint_only");
section.redactNotCell("Author(s)", 6, "CBI_author", "Not redacted because row is not a vertebrate study");
section.redactNot("CBI_address", 6, "Not redacted because row is not a vertebrate study");
section.highlightCell("Vertebrate study Y/N", 6, "hint_only");
end
rule "9: Redact if must redact entry is found"
rule "7: Redact if must redact entry is found"
when
Section(matchesType("must_redact"))
then
section.redact("author", 9, "must_redact entry was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redact("address", 9, "must_redact entry was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redact("CBI_author", 7, "must_redact entry was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redact("CBI_address", 7, "must_redact entry was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)");
end
rule "10: Redact Authors and Addresses in Reference Table if it is a Vertebrate study"
rule "8: Redact Authors and Addresses in Reference Table if it is a Vertebrate study"
when
Section(rowEquals("Vertebrate study Y/N", "Y") || rowEquals("Vertebrate study Y/N", "Yes"))
then
section.redactCell("Author(s)", 10, "author", "Redacted because row is a vertebrate study", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redact("address", 10, "Redacted because row is a vertebrate study", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.highlightCell("Vertebrate study Y/N", 10, "must_redact");
section.redactCell("Author(s)", 8, "CBI_author", "Redacted because row is a vertebrate study", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redact("CBI_address", 8, "Redacted because row is a vertebrate study", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.highlightCell("Vertebrate study Y/N", 8, "must_redact");
end
rule "11: Redact sponsor company"
rule "9: Redact sponsor company"
when
Section(searchText.toLowerCase().contains("batches produced at"))
then
section.redactIfPrecededBy("batches produced at", "sponsor", 11, "Redacted because it represents a sponsor company", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redactIfPrecededBy("batches produced at", "sponsor", 9, "Redacted because it represents a sponsor company", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.addHintAnnotation("batches produced at", "must_redact");
end
rule "10: Redact determination of residues"
when
Section(searchText.toLowerCase.contains("determination of residues") && (
searchText.toLowerCase.contains("livestock") ||
searchText.toLowerCase.contains("live stock") ||
searchText.toLowerCase.contains("egg") ||
searchText.toLowerCase.contains("milk") ||
searchText.toLowerCase.contains("bovine") ||
searchText.toLowerCase.contains("ruminant")
))
then
section.redact("CBI_author", 10, "Determination of residues was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.redact("CBI_address", 10, "Determination of residues was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)");
section.addHintAnnotation("determination of residues", "must_redact");
section.addHintAnnotation("livestock", "must_redact");
section.addHintAnnotation("live stock", "must_redact");
section.addHintAnnotation("egg", "must_redact");
section.addHintAnnotation("milk", "must_redact");
section.addHintAnnotation("bovine", "must_redact");
section.addHintAnnotation("ruminant", "must_redact");
end
// --------------------------------------- PII rules -------------------------------------------------------------------
rule "11: Redacted PII Personal Identification Information"
when
Section(matchesType("PII"))
then
section.redact("PII", 11, "PII (Personal Identification Information) found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
end
rule "12: Redact contact information if applicant is found"
when
Section(headlineContainsWord("applicant") || text.contains("Applicant") || headlineContainsWord("Primary contact") || headlineContainsWord("Alternative contact") || text.contains("Contact:") || text.contains("Telephone number:"))
then
section.redactLineAfter("Contact point:", "PII", 12, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
section.redactLineAfter("Phone:", "PII", 12, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
section.redactLineAfter("Fax:", "PII", 12, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
section.redactLineAfter("Tel.:", "PII", 12, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
section.redactLineAfter("Tel:", "PII", 12, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
section.redactLineAfter("E-mail:", "PII", 12, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
section.redactLineAfter("Email:", "PII", 12, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
section.redactLineAfter("e-mail:", "PII", 12, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
section.redactLineAfter("E-mail address:", "PII", 12, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
section.redactLineAfter("Contact:", "PII", 12, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
section.redactLineAfter("Alternative contact:", "PII", 12, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
section.redactLineAfter("Telephone number:", "PII", 12, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
section.redactLineAfter("Telephone No:", "PII", 12, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
section.redactLineAfter("Fax number:", "PII", 12, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
section.redactLineAfter("Telephone:", "PII", 12, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
section.redactLineAfter("Company:", "PII", 12, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
section.redactBetween("No:", "Fax", "PII", 12, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
section.redactBetween("Contact:", "Tel.:", "PII", 12, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
section.redactLineAfter("European contact:", "PII", 12, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
end
rule "13: Redact contact information if Producer is found"
when
Section(text.toLowerCase().contains("producer of the plant protection") || text.toLowerCase().contains("producer of the active substance") || text.contains("Manufacturer of the active substance") || text.contains("Manufacturer:") || text.contains("Producer or producers of the active substance"))
then
section.redactLineAfter("Contact:", "PII", 13, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
section.redactLineAfter("Telephone:", "PII", 13, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
section.redactLineAfter("Phone:", "PII", 13, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
section.redactLineAfter("Fax:", "PII", 13, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
section.redactLineAfter("E-mail:", "PII", 13, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
section.redactLineAfter("Contact:", "PII", 13, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
section.redactLineAfter("Fax number:", "PII", 13, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
section.redactLineAfter("Telephone number:", "PII", 13, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
section.redactLineAfter("Tel:", "PII", 13, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
section.redactBetween("No:", "Fax", "PII", 13, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
end
rule "14: Redact AUTHOR(S)"
when
Section(searchText.contains("AUTHOR(S):"))
then
section.redactLinesBetween("AUTHOR(S):", "COMPLETION DATE:", "PII", 14, true, "AUTHOR(S) was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
end
rule "15: Redact PERFORMING LABORATORY"
when
Section(searchText.contains("PERFORMING LABORATORY:"))
then
section.redactBetween("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "PII", 15, true, "PERFORMING LABORATORY was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
end
rule "16: Redact On behalf of Sequani Ltd.:"
when
Section(searchText.contains("On behalf of Sequani Ltd.: Name Title"))
then
section.redactBetween("On behalf of Sequani Ltd.: Name Title", "On behalf of", "PII", 16, false , "PII (Personal Identification Information) found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
end
rule "17: Redact On behalf of Syngenta Ltd.:"
when
Section(searchText.contains("On behalf of Syngenta Ltd.: Name Title"))
then
section.redactBetween("On behalf of Syngenta Ltd.: Name Title", "Study dates", "PII", 17, false , "PII (Personal Identification Information) found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
end