RED-727: Added possibility to redact/addRecommendations by regEx in rules. Added email regEx and et al. author recommendation regEx
This commit is contained in:
parent
75127fd1bd
commit
de725a630c
@ -14,8 +14,8 @@ import java.util.stream.Collectors;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.Patterns;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.PositionUtil;
|
||||
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
@ -76,10 +76,10 @@ public class Section {
|
||||
|
||||
public void redact(String type, int ruleNumber, String reason, String legalBasis) {
|
||||
|
||||
boolean hasRecommendactionDictionary = dictionaryTypes.contains(RECOMMENDATION_PREFIX + type);
|
||||
boolean hasRecommendationDictionary = dictionaryTypes.contains(RECOMMENDATION_PREFIX + type);
|
||||
|
||||
entities.forEach(entity -> {
|
||||
if (entity.getType().equals(type) || hasRecommendactionDictionary && entity.getType()
|
||||
if (entity.getType().equals(type) || hasRecommendationDictionary && entity.getType()
|
||||
.equals(RECOMMENDATION_PREFIX + type)) {
|
||||
entity.setRedaction(true);
|
||||
entity.setMatchedRule(ruleNumber);
|
||||
@ -92,10 +92,10 @@ public class Section {
|
||||
|
||||
public void redactNot(String type, int ruleNumber, String reason) {
|
||||
|
||||
boolean hasRecommendactionDictionary = dictionaryTypes.contains(RECOMMENDATION_PREFIX + type);
|
||||
boolean hasRecommendationDictionary = dictionaryTypes.contains(RECOMMENDATION_PREFIX + type);
|
||||
|
||||
entities.forEach(entity -> {
|
||||
if (entity.getType().equals(type) || hasRecommendactionDictionary && entity.getType()
|
||||
if (entity.getType().equals(type) || hasRecommendationDictionary && entity.getType()
|
||||
.equals(RECOMMENDATION_PREFIX + type)) {
|
||||
entity.setRedaction(false);
|
||||
entity.setMatchedRule(ruleNumber);
|
||||
@ -120,8 +120,8 @@ public class Section {
|
||||
|
||||
public void addHintAnnotation(String value, String asType) {
|
||||
|
||||
Set<Entity> found = findEntities(value.trim(), asType, true);
|
||||
entities.addAll(found);
|
||||
Set<Entity> found = findEntities(value.trim(), asType, true, false, 0, null, null);
|
||||
addNewerToEntities(found);
|
||||
}
|
||||
|
||||
|
||||
@ -133,24 +133,41 @@ public class Section {
|
||||
if (values != null) {
|
||||
for (String value : values) {
|
||||
if (StringUtils.isNotBlank(value)) {
|
||||
Set<Entity> found = findEntities(value.trim(), asType, false);
|
||||
// HashSet keeps the older value, but we want the new only.
|
||||
entities.removeAll(found);
|
||||
entities.addAll(found);
|
||||
Set<Entity> found = findEntities(value.trim(), asType, false, true, ruleNumber, reason, legalBasis);
|
||||
addNewerToEntities(found);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO No need to iterate
|
||||
entities.forEach(entity -> {
|
||||
if (entity.getType().equals(asType)) {
|
||||
entity.setRedaction(true);
|
||||
entity.setMatchedRule(ruleNumber);
|
||||
entity.setRedactionReason(reason);
|
||||
entity.setLegalBasis(legalBasis);
|
||||
|
||||
public void redactByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType, int ruleNumber, String reason, String legalBasis) {
|
||||
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
|
||||
|
||||
Matcher matcher = compiledPattern.matcher(text);
|
||||
|
||||
while (matcher.find()) {
|
||||
String match = matcher.group(group);
|
||||
if (StringUtils.isNotBlank(match)) {
|
||||
Set<Entity> found = findEntities(match.trim(), asType, false, true, ruleNumber, reason, legalBasis);
|
||||
addNewerToEntities(found);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void addRecommendationByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType) {
|
||||
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
|
||||
|
||||
Matcher matcher = compiledPattern.matcher(text);
|
||||
|
||||
while (matcher.find()) {
|
||||
String match = matcher.group(group);
|
||||
if (StringUtils.isNotBlank(match) && match.length() >= 3) {
|
||||
localDictionaryAdds.computeIfAbsent(RECOMMENDATION_PREFIX + asType, (x) -> new HashSet<>())
|
||||
.add(match);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -162,30 +179,21 @@ public class Section {
|
||||
if (values != null) {
|
||||
for (String value : values) {
|
||||
if (StringUtils.isNotBlank(value)) {
|
||||
Set<Entity> found = findEntities(value.trim(), asType, false);
|
||||
// HashSet keeps the older value, but we want the new only.
|
||||
entities.removeAll(found);
|
||||
entities.addAll(found);
|
||||
|
||||
Set<Entity> found = findEntities(value.trim(), asType, false, true, ruleNumber, reason, legalBasis);
|
||||
addNewerToEntities(found);
|
||||
|
||||
if (redactEverywhere && !isLocal()) {
|
||||
localDictionaryAdds.computeIfAbsent(asType, (x) -> new HashSet<>()).add(value.trim());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO No need to iterate
|
||||
entities.forEach(entity -> {
|
||||
if (entity.getType().equals(asType)) {
|
||||
entity.setRedaction(true);
|
||||
entity.setMatchedRule(ruleNumber);
|
||||
entity.setRedactionReason(reason);
|
||||
entity.setLegalBasis(legalBasis);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
public void redactLinesBetween(String start, String stop, String asType, int ruleNumber, boolean redactEverywhere,
|
||||
public void redactLinesBetween(String start, String stop, String asType, int ruleNumber,
|
||||
boolean redactEverywhere,
|
||||
String reason, String legalBasis) {
|
||||
|
||||
String[] values = StringUtils.substringsBetween(text, start, stop);
|
||||
@ -201,11 +209,9 @@ public class Section {
|
||||
return;
|
||||
}
|
||||
|
||||
Set<Entity> found = findEntities(line.trim(), asType, false);
|
||||
Set<Entity> found = findEntities(line.trim(), asType, false, true, ruleNumber, reason, legalBasis);
|
||||
addNewerToEntities(found);
|
||||
|
||||
// HashSet keeps the older value, but we want the new only.
|
||||
entities.removeAll(found);
|
||||
entities.addAll(found);
|
||||
if (redactEverywhere && !isLocal()) {
|
||||
localDictionaryAdds.computeIfAbsent(asType, (x) -> new HashSet<>()).add(line.trim());
|
||||
}
|
||||
@ -213,49 +219,6 @@ public class Section {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO No need to iterate
|
||||
entities.forEach(entity -> {
|
||||
if (entity.getType().equals(asType)) {
|
||||
entity.setRedaction(true);
|
||||
entity.setMatchedRule(ruleNumber);
|
||||
entity.setRedactionReason(reason);
|
||||
entity.setLegalBasis(legalBasis);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> findEntities(String value, String asType, boolean caseinsensitive) {
|
||||
|
||||
if (value.trim().length() <= 2) {
|
||||
return new HashSet<>();
|
||||
}
|
||||
|
||||
Set<Entity> found = new HashSet<>();
|
||||
|
||||
String text = caseinsensitive ? searchText.toLowerCase() : searchText;
|
||||
String searchValue = caseinsensitive ? value.toLowerCase() : value;
|
||||
|
||||
int startIndex;
|
||||
int stopIndex = 0;
|
||||
do {
|
||||
startIndex = text.indexOf(searchValue, stopIndex);
|
||||
stopIndex = startIndex + searchValue.length();
|
||||
|
||||
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(text.charAt(startIndex - 1)) || isSeparator(text
|
||||
.charAt(startIndex - 1))) && (stopIndex == text.length() || isSeparator(text.charAt(stopIndex)))) {
|
||||
found.add(new Entity(searchText.substring(startIndex, stopIndex), asType, startIndex, stopIndex, headline, sectionNumber, false));
|
||||
}
|
||||
} while (startIndex > -1);
|
||||
|
||||
return PositionUtil.clearAndFindPositions(found, searchableText, dictionary);
|
||||
}
|
||||
|
||||
|
||||
private boolean isSeparator(char c) {
|
||||
|
||||
return Character.isWhitespace(c) || Pattern.matches("\\p{Punct}", String.valueOf(c)) || c == '\"' || c == '‘' || c == '’';
|
||||
}
|
||||
|
||||
|
||||
@ -265,7 +228,8 @@ public class Section {
|
||||
}
|
||||
|
||||
|
||||
public void redactCell(String cellHeader, int ruleNumber, String type, boolean addAsRecommendations, String reason,
|
||||
public void redactCell(String cellHeader, int ruleNumber, String type, boolean addAsRecommendations, String
|
||||
reason,
|
||||
String legalBasis) {
|
||||
|
||||
annotateCell(cellHeader, ruleNumber, type, true, addAsRecommendations, reason, legalBasis);
|
||||
@ -279,6 +243,27 @@ public class Section {
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> findEntities(String value, String asType, boolean caseInsensitive, boolean redacted,
|
||||
int ruleNumber, String reason, String legalBasis) {
|
||||
|
||||
String text = caseInsensitive ? searchText.toLowerCase() : searchText;
|
||||
String searchValue = caseInsensitive ? value.toLowerCase() : value;
|
||||
|
||||
Set<Entity> found = EntitySearchUtils.find(text, Set.of(searchValue), asType, headline, sectionNumber, true);
|
||||
|
||||
found.forEach(entity -> {
|
||||
if (redacted) {
|
||||
entity.setRedaction(true);
|
||||
entity.setMatchedRule(ruleNumber);
|
||||
entity.setRedactionReason(reason);
|
||||
entity.setLegalBasis(legalBasis);
|
||||
}
|
||||
});
|
||||
|
||||
return EntitySearchUtils.clearAndFindPositions(found, searchableText, dictionary);
|
||||
}
|
||||
|
||||
|
||||
private void annotateCell(String cellHeader, int ruleNumber, String type, boolean redact,
|
||||
boolean addAsRecommendations, String reason, String legalBasis) {
|
||||
|
||||
@ -303,13 +288,11 @@ public class Section {
|
||||
|
||||
Set<Entity> singleEntitySet = new HashSet<>();
|
||||
singleEntitySet.add(entity);
|
||||
PositionUtil.clearAndFindPositions(singleEntitySet, searchableText, dictionary);
|
||||
EntitySearchUtils.clearAndFindPositions(singleEntitySet, searchableText, dictionary);
|
||||
|
||||
// HashSet keeps the older value, but we want the new only.
|
||||
entities.remove(entity);
|
||||
entities.add(entity);
|
||||
addNewerToEntities(entity);
|
||||
|
||||
PositionUtil.removeEntitiesContainedInLarger(entities);
|
||||
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
|
||||
|
||||
if (addAsRecommendations && !isLocal()) {
|
||||
String cleanedWord = word.replaceAll(",", " ").replaceAll(" ", " ").trim() + " ";
|
||||
@ -330,6 +313,19 @@ public class Section {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void addNewerToEntities(Set<Entity> found) {
|
||||
// HashSet keeps the older value, but we want the new only.
|
||||
entities.removeAll(found);
|
||||
entities.addAll(found);
|
||||
}
|
||||
|
||||
private void addNewerToEntities(Entity found) {
|
||||
// HashSet keeps the older value, but we want the new only.
|
||||
entities.remove(found);
|
||||
entities.add(found);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -1,5 +1,21 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
|
||||
import com.iqser.red.service.redaction.v1.model.ManualRedactions;
|
||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
||||
@ -14,26 +30,12 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionS
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SectionSearchableTextPair;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.PositionUtil;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@ -70,7 +72,7 @@ public class EntityRedactionService {
|
||||
documentEntities.removeAll(foundByLocal);
|
||||
documentEntities.addAll(foundByLocal);
|
||||
|
||||
PositionUtil.removeEntitiesContainedInLarger(documentEntities);
|
||||
EntitySearchUtils.removeEntitiesContainedInLarger(documentEntities);
|
||||
}
|
||||
|
||||
for (Entity entity : documentEntities) {
|
||||
@ -96,8 +98,9 @@ public class EntityRedactionService {
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> findEntities(Document classifiedDoc, KieContainer kieContainer, ManualRedactions manualRedactions, Dictionary dictionary,
|
||||
boolean local, Map<Integer, Set<Entity>> hintsPerSectionNumber) {
|
||||
private Set<Entity> findEntities(Document classifiedDoc, KieContainer kieContainer,
|
||||
ManualRedactions manualRedactions, Dictionary dictionary, boolean local,
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber) {
|
||||
|
||||
Set<Entity> documentEntities = new HashSet<>();
|
||||
|
||||
@ -119,7 +122,7 @@ public class EntityRedactionService {
|
||||
}
|
||||
|
||||
sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> {
|
||||
Section analysedRowSection = droolsExecutionService.executeRules(kieContainer,sectionSearchableTextPair.getSection());
|
||||
Section analysedRowSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair.getSection());
|
||||
documentEntities.addAll(analysedRowSection.getEntities());
|
||||
|
||||
analysedRowSection.getLocalDictionaryAdds().keySet().forEach(key -> {
|
||||
@ -152,7 +155,8 @@ public class EntityRedactionService {
|
||||
|
||||
|
||||
private List<SectionSearchableTextPair> processTablePerRow(Table table, ManualRedactions manualRedactions,
|
||||
AtomicInteger sectionNumber, Dictionary dictionary, boolean local,
|
||||
AtomicInteger sectionNumber, Dictionary dictionary,
|
||||
boolean local,
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber) {
|
||||
|
||||
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
|
||||
@ -233,8 +237,8 @@ public class EntityRedactionService {
|
||||
sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder()
|
||||
.isLocal(local)
|
||||
.dictionaryTypes(dictionary.getTypes())
|
||||
.entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(sectionNumber.intValue()) ? Stream.concat(rowEntities
|
||||
.stream(), hintsPerSectionNumber.get(sectionNumber.intValue()).stream())
|
||||
.entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(sectionNumber.intValue()) ? Stream
|
||||
.concat(rowEntities.stream(), hintsPerSectionNumber.get(sectionNumber.intValue()).stream())
|
||||
.collect(Collectors.toSet()) : rowEntities)
|
||||
.text(entireTableText.getAsStringWithLinebreaks())
|
||||
.searchText(entireTableText.toString())
|
||||
@ -253,15 +257,16 @@ public class EntityRedactionService {
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber) {
|
||||
|
||||
SearchableText searchableText = paragraph.getSearchableText();
|
||||
addSectionToManualRedactions(paragraph.getTextBlocks(), manualRedactions, paragraph.getHeadline(), sectionNumber.intValue());
|
||||
addSectionToManualRedactions(paragraph.getTextBlocks(), manualRedactions, paragraph.getHeadline(), sectionNumber
|
||||
.intValue());
|
||||
Set<Entity> entities = findEntities(searchableText, paragraph.getHeadline(), sectionNumber.intValue(), dictionary, local);
|
||||
surroundingWordsService.addSurroundingText(entities, searchableText, dictionary);
|
||||
|
||||
return new SectionSearchableTextPair(Section.builder()
|
||||
.isLocal(local)
|
||||
.dictionaryTypes(dictionary.getTypes())
|
||||
.entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(sectionNumber) ? Stream.concat(entities
|
||||
.stream(), hintsPerSectionNumber.get(sectionNumber).stream())
|
||||
.entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(sectionNumber.intValue()) ? Stream
|
||||
.concat(entities.stream(), hintsPerSectionNumber.get(sectionNumber.intValue()).stream())
|
||||
.collect(Collectors.toSet()) : entities)
|
||||
.text(searchableText.getAsStringWithLinebreaks())
|
||||
.searchText(searchableText.toString())
|
||||
@ -291,7 +296,7 @@ public class EntityRedactionService {
|
||||
}
|
||||
}
|
||||
|
||||
return PositionUtil.clearAndFindPositions(found, searchableText, dictionary);
|
||||
return EntitySearchUtils.clearAndFindPositions(found, searchableText, dictionary);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -3,9 +3,11 @@ package com.iqser.red.service.redaction.v1.server.redaction.utils;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
@ -18,7 +20,40 @@ import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@UtilityClass
|
||||
public class PositionUtil {
|
||||
public class EntitySearchUtils {
|
||||
|
||||
public Set<Entity> find(String inputString, Set<String> values, String type, String headline, int sectionNumber,
|
||||
boolean local) {
|
||||
|
||||
Set<Entity> found = new HashSet<>();
|
||||
|
||||
for (String value : values) {
|
||||
|
||||
if (value.trim().length() <= 2) {
|
||||
continue;
|
||||
}
|
||||
|
||||
int startIndex;
|
||||
int stopIndex = 0;
|
||||
do {
|
||||
startIndex = inputString.indexOf(value, stopIndex);
|
||||
stopIndex = startIndex + value.length();
|
||||
|
||||
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString
|
||||
.charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) {
|
||||
found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline, sectionNumber, !local));
|
||||
}
|
||||
} while (startIndex > -1);
|
||||
}
|
||||
return found;
|
||||
}
|
||||
|
||||
|
||||
private boolean isSeparator(char c) {
|
||||
|
||||
return Character.isWhitespace(c) || Pattern.matches("\\p{Punct}", String.valueOf(c)) || c == '\"' || c == '‘' || c == '’';
|
||||
}
|
||||
|
||||
|
||||
public Set<Entity> clearAndFindPositions(Set<Entity> entities, SearchableText text, Dictionary dictionary) {
|
||||
|
||||
@ -41,7 +76,7 @@ public class PositionUtil {
|
||||
for (int i = 0; i <= orderedEntities.size() - 1; i++) {
|
||||
try {
|
||||
orderedEntities.get(i).setPositionSequences(List.of(positionSequences.get(i)));
|
||||
} catch (Exception e){
|
||||
} catch (Exception e) {
|
||||
log.warn("Mismatch between EntityPositionSequence and found Entity!");
|
||||
}
|
||||
}
|
||||
@ -67,6 +102,4 @@ public class PositionUtil {
|
||||
}
|
||||
entities.removeAll(wordsToRemove);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@ -1,12 +1,28 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.utils;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@UtilityClass
|
||||
public class Patterns {
|
||||
|
||||
public static Map<String, Pattern> patternCache = new HashMap<>();
|
||||
|
||||
public static Pattern AUTHOR_TABLE_SPITTER = Pattern.compile("((((di)|(van)) )|[A-Z]’)?[A-ZÄÖÜ][\\wäöüéèê]{2,}( ?[A-ZÄÖÜ]{1,2}\\.)+|((((di)|(van)) )|[A-Z]’)?[A-ZÄÖÜ][\\wäöüéèê]{2,}( ?[A-ZÄÖÜ]{1,2} )+");
|
||||
|
||||
|
||||
public Pattern getCompiledPattern(String pattern, boolean caseInsensitive) {
|
||||
|
||||
String patternKey = pattern + caseInsensitive;
|
||||
if (patternCache.containsKey(patternKey)) {
|
||||
return patternCache.get(patternKey);
|
||||
}
|
||||
Pattern compiledPattern = Pattern.compile(pattern, caseInsensitive ? Pattern.CASE_INSENSITIVE : 0);
|
||||
patternCache.put(patternKey, compiledPattern);
|
||||
return compiledPattern;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -390,7 +390,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
System.out.println("redactionTest");
|
||||
long start = System.currentTimeMillis();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf");
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Applicant Producer Table.pdf");
|
||||
|
||||
RedactionRequest request = RedactionRequest.builder()
|
||||
.ruleSetId(TEST_RULESET_ID)
|
||||
|
||||
@ -10,7 +10,7 @@ import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.PositionUtil;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
|
||||
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
@ -110,7 +110,7 @@ public class EntityRedactionServiceTest {
|
||||
Entity nesting = new Entity("nesting nested", "fake type", 2, 16, "fake headline", 0, false);
|
||||
entities.add(nested);
|
||||
entities.add(nesting);
|
||||
PositionUtil.removeEntitiesContainedInLarger(entities);
|
||||
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
|
||||
|
||||
assertThat(entities.size()).isEqualTo(1);
|
||||
assertThat(entities).contains(nesting);
|
||||
@ -313,7 +313,7 @@ public class EntityRedactionServiceTest {
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||
assertThat(classifiedDoc.getEntities().get(1).stream()
|
||||
.filter(entity -> entity.getMatchedRule() == 6)
|
||||
.count()).isEqualTo(18);
|
||||
.count()).isEqualTo(13);
|
||||
}
|
||||
|
||||
}
|
||||
@ -515,28 +515,4 @@ public class EntityRedactionServiceTest {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testAuthorSplitting() {
|
||||
|
||||
String word = "Porch JR, " + "Kendall TZ, " + "Krueger HO";
|
||||
|
||||
word.replaceAll(",", " ").replaceAll(" ", " ");
|
||||
|
||||
Pattern pattern = Pattern.compile("[A-ZÄÖÜ][\\wäöüéèê]{2,}( [A-ZÄÖÜ]{1,2}\\.)+");
|
||||
Matcher matcher = pattern.matcher(word);
|
||||
|
||||
List<String> allMatches = new ArrayList<>();
|
||||
while (matcher.find()) {
|
||||
allMatches.add(matcher.group());
|
||||
}
|
||||
|
||||
for (String name : allMatches) {
|
||||
if (name.length() >= 3) {
|
||||
System.out.println(name);
|
||||
// dictionaryService.addToLocalDictionary(type, name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,95 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.utils;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class RegExPatternTest {
|
||||
|
||||
|
||||
@Test
|
||||
public void testEmailRegEx(){
|
||||
String text = "Address: Schwarzwaldalle " +
|
||||
"P.O.Box\n" +
|
||||
"CH-4002 Basel\n" +
|
||||
"Switzerland\n" +
|
||||
"Contact: Christian Warmers\n" +
|
||||
"Tel: +41 (61) 323 8044\n" +
|
||||
"christian.warmers@syngenta.com";
|
||||
|
||||
|
||||
Pattern p = Pattern.compile("\\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,4}\\b", Pattern.CASE_INSENSITIVE);
|
||||
|
||||
Matcher matcher = p.matcher(text);
|
||||
|
||||
while (matcher.find()) {
|
||||
String match = matcher.group(0);
|
||||
System.out.println(match);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testEtAlRegEx() {
|
||||
String text = "To assess the potential of S-metolachlor to cause endocrine disruption (ED) a review (Charlton 2014,\n" +
|
||||
"ASB2016-762) was submitted that summarises results from regulatory and open scientific literature\n" +
|
||||
"studies covering in vitro and in vivo studies (level 2-5 of the OECD Conceptual Framework). According to this information metolachlor increased (1.5-fold) aromatase activity in JEG-3 cells (Laville et al.\n" +
|
||||
"2006, ASB2010-14391) and induced weak anti-androgenic activity in the MDA-kb2 reporter cell line\n" +
|
||||
"with a IC50 of 9.92 µM (IC50 of positive control flutamide: 0.51 µM) (Aït-Aïssa et al. 2010, ASB2015-\n" +
|
||||
"9562). Data from the Tox21 high throughput screening revealed just few postive findings in assays to\n" +
|
||||
"identify antagonists of the androgen receptor. An isolated result of this screening showed agonistic\n" +
|
||||
"activity on the thyroid stimulating hormone receptor, while Dalton et al. (2003, ASB2018-2832)\n" +
|
||||
"demonstrated that metolachlor induced CYP2B1/2 and CYP3A1/2 but did not affect T4, T3 or TSH.\n" +
|
||||
"After prepubertal exposure of male Wistar rats to metolachlor (Mathias et al. 2012, ASB2016-9890) a\n" +
|
||||
"statistically significant increase of serum hormone concentration was observed for testosterone (at the\n" +
|
||||
"dose 50 mg/kg) as well as a statistically significant decrease in the age of preputial separation at a dose\n" +
|
||||
"of 5 and 50 mg/kg. Furthermore a statistically significant increase for estradiol at a dose of 50 mg/kg\n" +
|
||||
"and for FSH at a dose of 5 and 50 mg/kg and morphological alterations of the seminiferous epithelium\n" +
|
||||
"were observed. Relative testicular weight was not altered. A statistically significant increase of relative\n" +
|
||||
"weights was observed in long-term studies with rats (Tisdel et al. 1983, TOX9800328 ). This finding\n" +
|
||||
"was attributed to lower terminal body weight. In mice a statistically significant decrease of the weight\n" +
|
||||
"seminal vesicle (Tisdel et al. 1982, TOX9800327) was shown after 24 month treatment with\n" +
|
||||
"metolachlor. In a mouse preimplantation embryo assay from open literature metolachlor increased the\n" +
|
||||
"percentage of apoptosis significantly and reduced the mean number of cells per embryo significantly\n" +
|
||||
"while the percentage of developing blastocytes was unaltered (Grennlee et al. 2004, ASB2016-9889).\n" +
|
||||
"In reproduvtive toxicity studies a retarded body weight development of the pups was observed, while\n" +
|
||||
"survival and normal morphological and functional development were not altered. No adverse effects\n" +
|
||||
"on male fertility were seen, however important parameters to assess effects on female fertility like\n" +
|
||||
"cyclicity, ovarian follicles as well as developmental landmarks in the offspring have not been investigated.";
|
||||
|
||||
Pattern p = Pattern.compile("([^\\s(]*?( \\w\\.?)?) et al\\.?");
|
||||
|
||||
Matcher matcher = p.matcher(text);
|
||||
|
||||
while (matcher.find()) {
|
||||
String match = matcher.group(1);
|
||||
System.out.println(match);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testAuthorSplitting(){
|
||||
|
||||
String word = "Porch JR, " + "Kendall TZ, " + "Krueger HO";
|
||||
|
||||
word.replaceAll(",", " ").replaceAll(" ", " ");
|
||||
|
||||
Pattern pattern = Pattern.compile("[A-ZÄÖÜ][\\wäöüéèê]{2,}( [A-ZÄÖÜ]{1,2}\\.)+");
|
||||
Matcher matcher = pattern.matcher(word);
|
||||
|
||||
List<String> allMatches = new ArrayList<>();
|
||||
while (matcher.find()) {
|
||||
allMatches.add(matcher.group());
|
||||
}
|
||||
|
||||
for(String name: allMatches) {
|
||||
if(name.length() >= 3) {
|
||||
System.out.println(name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -8,48 +8,48 @@ global Section section
|
||||
// --------------------------------------- CBI rules -------------------------------------------------------------------
|
||||
|
||||
rule "1: Redacted because Section contains Vertebrate"
|
||||
when
|
||||
Section(matchesType("vertebrate"))
|
||||
then
|
||||
section.redact("CBI_author", 1, "Vertebrate found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
section.redact("CBI_address", 1, "Vertebrate found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
end
|
||||
when
|
||||
Section(matchesType("vertebrate"))
|
||||
then
|
||||
section.redact("CBI_author", 1, "Vertebrate found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
section.redact("CBI_address", 1, "Vertebrate found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
end
|
||||
|
||||
|
||||
rule "2: Not Redacted because Section contains no Vertebrate"
|
||||
when
|
||||
Section(!matchesType("vertebrate"))
|
||||
then
|
||||
section.redactNot("CBI_author", 2, "No Vertebrate found");
|
||||
section.redactNot("CBI_address", 2, "No Vertebrate found");
|
||||
end
|
||||
when
|
||||
Section(!matchesType("vertebrate"))
|
||||
then
|
||||
section.redactNot("CBI_author", 2, "No Vertebrate found");
|
||||
section.redactNot("CBI_address", 2, "No Vertebrate found");
|
||||
end
|
||||
|
||||
|
||||
rule "3: Do not redact Names and Addresses if no redaction Indicator is contained"
|
||||
when
|
||||
Section(matchesType("vertebrate"), matchesType("no_redaction_indicator"))
|
||||
then
|
||||
section.redactNot("CBI_author", 3, "Vertebrate and No Redaction Indicator found");
|
||||
section.redactNot("CBI_address", 3, "Vertebrate and No Redaction Indicator found");
|
||||
end
|
||||
when
|
||||
Section(matchesType("vertebrate"), matchesType("no_redaction_indicator"))
|
||||
then
|
||||
section.redactNot("CBI_author", 3, "Vertebrate and No Redaction Indicator found");
|
||||
section.redactNot("CBI_address", 3, "Vertebrate and No Redaction Indicator found");
|
||||
end
|
||||
|
||||
|
||||
rule "4: Do not redact Names and Addresses if no redaction Indicator is contained"
|
||||
when
|
||||
Section(matchesType("vertebrate"), matchesType("published_information"))
|
||||
then
|
||||
section.redactNot("CBI_author", 4, "Vertebrate and Published Information found");
|
||||
section.redactNot("CBI_address", 4, "Vertebrate and Published Information found");
|
||||
end
|
||||
when
|
||||
Section(matchesType("vertebrate"), matchesType("published_information"))
|
||||
then
|
||||
section.redactNot("CBI_author", 4, "Vertebrate and Published Information found");
|
||||
section.redactNot("CBI_address", 4, "Vertebrate and Published Information found");
|
||||
end
|
||||
|
||||
|
||||
rule "5: Redact Names and Addresses if no_redaction_indicator and redaction_indicator is contained"
|
||||
when
|
||||
Section(matchesType("vertebrate"), matchesType("no_redaction_indicator"), matchesType("redaction_indicator"))
|
||||
then
|
||||
section.redact("CBI_author", 5, "Vertebrate and Redaction Indicator found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
section.redact("CBI_address", 5, "Vertebrate and Redaction Indicator found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
end
|
||||
when
|
||||
Section(matchesType("vertebrate"), matchesType("no_redaction_indicator"), matchesType("redaction_indicator"))
|
||||
then
|
||||
section.redact("CBI_author", 5, "Vertebrate and Redaction Indicator found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
section.redact("CBI_address", 5, "Vertebrate and Redaction Indicator found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
end
|
||||
|
||||
|
||||
rule "6: Not redacted because Vertebrate Study = N"
|
||||
@ -64,12 +64,12 @@ rule "6: Not redacted because Vertebrate Study = N"
|
||||
|
||||
|
||||
rule "7: Redact if must redact entry is found"
|
||||
when
|
||||
Section(matchesType("must_redact"))
|
||||
then
|
||||
section.redact("CBI_author", 7, "must_redact entry was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
section.redact("CBI_address", 7, "must_redact entry was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
end
|
||||
when
|
||||
Section(matchesType("must_redact"))
|
||||
then
|
||||
section.redact("CBI_author", 7, "must_redact entry was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
section.redact("CBI_address", 7, "must_redact entry was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
end
|
||||
|
||||
|
||||
rule "8: Redact Authors and Addresses in Reference Table if it is a Vertebrate study"
|
||||
@ -92,55 +92,75 @@ rule "9: Redact sponsor company"
|
||||
|
||||
|
||||
rule "10: Redact determination of residues"
|
||||
when
|
||||
Section(searchText.toLowerCase.contains("determination of residues") && (
|
||||
searchText.toLowerCase.contains("livestock") ||
|
||||
searchText.toLowerCase.contains("live stock") ||
|
||||
searchText.toLowerCase.contains("tissue") ||
|
||||
searchText.toLowerCase.contains("liver") ||
|
||||
searchText.toLowerCase.contains("muscle") ||
|
||||
searchText.toLowerCase.contains("bovine") ||
|
||||
searchText.toLowerCase.contains("ruminant") ||
|
||||
searchText.toLowerCase.contains("ruminants")
|
||||
))
|
||||
then
|
||||
section.redact("CBI_author", 10, "Determination of residues was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
section.redact("CBI_address", 10, "Determination of residues was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
section.addHintAnnotation("determination of residues", "must_redact");
|
||||
section.addHintAnnotation("livestock", "must_redact");
|
||||
section.addHintAnnotation("live stock", "must_redact");
|
||||
section.addHintAnnotation("tissue", "must_redact");
|
||||
section.addHintAnnotation("liver", "must_redact");
|
||||
section.addHintAnnotation("muscle", "must_redact");
|
||||
section.addHintAnnotation("bovine", "must_redact");
|
||||
section.addHintAnnotation("ruminant", "must_redact");
|
||||
section.addHintAnnotation("ruminants", "must_redact");
|
||||
end
|
||||
when
|
||||
Section((
|
||||
searchText.toLowerCase.contains("determination of residues") ||
|
||||
searchText.toLowerCase.contains("determination of total residues")
|
||||
) && (
|
||||
searchText.toLowerCase.contains("livestock") ||
|
||||
searchText.toLowerCase.contains("live stock") ||
|
||||
searchText.toLowerCase.contains("tissue") ||
|
||||
searchText.toLowerCase.contains("tissues") ||
|
||||
searchText.toLowerCase.contains("liver") ||
|
||||
searchText.toLowerCase.contains("muscle") ||
|
||||
searchText.toLowerCase.contains("bovine") ||
|
||||
searchText.toLowerCase.contains("ruminant") ||
|
||||
searchText.toLowerCase.contains("ruminants")
|
||||
))
|
||||
then
|
||||
section.redact("CBI_author", 10, "Determination of residues was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
section.redact("CBI_address", 10, "Determination of residues was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
section.addHintAnnotation("determination of residues", "must_redact");
|
||||
section.addHintAnnotation("livestock", "must_redact");
|
||||
section.addHintAnnotation("live stock", "must_redact");
|
||||
section.addHintAnnotation("tissue", "must_redact");
|
||||
section.addHintAnnotation("tissues", "must_redact");
|
||||
section.addHintAnnotation("liver", "must_redact");
|
||||
section.addHintAnnotation("muscle", "must_redact");
|
||||
section.addHintAnnotation("bovine", "must_redact");
|
||||
section.addHintAnnotation("ruminant", "must_redact");
|
||||
section.addHintAnnotation("ruminants", "must_redact");
|
||||
end
|
||||
|
||||
|
||||
rule "11: Redact if CTL/* or BL/* was found"
|
||||
when
|
||||
Section(searchText.contains("CTL/") || searchText.contains("BL/"))
|
||||
then
|
||||
section.redact("CBI_author", 11, "Laboraty for vertebrate studies found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
section.redact("CBI_address", 11, "Laboraty for vertebrate studies found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
section.addHintAnnotation("CTL", "must_redact");
|
||||
section.addHintAnnotation("BL", "must_redact");
|
||||
section.redact("CBI_author", 11, "Laboraty for vertebrate studies found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
section.redact("CBI_address", 11, "Laboraty for vertebrate studies found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
section.addHintAnnotation("CTL", "must_redact");
|
||||
section.addHintAnnotation("BL", "must_redact");
|
||||
end
|
||||
|
||||
|
||||
rule "12: Add recommendation for et al. author"
|
||||
when
|
||||
Section(searchText.contains("et al."))
|
||||
then
|
||||
section.addRecommendationByRegEx("([^\\s(]*?( \\w\\.?)?) et al\\.?", false, 1, "CBI_author");
|
||||
end
|
||||
|
||||
// --------------------------------------- PII rules -------------------------------------------------------------------
|
||||
|
||||
|
||||
rule "12: Redacted PII Personal Identification Information"
|
||||
when
|
||||
Section(matchesType("PII"))
|
||||
then
|
||||
section.redact("PII", 12, "PII (Personal Identification Information) found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
end
|
||||
rule "13: Redacted PII Personal Identification Information"
|
||||
when
|
||||
Section(matchesType("PII"))
|
||||
then
|
||||
section.redact("PII", 13, "PII (Personal Identification Information) found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
end
|
||||
|
||||
|
||||
rule "13: Redact contact information"
|
||||
rule "14: Redact Emails by RegEx"
|
||||
when
|
||||
Section(searchText.contains("@"))
|
||||
then
|
||||
section.redactByRegEx("\\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,4}\\b", true, 0, "PII", 14, "PII (Personal Identification Information) found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
end
|
||||
|
||||
|
||||
rule "15: Redact contact information"
|
||||
when
|
||||
Section(text.contains("Contact point:")
|
||||
|| text.contains("Phone:")
|
||||
@ -158,96 +178,96 @@ rule "13: Redact contact information"
|
||||
|| text.contains("Telephone:")
|
||||
|| text.contains("European contact:"))
|
||||
then
|
||||
section.redactLineAfter("Contact point:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Phone:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Fax:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Tel.:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Tel:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("E-mail:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Email:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("e-mail:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("E-mail address:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Contact:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Alternative contact:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Telephone number:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Telephone No:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Fax number:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Telephone:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactBetween("No:", "Fax", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactBetween("Contact:", "Tel.:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("European contact:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Contact point:", "PII", 15, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Phone:", "PII", 15, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Fax:", "PII", 15, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Tel.:", "PII", 15, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Tel:", "PII", 15, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("E-mail:", "PII", 15, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Email:", "PII", 15, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("e-mail:", "PII", 15, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("E-mail address:", "PII", 15, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Contact:", "PII", 15, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Alternative contact:", "PII", 15, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Telephone number:", "PII", 15, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Telephone No:", "PII", 15, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Fax number:", "PII", 15, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Telephone:", "PII", 15, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactBetween("No:", "Fax", "PII", 15, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactBetween("Contact:", "Tel.:", "PII", 15, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("European contact:", "PII", 15, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
end
|
||||
|
||||
|
||||
rule "14: Redact contact information if applicant is found"
|
||||
when
|
||||
Section(headlineContainsWord("applicant") || text.contains("Applicant") || headlineContainsWord("Primary contact") || headlineContainsWord("Alternative contact") || text.contains("Contact:") || text.contains("Telephone number:"))
|
||||
then
|
||||
section.redactLineAfter("Contact point:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Phone:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Fax:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Tel.:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Tel:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("E-mail:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Email:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("e-mail:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("E-mail address:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Contact:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Alternative contact:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Telephone number:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Telephone No:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Fax number:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Telephone:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactBetween("No:", "Fax", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactBetween("Contact:", "Tel.:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("European contact:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
end
|
||||
rule "16: Redact contact information if applicant is found"
|
||||
when
|
||||
Section(headlineContainsWord("applicant") || text.contains("Applicant") || headlineContainsWord("Primary contact") || headlineContainsWord("Alternative contact") || text.contains("Telephone number:"))
|
||||
then
|
||||
section.redactLineAfter("Contact point:", "PII", 16, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Phone:", "PII", 16, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Fax:", "PII", 16, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Tel.:", "PII", 16, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Tel:", "PII", 16, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("E-mail:", "PII", 16, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Email:", "PII", 16, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("e-mail:", "PII", 16, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("E-mail address:", "PII", 16, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Contact:", "PII", 16, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Alternative contact:", "PII", 16, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Telephone number:", "PII", 16, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Telephone No:", "PII", 16, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Fax number:", "PII", 16, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Telephone:", "PII", 16, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactBetween("No:", "Fax", "PII", 16, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactBetween("Contact:", "Tel.:", "PII", 16, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("European contact:", "PII", 16, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
end
|
||||
|
||||
|
||||
rule "15: Redact contact information if Producer is found"
|
||||
when
|
||||
Section(text.toLowerCase().contains("producer of the plant protection") || text.toLowerCase().contains("producer of the active substance") || text.contains("Manufacturer of the active substance") || text.contains("Manufacturer:") || text.contains("Producer or producers of the active substance"))
|
||||
then
|
||||
section.redactLineAfter("Contact:", "PII", 15, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Telephone:", "PII", 15, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Phone:", "PII", 15, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Fax:", "PII", 15, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("E-mail:", "PII", 15, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Contact:", "PII", 15, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Fax number:", "PII", 15, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Telephone number:", "PII", 15, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Tel:", "PII", 15, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactBetween("No:", "Fax", "PII", 15, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
end
|
||||
rule "17: Redact contact information if Producer is found"
|
||||
when
|
||||
Section(text.toLowerCase().contains("producer of the plant protection") || text.toLowerCase().contains("producer of the active substance") || text.contains("Manufacturer of the active substance") || text.contains("Manufacturer:") || text.contains("Producer or producers of the active substance"))
|
||||
then
|
||||
section.redactLineAfter("Contact:", "PII", 17, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Telephone:", "PII", 17, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Phone:", "PII", 17, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Fax:", "PII", 17, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("E-mail:", "PII", 17, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Contact:", "PII", 17, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Fax number:", "PII", 17, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Telephone number:", "PII", 17, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLineAfter("Tel:", "PII", 17, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactBetween("No:", "Fax", "PII", 17, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
end
|
||||
|
||||
|
||||
rule "16: Redact AUTHOR(S)"
|
||||
rule "18: Redact AUTHOR(S)"
|
||||
when
|
||||
Section(searchText.contains("AUTHOR(S):"))
|
||||
then
|
||||
section.redactLinesBetween("AUTHOR(S):", "COMPLETION DATE:", "PII", 16, true, "AUTHOR(S) was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactLinesBetween("AUTHOR(S):", "COMPLETION DATE:", "PII", 18, true, "AUTHOR(S) was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
end
|
||||
|
||||
|
||||
rule "17: Redact PERFORMING LABORATORY"
|
||||
rule "19: Redact PERFORMING LABORATORY"
|
||||
when
|
||||
Section(searchText.contains("PERFORMING LABORATORY:"))
|
||||
then
|
||||
section.redactBetween("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "PII", 17, true, "PERFORMING LABORATORY was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactBetween("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "PII", 19, true, "PERFORMING LABORATORY was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
end
|
||||
|
||||
|
||||
rule "18: Redact On behalf of Sequani Ltd.:"
|
||||
rule "20: Redact On behalf of Sequani Ltd.:"
|
||||
when
|
||||
Section(searchText.contains("On behalf of Sequani Ltd.: Name Title"))
|
||||
then
|
||||
section.redactBetween("On behalf of Sequani Ltd.: Name Title", "On behalf of", "PII", 18, false , "PII (Personal Identification Information) found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactBetween("On behalf of Sequani Ltd.: Name Title", "On behalf of", "PII", 20, false , "PII (Personal Identification Information) found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
end
|
||||
|
||||
|
||||
rule "19: Redact On behalf of Syngenta Ltd.:"
|
||||
rule "21: Redact On behalf of Syngenta Ltd.:"
|
||||
when
|
||||
Section(searchText.contains("On behalf of Syngenta Ltd.: Name Title"))
|
||||
then
|
||||
section.redactBetween("On behalf of Syngenta Ltd.: Name Title", "Study dates", "PII", 19, false , "PII (Personal Identification Information) found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
section.redactBetween("On behalf of Syngenta Ltd.: Name Title", "Study dates", "PII", 21, false , "PII (Personal Identification Information) found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
|
||||
end
|
||||
Loading…
x
Reference in New Issue
Block a user